diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 721eeec4c0a4..0938408bf868 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2577,6 +2577,8 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] + no_uaccess_flush [PPC] Exceptions: This does not have any effect on @@ -2887,6 +2889,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] @@ -2936,6 +2940,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -5299,6 +5306,14 @@ with /sys/devices/system/xen_memory/xen_memory0/scrub_pages. Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT. + xen.event_eoi_delay= [XEN] + How long to delay EOI handling in case of event + storms (jiffies). Default is 10. + + xen.event_loop_timeout= [XEN] + After which time (jiffies) the event handling loop + should start to delay EOI handling. Default is 2. + xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index f24615544792..16e46bec8093 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -29,8 +29,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum :c:type:`v4l2_hsv_encoding` specifies which encoding is used. .. note:: The default R'G'B' quantization is full range for all - colorspaces except for BT.2020 which uses limited range R'G'B' - quantization. + colorspaces. HSV formats are always full range. .. tabularcolumns:: |p{6.0cm}|p{11.5cm}| @@ -162,8 +161,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Details * - ``V4L2_QUANTIZATION_DEFAULT`` - Use the default quantization encoding as defined by the - colorspace. This is always full range for R'G'B' (except for the - BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr. + colorspace. This is always full range for R'G'B' and HSV. + It is usually limited range for Y'CbCr. * - ``V4L2_QUANTIZATION_FULL_RANGE`` - Use the full range quantization encoding. I.e. the range [0…1] is mapped to [0…255] (with possible clipping to [1…254] to avoid the @@ -173,4 +172,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum * - ``V4L2_QUANTIZATION_LIM_RANGE`` - Use the limited range quantization encoding. I.e. the range [0…1] is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to - [16…240]. + [16…240]. Limited Range cannot be used with HSV. diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index 09fabf4cd412..ca7176cae8dd 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -370,9 +370,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020) The :ref:`itu2020` standard defines the colorspace used by Ultra-high definition television (UHDTV). The default transfer function is ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited -range (!), and so is the default Y'CbCr quantization. The chromaticities -of the primary colors and the white reference are: +``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range. +The chromaticities of the primary colors and the white reference are: diff --git a/Makefile b/Makefile index aed13b0f080d..57aceb4646d6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 154 +SUBLEVEL = 161 EXTRAVERSION = NAME = "People's Front" @@ -525,11 +525,7 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TRIPLE ?= $(CROSS_COMPILE) -CLANG_FLAGS += --target=$(notdir $(CLANG_TRIPLE:%-=%)) -ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y) -$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") -endif +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index e7b884328a84..e9815f9c5414 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -3,6 +3,9 @@ + + + @@ -182,6 +185,7 @@ + @@ -251,9 +255,12 @@ + + + @@ -414,6 +421,8 @@ + + @@ -633,6 +642,8 @@ + + @@ -679,8 +690,10 @@ + + @@ -1397,7 +1410,9 @@ + + @@ -2497,6 +2512,7 @@ + @@ -2813,7 +2829,7 @@ - + @@ -3312,7 +3328,7 @@ - + @@ -3565,7 +3581,7 @@ - + @@ -5059,7 +5075,7 @@ - + @@ -5273,7 +5289,7 @@ - + @@ -5818,7 +5834,7 @@ - + @@ -7100,7 +7116,7 @@ - + @@ -7254,6 +7270,10 @@ + + + + @@ -7493,7 +7513,7 @@ - + @@ -7540,7 +7560,7 @@ - + @@ -7589,7 +7609,7 @@ - + @@ -8058,6 +8078,11 @@ + + + + + @@ -8523,7 +8548,7 @@ - + @@ -8679,7 +8704,7 @@ - + @@ -9387,7 +9412,6 @@ - @@ -10374,7 +10398,7 @@ - + @@ -10655,7 +10679,7 @@ - + @@ -11035,7 +11059,7 @@ - + @@ -11154,7 +11178,7 @@ - + @@ -11716,7 +11740,7 @@ - + @@ -12302,7 +12326,7 @@ - + @@ -12422,7 +12446,7 @@ - + @@ -12431,7 +12455,7 @@ - + @@ -14537,6 +14561,17 @@ + + + + + + + + + + + @@ -15620,6 +15655,7 @@ + @@ -16308,30 +16344,30 @@ - - - - - + + + + + - - - - + + + + - - + + - - + + - - - + + + @@ -16611,7 +16647,7 @@ - + @@ -16885,19 +16921,19 @@ - - + + - - + + - - + + - + @@ -17872,7 +17908,7 @@ - + @@ -17927,7 +17963,7 @@ - + @@ -18646,6 +18682,10 @@ + + + + @@ -18811,10 +18851,10 @@ - - - - + + + + @@ -19668,7 +19708,7 @@ - + @@ -19760,7 +19800,7 @@ - + @@ -20078,7 +20118,7 @@ - + @@ -20698,58 +20738,58 @@ - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - + + + + - - + + - - - + + + - - - + + + - - - + + + @@ -22065,7 +22105,7 @@ - + @@ -22228,7 +22268,7 @@ - + @@ -22825,29 +22865,29 @@ - - + + - - + + - - + + - - + + - - - + + + - - + + @@ -22873,13 +22913,12 @@ - - + + - @@ -22929,7 +22968,6 @@ - @@ -23117,7 +23155,7 @@ - + @@ -23878,7 +23916,7 @@ - + @@ -23921,7 +23959,7 @@ - + @@ -23951,7 +23989,7 @@ - + @@ -23990,7 +24028,7 @@ - + @@ -24129,7 +24167,7 @@ - + @@ -25235,7 +25273,7 @@ - + @@ -25392,14 +25430,14 @@ - + - + @@ -25419,7 +25457,7 @@ - + @@ -25495,7 +25533,7 @@ - + @@ -25528,7 +25566,7 @@ - + @@ -25571,7 +25609,7 @@ - + @@ -25685,13 +25723,13 @@ - + - + @@ -25725,7 +25763,7 @@ - + @@ -26023,7 +26061,7 @@ - + @@ -26111,7 +26149,6 @@ - @@ -26174,7 +26211,7 @@ - + @@ -26286,7 +26323,7 @@ - + @@ -26566,7 +26603,7 @@ - + @@ -26575,12 +26612,12 @@ - + - + @@ -26618,7 +26655,7 @@ - + @@ -26643,7 +26680,7 @@ - + @@ -26878,11 +26915,11 @@ - + - + @@ -27053,7 +27090,7 @@ - + @@ -27158,23 +27195,23 @@ - + - + - + - + @@ -27458,6 +27495,7 @@ + @@ -28048,6 +28086,7 @@ + @@ -28181,7 +28220,7 @@ - + @@ -28248,7 +28287,7 @@ - + @@ -28432,7 +28471,7 @@ - + @@ -28452,7 +28491,7 @@ - + @@ -28475,25 +28514,6 @@ - - - - - - - - - - - - - - - - - - - @@ -28515,7 +28535,7 @@ - + @@ -28524,7 +28544,7 @@ - + @@ -28619,17 +28639,6 @@ - - - - - - - - - - - @@ -28720,24 +28729,41 @@ - + - + - + - + - + - - + + + + - - + + + + + + + + + + + + + + + + + @@ -28781,27 +28807,18 @@ - + - + - + - - + + - - - - - - - - - - - + + @@ -28924,7 +28941,7 @@ - + @@ -28995,7 +29012,10 @@ + + + @@ -29018,14 +29038,14 @@ - + - + @@ -29036,14 +29056,14 @@ - + - + @@ -29064,44 +29084,44 @@ - - + + - + - + - + - + - + - + - + @@ -29113,27 +29133,27 @@ - + - - + + - + - + - + @@ -29144,11 +29164,11 @@ - + - + @@ -29172,7 +29192,7 @@ - + @@ -29271,7 +29291,7 @@ - + @@ -29381,7 +29401,7 @@ - + @@ -29439,7 +29459,9 @@ + + @@ -29719,30 +29741,30 @@ - - - + + + - - - - - + + + + + - - - + + + - - - + + + @@ -30156,7 +30178,7 @@ - + @@ -30186,7 +30208,7 @@ - + @@ -30195,7 +30217,7 @@ - + @@ -30360,7 +30382,7 @@ - + @@ -30697,7 +30719,7 @@ - + @@ -30875,13 +30897,13 @@ - + - + @@ -31242,7 +31264,7 @@ - + @@ -31268,7 +31290,7 @@ - + @@ -31281,7 +31303,7 @@ - + @@ -31944,7 +31966,7 @@ - + @@ -32001,7 +32023,7 @@ - + @@ -32027,7 +32049,7 @@ - + @@ -32036,7 +32058,7 @@ - + @@ -32060,7 +32082,7 @@ - + @@ -32121,7 +32143,7 @@ - + @@ -32320,6 +32342,7 @@ + @@ -32331,6 +32354,8 @@ + + @@ -32357,7 +32382,18 @@ - + + + + + + + + + + + + @@ -32395,9 +32431,10 @@ - + + @@ -33002,7 +33039,7 @@ - + @@ -33228,9 +33265,9 @@ - - - + + + @@ -33575,7 +33612,7 @@ - + @@ -34651,7 +34688,7 @@ - + @@ -34948,7 +34985,7 @@ - + @@ -35118,7 +35155,7 @@ - + @@ -35145,7 +35182,7 @@ - + @@ -35406,7 +35443,7 @@ - + @@ -35931,11 +35968,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -35950,10 +36212,18 @@ + + + + + + + + @@ -35975,6 +36245,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -36048,22 +36422,11 @@ - + - + - - - - - - - - - - - @@ -36375,7 +36738,7 @@ - + @@ -36574,11 +36937,6 @@ - - - - - @@ -37095,26 +37453,26 @@ - - + + - + - + - + - + - + @@ -37152,105 +37510,105 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37337,13 +37695,13 @@ - - + + - + - + @@ -37357,82 +37715,82 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37444,7 +37802,7 @@ - + @@ -37488,75 +37846,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37627,7 +37985,7 @@ - + @@ -37725,18 +38083,18 @@ - + - + - + - + - + @@ -37862,69 +38220,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37947,7 +38305,7 @@ - + @@ -37985,7 +38343,7 @@ - + @@ -38081,7 +38439,7 @@ - + @@ -38091,7 +38449,7 @@ - + @@ -38455,289 +38813,289 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38749,150 +39107,150 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38908,586 +39266,586 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -40396,6 +40754,11 @@ + + + + + @@ -40558,17 +40921,6 @@ - - - - - - - - - - - @@ -40777,8 +41129,8 @@ - - + + @@ -40793,7 +41145,7 @@ - + @@ -40801,7 +41153,7 @@ - + @@ -41008,7 +41360,7 @@ - + @@ -41038,7 +41390,7 @@ - + @@ -41148,107 +41500,107 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -41436,7 +41788,7 @@ - + @@ -41449,6 +41801,17 @@ + + + + + + + + + + + @@ -41732,7 +42095,7 @@ - + @@ -41972,8 +42335,8 @@ - - + + @@ -41982,7 +42345,7 @@ - + @@ -42276,6 +42639,17 @@ + + + + + + + + + + + @@ -42421,6 +42795,7 @@ + @@ -44492,7 +44867,7 @@ - + @@ -44517,7 +44892,7 @@ - + @@ -45036,7 +45411,7 @@ - + @@ -45303,7 +45678,7 @@ - + @@ -47038,7 +47413,7 @@ - + @@ -47047,7 +47422,7 @@ - + @@ -47121,7 +47496,7 @@ - + @@ -47129,7 +47504,7 @@ - + @@ -49390,8 +49765,8 @@ - - + + @@ -49435,7 +49810,7 @@ - + @@ -49569,7 +49944,7 @@ - + @@ -49688,7 +50063,7 @@ - + @@ -50099,7 +50474,7 @@ - + @@ -50169,7 +50544,7 @@ - + @@ -50535,7 +50910,7 @@ - + @@ -50610,7 +50985,7 @@ - + @@ -51253,7 +51628,7 @@ - + @@ -51278,7 +51653,7 @@ - + @@ -51510,8 +51885,8 @@ - - + + @@ -51520,50 +51895,50 @@ - - - - + + + + - - - + + + - - + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + @@ -51571,24 +51946,35 @@ - - + + - - + + - - + + - - + + + + + + + + + + + + + @@ -51614,6 +52000,7 @@ + @@ -52161,8 +52548,19 @@ + + + + + + + + + + + - + @@ -52292,6 +52690,7 @@ + @@ -53147,8 +53546,8 @@ - - + + @@ -54113,7 +54512,7 @@ - + @@ -54132,7 +54531,7 @@ - + @@ -54159,7 +54558,7 @@ - + @@ -54170,34 +54569,34 @@ - + - + - + - + - + - + - + - + - + - + @@ -54558,7 +54957,7 @@ - + @@ -55092,12 +55491,12 @@ - + - + @@ -55107,7 +55506,7 @@ - + @@ -55193,7 +55592,7 @@ - + @@ -55438,7 +55837,7 @@ - + @@ -55584,7 +55983,7 @@ - + @@ -56533,7 +56932,7 @@ - + @@ -57085,7 +57484,7 @@ - + @@ -57095,7 +57494,7 @@ - + @@ -57134,7 +57533,7 @@ - + @@ -57155,7 +57554,7 @@ - + @@ -57188,12 +57587,12 @@ - + - + @@ -57282,7 +57681,7 @@ - + @@ -57294,7 +57693,7 @@ - + @@ -58257,7 +58656,7 @@ - + @@ -58383,7 +58782,7 @@ - + @@ -58391,7 +58790,7 @@ - + @@ -58434,7 +58833,7 @@ - + @@ -58460,7 +58859,7 @@ - + @@ -58468,7 +58867,7 @@ - + @@ -59358,6 +59757,17 @@ + + + + + + + + + + + @@ -59477,17 +59887,6 @@ - - - - - - - - - - - @@ -60419,7 +60818,7 @@ - + @@ -60456,7 +60855,7 @@ - + @@ -60643,7 +61042,7 @@ - + @@ -60689,7 +61088,7 @@ - + @@ -61727,7 +62126,7 @@ - + @@ -61739,7 +62138,7 @@ - + @@ -62278,7 +62677,7 @@ - + @@ -62296,7 +62695,7 @@ - + @@ -62627,7 +63026,7 @@ - + @@ -62650,15 +63049,15 @@ - + - + - + - + @@ -62825,67 +63224,67 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -63135,7 +63534,7 @@ - + @@ -63509,6 +63908,17 @@ + + + + + + + + + + + @@ -63626,17 +64036,17 @@ - + - + - + @@ -63647,7 +64057,7 @@ - + @@ -63804,6 +64214,7 @@ + @@ -63813,7 +64224,7 @@ - + @@ -63884,11 +64295,11 @@ - - - - - + + + + + @@ -63941,30 +64352,30 @@ - + - + - + - + - + @@ -64032,13 +64443,13 @@ - + - + @@ -64379,7 +64790,7 @@ - + @@ -65464,6 +65875,17 @@ + + + + + + + + + + + @@ -65497,7 +65919,7 @@ - + @@ -65513,7 +65935,7 @@ - + @@ -65537,7 +65959,7 @@ - + @@ -65579,6 +66001,7 @@ + @@ -65627,11 +66050,23 @@ + + + + + + + + + + + + @@ -65648,24 +66083,24 @@ - + - + - + - - - - + + + + - + @@ -65684,7 +66119,7 @@ - + @@ -65772,6 +66207,14 @@ + + + + + + + + @@ -65929,6 +66372,17 @@ + + + + + + + + + + + @@ -65978,6 +66432,7 @@ + @@ -66164,6 +66619,17 @@ + + + + + + + + + + + @@ -66217,7 +66683,7 @@ - + @@ -66233,7 +66699,7 @@ - + @@ -66337,6 +66803,7 @@ + @@ -66649,7 +67116,7 @@ - + @@ -66670,27 +67137,31 @@ - + - + - + + + + + - + - + - + @@ -66699,6 +67170,15 @@ + + + + + + + + + @@ -66709,14 +67189,34 @@ + + + + + + + + + + + + + + + + + + + - + - + + @@ -66732,7 +67232,7 @@ - + @@ -66742,7 +67242,7 @@ - + @@ -66869,7 +67369,7 @@ - + @@ -67144,6 +67644,17 @@ + + + + + + + + + + + @@ -67213,6 +67724,14 @@ + + + + + + + + @@ -67416,7 +67935,7 @@ - + @@ -67426,7 +67945,7 @@ - + @@ -67467,6 +67986,17 @@ + + + + + + + + + + + @@ -67685,6 +68215,7 @@ + @@ -67861,7 +68392,7 @@ - + @@ -68146,6 +68677,10 @@ + + + + @@ -68162,12 +68697,24 @@ + + + + + + + + + + + + @@ -68256,6 +68803,17 @@ + + + + + + + + + + + @@ -68268,6 +68826,7 @@ + @@ -68280,23 +68839,7 @@ - - - - - - - - - - - - - - - - - + @@ -68325,7 +68868,7 @@ - + @@ -68353,7 +68896,7 @@ - + @@ -68899,10 +69442,10 @@ - - - - + + + + @@ -69006,9 +69549,9 @@ - - - + + + @@ -69103,7 +69646,7 @@ - + @@ -69487,32 +70030,32 @@ - - - - + + + + - - + + - - - - + + + + - - + + - - + + - - + + @@ -69580,6 +70123,10 @@ + + + + @@ -69888,6 +70435,14 @@ + + + + + + + + @@ -69939,7 +70494,7 @@ - + @@ -70230,75 +70785,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -71944,7 +72499,7 @@ - + @@ -72443,7 +72998,7 @@ - + @@ -72483,7 +73038,7 @@ - + @@ -74309,7 +74864,7 @@ - + @@ -74543,7 +75098,7 @@ - + @@ -75392,7 +75947,7 @@ - + @@ -76274,7 +76829,7 @@ - + @@ -76418,7 +76973,7 @@ - + @@ -76521,7 +77076,7 @@ - + @@ -76695,7 +77250,7 @@ - + @@ -76826,7 +77381,7 @@ - + @@ -76958,7 +77513,7 @@ - + @@ -77198,7 +77753,7 @@ - + @@ -77465,7 +78020,7 @@ - + @@ -77791,7 +78346,7 @@ - + @@ -77826,7 +78381,7 @@ - + @@ -78054,7 +78609,7 @@ - + @@ -78128,7 +78683,7 @@ - + @@ -78352,7 +78907,7 @@ - + @@ -78360,7 +78915,7 @@ - + @@ -78501,7 +79056,7 @@ - + @@ -78871,7 +79426,7 @@ - + @@ -79723,7 +80278,7 @@ - + @@ -80120,7 +80675,7 @@ - + @@ -80177,18 +80732,29 @@ - + - + - + - + + + + + + + + + + + + @@ -80259,7 +80825,7 @@ - + @@ -80435,7 +81001,7 @@ - + @@ -80512,7 +81078,7 @@ - + @@ -80534,7 +81100,7 @@ - + @@ -80556,7 +81122,7 @@ - + @@ -80564,7 +81130,7 @@ - + @@ -80573,7 +81139,7 @@ - + @@ -80602,7 +81168,7 @@ - + @@ -80719,8 +81285,8 @@ - - + + @@ -81176,6 +81742,7 @@ + @@ -82436,6 +83003,11 @@ + + + + + @@ -82799,7 +83371,7 @@ - + @@ -82815,7 +83387,7 @@ - + @@ -82823,7 +83395,7 @@ - + @@ -82874,7 +83446,7 @@ - + @@ -82906,7 +83478,7 @@ - + @@ -82963,7 +83535,8 @@ - + + @@ -83045,10 +83618,10 @@ - - - - + + + + @@ -83357,6 +83930,17 @@ + + + + + + + + + + + @@ -83378,12 +83962,12 @@ - + - + @@ -83448,7 +84032,7 @@ - + @@ -83477,7 +84061,7 @@ - + @@ -83487,12 +84071,12 @@ - + - - + + @@ -83570,12 +84154,12 @@ - - + + - - + + @@ -83589,8 +84173,8 @@ - - + + @@ -83659,17 +84243,6 @@ - - - - - - - - - - - @@ -83740,7 +84313,6 @@ - @@ -83790,7 +84362,7 @@ - + @@ -83816,7 +84388,7 @@ - + @@ -84149,46 +84721,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -85181,6 +85753,364 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -85268,6 +86198,13 @@ + + + + + + + @@ -85313,12 +86250,6 @@ - - - - - - @@ -85358,7 +86289,7 @@ - + @@ -85440,41 +86371,41 @@ - - - + + + - - - + + + - - - - - + + + + + - - - - - + + + + + - - - + + + - - - + + + - + @@ -85503,6 +86434,15 @@ + + + + + + + + + @@ -85764,7 +86704,7 @@ - + @@ -85772,6 +86712,23 @@ + + + + + + + + + + + + + + + + + @@ -85837,7 +86794,7 @@ - + @@ -85887,7 +86844,7 @@ - + @@ -86262,7 +87219,7 @@ - + @@ -86649,7 +87606,7 @@ - + @@ -86673,8 +87630,8 @@ - - + + @@ -86822,7 +87779,7 @@ - + @@ -86838,7 +87795,7 @@ - + @@ -88084,7 +89041,7 @@ - + @@ -88466,76 +89423,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -88776,6 +89663,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -88996,6 +89961,10 @@ + + + + @@ -89450,7 +90419,7 @@ - + @@ -90545,7 +91514,7 @@ - + @@ -90825,7 +91794,7 @@ - + @@ -90858,7 +91827,7 @@ - + @@ -90981,7 +91950,7 @@ - + @@ -91027,7 +91996,7 @@ - + @@ -91164,7 +92133,7 @@ - + @@ -91204,7 +92173,7 @@ - + @@ -91223,7 +92192,7 @@ - + @@ -91450,27 +92419,27 @@ - + - + - + - + @@ -92073,7 +93042,7 @@ - + @@ -92120,7 +93089,7 @@ - + @@ -92134,7 +93103,7 @@ - + @@ -92153,7 +93122,7 @@ - + @@ -92177,7 +93146,7 @@ - + @@ -92269,7 +93238,7 @@ - + @@ -92739,7 +93708,7 @@ - + @@ -93028,7 +93997,7 @@ - + @@ -93042,7 +94011,7 @@ - + @@ -93115,7 +94084,7 @@ - + @@ -93216,7 +94185,7 @@ - + @@ -93235,7 +94204,7 @@ - + @@ -93265,7 +94234,7 @@ - + @@ -93411,7 +94380,7 @@ - + @@ -93461,7 +94430,7 @@ - + @@ -94318,7 +95287,7 @@ - + @@ -94423,7 +95392,7 @@ - + @@ -94569,7 +95538,7 @@ - + @@ -95016,7 +95985,7 @@ - + @@ -95037,7 +96006,7 @@ - + @@ -95069,7 +96038,7 @@ - + @@ -95537,17 +96506,17 @@ - - - - - - + + + + + + - - - + + + @@ -95557,7 +96526,7 @@ - + @@ -95578,8 +96547,8 @@ - - + + @@ -95646,7 +96615,7 @@ - + @@ -95703,6 +96672,17 @@ + + + + + + + + + + + @@ -95762,16 +96742,6 @@ - - - - - - - - - - @@ -96443,14 +97413,14 @@ - + - + - + @@ -97319,13 +98289,13 @@ - - + + - + @@ -97335,8 +98305,8 @@ - - + + @@ -97381,7 +98351,7 @@ - + @@ -97626,6 +98596,17 @@ + + + + + + + + + + + @@ -97721,7 +98702,7 @@ - + @@ -97807,6 +98788,7 @@ + @@ -97817,27 +98799,27 @@ - - + + - + - + - + - + @@ -97845,7 +98827,7 @@ - + @@ -97853,14 +98835,14 @@ - + - + @@ -97868,7 +98850,7 @@ - + @@ -97877,7 +98859,7 @@ - + @@ -97886,11 +98868,11 @@ - + - + @@ -98244,14 +99226,14 @@ - + - + @@ -98468,7 +99450,7 @@ - + @@ -99050,7 +100032,7 @@ - + @@ -99090,7 +100072,7 @@ - + @@ -99098,7 +100080,7 @@ - + @@ -99129,13 +100111,13 @@ - + - + @@ -101225,10 +102207,10 @@ - - - - + + + + @@ -101278,6 +102260,6 @@ diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index 07ff4483e2be..bbc7c01ccb07 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -41,6 +41,7 @@ bus_for_each_dev bus_register bus_unregister + call_rcu cancel_delayed_work cancel_delayed_work_sync cancel_work_sync @@ -287,7 +288,6 @@ get_cpu_device get_device __get_free_pages - get_next_event_cpu get_pid_task get_random_bytes __get_task_comm @@ -467,6 +467,7 @@ __ll_sc_atomic64_fetch_or __ll_sc_atomic64_or __ll_sc_atomic64_sub + __ll_sc_atomic64_sub_return __ll_sc_atomic_add __ll_sc_atomic_add_return __ll_sc_atomic_sub @@ -491,6 +492,10 @@ __memcpy_toio memdup_user memmove + mempool_alloc + mempool_free + mempool_kfree + mempool_kmalloc memremap memset __memset_io @@ -506,6 +511,7 @@ module_put __msecs_to_jiffies msleep + msleep_interruptible __mutex_init mutex_lock mutex_lock_interruptible @@ -1210,15 +1216,24 @@ crc_ccitt_table kernel_restart kernel_setsockopt - mempool_alloc mempool_create mempool_destroy - mempool_free - mempool_kfree - mempool_kmalloc send_sig_info time64_to_tm +# required by dm-user.ko + bio_advance + bio_endio + bio_put + _copy_from_iter + copy_page_from_iter + copy_page_to_iter + _copy_to_iter + dm_register_target + dm_unregister_target + mempool_exit + mempool_init + # required by dwc3-haps.ko pcim_enable_device __pci_register_driver @@ -1315,9 +1330,6 @@ proc_create seq_write -# required by google-battery.ko - simple_strtoull - # required by google-bms.ko full_name_hash @@ -1362,7 +1374,6 @@ __break_lease clear_inode __close_fd - crc32_le d_add d_drop deactivate_locked_super @@ -1370,6 +1381,8 @@ dget_parent d_instantiate d_make_root + down_read_killable + down_write_killable dput flush_dcache_page fs_kobj @@ -1419,6 +1432,7 @@ user_path_at_empty vfs_create vfs_fallocate + vfs_fsync vfs_getattr vfs_getxattr vfs_link @@ -1428,19 +1442,19 @@ vfs_rmdir vfs_setxattr vfs_unlink + ZSTD_decompressStream + ZSTD_DStreamWorkspaceBound + ZSTD_initDStream # required by ion-alloc.ko dma_buf_export dma_get_device_base dma_get_size - __ll_sc_atomic64_sub_return mm_event_count __next_zones_zonelist ptr_to_hashval sched_setattr split_page - vm_map_ram - vm_unmap_ram zone_watermark_ok_safe # required by ipa3.ko @@ -2223,7 +2237,6 @@ # required by sctp.ko __bitmap_shift_right __bitmap_weight - call_rcu compat_ip_getsockopt compat_ip_setsockopt compat_ipv6_getsockopt @@ -2518,100 +2531,6 @@ trace_output_call utf16s_to_utf8s -# required by wlan.ko - bitmap_print_to_pagebuf - __cfg80211_alloc_event_skb - __cfg80211_alloc_reply_skb - cfg80211_ap_stopped - cfg80211_calculate_bitrate - cfg80211_chandef_create - cfg80211_ch_switch_notify - cfg80211_connect_done - cfg80211_del_sta_sinfo - cfg80211_disconnected - cfg80211_external_auth_request - cfg80211_ft_event - cfg80211_get_bss - cfg80211_gtk_rekey_notify - cfg80211_ibss_joined - cfg80211_inform_bss_frame_data - cfg80211_mgmt_tx_status - cfg80211_michael_mic_failure - cfg80211_new_sta - cfg80211_pmksa_candidate_notify - cfg80211_put_bss - cfg80211_ready_on_channel - cfg80211_remain_on_channel_expired - cfg80211_roamed - cfg80211_rx_mgmt - cfg80211_rx_unprot_mlme_mgmt - cfg80211_scan_done - cfg80211_sched_scan_results - __cfg80211_send_event_skb - cfg80211_tdls_oper_request - cfg80211_unlink_bss - cfg80211_update_owe_info_event - cfg80211_vendor_cmd_reply - complete_and_exit - cpufreq_quick_get_max - __cpuhp_remove_state - cpu_topology - crypto_aead_setauthsize - crypto_aead_setkey - crypto_alloc_aead - crypto_alloc_base - crypto_alloc_skcipher - crypto_shash_final - crypto_shash_update - dev_alloc_name - dump_stack - hex2bin - hex_to_bin - ieee80211_channel_to_frequency - ieee80211_frequency_to_channel - ieee80211_get_channel - ieee80211_hdrlen - iommu_iova_to_phys - irq_set_affinity_hint - mac_pton - msleep_interruptible - netif_tx_stop_all_queues - netlink_broadcast - __netlink_kernel_create - netlink_kernel_release - nla_put_64bit - nla_strlcpy - param_get_string - param_ops_byte - param_set_copystring - pci_read_config_dword - pci_read_config_word - pci_write_config_dword - pci_write_config_word - PDE_DATA - pm_freezing - pm_system_wakeup - proc_create_data - proc_mkdir - register_netevent_notifier - register_sysctl_table - regulatory_set_wiphy_regd - rtnl_lock - save_stack_trace_tsk - schedule_timeout_interruptible - seq_vprintf - set_cpus_allowed_ptr - skip_spaces - strchrnul - unregister_netevent_notifier - unregister_sysctl_table - vprintk - wiphy_free - wiphy_new_nm - wiphy_register - wiphy_unregister - wireless_send_event - # required by usb-audio-qmi.ko find_snd_usb_substream snd_usb_enable_audio_stream @@ -2684,6 +2603,99 @@ irq_create_mapping regmap_irq_get_virq +# required by wlan.ko + bitmap_print_to_pagebuf + __cfg80211_alloc_event_skb + __cfg80211_alloc_reply_skb + cfg80211_ap_stopped + cfg80211_calculate_bitrate + cfg80211_chandef_create + cfg80211_ch_switch_notify + cfg80211_connect_done + cfg80211_del_sta_sinfo + cfg80211_disconnected + cfg80211_external_auth_request + cfg80211_ft_event + cfg80211_get_bss + cfg80211_gtk_rekey_notify + cfg80211_ibss_joined + cfg80211_inform_bss_frame_data + cfg80211_mgmt_tx_status + cfg80211_michael_mic_failure + cfg80211_new_sta + cfg80211_pmksa_candidate_notify + cfg80211_put_bss + cfg80211_ready_on_channel + cfg80211_remain_on_channel_expired + cfg80211_roamed + cfg80211_rx_mgmt + cfg80211_rx_unprot_mlme_mgmt + cfg80211_scan_done + cfg80211_sched_scan_results + __cfg80211_send_event_skb + cfg80211_tdls_oper_request + cfg80211_unlink_bss + cfg80211_update_owe_info_event + cfg80211_vendor_cmd_reply + complete_and_exit + cpufreq_quick_get_max + __cpuhp_remove_state + cpu_topology + crypto_aead_setauthsize + crypto_aead_setkey + crypto_alloc_aead + crypto_alloc_base + crypto_alloc_skcipher + crypto_shash_final + crypto_shash_update + dev_alloc_name + dump_stack + hex2bin + hex_to_bin + ieee80211_channel_to_frequency + ieee80211_frequency_to_channel + ieee80211_get_channel + ieee80211_hdrlen + iommu_iova_to_phys + irq_set_affinity_hint + mac_pton + netif_tx_stop_all_queues + netlink_broadcast + __netlink_kernel_create + netlink_kernel_release + nla_put_64bit + nla_strlcpy + param_get_string + param_ops_byte + param_set_copystring + pci_read_config_dword + pci_read_config_word + pci_write_config_dword + pci_write_config_word + PDE_DATA + pm_freezing + pm_system_wakeup + proc_create_data + proc_mkdir + register_netevent_notifier + register_sysctl_table + regulatory_set_wiphy_regd + rtnl_lock + save_stack_trace_tsk + schedule_timeout_interruptible + seq_vprintf + set_cpus_allowed_ptr + skip_spaces + strchrnul + unregister_netevent_notifier + unregister_sysctl_table + vprintk + wiphy_free + wiphy_new_nm + wiphy_register + wiphy_unregister + wireless_send_event + # required by wsa883x_dlkm.ko snd_info_create_module_entry snd_soc_component_exit_regmap @@ -2702,3 +2714,10 @@ xhci_resume xhci_run xhci_suspend + +# preserved by --additions-only + crc32_le + get_next_event_cpu + simple_strtoull + vm_map_ram + vm_unmap_ram diff --git a/arch/Kconfig b/arch/Kconfig index 6b6c827132a4..01684e9ee402 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -366,6 +366,13 @@ config HAVE_RCU_TABLE_FREE config HAVE_RCU_TABLE_INVALIDATE bool +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 705a68208423..85d9ea4a0acc 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -156,6 +156,7 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed + lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -198,9 +199,15 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc + + ; stop_pc info by gdb needs this info + lr r0, [efa] mov r1, sp + ; Now that we have read EFA, it is safe to do "fake" rtie + ; and get out of CPU exception mode + FAKE_RET_FROM_EXCPN + ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -227,10 +234,6 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE - lr r12, [efa] - - FAKE_RET_FROM_EXCPN - ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR) bmsk.f 0, r9, 7 @@ -238,6 +241,9 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level + ; First return from Exception to pure K mode (Exception/IRQs renabled) + FAKE_RET_FROM_EXCPN + ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index bf40e06f3fb8..0fed32b95923 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -115,7 +115,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; @@ -135,6 +135,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index dc10dfb94390..4af7afc8bb5c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -592,8 +592,10 @@ config ARCH_S3C24XX select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_IO_H + select S3C2410_WATCHDOG select SAMSUNG_ATAGS select USE_OF + select WATCHDOG help Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 SoCs based systems, such as the Simtec Electronics BAST diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 216e1d1a69c7..473d6721b788 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -35,8 +35,8 @@ interrupts = , ; interrupt-names = "int0", "int1"; - clocks = <&mcan_clk>, <&l3_iclk_div>; - clock-names = "cclk", "hclk"; + clocks = <&l3_iclk_div>, <&mcan_clk>; + clock-names = "hclk", "cclk"; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; }; }; diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index a25da415cb02..907339bc81e5 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 4f27861bbb32..4cc9858f7ff8 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -97,7 +97,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts index 2b760f90f38c..5375c6699843 100644 --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts @@ -192,6 +192,7 @@ fixed-link { speed = <1000>; full-duplex; + pause; }; }; }; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 1a96d4317c97..8f907c235b02 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -516,7 +516,7 @@ status = "disabled"; }; - target-module@56000000 { + sgx_module: target-module@56000000 { compatible = "ti,sysc-omap4", "ti,sysc"; ti,hwmods = "gpu"; reg = <0x5601fc00 0x4>, diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi index cbcdcb4e7d1c..86b9caf461df 100644 --- a/arch/arm/boot/dts/omap443x.dtsi +++ b/arch/arm/boot/dts/omap443x.dtsi @@ -74,3 +74,13 @@ }; /include/ "omap443x-clocks.dtsi" + +/* + * Use dpll_per for sgx at 153.6MHz like droid4 stock v3.0.8 Android kernel + */ +&sgx_module { + assigned-clocks = <&l3_gfx_clkctrl OMAP4_GPU_CLKCTRL 24>, + <&dpll_per_m7x2_ck>; + assigned-clock-rates = <0>, <153600000>; + assigned-clock-parents = <&dpll_per_m7x2_ck>; +}; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 67358562a6ea..020a864623ff 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -98,19 +98,16 @@ }; clocks: clock-controller@e0100000 { - compatible = "samsung,s5pv210-clock", "simple-bus"; + compatible = "samsung,s5pv210-clock"; reg = <0xe0100000 0x10000>; clock-names = "xxti", "xusbxti"; clocks = <&xxti>, <&xusbxti>; #clock-cells = <1>; - #address-cells = <1>; - #size-cells = <1>; - ranges; + }; - pmu_syscon: syscon@e0108000 { - compatible = "samsung-s5pv210-pmu", "syscon"; - reg = <0xe0108000 0x8000>; - }; + pmu_syscon: syscon@e0108000 { + compatible = "samsung-s5pv210-pmu", "syscon"; + reg = <0xe0108000 0x8000>; }; pinctrl0: pinctrl@e0200000 { @@ -126,35 +123,28 @@ }; }; - amba { - #address-cells = <1>; - #size-cells = <1>; - compatible = "simple-bus"; - ranges; + pdma0: dma@e0900000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0900000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <19>; + clocks = <&clocks CLK_PDMA0>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + }; - pdma0: dma@e0900000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0900000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <19>; - clocks = <&clocks CLK_PDMA0>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; - - pdma1: dma@e0a00000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0a00000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <20>; - clocks = <&clocks CLK_PDMA1>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma1: dma@e0a00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0a00000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <20>; + clocks = <&clocks CLK_PDMA1>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; spi0: spi@e1300000 { @@ -227,43 +217,36 @@ status = "disabled"; }; - audio-subsystem { - compatible = "samsung,s5pv210-audss", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges; + clk_audss: clock-controller@eee10000 { + compatible = "samsung,s5pv210-audss-clock"; + reg = <0xeee10000 0x1000>; + clock-names = "hclk", "xxti", + "fout_epll", + "sclk_audio0"; + clocks = <&clocks DOUT_HCLKP>, <&xxti>, + <&clocks FOUT_EPLL>, + <&clocks SCLK_AUDIO0>; + #clock-cells = <1>; + }; - clk_audss: clock-controller@eee10000 { - compatible = "samsung,s5pv210-audss-clock"; - reg = <0xeee10000 0x1000>; - clock-names = "hclk", "xxti", - "fout_epll", - "sclk_audio0"; - clocks = <&clocks DOUT_HCLKP>, <&xxti>, - <&clocks FOUT_EPLL>, - <&clocks SCLK_AUDIO0>; - #clock-cells = <1>; - }; - - i2s0: i2s@eee30000 { - compatible = "samsung,s5pv210-i2s"; - reg = <0xeee30000 0x1000>; - interrupt-parent = <&vic2>; - interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; - clock-names = "iis", - "i2s_opclk0", - "i2s_opclk1"; - clocks = <&clk_audss CLK_I2S>, - <&clk_audss CLK_I2S>, - <&clk_audss CLK_DOUT_AUD_BUS>; - samsung,idma-addr = <0xc0010000>; - pinctrl-names = "default"; - pinctrl-0 = <&i2s0_bus>; - #sound-dai-cells = <0>; - status = "disabled"; - }; + i2s0: i2s@eee30000 { + compatible = "samsung,s5pv210-i2s"; + reg = <0xeee30000 0x1000>; + interrupt-parent = <&vic2>; + interrupts = <16>; + dma-names = "rx", "tx", "tx-sec"; + dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + clock-names = "iis", + "i2s_opclk0", + "i2s_opclk1"; + clocks = <&clk_audss CLK_I2S>, + <&clk_audss CLK_I2S>, + <&clk_audss CLK_DOUT_AUD_BUS>; + samsung,idma-addr = <0xc0010000>; + pinctrl-names = "default"; + pinctrl-0 = <&i2s0_bus>; + #sound-dai-cells = <0>; + status = "disabled"; }; i2s1: i2s@e2100000 { diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5d46bb0139fa..707ad5074878 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index f250b20af493..9be1c4a3d95f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index 7e74ba83f809..75396993195d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -168,7 +168,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index b2a773a718e1..5e5223a48ac7 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -121,7 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index e9bc88937b1e..0f6d1fe1f1c2 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -167,6 +167,7 @@ CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y CONFIG_TEST_KSTRTOX=y +CONFIG_DEBUG_FS=y CONFIG_KGDB=y CONFIG_KGDB_KDB=y CONFIG_STRICT_DEVMEM=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 371fca4e1ab7..e5e45314af2a 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -159,6 +159,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_FUNCTION_TRACER=y diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 82290f212d8e..e1eb662e0f9e 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -52,20 +52,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 8a8470d36c65..97fa9c167757 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -688,6 +688,40 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } +/* + * Arm32 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See this same function in the arm64 platform code, which has the same + * problem. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u32 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_fault_on_uaccess(struct pt_regs *regs, struct arch_hw_breakpoint *info) { @@ -697,23 +731,25 @@ static int watchpoint_fault_on_uaccess(struct pt_regs *regs, static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int i, access; - u32 val, ctrl_reg, alignment_mask; + int i, access, closest_match = 0; + u32 min_dist = -1, dist; + u32 val, ctrl_reg; struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; + continue; - info = counter_arch_bp(wp); /* * The DFAR is an unknown value on debug architectures prior * to 7.1. Since we only allow a single watchpoint on these @@ -722,33 +758,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, */ if (debug_arch < ARM_DEBUG_ARCH_V7_1) { BUG_ON(i > 0); + info = counter_arch_bp(wp); info->trigger = wp->attr.bp_addr; } else { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(ARM_BASE_WVR + i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select. */ - ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; - /* Check that the access type matches. */ if (debug_exception_updates_fsr()) { access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; } + val = read_wb_reg(ARM_BASE_WVR + i); + ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + /* We have a winner. */ + info = counter_arch_bp(wp); info->trigger = addr; } @@ -770,13 +804,23 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * we can single-step over the watchpoint trigger. */ if (!is_default_overflow_handler(wp)) - goto unlock; - + continue; step: enable_single_step(wp, instruction_pointer(regs)); -unlock: - rcu_read_unlock(); } + + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + perf_bp_event(wp, regs); + if (is_default_overflow_handler(wp)) + enable_single_step(wp, instruction_pointer(regs)); + } + + rcu_read_unlock(); } static void watchpoint_single_step_handler(unsigned long pc) diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 377ff9cda667..c83baa51289f 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -240,6 +240,7 @@ config SAMSUNG_PM_DEBUG bool "Samsung PM Suspend debug" depends on PM && DEBUG_KERNEL depends on DEBUG_EXYNOS_UART || DEBUG_S3C24XX_UART || DEBUG_S3C2410_UART + depends on DEBUG_LL && MMU help Say Y here if you want verbose debugging from the PM Suspend and Resume code. See diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 0dc23fc227ed..cf08cb726767 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -98,21 +98,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 777a7d2a58dc..fc362eca8083 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -46,6 +46,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index eb9da0b0b2b1..5b3908880485 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -155,6 +155,7 @@ dtbs_install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@ # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 094cfed13df9..13ce24e922ee 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -97,7 +97,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index d5b6e8159a33..5d0905f0f1c1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -52,7 +52,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 3e0d5a9c096d..5fbfa76daae2 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -157,7 +157,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index b75ca4d7d001..7a30211d59ef 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -164,7 +164,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index 6cbdd66921aa..1a3e6e3b04eb 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -21,6 +21,10 @@ aliases { ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; serial0 = &uart0; serial1 = &uart1; }; @@ -136,25 +140,25 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0ead552d7eae..600adc25eaef 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -430,6 +430,7 @@ bus-width = <8>; mmc-hs200-1_8v; non-removable; + full-pwr-cycle-in-suspend; status = "okay"; }; diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 483a7130cf0e..496c243de4ac 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -232,17 +232,19 @@ AES_ENTRY(aes_ctr_encrypt) bmi .Lctr1x cmn w6, #4 /* 32 bit overflow? */ bcs .Lctr1x - ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w6 + add w7, w6, #1 mov v0.16b, v4.16b - add v7.4s, v7.4s, v8.4s + add w8, w6, #2 mov v1.16b, v4.16b - rev32 v8.16b, v7.16b + add w9, w6, #3 mov v2.16b, v4.16b + rev w7, w7 mov v3.16b, v4.16b - mov v1.s[3], v8.s[0] - mov v2.s[3], v8.s[1] - mov v3.s[3], v8.s[2] + rev w8, w8 + mov v1.s[3], w7 + rev w9, w9 + mov v2.s[3], w8 + mov v3.s[3], w9 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ bl aes_encrypt_block4x eor v0.16b, v5.16b, v0.16b diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e720742d647..c67cae9d5229 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -192,6 +192,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 291fed0a9df8..f43519b71061 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -107,8 +107,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -116,9 +114,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* * p??_access_permitted() is true for valid user mappings (subject to the @@ -144,13 +145,6 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } -static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -176,6 +170,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -668,12 +676,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbe0c5855b60..a41a5c1a1330 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,16 +86,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) -#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ - (!!x)<<8 | 0x1f) +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 9b197e5ea759..a08cc3cb0f8e 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -32,9 +32,10 @@ #include #endif -#define VDSO_SYMBOL(base, name) \ -({ \ - (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ +#define VDSO_SYMBOL(base, name) \ +({ \ + (void *)((vdso_offset_##name & ~1UL) - VDSO_LBASE + \ + (unsigned long)(base)); \ }) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index d44313a5d033..3fd8fd6d8fc2 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -18,14 +18,7 @@ #undef dmb #endif -#if __LINUX_ARM_ARCH__ >= 7 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#elif __LINUX_ARM_ARCH__ == 6 -#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory") -#else -#define dmb(x) __asm__ __volatile__ ("" : : : "memory") -#endif #if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f8f4434054b5..dbf2e630bffa 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -606,6 +606,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static void +cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) +{ + cap->matches(cap, SCOPE_LOCAL_CPU); +} + static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -800,9 +806,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Branch predictor hardening", .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, + .cpu_enable = cpu_enable_branch_predictor_hardening, }, #ifdef CONFIG_HARDEN_EL2_VECTORS { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 81690e65fc94..1be1faa7cc05 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1098,7 +1098,7 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) if (user_mode(regs)) return 1; - if (instr & BIT(CRm_shift)) + if (instr & BIT(PSTATE_Imm_shift)) regs->pstate |= PSR_SSBS_BIT; else regs->pstate &= ~PSR_SSBS_BIT; @@ -1108,8 +1108,8 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) } static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << CRm_shift), - .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, .fn = ssbs_emulation_handler, }; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 73d5ac36803d..cd0638ed1113 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -70,7 +70,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -78,9 +77,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 6106c49f84bc..655a308af9e3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -272,21 +272,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ad3a81b2c7ce..3c90b381ac21 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -271,18 +271,7 @@ static int __aarch32_alloc_vdso_pages(void) if (ret) return ret; - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) { - unsigned long c_vvar = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); - unsigned long c_vdso = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); - - free_page(c_vvar); - free_page(c_vdso); - } - - return ret; + return aarch32_alloc_kuser_vdso_page(); } #else static int __aarch32_alloc_vdso_pages(void) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 7155a6ff320d..5fb656a0b7d7 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 8fddb291c0e2..d6ade5f80fd0 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -202,7 +202,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0c073f3ca122..b53d0ebb87fc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1555,9 +1555,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1572,7 +1572,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 54529b4ed513..15eaf1e09d0c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -58,7 +58,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index d0c0ccdd656a..03ee3ff3cefa 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -42,7 +42,7 @@ obj-y += esi_stub.o # must be in kernel proper endif obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index aa41bd5cf9b7..8207b897b49d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -409,83 +409,9 @@ static void kretprobe_trampoline(void) { } -/* - * At this point the target function has been tricked into - * returning into our trampoline. Lookup the associated instance - * and then: - * - call the handler function - * - cleanup by marking the instance as unused - * - long jump back to the original return address - */ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = - ((struct fnptr *)kretprobe_trampoline)->ip; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more than one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - regs->cr_iip = orig_ret_address; - - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - if (ri->rp && ri->rp->handler) - ri->rp->handler(ri, regs); - - orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - kretprobe_hash_unlock(current, &flags); - - hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } + regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -498,6 +424,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->b0; + ri->fp = NULL; /* Replace the return addr with trampoline addr */ regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index d129475fd40d..4254ba13c5c5 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index 048d12fbb925..e45beb525f29 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -13,6 +13,7 @@ #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT) /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */ +#define BUILD_VDSO32_64 #undef CONFIG_64BIT #define CONFIG_32BIT 1 #ifndef __ASSEMBLY__ diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 0596505770db..11985399c469 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -424,6 +424,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 920ec7caee96..5c00f8d3d3d6 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -60,7 +60,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO endif endif diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8e6d83f79e72..7644b6518b6a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -35,6 +35,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE + select ARCH_DISCARD_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC select VIRT_TO_BUS select MODULES_USE_ELF_RELA @@ -311,21 +312,16 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on 64BIT -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y depends on 64BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config NODES_SHIFT - int - default "3" - depends on NEED_MULTIPLE_NODES + depends on ARCH_SPARSEMEM_ENABLE source "kernel/Kconfig.hz" diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index fafa3893fd70..8d390406d862 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -2,62 +2,6 @@ #ifndef _PARISC_MMZONE_H #define _PARISC_MMZONE_H -#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ -#ifdef CONFIG_DISCONTIGMEM - -extern int npmem_ranges; - -struct node_map_data { - pg_data_t pg_data; -}; - -extern struct node_map_data node_data[]; - -#define NODE_DATA(nid) (&node_data[nid].pg_data) - -/* We have these possible memory map layouts: - * Astro: 0-3.75, 67.75-68, 4-64 - * zx1: 0-1, 257-260, 4-256 - * Stretch (N-class): 0-2, 4-32, 34-xxx - */ - -/* Since each 1GB can only belong to one region (node), we can create - * an index table for pfn to nid lookup; each entry in pfnnid_map - * represents 1GB, and contains the node that the memory belongs to. */ - -#define PFNNID_SHIFT (30 - PAGE_SHIFT) -#define PFNNID_MAP_MAX 512 /* support 512GB */ -extern signed char pfnnid_map[PFNNID_MAP_MAX]; - -#ifndef CONFIG_64BIT -#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) -#else -/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ -#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) -#endif - -static inline int pfn_to_nid(unsigned long pfn) -{ - unsigned int i; - - if (unlikely(pfn_is_io(pfn))) - return 0; - - i = pfn >> PFNNID_SHIFT; - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); - - return pfnnid_map[i]; -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#endif #endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index af00fe9bf846..936d43999b63 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -145,9 +145,9 @@ extern int npmem_ranges; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_DISCONTIGMEM */ +#endif #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h new file mode 100644 index 000000000000..b5c3a79045b4 --- /dev/null +++ b/arch/parisc/include/asm/sparsemem.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_PARISC_SPARSEMEM_H +#define ASM_PARISC_SPARSEMEM_H + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +#define MAX_PHYSMEM_BITS 39 /* 512 GB */ +#define SECTION_SIZE_BITS 27 /* 128 MB */ + +#endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa2265d439..174213b1716e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -138,12 +138,6 @@ extern void $$dyncall(void); EXPORT_SYMBOL($$dyncall); #endif -#ifdef CONFIG_DISCONTIGMEM -#include -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(pfnnid_map); -#endif - #ifdef CONFIG_FUNCTION_TRACER extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 10a52664e29f..45ba4ac504ab 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -33,6 +33,7 @@ #include #include #include +#include extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -49,11 +50,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); -#ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; -#endif - static struct resource data_resource = { .name = "Kernel data", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -77,8 +73,8 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; -int npmem_ranges __read_mostly; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; +int npmem_ranges __initdata; /* * get_memblock() allocates pages via memblock. @@ -111,7 +107,7 @@ static void * __init get_memblock(unsigned long size) } #ifdef CONFIG_64BIT -#define MAX_MEM (~0UL) +#define MAX_MEM (1UL << MAX_PHYSMEM_BITS) #else /* !CONFIG_64BIT */ #define MAX_MEM (3584U*1024U*1024U) #endif /* !CONFIG_64BIT */ @@ -150,7 +146,7 @@ static void __init mem_limit_func(void) static void __init setup_bootmem(void) { unsigned long mem_max; -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; int npmem_holes; #endif @@ -168,23 +164,20 @@ static void __init setup_bootmem(void) int j; for (j = i; j > 0; j--) { - unsigned long tmp; + physmem_range_t tmp; if (pmem_ranges[j-1].start_pfn < pmem_ranges[j].start_pfn) { break; } - tmp = pmem_ranges[j-1].start_pfn; - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; - pmem_ranges[j].start_pfn = tmp; - tmp = pmem_ranges[j-1].pages; - pmem_ranges[j-1].pages = pmem_ranges[j].pages; - pmem_ranges[j].pages = tmp; + tmp = pmem_ranges[j-1]; + pmem_ranges[j-1] = pmem_ranges[j]; + pmem_ranges[j] = tmp; } } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* * Throw out ranges that are too far apart (controlled by * MAX_GAP). @@ -196,7 +189,7 @@ static void __init setup_bootmem(void) pmem_ranges[i-1].pages) > MAX_GAP) { npmem_ranges = i; printk("Large gap in memory detected (%ld pages). " - "Consider turning on CONFIG_DISCONTIGMEM\n", + "Consider turning on CONFIG_SPARSEMEM\n", pmem_ranges[i].start_pfn - (pmem_ranges[i-1].start_pfn + pmem_ranges[i-1].pages)); @@ -261,9 +254,8 @@ static void __init setup_bootmem(void) printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* Merge the ranges, keeping track of the holes */ - { unsigned long end_pfn; unsigned long hole_pages; @@ -286,18 +278,6 @@ static void __init setup_bootmem(void) } #endif -#ifdef CONFIG_DISCONTIGMEM - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - } - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); - - for (i = 0; i < npmem_ranges; i++) { - node_set_state(i, N_NORMAL_MEMORY); - node_set_online(i); - } -#endif - /* * Initialize and free the full range of memory in each range. */ @@ -338,7 +318,7 @@ static void __init setup_bootmem(void) memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* reserve the holes */ @@ -384,6 +364,9 @@ static void __init setup_bootmem(void) /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ pdc_pdt_init(); + + memblock_allow_resize(); + memblock_dump_all(); } static int __init parisc_text_address(unsigned long vaddr) @@ -711,37 +694,46 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } -void __init paging_init(void) +static void __init parisc_bootmem_free(void) { + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; int i; + for (i = 0; i < npmem_ranges; i++) { + unsigned long start = pmem_ranges[i].start_pfn; + unsigned long size = pmem_ranges[i].pages; + unsigned long end = start + size; + + if (mem_start_pfn > start) + mem_start_pfn = start; + if (mem_end_pfn < end) + mem_end_pfn = end; + mem_size_pfn += size; + } + + zones_size[0] = mem_end_pfn - mem_start_pfn; + holes_size[0] = zones_size[0] - mem_size_pfn; + + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); +} + +void __init paging_init(void) +{ setup_bootmem(); pagetable_init(); gateway_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); - for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; - -#ifdef CONFIG_DISCONTIGMEM - /* Need to initialize the pfnnid_map before we can initialize - the zone */ - { - int j; - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); - j++) { - pfnnid_map[j] = i; - } - } -#endif - - free_area_init_node(i, zones_size, - pmem_ranges[i].start_pfn, NULL); - } + /* + * Mark all memblocks as present for sparsemem using + * memory_present() and then initialize sparsemem. + */ + memblocks_present(); + sparse_init(); + parisc_bootmem_free(); } #ifdef CONFIG_PA20 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f38d153d2586..d18ea3c1f4fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_EXTABLE_SORT @@ -1009,6 +1010,19 @@ config FSL_RIO source "drivers/rapidio/Kconfig" +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config NONSTATIC_KERNEL diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..aa54ac2e5659 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); +} + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 668d8a121f1a..8b196720e9a0 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -24,7 +24,7 @@ struct drmem_lmb { struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; @@ -83,7 +83,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index a86feddddad0..35fb5b11955a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -90,11 +90,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..5bf3f0779b93 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,22 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +253,11 @@ label##3: \ #include extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a7b01f97a56..1eabc20dddd3 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -61,6 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -74,6 +76,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -94,6 +97,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h new file mode 100644 index 000000000000..f0f8e36ad71f --- /dev/null +++ b/arch/powerpc/include/asm/kup.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_H_ +#define _ASM_POWERPC_KUP_H_ + +#ifndef __ASSEMBLY__ + +#include + +#ifdef CONFIG_PPC_BOOK3S_64 +#include +#else +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) { } +#endif /* CONFIG_PPC_BOOK3S_64 */ + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + allow_user_access(NULL, from, size); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + allow_user_access(to, NULL, size); +} + +static inline void prevent_read_from_user(const void __user *from, unsigned long size) +{ + prevent_user_access(NULL, from, size); +} + +static inline void prevent_write_to_user(void __user *to, unsigned long size) +{ + prevent_user_access(to, NULL, size); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_KUP_H_ */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index ae953958c0f3..d93bdcaa4a46 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -204,7 +204,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index ccf44c135389..3b45a64e491e 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -84,12 +84,19 @@ static inline bool security_ftr_enabled(unsigned long feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..6f2f4497e13b 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,16 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 38a25ff8afb7..ab6612e35ace 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * The fs value determines whether argument validity checking should be @@ -91,9 +92,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size, __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true) + +#define __get_user_allowed(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) +#define __put_user_allowed(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false) #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) @@ -138,7 +144,7 @@ extern long __put_user_bad(void); : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __put_user_size(x, ptr, size, retval) \ +#define __put_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ switch (size) { \ @@ -150,14 +156,28 @@ do { \ } \ } while (0) -#define __put_user_nocheck(x, ptr, size) \ +#define __put_user_size(x, ptr, size, retval) \ +do { \ + allow_write_to_user(ptr, size); \ + __put_user_size_allowed(x, ptr, size, retval); \ + prevent_write_to_user(ptr, size); \ +} while (0) + +#define __put_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ - __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __chk_user_ptr(__pu_addr); \ + if (do_allow) \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + else \ + __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -165,9 +185,13 @@ do { \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ might_fault(); \ - if (access_ok(VERIFY_WRITE, __pu_addr, size)) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (access_ok(VERIFY_WRITE, __pu_addr, __pu_size)) \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -175,8 +199,12 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ + __chk_user_ptr(__pu_addr); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -234,7 +262,7 @@ extern long __get_user_bad(void); : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __get_user_size(x, ptr, size, retval) \ +#define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ __chk_user_ptr(ptr); \ @@ -249,6 +277,13 @@ do { \ } \ } while (0) +#define __get_user_size(x, ptr, size, retval) \ +do { \ + allow_read_from_user(ptr, size); \ + __get_user_size_allowed(x, ptr, size, retval); \ + prevent_read_from_user(ptr, size); \ +} while (0) + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -256,17 +291,23 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size) \ +#define __get_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + if (do_allow) \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ + else \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -275,12 +316,15 @@ do { \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(size) __gu_size = (size); \ + \ might_fault(); \ - if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ + if (access_ok(VERIFY_READ, __gu_addr, __gu_size)) { \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -289,10 +333,13 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -306,16 +353,22 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { + unsigned long ret; + barrier_nospec(); - return __copy_tofrom_user(to, from, n); + allow_user_access(to, from, n); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(to, from, n); + return ret; } #endif /* __powerpc64__ */ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -340,27 +393,30 @@ static inline unsigned long raw_copy_from_user(void *to, } barrier_nospec(); - return __copy_tofrom_user((__force void __user *)to, from, n); + allow_read_from_user(from, n); + ret = __copy_tofrom_user((__force void __user *)to, from, n); + prevent_read_from_user(from, n); + return ret; } -static inline unsigned long raw_copy_to_user(void __user *to, - const void *from, unsigned long n) +static inline unsigned long +raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) { if (__builtin_constant_p(n) && (n <= 8)) { unsigned long ret = 1; switch (n) { case 1: - __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret); + __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); break; case 2: - __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret); + __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); break; case 4: - __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret); + __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); break; case 8: - __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret); + __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); break; } if (ret == 0) @@ -370,14 +426,34 @@ static inline unsigned long raw_copy_to_user(void __user *to, return __copy_tofrom_user(to, (__force const void __user *)from, n); } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + unsigned long ret; + + allow_write_to_user(to, n); + ret = raw_copy_to_user_allowed(to, from, n); + prevent_write_to_user(to, n); + return ret; +} + +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { + unsigned long ret = size; might_fault(); - if (likely(access_ok(VERIFY_WRITE, addr, size))) - return __clear_user(addr, size); - return size; + if (likely(access_ok(VERIFY_WRITE, addr, size))) { + allow_write_to_user(addr, size); + ret = __arch_clear_user(addr, size); + prevent_write_to_user(addr, size); + } + return ret; +} + +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); } extern long strncpy_from_user(char *dst, const char __user *src, long count); @@ -388,4 +464,13 @@ extern long __copy_from_user_flushcache(void *dst, const void __user *src, extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); +#define user_access_begin(type, ptr, len) access_ok(type, ptr, len) +#define user_access_end() prevent_user_access(NULL, NULL, ~0ul) + +#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) +#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e) +#define unsafe_copy_to_user(d, s, l, e) \ + unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e) + #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e1dab9b1e447..344e2758b22d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -540,7 +540,7 @@ EXC_COMMON_BEGIN(unrecover_mce) b 1b -EXC_REAL(data_access, 0x300, 0x80) +EXC_REAL_OOL(data_access, 0x300, 0x80) EXC_VIRT(data_access, 0x4300, 0x80, 0x300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) @@ -572,13 +572,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) + b tramp_data_access_slb +EXC_REAL_END(data_access_slb, 0x380, 0x80) + +TRAMP_REAL_BEGIN(tramp_data_access_slb) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) mr r12,r3 /* save r3 */ mfspr r3,SPRN_DAR mfspr r11,SPRN_SRR1 crset 4*cr6+eq BRANCH_TO_COMMON(r10, slb_miss_common) -EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) SET_SCRATCH0(r13) @@ -593,7 +596,7 @@ EXC_VIRT_END(data_access_slb, 0x4380, 0x80) TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) -EXC_REAL(instruction_access, 0x400, 0x80) +EXC_REAL_OOL(instruction_access, 0x400, 0x80) EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) TRAMP_KVM(PACA_EXGEN, 0x400) @@ -616,13 +619,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) + b tramp_instruction_access_slb +EXC_REAL_END(instruction_access_slb, 0x480, 0x80) + +TRAMP_REAL_BEGIN(tramp_instruction_access_slb) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) mr r12,r3 /* save r3 */ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ mfspr r11,SPRN_SRR1 crclr 4*cr6+eq BRANCH_TO_COMMON(r10, slb_miss_common) -EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) SET_SCRATCH0(r13) @@ -883,13 +889,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) -EXC_REAL_HV(hdecrementer, 0x980, 0x80) -EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980) +EXC_REAL_OOL_HV(hdecrementer, 0x980, 0x80) +EXC_VIRT_OOL_HV(hdecrementer, 0x4980, 0x80, 0x980) TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) @@ -1523,15 +1529,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(rfi_flush_fallback) - SET_SCRATCH0(r13); - GET_PACA(r13); - std r1,PACA_EXRFI+EX_R12(r13) - ld r1,PACAKSAVE(r13) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -1542,7 +1541,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) sync /* - * The load adresses are at staggered offsets within cachelines, + * The load addresses are at staggered offsets within cachelines, * which suits some pipelines better (on others it should not * hurt). */ @@ -1557,7 +1556,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -1575,32 +1597,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -1609,6 +1606,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +USE_TEXT_SECTION() + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9fd2ff28b8ff..dc99258f2e8c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -356,11 +356,9 @@ _ENTRY(ITLBMiss_cmp) /* Load the MI_TWC with the attributes for this "segment." */ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. @@ -482,11 +480,9 @@ _ENTRY(DTLBMiss_jmp) * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 95d1264ba795..7e0722b62cae 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1057,6 +1057,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1094,6 +1235,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1155,6 +1299,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1190,6 +1337,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bd4996958b13..122365624d3d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -863,7 +863,13 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; +bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -873,6 +879,22 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -904,6 +926,32 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -957,10 +1005,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -988,9 +1054,63 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 755dc98a57ae..6b107de10ffa 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -29,29 +29,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -59,9 +57,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -69,16 +67,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 7781f0168ce8..1b2d84cb373b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -794,7 +794,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index d081d726ca8e..695432965f20 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -140,6 +140,20 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index a0cb63fb76a1..8d83c39be7e4 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, unsigned int csum; might_sleep(); + allow_read_from_user(src, len); *err_ptr = 0; @@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, } out: + prevent_read_from_user(src, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, unsigned int csum; might_sleep(); + allow_write_to_user(dst, len); *err_ptr = 0; @@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, } out: + prevent_write_to_user(dst, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index dbe478e7b8e0..065a3426f0eb 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -232,6 +232,110 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + patch_instruction((dest + 1), instrs[1]); + patch_instruction((dest + 2), instrs[2]); + patch_instruction((dest + 3), instrs[3]); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((dest + 1), instrs[1]); + + patch_instruction((dest + 2), instrs[2]); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..1ddb26394e8a 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 56aac4c22025..ea3798f4f25f 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -29,7 +29,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -70,7 +70,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -193,4 +193,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ba6e437abb4b..398a06631456 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -183,14 +183,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -223,18 +223,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index adddde023622..5068dd7f6e74 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -125,12 +125,29 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry or after user access: they fix a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) { + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8d49ba370c50..889c3dbec6fb 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -47,7 +47,7 @@ #include #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c2d318d1df02..2e0d38cafdd4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -565,6 +565,14 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 1376515547cd..ed7bf7c7add5 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -21,4 +21,7 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0e6d01225a67..c8e1e325215b 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2097,4 +2097,4 @@ out: return err; } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4bda9055daef..81cd304764b4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -907,6 +907,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8e31dfd85de3..888f247c9261 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -831,7 +831,7 @@ void __init smp_detect_cpus(void) */ static void smp_start_secondary(void *cpuvoid) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; @@ -844,6 +844,7 @@ static void smp_start_secondary(void *cpuvoid) set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8ea9db599d38..11c32b228f51 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -354,8 +354,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -592,6 +593,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -613,8 +630,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -659,7 +675,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -696,10 +712,14 @@ static ssize_t stp_ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); @@ -708,9 +728,13 @@ static ssize_t stp_ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); @@ -719,9 +743,13 @@ static ssize_t stp_dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); @@ -730,9 +758,13 @@ static ssize_t stp_leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); @@ -741,9 +773,13 @@ static ssize_t stp_stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); @@ -752,9 +788,13 @@ static ssize_t stp_time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); @@ -763,9 +803,13 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_zone_offset, 0400, @@ -775,9 +819,13 @@ static ssize_t stp_timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); @@ -786,9 +834,13 @@ static ssize_t stp_timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3ea1f3c06a0..a7d7b7ade42f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index b5e0cbb34382..476ded92affa 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -36,14 +36,14 @@ int write_sigio_irq(int fd) } /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ -static DEFINE_SPINLOCK(sigio_spinlock); +static DEFINE_MUTEX(sigio_mutex); void sigio_lock(void) { - spin_lock(&sigio_spinlock); + mutex_lock(&sigio_mutex); } void sigio_unlock(void) { - spin_unlock(&sigio_spinlock); + mutex_unlock(&sigio_mutex); } diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 07bf5517d9d8..2410bd4bb48f 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,7 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -346,11 +347,15 @@ static u64 get_ibs_op_count(u64 config) { u64 count = 0; + /* + * If the internal 27-bit counter rolled over, the count is MaxCnt + * and the lower 7 bits of CurCnt are randomized. + * Otherwise CurCnt has the full 27-bit current counter value. + */ if (config & IBS_OP_VAL) - count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ - - if (ibs_caps & IBS_CAPS_RDWROPCNT) - count += (config & IBS_OP_CUR_CNT) >> 32; + count = (config & IBS_OP_MAX_CNT) << 4; + else if (ibs_caps & IBS_CAPS_RDWROPCNT) + count = (config & IBS_OP_CUR_CNT) >> 32; return count; } @@ -375,7 +380,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 tmp = hwc->config | config; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + + wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -637,18 +647,24 @@ fail: perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + /* + * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately + * depending on their availability. + * Can't add to offset_max as they are staggered + */ if (event->attr.sample_type & PERF_SAMPLE_RAW) { - /* - * Read IbsBrTarget and IbsOpData4 separately - * depending on their availability. - * Can't add to offset_max as they are staggered - */ - if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); - size++; + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_BRNTRGT) { + rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + size++; + } + if (ibs_caps & IBS_CAPS_OPDATA4) { + rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + size++; + } } - if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { + rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } @@ -744,6 +760,13 @@ static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4a650eb3d94a..3b3cd12c0692 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -100,14 +100,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 2413169ce362..bc348663da94 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -115,18 +115,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -548,7 +536,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = NULL, /* patched at runtime */ }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7098b9b05d56..2f4ed5aa08ba 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -90,8 +90,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 40e040ec31b5..0fc86ac73b51 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -133,7 +133,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -153,8 +153,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ @@ -163,14 +163,14 @@ ssize_t uncore_event_show(struct kobject *kobj, } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4876411a072a..98b74711e6b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1472,6 +1472,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5bb11a8c245e..892af8ab95d7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -377,6 +377,7 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< #include #include @@ -12,6 +17,7 @@ typedef u64 gtod_long_t; #else typedef unsigned long gtod_long_t; #endif +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ extern int vclocks_used; static inline bool vclock_was_used(int vclock) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bf554ed2fd51..c524fa1f4c0e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -733,11 +733,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -751,8 +753,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* @@ -1240,6 +1240,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { @@ -1247,17 +1255,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; - /* - * Indirect branch speculation is always disabled in strict - * mode. It can neither be enabled if it was force-disabled - * by a previous prctl call. + /* + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + if (!is_spec_ib_user_controlled() || task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1270,10 +1287,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1336,20 +1353,17 @@ static int ib_prctl_get(struct task_struct *task) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) - return PR_SPEC_DISABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || - spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - } else + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else return PR_SPEC_NOT_AFFECTED; } diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index e62e416dd116..12083f200e09 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -515,6 +515,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -537,7 +555,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } preempt_disable(); @@ -1626,7 +1644,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1649,7 +1666,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1670,12 +1686,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1704,12 +1714,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -1973,8 +1977,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2025,7 +2028,6 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = ERR_PTR(ret); goto out_info; } - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); @@ -2033,7 +2035,6 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = ERR_PTR(ret); goto out_mongrp; } - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2185,7 +2186,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2227,7 +2228,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2326,11 +2327,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2622,8 +2618,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2664,6 +2660,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2676,7 +2673,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* @@ -2838,11 +2835,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -2854,11 +2846,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 16936a24795c..3aa0e5a45303 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -103,53 +103,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -167,7 +120,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -213,6 +166,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -347,13 +303,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -486,14 +443,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -937,7 +894,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 9490a2845f14..273687986a26 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -211,8 +211,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1d264ba1e56d..8fa9ca3c3bd7 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -300,19 +300,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { - struct task_struct *task = state->task; - if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; - if (task != current && state->sp == task->thread.sp) { - struct inactive_task_frame *frame = (void *)task->thread.sp; - return &frame->ret_addr; - } - if (state->sp) return (unsigned long *)state->sp - 1; @@ -634,7 +627,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } else { struct inactive_task_frame *frame = (void *)task->thread.sp; - state->sp = task->thread.sp; + state->sp = task->thread.sp + sizeof(*frame); state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); state->signal = (void *)state->ip == ret_from_fork; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 670c2aedcefa..3e182c7ae771 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3994,6 +3994,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4507,7 +4513,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 007bc654f928..295ebadb8f2c 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -52,29 +52,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * check if there is pending interrupt from * non-APIC source without intack. */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ - u8 accept = kvm_apic_accept_pic_intr(v); - - if (accept) { - if (irqchip_split(v->kvm)) - return pending_userspace_extint(v); - else - return v->kvm->arch.vpic->output; - } else - return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v) { /* - * FIXME: interrupt.injected represents an interrupt that it's + * FIXME: interrupt.injected represents an interrupt whose * side-effects have already been applied (e.g. bit from IRR * already moved to ISR). Therefore, it is incorrect to rely * on interrupt.injected to know if there is a pending @@ -87,6 +68,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (!lapic_in_kernel(v)) return v->arch.interrupt.injected; + if (!kvm_apic_accept_pic_intr(v)) + return 0; + + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ if (kvm_cpu_has_extint(v)) return 1; @@ -102,20 +100,6 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* - * FIXME: interrupt.injected represents an interrupt that it's - * side-effects have already been applied (e.g. bit from IRR - * already moved to ISR). Therefore, it is incorrect to rely - * on interrupt.injected to know if there is a pending - * interrupt in the user-mode LAPIC. - * This leads to nVMX/nSVM not be able to distinguish - * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on - * pending interrupt or should re-inject an injected - * interrupt. - */ - if (!lapic_in_kernel(v)) - return v->arch.interrupt.injected; - if (kvm_cpu_has_extint(v)) return 1; @@ -129,16 +113,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) { - if (irqchip_split(v->kvm)) { - int vector = v->arch.pending_external_vector; - - v->arch.pending_external_vector = -1; - return vector; - } else - return kvm_pic_read_irq(v->kvm); /* PIC */ - } else + if (!kvm_cpu_has_extint(v)) { + WARN_ON(!lapic_in_kernel(v)); return -1; + } + + if (!lapic_in_kernel(v)) + return v->arch.interrupt.nr; + + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ } /* @@ -146,13 +135,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - int vector; - - if (!lapic_in_kernel(v)) - return v->arch.interrupt.nr; - - vector = kvm_cpu_get_extint(v); - + int vector = kvm_cpu_get_extint(v); if (vector != -1) return vector; /* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bba2f76c356d..56a4b9762b0c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2284,7 +2284,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd182228be71..7096578ef737 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3351,21 +3351,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) { + /* + * We can accept userspace's request for interrupt injection + * as long as we have a place to store the interrupt number. + * The actual injection will happen when the CPU is able to + * deliver the interrupt. + */ + if (kvm_cpu_has_extint(vcpu)) + return false; + + /* Acknowledging ExtINT does not happen if LINT0 is masked. */ return (!lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu)); } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); } diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 43867bc85368..eea5a0f3b959 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -33,6 +33,7 @@ #include #include #include +#include #define PCIE_CAP_OFFSET 0x100 diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 52dd59af873e..77d05b56089a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -214,28 +214,30 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; } /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 76864ea59160..9f8995cd28f6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1383,6 +1383,15 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_init.mpparse.get_smp_config = x86_init_uint_noop; xen_boot_params_init_edd(); + +#ifdef CONFIG_ACPI + /* + * Disable selecting "Firmware First mode" for correctable + * memory errors, as this is the duty of the hypervisor to + * decide. + */ + acpi_disable_cmcff = 1; +#endif } if (!boot_params.screen_info.orig_video_isVGA) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 717b4847b473..6fffb86a32ad 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -101,10 +101,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index f1158b4c629c..da4effe27007 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -291,7 +291,7 @@ strncpy_from_user(char *dst, const char *src, long count) return -EFAULT; } #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count); #endif /* diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 9220dcde7520..d1ebe67c68d4 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -71,8 +71,10 @@ static inline void kmap_invalidate_coherent(struct page *page, kvaddr = TLBTEMP_BASE_1 + (page_to_phys(page) & DCACHE_ALIAS_MASK); + preempt_disable(); __invalidate_dcache_page_alias(kvaddr, page_to_phys(page)); + preempt_enable(); } } } @@ -157,6 +159,7 @@ void flush_dcache_page(struct page *page) if (!alias && !mapping) return; + preempt_disable(); virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(virt, phys); @@ -167,6 +170,7 @@ void flush_dcache_page(struct page *page) if (mapping) __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } /* There shouldn't be an entry in the cache for this page anymore. */ @@ -200,8 +204,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long phys = page_to_phys(pfn_to_page(pfn)); unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } EXPORT_SYMBOL(local_flush_cache_page); @@ -228,11 +234,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) unsigned long phys = page_to_phys(page); unsigned long tmp; + preempt_disable(); tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); + preempt_enable(); clear_bit(PG_arch_1, &page->flags); } @@ -266,7 +274,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } /* Copy data */ @@ -281,9 +291,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_range((unsigned long) dst, len); if ((vma->vm_flags & VM_EXEC) != 0) __invalidate_icache_page_alias(t, phys); + preempt_enable(); } else if ((vma->vm_flags & VM_EXEC) != 0) { __flush_dcache_range((unsigned long)dst,len); @@ -305,7 +317,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } memcpy(dst, src, len); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a06547fe6f6b..85bd46e0a745 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -876,13 +876,20 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto fail; } + if (radix_tree_preload(GFP_KERNEL)) { + blkg_free(new_blkg); + ret = -ENOMEM; + goto fail; + } + rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_lookup_check(pos, pol, q); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); - goto fail_unlock; + blkg_free(new_blkg); + goto fail_preloaded; } if (blkg) { @@ -891,10 +898,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_create(pos, q, new_blkg); if (unlikely(IS_ERR(blkg))) { ret = PTR_ERR(blkg); - goto fail_unlock; + goto fail_preloaded; } } + radix_tree_preload_end(); + if (pos == blkcg) goto success; } @@ -904,6 +913,8 @@ success: ctx->body = body; return 0; +fail_preloaded: + radix_tree_preload_end(); fail_unlock: spin_unlock_irq(q->queue_lock); rcu_read_unlock(); diff --git a/build.config.aarch64 b/build.config.aarch64 index adc881d0cc30..569b9747661d 100644 --- a/build.config.aarch64 +++ b/build.config.aarch64 @@ -1,8 +1,7 @@ ARCH=arm64 -CLANG_TRIPLE=aarch64-linux-gnu- -CROSS_COMPILE=aarch64-linux-androidkernel- -CROSS_COMPILE_COMPAT=arm-linux-androidkernel- +CROSS_COMPILE=aarch64-linux-gnu- +CROSS_COMPILE_COMPAT=arm-linux-gnueabi- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 LINUX_GCC_CROSS_COMPILE_COMPAT_PREBUILTS_BIN=prebuilts/gas/linux-x86 diff --git a/build.config.allmodconfig b/build.config.allmodconfig index 4ce14e5a7c5a..103d1cdf4edc 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -10,5 +10,5 @@ function update_config() { -e UNWINDER_FRAME_POINTER \ (cd ${OUT_DIR} && \ - make O=${OUT_DIR} $archsubarch CLANG_TRIPLE=${CLANG_TRIPLE} CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) + make O=${OUT_DIR} $archsubarch CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) } diff --git a/build.config.arm b/build.config.arm index 35defb811b98..a9e9a6c2a418 100644 --- a/build.config.arm +++ b/build.config.arm @@ -1,7 +1,6 @@ ARCH=arm -CLANG_TRIPLE=arm-linux-gnueabi- -CROSS_COMPILE=arm-linux-androidkernel- +CROSS_COMPILE=arm-linux-gnueabi- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 FILES=" diff --git a/build.config.x86_64 b/build.config.x86_64 index 73405452fe83..b16ac8a68b25 100644 --- a/build.config.x86_64 +++ b/build.config.x86_64 @@ -1,7 +1,6 @@ ARCH=x86_64 -CLANG_TRIPLE=x86_64-linux-gnu- -CROSS_COMPILE=x86_64-linux-androidkernel- +CROSS_COMPILE=x86_64-linux-gnu- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 FILES=" diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c index daf03beba9de..b42198de8b1a 100644 --- a/crypto/chacha_generic.c +++ b/crypto/chacha_generic.c @@ -31,7 +31,7 @@ static int chacha_stream_xor(struct skcipher_request *req, unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); + nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); chacha_crypt_generic(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes, ctx->nrounds); diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c index f21c99ec46ee..ed19d9822bc0 100644 --- a/drivers/acpi/acpi_dbg.c +++ b/drivers/acpi/acpi_dbg.c @@ -757,6 +757,9 @@ int __init acpi_aml_init(void) goto err_exit; } + if (acpi_disabled) + return -ENODEV; + /* Initialize AML IO interface */ mutex_init(&acpi_aml_io.lock); init_waitqueue_head(&acpi_aml_io.wait); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 560fdae8cc59..943b1dc2d0b3 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -224,9 +224,9 @@ static int __init extlog_init(void) u64 cap; int rc; - rdmsrl(MSR_IA32_MCG_CAP, cap); - - if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) || + !(cap & MCG_ELOG_P) || + !extlog_get_l1addr()) return -ENODEV; if (edac_get_report_status() == EDAC_REPORTING_FORCE) { diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index d5c19e25ddf5..abf101451c92 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -98,7 +98,18 @@ static const struct dmi_system_id lid_blacklst[] = { */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), - DMI_MATCH(DMI_PRODUCT_NAME, "E2215T MD60198"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2215T"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, + }, + { + /* + * Medion Akoya E2228T, notification of the LID device only + * happens on close, not on open and _LID always returns closed. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2228T"), }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, }, @@ -149,6 +160,7 @@ struct acpi_button { int last_state; ktime_t last_time; bool suspended; + bool lid_state_initialized; }; static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); @@ -404,6 +416,8 @@ static int acpi_lid_update_state(struct acpi_device *device, static void acpi_lid_initialize_state(struct acpi_device *device) { + struct acpi_button *button = acpi_driver_data(device); + switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN: (void)acpi_lid_notify_state(device, 1); @@ -415,13 +429,14 @@ static void acpi_lid_initialize_state(struct acpi_device *device) default: break; } + + button->lid_state_initialized = true; } static void acpi_button_notify(struct acpi_device *device, u32 event) { struct acpi_button *button = acpi_driver_data(device); struct input_dev *input; - int users; switch (event) { case ACPI_FIXED_HARDWARE_EVENT: @@ -430,10 +445,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) case ACPI_BUTTON_NOTIFY_STATUS: input = button->input; if (button->type == ACPI_BUTTON_TYPE_LID) { - mutex_lock(&button->input->mutex); - users = button->input->users; - mutex_unlock(&button->input->mutex); - if (users) + if (button->lid_state_initialized) acpi_lid_update_state(device, true); } else { int keycode; @@ -478,7 +490,7 @@ static int acpi_button_resume(struct device *dev) struct acpi_button *button = acpi_driver_data(device); button->suspended = false; - if (button->type == ACPI_BUTTON_TYPE_LID && button->input->users) { + if (button->type == ACPI_BUTTON_TYPE_LID) { button->last_state = !!acpi_lid_evaluate_state(device); button->last_time = ktime_get(); acpi_lid_initialize_state(device); diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c index 73f6093a5c16..9f4b405a5c20 100644 --- a/drivers/acpi/evged.c +++ b/drivers/acpi/evged.c @@ -110,7 +110,7 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, switch (gsi) { case 0 ... 255: - sprintf(ev_name, "_%c%02hhX", + sprintf(ev_name, "_%c%02X", trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index bce135d23f53..77471ed36032 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1535,7 +1535,7 @@ static ssize_t format1_show(struct device *dev, le16_to_cpu(nfit_dcr->dcr->code)); break; } - if (rc != ENXIO) + if (rc != -ENXIO) break; } mutex_unlock(&acpi_desc->init_mutex); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 0da58f0bf7e5..a28ff3cfbc29 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -46,7 +46,7 @@ int acpi_numa __initdata; int pxm_to_node(int pxm) { - if (pxm < 0) + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) return NUMA_NO_NODE; return pxm_to_node_map[pxm]; } diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index ab1da5e6e7e3..86ffb4af4afc 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -274,6 +274,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "530U4E/540U4E"), }, }, + /* https://bugs.launchpad.net/bugs/1894667 */ + { + .callback = video_detect_force_video, + .ident = "HP 635 Notebook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP 635 Notebook PC"), + }, + }, /* Non win8 machines which need native backlight nevertheless */ { diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 110e41f920c2..f303106b3362 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -448,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 798d549435cc..2248a40631bf 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -2122,7 +2122,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) pp->dhfis_bits &= ~done_mask; pp->dmafis_bits &= ~done_mask; pp->sdbfis_bits |= done_mask; - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); if (!ap->qc_active) { DPRINTK("over\n"); diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 8323f88d17a5..4dcdf8ee0055 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -124,7 +124,7 @@ /* Descriptor table word 0 bit (when DTA32M = 1) */ #define SATA_RCAR_DTEND BIT(0) -#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL +#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL /* Gen2 Physical Layer Control Registers */ #define RCAR_GEN2_PHY_CTL1_REG 0x1704 diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index cbec9adc01c7..0d3754a4ac20 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -1705,6 +1705,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (push_scqe(card, vc, scq, &scqe, skb) != 0) { atomic_inc(&vcc->stats->tx_err); + dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len, + DMA_TO_DEVICE); dev_kfree_skb_any(skb); return -EIO; } diff --git a/drivers/base/base.h b/drivers/base/base.h index e6d4d55df615..92803ce31257 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -138,7 +138,6 @@ extern char *make_class_name(const char *name, struct kobject *kobj); extern int devres_release_all(struct device *dev); extern void device_block_probing(void); extern void device_unblock_probing(void); -extern void driver_deferred_probe_force_trigger(void); /* /sys/devices directory */ extern struct kset *devices_kset; diff --git a/drivers/base/core.c b/drivers/base/core.c index f6153654968c..39aee721d4db 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1272,7 +1272,6 @@ void fw_devlink_resume(void) goto out; device_link_add_missing_supplier_links(); - driver_deferred_probe_force_trigger(); out: mutex_unlock(&defer_fw_devlink_lock); } @@ -3777,6 +3776,7 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { + struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { @@ -3791,7 +3791,8 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; - fn->secondary = NULL; + if (!(parent && fn == parent->fwnode)) + fn->secondary = ERR_PTR(-ENODEV); } else { dev->fwnode = NULL; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 2346481fb2d1..363efcaaed04 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -164,11 +164,6 @@ static void driver_deferred_probe_trigger(void) if (!driver_deferred_probe_enable) return; - driver_deferred_probe_force_trigger(); -} - -void driver_deferred_probe_force_trigger(void) -{ /* * A successful probe means that all the devices in the pending list * should be triggered to be reprobed. Move all the deferred devices @@ -1054,6 +1049,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv = dev->driver; if (drv) { + pm_runtime_get_sync(dev); + while (device_links_busy(dev)) { __device_driver_unlock(dev, parent); @@ -1065,11 +1062,12 @@ static void __device_release_driver(struct device *dev, struct device *parent) * have released the driver successfully while this one * was waiting, so check for that. */ - if (dev->driver != drv) + if (dev->driver != drv) { + pm_runtime_put(dev); return; + } } - pm_runtime_get_sync(dev); pm_runtime_clean_up_links(dev); driver_sysfs_remove(dev); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index c248412f1e34..0e8cad8ec4c3 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -244,7 +244,9 @@ void wakeup_source_unregister(struct wakeup_source *ws) { if (ws) { wakeup_source_remove(ws); - wakeup_source_sysfs_remove(ws); + if (ws->dev) + wakeup_source_sysfs_remove(ws); + wakeup_source_destroy(ws); } } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d7c7232e438c..517318bb350c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -276,7 +276,7 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd) +static void nbd_size_update(struct nbd_device *nbd, bool start) { struct nbd_config *config = nbd->config; struct block_device *bdev = bdget_disk(nbd->disk, 0); @@ -292,7 +292,8 @@ static void nbd_size_update(struct nbd_device *nbd) if (bdev) { if (bdev->bd_disk) { bd_set_size(bdev, config->bytesize); - set_blocksize(bdev, config->blksize); + if (start) + set_blocksize(bdev, config->blksize); } else bdev->bd_invalidated = 1; bdput(bdev); @@ -307,7 +308,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; if (nbd->task_recv != NULL) - nbd_size_update(nbd); + nbd_size_update(nbd, false); } static void nbd_complete_rq(struct request *req) @@ -740,9 +741,9 @@ static void recv_work(struct work_struct *work) blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); - nbd_config_put(nbd); kfree(args); } @@ -1244,7 +1245,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd); + nbd_size_update(nbd, true); return error; } @@ -1447,6 +1448,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode) if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && bdev->bd_openers == 0) nbd_disconnect_and_put(nbd); + bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 3666afa639d1..b18f0162cb9c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -202,7 +202,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) -static int do_block_io_op(struct xen_blkif_ring *ring); +static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags); static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct blkif_request *req, struct pending_req *pending_req); @@ -615,6 +615,8 @@ int xen_blkif_schedule(void *arg) struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; int ret; + bool do_eoi; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; set_freezable(); while (!kthread_should_stop()) { @@ -639,16 +641,23 @@ int xen_blkif_schedule(void *arg) if (timeout == 0) goto purge_gnt_list; + do_eoi = ring->waiting_reqs; + ring->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - ret = do_block_io_op(ring); + ret = do_block_io_op(ring, &eoi_flags); if (ret > 0) ring->waiting_reqs = 1; if (ret == -EACCES) wait_event_interruptible(ring->shutdown_wq, kthread_should_stop()); + if (do_eoi && !ring->waiting_reqs) { + xen_irq_lateeoi(ring->irq, eoi_flags); + eoi_flags |= XEN_EOI_FLAG_SPURIOUS; + } + purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && time_after(jiffies, ring->next_lru)) { @@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio) * and transmute it to the block API to hand it over to the proper block disk. */ static int -__do_block_io_op(struct xen_blkif_ring *ring) +__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; struct blkif_request req; @@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + /* We've seen a request, so clear spurious eoi flag. */ + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (kthread_should_stop()) { more_to_do = 1; break; @@ -1202,13 +1214,13 @@ done: } static int -do_block_io_op(struct xen_blkif_ring *ring) +do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; int more_to_do; do { - more_to_do = __do_block_io_op(ring); + more_to_do = __do_block_io_op(ring, eoi_flags); if (more_to_do) break; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 25c41ce070a7..93896c992245 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -237,9 +237,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, BUG(); } - err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, - xen_blkif_be_int, 0, - "blkif-backend", ring); + err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid, + evtchn, xen_blkif_be_int, 0, "blkif-backend", ring); if (err < 0) { xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); ring->blk_rings.common.sring = NULL; diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c index 7226cfc49b6f..3f806599748a 100644 --- a/drivers/bus/fsl-mc/mc-io.c +++ b/drivers/bus/fsl-mc/mc-io.c @@ -129,7 +129,12 @@ error_destroy_mc_io: */ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io) { - struct fsl_mc_device *dpmcp_dev = mc_io->dpmcp_dev; + struct fsl_mc_device *dpmcp_dev; + + if (!mc_io) + return; + + dpmcp_dev = mc_io->dpmcp_dev; if (dpmcp_dev) fsl_mc_io_unset_dpmcp(mc_io); diff --git a/drivers/char/random.c b/drivers/char/random.c index f9adad9382cc..ab0371c6c1ca 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1249,7 +1249,6 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]); if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c index 3e673ab22cb4..abd3beeb5158 100644 --- a/drivers/char/tpm/eventlog/efi.c +++ b/drivers/char/tpm/eventlog/efi.c @@ -43,6 +43,11 @@ int tpm_read_log_efi(struct tpm_chip *chip) log_size = log_tbl->size; memunmap(log_tbl); + if (!log_size) { + pr_warn("UEFI TPM log area empty\n"); + return -EIO; + } + log_tbl = memremap(efi.tpm_log, sizeof(*log_tbl) + log_size, MEMREMAP_WB); if (!log_tbl) { diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f08949a5f678..5a3a4f095391 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "tpm.h" #include "tpm_tis_core.h" @@ -53,8 +54,8 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_tcg_phy, priv); } -static bool interrupts = true; -module_param(interrupts, bool, 0444); +static int interrupts = -1; +module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); static bool itpm; @@ -67,6 +68,28 @@ module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry"); #endif +static int tpm_tis_disable_irq(const struct dmi_system_id *d) +{ + if (interrupts == -1) { + pr_notice("tpm_tis: %s detected: disabling interrupts.\n", d->ident); + interrupts = 0; + } + + return 0; +} + +static const struct dmi_system_id tpm_tis_dmi_table[] = { + { + .callback = tpm_tis_disable_irq, + .ident = "ThinkPad T490s", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"), + }, + }, + {} +}; + #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int has_hid(struct acpi_device *dev, const char *hid) { @@ -196,6 +219,8 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info) int irq = -1; int rc; + dmi_check_system(tpm_tis_dmi_table); + rc = check_acpi_tpm2(dev); if (rc) return rc; diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c index 07a805125e98..11d92311e162 100644 --- a/drivers/clk/ti/clockdomain.c +++ b/drivers/clk/ti/clockdomain.c @@ -146,10 +146,12 @@ static void __init of_ti_clockdomain_setup(struct device_node *node) if (clk_hw_get_flags(clk_hw) & CLK_IS_BASIC) { pr_warn("can't setup clkdm for basic clk %s\n", __clk_get_name(clk)); + clk_put(clk); continue; } to_clk_hw_omap(clk_hw)->clkdm_name = clkdm_name; omap2_init_clk_clkdm(clk_hw); + clk_put(clk); } } diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index aca30f45172e..9e86404a361f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -701,7 +701,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } - if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 && + !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 47105735df12..6b5d241c30b7 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -144,7 +144,8 @@ static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = { static const struct reg_field *sti_cpufreq_match(void) { if (of_machine_is_compatible("st,stih407") || - of_machine_is_compatible("st,stih410")) + of_machine_is_compatible("st,stih410") || + of_machine_is_compatible("st,stih418")) return sti_stih407_dvfs_regfields; return NULL; @@ -261,7 +262,8 @@ static int sti_cpufreq_init(void) int ret; if ((!of_machine_is_compatible("st,stih407")) && - (!of_machine_is_compatible("st,stih410"))) + (!of_machine_is_compatible("st,stih410")) && + (!of_machine_is_compatible("st,stih418"))) return -ENODEV; ddata.cpu = get_cpu_device(0); diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index f1ca66147c28..35116cdb5e38 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -175,7 +175,7 @@ static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len) { if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) { __skb_trim(skb, 0); - refcount_add(2, &skb->users); + refcount_inc(&skb->users); } else { skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); } @@ -696,14 +696,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) @@ -720,15 +719,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static void chtls_purge_wr_queue(struct sock *sk) @@ -1348,7 +1345,6 @@ static void add_to_reap_list(struct sock *sk) struct chtls_sock *csk = sk->sk_user_data; local_bh_disable(); - bh_lock_sock(sk); release_tcp_port(sk); /* release the port immediately */ spin_lock(&reap_list_lock); @@ -1357,7 +1353,6 @@ static void add_to_reap_list(struct sock *sk) if (!csk->passive_reap_next) schedule_work(&reap_task); spin_unlock(&reap_list_lock); - bh_unlock_sock(sk); local_bh_enable(); } diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c index 64d24823c65a..2294fb06bef3 100644 --- a/drivers/crypto/chelsio/chtls/chtls_hw.c +++ b/drivers/crypto/chelsio/chtls/chtls_hw.c @@ -368,6 +368,9 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) if (ret) goto out_notcb; + if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) + goto out_notcb; + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid); csk->wr_credits -= DIV_ROUND_UP(len, 16); csk->wr_unacked += DIV_ROUND_UP(len, 16); diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index e9573e7f9e80..f9874da23a29 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1549,6 +1549,7 @@ skip_copy: tp->urg_data = 0; if ((avail + offset) >= skb->len) { + struct sk_buff *next_skb; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; @@ -1558,8 +1559,10 @@ skip_copy: chtls_free_skb(sk, skb); buffers_freed++; hws->copied_seq = 0; - if (copied >= target && - !skb_peek(&sk->sk_receive_queue)) + next_skb = skb_peek(&sk->sk_receive_queue); + if (copied >= target && !next_skb) + break; + if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) break; } } while (len > 0); diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index edff93aacad3..0d0850b35e45 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -574,11 +574,11 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, enum dma_status status; unsigned long flags; + spin_lock_irqsave(&jzchan->vchan.lock, flags); + status = dma_cookie_status(chan, cookie, txstate); if ((status == DMA_COMPLETE) || (txstate == NULL)) - return status; - - spin_lock_irqsave(&jzchan->vchan.lock, flags); + goto out_unlock_irqrestore; vdesc = vchan_find_desc(&jzchan->vchan, cookie); if (vdesc) { @@ -595,6 +595,7 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) status = DMA_ERROR; +out_unlock_irqrestore: spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; } diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index d56b6b0e22a8..28592137fb67 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -453,8 +453,8 @@ struct xilinx_dma_device { #define to_dma_tx_descriptor(tx) \ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ - readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ - cond, delay_us, timeout_us) + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) /* IO accessors */ static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index f953541e7890..634125747a03 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -28,6 +28,47 @@ #include #include +/* + * PLX PEX8311 PCI LCS_INTCSR Interrupt Control/Status + * + * Bit: Description + * 0: Enable Interrupt Sources (Bit 0) + * 1: Enable Interrupt Sources (Bit 1) + * 2: Generate Internal PCI Bus Internal SERR# Interrupt + * 3: Mailbox Interrupt Enable + * 4: Power Management Interrupt Enable + * 5: Power Management Interrupt + * 6: Slave Read Local Data Parity Check Error Enable + * 7: Slave Read Local Data Parity Check Error Status + * 8: Internal PCI Wire Interrupt Enable + * 9: PCI Express Doorbell Interrupt Enable + * 10: PCI Abort Interrupt Enable + * 11: Local Interrupt Input Enable + * 12: Retry Abort Enable + * 13: PCI Express Doorbell Interrupt Active + * 14: PCI Abort Interrupt Active + * 15: Local Interrupt Input Active + * 16: Local Interrupt Output Enable + * 17: Local Doorbell Interrupt Enable + * 18: DMA Channel 0 Interrupt Enable + * 19: DMA Channel 1 Interrupt Enable + * 20: Local Doorbell Interrupt Active + * 21: DMA Channel 0 Interrupt Active + * 22: DMA Channel 1 Interrupt Active + * 23: Built-In Self-Test (BIST) Interrupt Active + * 24: Direct Master was the Bus Master during a Master or Target Abort + * 25: DMA Channel 0 was the Bus Master during a Master or Target Abort + * 26: DMA Channel 1 was the Bus Master during a Master or Target Abort + * 27: Target Abort after internal 256 consecutive Master Retrys + * 28: PCI Bus wrote data to LCS_MBOX0 + * 29: PCI Bus wrote data to LCS_MBOX1 + * 30: PCI Bus wrote data to LCS_MBOX2 + * 31: PCI Bus wrote data to LCS_MBOX3 + */ +#define PLX_PEX8311_PCI_LCS_INTCSR 0x68 +#define INTCSR_INTERNAL_PCI_WIRE BIT(8) +#define INTCSR_LOCAL_INPUT BIT(11) + /** * struct idio_24_gpio_reg - GPIO device registers structure * @out0_7: Read: FET Outputs 0-7 @@ -92,6 +133,7 @@ struct idio_24_gpio_reg { struct idio_24_gpio { struct gpio_chip chip; raw_spinlock_t lock; + __u8 __iomem *plx; struct idio_24_gpio_reg __iomem *reg; unsigned long irq_mask; }; @@ -360,13 +402,13 @@ static void idio_24_irq_mask(struct irq_data *data) unsigned long flags; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; unsigned char new_irq_mask; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - idio24gpio->irq_mask &= BIT(bit_offset); - new_irq_mask = idio24gpio->irq_mask >> bank_offset; + idio24gpio->irq_mask &= ~BIT(bit_offset); + new_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; if (!new_irq_mask) { cos_enable_state = ioread8(&idio24gpio->reg->cos_enable); @@ -389,12 +431,12 @@ static void idio_24_irq_unmask(struct irq_data *data) unsigned long flags; unsigned char prev_irq_mask; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - prev_irq_mask = idio24gpio->irq_mask >> bank_offset; + prev_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; idio24gpio->irq_mask |= BIT(bit_offset); if (!prev_irq_mask) { @@ -481,6 +523,7 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *const dev = &pdev->dev; struct idio_24_gpio *idio24gpio; int err; + const size_t pci_plx_bar_index = 1; const size_t pci_bar_index = 2; const char *const name = pci_name(pdev); @@ -494,12 +537,13 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pcim_iomap_regions(pdev, BIT(pci_bar_index), name); + err = pcim_iomap_regions(pdev, BIT(pci_plx_bar_index) | BIT(pci_bar_index), name); if (err) { dev_err(dev, "Unable to map PCI I/O addresses (%d)\n", err); return err; } + idio24gpio->plx = pcim_iomap_table(pdev)[pci_plx_bar_index]; idio24gpio->reg = pcim_iomap_table(pdev)[pci_bar_index]; idio24gpio->chip.label = name; @@ -520,6 +564,12 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Software board reset */ iowrite8(0, &idio24gpio->reg->soft_reset); + /* + * enable PLX PEX8311 internal PCI wire interrupt and local interrupt + * input + */ + iowrite8((INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT) >> 8, + idio24gpio->plx + PLX_PEX8311_PCI_LCS_INTCSR + 1); err = devm_gpiochip_add_data(dev, &idio24gpio->chip, idio24gpio); if (err) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b54238406192..98d20efe54d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -565,6 +565,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx ticket; struct list_head list, duplicates; uint64_t va_flags; + uint64_t vm_size; int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { @@ -585,6 +586,15 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_VA_HOLE_MASK; + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + vm_size -= AMDGPU_VA_RESERVED_SIZE; + if (args->va_address + args->map_size > vm_size) { + dev_dbg(&dev->pdev->dev, + "va_address 0x%llx is in top reserved area 0x%llx\n", + args->va_address + args->map_size, vm_size); + return -EINVAL; + } + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", args->flags); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index d0fa2aac2388..ca66c2f79758 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1086,22 +1086,19 @@ static int cik_sdma_soft_reset(void *handle) { u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 tmp = RREG32(mmSRBM_STATUS2); + u32 tmp; - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - } - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; - } + /* sdma0 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; + + /* sdma1 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; if (srbm_soft_reset) { tmp = RREG32(mmSRBM_SOFT_RESET); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 2fb2c683ad54..fa0e6c8e2447 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2009,7 +2009,7 @@ enum dc_status dc_link_validate_mode_timing( /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle */ - if (link->remote_sinks[0]) + if (link->remote_sinks[0] && link->remote_sinks[0]->sink_signal == SIGNAL_TYPE_VIRTUAL) return DC_OK; /* Passive Dongle */ diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index a407892905af..d4cb7db89192 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -57,7 +57,7 @@ * general debug capabilities * */ -#if defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB) +#if defined(CONFIG_DEBUG_KERNEL_DC) && (defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB)) #define ASSERT_CRITICAL(expr) do { \ if (WARN_ON(!(expr))) { \ kgdb_breakpoint(); \ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 058898b321b8..d8e624d64ae3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1531,6 +1531,10 @@ int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to reset to default!", result = tmp_result); + tmp_result = smum_stop_smc(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to stop smc!", result = tmp_result); + tmp_result = smu7_force_switch_to_arbf0(hwmgr); PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to force to switch arbf0!", result = tmp_result); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 6ee864455a12..f59e1e737735 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -216,6 +216,7 @@ struct pp_smumgr_func { bool (*is_hw_avfs_present)(struct pp_hwmgr *hwmgr); int (*update_dpm_settings)(struct pp_hwmgr *hwmgr, void *profile_setting); int (*smc_table_manager)(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw); /*rw: true for read, false for write */ + int (*stop_smc)(struct pp_hwmgr *hwmgr); }; struct pp_hwmgr_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 82550a8a3a3f..ef4f2392e2e7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -113,4 +113,6 @@ extern int smum_update_dpm_settings(struct pp_hwmgr *hwmgr, void *profile_settin extern int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw); +extern int smum_stop_smc(struct pp_hwmgr *hwmgr); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index db87cb8930d2..c05bec5effb2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2723,10 +2723,7 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, - VOLTAGE_CONTROLLER_ON)) - ? true : false; + return ci_is_smc_ram_running(hwmgr); } static int ci_smu_init(struct pp_hwmgr *hwmgr) @@ -2934,6 +2931,29 @@ static int ci_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) return 0; } +static void ci_reset_smc(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); +} + + +static void ci_stop_smc_clock(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, + ck_disable, 1); +} + +static int ci_stop_smc(struct pp_hwmgr *hwmgr) +{ + ci_reset_smc(hwmgr); + ci_stop_smc_clock(hwmgr); + + return 0; +} + const struct pp_smumgr_func ci_smu_funcs = { .smu_init = ci_smu_init, .smu_fini = ci_smu_fini, @@ -2957,4 +2977,5 @@ const struct pp_smumgr_func ci_smu_funcs = { .is_dpm_running = ci_is_dpm_running, .update_dpm_settings = ci_update_dpm_settings, .update_smc_table = ci_update_smc_table, + .stop_smc = ci_stop_smc, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index a6edd5df33b0..20ecf994d47f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -213,3 +213,11 @@ int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t tabl return -EINVAL; } + +int smum_stop_smc(struct pp_hwmgr *hwmgr) +{ + if (hwmgr->smumgr_funcs->stop_smc) + return hwmgr->smumgr_funcs->stop_smc(hwmgr); + + return 0; +} diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 2136c97aeb8e..dcf091f9d843 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -306,8 +306,12 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp4028_i2c->dev; + int ret; - ge_b850v3_lvds_init(dev); + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); @@ -365,8 +369,12 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp2690_i2c->dev; + int ret; - ge_b850v3_lvds_init(dev); + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index fd7999642cf8..8b5f9241a887 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -326,7 +326,6 @@ static void dw_mipi_message_config(struct dw_mipi_dsi *dsi, if (lpm) val |= CMD_MODE_ALL_LP; - dsi_write(dsi, DSI_LPCLK_CTRL, lpm ? 0 : PHY_TXREQUESTCLKHS); dsi_write(dsi, DSI_CMD_MODE_CFG, val); } @@ -488,16 +487,22 @@ static void dw_mipi_dsi_video_mode_config(struct dw_mipi_dsi *dsi) static void dw_mipi_dsi_set_mode(struct dw_mipi_dsi *dsi, unsigned long mode_flags) { + u32 val; + dsi_write(dsi, DSI_PWR_UP, RESET); if (mode_flags & MIPI_DSI_MODE_VIDEO) { dsi_write(dsi, DSI_MODE_CFG, ENABLE_VIDEO_MODE); dw_mipi_dsi_video_mode_config(dsi); - dsi_write(dsi, DSI_LPCLK_CTRL, PHY_TXREQUESTCLKHS); } else { dsi_write(dsi, DSI_MODE_CFG, ENABLE_CMD_MODE); } + val = PHY_TXREQUESTCLKHS; + if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) + val |= AUTO_CLKLANE_CTRL; + dsi_write(dsi, DSI_LPCLK_CTRL, val); + dsi_write(dsi, DSI_PWR_UP, POWERUP); } diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b152c108d06a..8a2a1965e7c2 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -293,7 +293,6 @@ EXPORT_SYMBOL(drm_connector_init); /** * drm_connector_attach_edid_property - attach edid property. - * @dev: DRM device * @connector: the connector * * Some connector types like DRM_MODE_CONNECTOR_VIRTUAL do not get a diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 2f09f3473375..0183b85bc1b2 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -715,24 +715,33 @@ EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); */ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { - /* Used by drm_gem_mmap() to lookup the GEM object */ - struct drm_file priv = { - .minor = obj->dev->primary, - }; - struct file fil = { - .private_data = &priv, - }; + struct drm_file *priv; + struct file *fil; int ret; - ret = drm_vma_node_allow(&obj->vma_node, &priv); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + fil = kzalloc(sizeof(*fil), GFP_KERNEL); + if (!priv || !fil) { + ret = -ENOMEM; + goto out; + } + + /* Used by drm_gem_mmap() to lookup the GEM object */ + priv->minor = obj->dev->primary; + fil->private_data = priv; + + ret = drm_vma_node_allow(&obj->vma_node, priv); if (ret) - return ret; + goto out; vma->vm_pgoff += drm_vma_node_start(&obj->vma_node); - ret = obj->dev->driver->fops->mmap(&fil, vma); + ret = obj->dev->driver->fops->mmap(fil, vma); - drm_vma_node_revoke(&obj->vma_node, &priv); + drm_vma_node_revoke(&obj->vma_node, priv); +out: + kfree(priv); + kfree(fil); return ret; } diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 78eb10902809..076b6da44f46 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -350,6 +350,7 @@ int psb_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -362,20 +363,12 @@ int psb_irq_postinstall(struct drm_device *dev) PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R); PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + else + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } if (dev_priv->ops->hotplug_enable) dev_priv->ops->hotplug_enable(dev, true); @@ -388,6 +381,7 @@ void psb_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -396,14 +390,10 @@ void psb_irq_uninstall(struct drm_device *dev) PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG | _PSB_IRQ_MSVDX_FLAG | diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db2e9af49ae6..37c80cfecd09 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -2683,7 +2685,9 @@ static inline bool intel_vtd_active(void) if (intel_iommu_gfx_mapped) return true; #endif - return false; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return !hypervisor_is_type(X86_HYPER_NATIVE); } static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a262a64f5625..ba24ac698e8b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -268,6 +268,8 @@ static int compress_page(struct compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + cond_resched(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -347,6 +349,7 @@ static int compress_page(struct compress *c, if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; + cond_resched(); return 0; } diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 31875b636434..5073622cbb56 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -140,6 +140,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, phy_node = of_parse_phandle(dev->of_node, "phys", 0); if (!phy_node) { dev_err(dev, "Can't found PHY phandle\n"); + ret = -EINVAL; goto err_disable_clk_tmds; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 268f5a3b3122..81e076662c7a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -671,7 +671,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, /* Don't evict this BO if it's outside of the * requested placement range */ - if (place->fpfn >= (bo->mem.start + bo->mem.size) || + if (place->fpfn >= (bo->mem.start + bo->mem.num_pages) || (place->lpfn && place->lpfn <= bo->mem.start)) return false; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 04270a14fcaa..868dd1ef3b69 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -312,6 +312,7 @@ unbind_all: component_unbind_all(dev, drm); gem_destroy: vc4_gem_destroy(drm); + drm_mode_config_cleanup(drm); vc4_bo_cache_destroy(drm); dev_put: drm_dev_put(drm); diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 1689568b597d..12c5d7c96527 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -26,19 +26,17 @@ #define CP_2WHEEL_MOUSE_HACK 0x02 #define CP_2WHEEL_MOUSE_HACK_ON 0x04 +#define VA_INVAL_LOGICAL_BOUNDARY 0x08 + /* * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ -static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, +static __u8 *cp_rdesc_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); unsigned int i; - if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) - return rdesc; - if (*rsize < 4) return rdesc; @@ -51,6 +49,40 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static __u8 *va_logical_boundary_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* + * Varmilo VA104M (with VID Cypress and device ID 07B1) incorrectly + * reports Logical Minimum of its Consumer Control device as 572 + * (0x02 0x3c). Fix this by setting its Logical Minimum to zero. + */ + if (*rsize == 25 && + rdesc[0] == 0x05 && rdesc[1] == 0x0c && + rdesc[2] == 0x09 && rdesc[3] == 0x01 && + rdesc[6] == 0x19 && rdesc[7] == 0x00 && + rdesc[11] == 0x16 && rdesc[12] == 0x3c && rdesc[13] == 0x02) { + hid_info(hdev, + "fixing up varmilo VA104M consumer control report descriptor\n"); + rdesc[12] = 0x00; + rdesc[13] = 0x00; + } + return rdesc; +} + +static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if (quirks & CP_RDESC_SWAPPED_MIN_MAX) + rdesc = cp_rdesc_fixup(hdev, rdesc, rsize); + if (quirks & VA_INVAL_LOGICAL_BOUNDARY) + rdesc = va_logical_boundary_fixup(hdev, rdesc, rsize); + + return rdesc; +} + static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -131,6 +163,8 @@ static const struct hid_device_id cp_devices[] = { .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE), .driver_data = CP_2WHEEL_MOUSE_HACK }, + { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1), + .driver_data = VA_INVAL_LOGICAL_BOUNDARY }, { } }; MODULE_DEVICE_TABLE(hid, cp_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2c055e60e590..bef096a253dc 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -330,6 +330,8 @@ #define USB_DEVICE_ID_CYPRESS_BARCODE_4 0xed81 #define USB_DEVICE_ID_CYPRESS_TRUETOUCH 0xc001 +#define USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1 0X07b1 + #define USB_VENDOR_ID_DATA_MODUL 0x7374 #define USB_VENDOR_ID_DATA_MODUL_EASYMAXTOUCH 0x1201 @@ -442,6 +444,10 @@ #define USB_VENDOR_ID_FRUCTEL 0x25B6 #define USB_DEVICE_ID_GAMETEL_MT_MODE 0x0002 +#define USB_VENDOR_ID_GAMEVICE 0x27F8 +#define USB_DEVICE_ID_GAMEVICE_GV186 0x0BBE +#define USB_DEVICE_ID_GAMEVICE_KISHI 0x0BBF + #define USB_VENDOR_ID_GAMERON 0x0810 #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR 0x0001 #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR 0x0002 @@ -477,6 +483,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR 0x3010 #define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 @@ -726,6 +733,7 @@ #define USB_VENDOR_ID_LOGITECH 0x046d #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e #define USB_DEVICE_ID_LOGITECH_T651 0xb00c +#define USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD 0xb309 #define USB_DEVICE_ID_LOGITECH_C007 0xc007 #define USB_DEVICE_ID_LOGITECH_C077 0xc077 #define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 11bd2ca22a2e..13deb9a67685 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -331,6 +331,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), HID_BATTERY_QUIRK_IGNORE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, + USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), + HID_BATTERY_QUIRK_IGNORE }, {} }; diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 2d8d20a7f457..10cb42a00fe8 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -85,7 +85,12 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, + { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 4256fdc5cd6d..21fbdcde1faa 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -496,7 +496,8 @@ static int sensor_hub_raw_event(struct hid_device *hdev, return 1; ptr = raw_data; - ptr++; /* Skip report id */ + if (report->id) + ptr++; /* Skip report id */ spin_lock_irqsave(&pdata->lock, flags); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 77bb46948eea..da83884b90d2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2729,7 +2729,9 @@ static int wacom_wac_collection(struct hid_device *hdev, struct hid_report *repo if (report->type != HID_INPUT_REPORT) return -1; - if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) + if (WACOM_PAD_FIELD(field)) + return 0; + else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) wacom_wac_pen_report(hdev, report); else if (WACOM_FINGER_FIELD(field) && wacom->wacom_wac.touch_input) wacom_wac_finger_report(hdev, report); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 9ca0706a9d40..e5fc719a34e7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1275,7 +1275,7 @@ static void balloon_up(struct work_struct *dummy) /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { - pr_warn("Balloon request will be partially fulfilled. %s\n", + pr_info("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 84f1dcb69827..9b0c5d719232 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -126,10 +126,10 @@ static void free_sink_buffer(struct etm_event_data *event_data) cpumask_t *mask = &event_data->mask; struct coresight_device *sink; - if (WARN_ON(cpumask_empty(mask))) + if (!event_data->snk_config) return; - if (!event_data->snk_config) + if (WARN_ON(cpumask_empty(mask))) return; cpu = cpumask_first(mask); @@ -310,6 +310,16 @@ static void etm_event_start(struct perf_event *event, int flags) if (!event_data) goto fail; + /* + * Check if this ETM is allowed to trace, as decided + * at etm_setup_aux(). This could be due to an unreachable + * sink from this ETM. We can't do much in this case if + * the sink was specified or hinted to the driver. For + * now, simply don't record anything on this ETM. + */ + if (!cpumask_test_cpu(cpu, &event_data->mask)) + goto fail_end_stop; + path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ sink = coresight_get_sink(path); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 3e22cfc0bc48..66a9c6c9dd30 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1102,7 +1102,6 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } -#ifdef CONFIG_CPU_PM static int etm4_cpu_save(struct etmv4_drvdata *drvdata) { int i, ret = 0; @@ -1155,7 +1154,7 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR); state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR); - for (i = 0; i < drvdata->nrseqstate; i++) + for (i = 0; i < drvdata->nrseqstate - 1; i++) state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i)); state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR); @@ -1178,8 +1177,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { - state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i)); - state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i)); + state->trcacvr[i] = readq(drvdata->base + TRCACVRn(i)); + state->trcacatr[i] = readq(drvdata->base + TRCACATRn(i)); } /* @@ -1190,16 +1189,16 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) */ for (i = 0; i < drvdata->numcidc; i++) - state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i)); + state->trccidcvr[i] = readq(drvdata->base + TRCCIDCVRn(i)); for (i = 0; i < drvdata->numvmidc; i++) - state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i)); + state->trcvmidcvr[i] = readq(drvdata->base + TRCVMIDCVRn(i)); state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0); state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0); - state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1); + state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1); state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); @@ -1260,7 +1259,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR); writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR); - for (i = 0; i < drvdata->nrseqstate; i++) + for (i = 0; i < drvdata->nrseqstate - 1; i++) writel_relaxed(state->trcseqevr[i], drvdata->base + TRCSEQEVRn(i)); @@ -1291,25 +1290,25 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { - writel_relaxed(state->trcacvr[i], + writeq_relaxed(state->trcacvr[i], drvdata->base + TRCACVRn(i)); - writel_relaxed(state->trcacatr[i], + writeq_relaxed(state->trcacatr[i], drvdata->base + TRCACATRn(i)); } for (i = 0; i < drvdata->numcidc; i++) - writel_relaxed(state->trccidcvr[i], + writeq_relaxed(state->trccidcvr[i], drvdata->base + TRCCIDCVRn(i)); for (i = 0; i < drvdata->numvmidc; i++) - writel_relaxed(state->trcvmidcvr[i], + writeq_relaxed(state->trcvmidcvr[i], drvdata->base + TRCVMIDCVRn(i)); writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0); writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0); - writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1); + writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1); writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); @@ -1372,17 +1371,17 @@ static struct notifier_block etm4_cpu_pm_nb = { static int etm4_cpu_pm_register(void) { - return cpu_pm_register_notifier(&etm4_cpu_pm_nb); + if (IS_ENABLED(CONFIG_CPU_PM)) + return cpu_pm_register_notifier(&etm4_cpu_pm_nb); + + return 0; } static void etm4_cpu_pm_unregister(void) { - cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); + if (IS_ENABLED(CONFIG_CPU_PM)) + cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); } -#else -static int etm4_cpu_pm_register(void) { return 0; } -static void etm4_cpu_pm_unregister(void) { } -#endif static int etm4_probe(struct amba_device *adev, const struct amba_id *id) { @@ -1494,6 +1493,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) return 0; err_arch_supported: + etmdrvdata[drvdata->cpu] = NULL; if (--etm4_count == 0) { etm4_cpu_pm_unregister(); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 570baf4e15dc..a0a953ba335d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -325,7 +325,7 @@ struct etmv4_save_state { u64 trcacvr[ETM_MAX_SINGLE_ADDR_CMP]; u64 trcacatr[ETM_MAX_SINGLE_ADDR_CMP]; u64 trccidcvr[ETMv4_MAX_CTXID_CMP]; - u32 trcvmidcvr[ETM_MAX_VMID_CMP]; + u64 trcvmidcvr[ETM_MAX_VMID_CMP]; u32 trccidcctlr0; u32 trccidcctlr1; u32 trcvmidcctlr0; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index a16273d997a4..8c819deb2bbc 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -90,7 +90,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata) static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) { - coresight_disclaim_device(drvdata); + coresight_disclaim_device(drvdata->base); __tmc_etb_disable_hw(drvdata); } diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index d4b72e4ffd71..0e7f9bd17a91 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1101,14 +1101,6 @@ static int i2c_imx_probe(struct platform_device *pdev) return ret; } - /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, - pdev->name, i2c_imx); - if (ret) { - dev_err(&pdev->dev, "can't claim irq %d\n", irq); - goto clk_disable; - } - /* Init queue */ init_waitqueue_head(&i2c_imx->queue); @@ -1127,6 +1119,14 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto rpm_disable; + /* Request IRQ */ + ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED, + pdev->name, i2c_imx); + if (ret) { + dev_err(&pdev->dev, "can't claim irq %d\n", irq); + goto rpm_disable; + } + /* Set up clock divider */ i2c_imx->bitrate = IMX_I2C_BIT_RATE; ret = of_property_read_u32(pdev->dev.of_node, @@ -1169,13 +1169,12 @@ static int i2c_imx_probe(struct platform_device *pdev) clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + free_irq(irq, i2c_imx); rpm_disable: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - -clk_disable: clk_disable_unprepare(i2c_imx->clk); return ret; } @@ -1183,7 +1182,7 @@ clk_disable: static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); - int ret; + int irq, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -1203,6 +1202,9 @@ static int i2c_imx_remove(struct platform_device *pdev) imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) + free_irq(irq, i2c_imx); clk_disable_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index e5fdca74a630..c22afc979206 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -134,6 +134,12 @@ enum kx_chipset { KX_MAX_CHIPS /* this must be last */ }; +enum kx_acpi_type { + ACPI_GENERIC, + ACPI_SMO8500, + ACPI_KIOX010A, +}; + struct kxcjk1013_data { struct i2c_client *client; struct iio_trigger *dready_trig; @@ -150,7 +156,7 @@ struct kxcjk1013_data { bool motion_trigger_on; int64_t timestamp; enum kx_chipset chipset; - bool is_smo8500_device; + enum kx_acpi_type acpi_type; }; enum kxcjk1013_axis { @@ -277,6 +283,32 @@ static const struct { {19163, 1, 0}, {38326, 0, 1} }; +#ifdef CONFIG_ACPI +enum kiox010a_fn_index { + KIOX010A_SET_LAPTOP_MODE = 1, + KIOX010A_SET_TABLET_MODE = 2, +}; + +static int kiox010a_dsm(struct device *dev, int fn_index) +{ + acpi_handle handle = ACPI_HANDLE(dev); + guid_t kiox010a_dsm_guid; + union acpi_object *obj; + + if (!handle) + return -ENODEV; + + guid_parse("1f339696-d475-4e26-8cad-2e9f8e6d7a91", &kiox010a_dsm_guid); + + obj = acpi_evaluate_dsm(handle, &kiox010a_dsm_guid, 1, fn_index, NULL); + if (!obj) + return -EIO; + + ACPI_FREE(obj); + return 0; +} +#endif + static int kxcjk1013_set_mode(struct kxcjk1013_data *data, enum kxcjk1013_mode mode) { @@ -354,6 +386,13 @@ static int kxcjk1013_chip_init(struct kxcjk1013_data *data) { int ret; +#ifdef CONFIG_ACPI + if (data->acpi_type == ACPI_KIOX010A) { + /* Make sure the kbd and touchpad on 2-in-1s using 2 KXCJ91008-s work */ + kiox010a_dsm(&data->client->dev, KIOX010A_SET_LAPTOP_MODE); + } +#endif + ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_WHO_AM_I); if (ret < 0) { dev_err(&data->client->dev, "Error reading who_am_i\n"); @@ -1241,7 +1280,7 @@ static irqreturn_t kxcjk1013_data_rdy_trig_poll(int irq, void *private) static const char *kxcjk1013_match_acpi_device(struct device *dev, enum kx_chipset *chipset, - bool *is_smo8500_device) + enum kx_acpi_type *acpi_type) { const struct acpi_device_id *id; @@ -1250,7 +1289,9 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, return NULL; if (strcmp(id->id, "SMO8500") == 0) - *is_smo8500_device = true; + *acpi_type = ACPI_SMO8500; + else if (strcmp(id->id, "KIOX010A") == 0) + *acpi_type = ACPI_KIOX010A; *chipset = (enum kx_chipset)id->driver_data; @@ -1286,7 +1327,7 @@ static int kxcjk1013_probe(struct i2c_client *client, } else if (ACPI_HANDLE(&client->dev)) { name = kxcjk1013_match_acpi_device(&client->dev, &data->chipset, - &data->is_smo8500_device); + &data->acpi_type); } else return -ENODEV; @@ -1304,7 +1345,7 @@ static int kxcjk1013_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &kxcjk1013_info; - if (client->irq > 0 && !data->is_smo8500_device) { + if (client->irq > 0 && data->acpi_type != ACPI_SMO8500) { ret = devm_request_threaded_irq(&client->dev, client->irq, kxcjk1013_data_rdy_trig_poll, kxcjk1013_event_handler, diff --git a/drivers/iio/adc/ti-adc0832.c b/drivers/iio/adc/ti-adc0832.c index 188dae705bf7..a408d97e2d2e 100644 --- a/drivers/iio/adc/ti-adc0832.c +++ b/drivers/iio/adc/ti-adc0832.c @@ -31,6 +31,12 @@ struct adc0832 { struct regulator *reg; struct mutex lock; u8 mux_bits; + /* + * Max size needed: 16x 1 byte ADC data + 8 bytes timestamp + * May be shorter if not all channels are enabled subject + * to the timestamp remaining 8 byte aligned. + */ + u8 data[24] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -202,7 +208,6 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc0832 *adc = iio_priv(indio_dev); - u8 data[24] = { }; /* 16x 1 byte ADC data + 8 bytes timestamp */ int scan_index; int i = 0; @@ -220,10 +225,10 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) goto out; } - data[i] = ret; + adc->data[i] = ret; i++; } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 703d68ae96b7..4517d7742bc3 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -50,6 +50,12 @@ struct adc12138 { struct completion complete; /* The number of cclk periods for the S/H's acquisition time */ unsigned int acquisition_time; + /* + * Maximum size needed: 16x 2 bytes ADC data + 8 bytes timestamp. + * Less may be need if not all channels are enabled, as long as + * the 8 byte alignment of the timestamp is maintained. + */ + __be16 data[20] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -332,7 +338,6 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc12138 *adc = iio_priv(indio_dev); - __be16 data[20] = { }; /* 16x 2 bytes ADC data + 8 bytes timestamp */ __be16 trash; int ret; int scan_index; @@ -348,7 +353,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) reinit_completion(&adc->complete); ret = adc12138_start_and_read_conv(adc, scan_chan, - i ? &data[i - 1] : &trash); + i ? &adc->data[i - 1] : &trash); if (ret) { dev_warn(&adc->spi->dev, "failed to start conversion\n"); @@ -365,7 +370,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } if (i) { - ret = adc12138_read_conv_data(adc, &data[i - 1]); + ret = adc12138_read_conv_data(adc, &adc->data[i - 1]); if (ret) { dev_warn(&adc->spi->dev, "failed to get conversion data\n"); @@ -373,7 +378,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 59770e5b6660..b080362a8766 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -49,13 +49,20 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct itg3200 *st = iio_priv(indio_dev); - __be16 buf[ITG3200_SCAN_ELEMENTS + sizeof(s64)/sizeof(u16)]; + /* + * Ensure correct alignment and padding including for the + * timestamp that may be inserted. + */ + struct { + __be16 buf[ITG3200_SCAN_ELEMENTS]; + s64 ts __aligned(8); + } scan; - int ret = itg3200_read_all_channels(st->i2c, buf); + int ret = itg3200_read_all_channels(st->i2c, scan.buf); if (ret < 0) goto error_ret; - iio_push_to_buffers_with_timestamp(indio_dev, buf, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/light/si1145.c b/drivers/iio/light/si1145.c index 76f16f9c7616..31f78fd7f915 100644 --- a/drivers/iio/light/si1145.c +++ b/drivers/iio/light/si1145.c @@ -172,6 +172,7 @@ struct si1145_part_info { * @part_info: Part information * @trig: Pointer to iio trigger * @meas_rate: Value of MEAS_RATE register. Only set in HW in auto mode + * @buffer: Used to pack data read from sensor. */ struct si1145_data { struct i2c_client *client; @@ -183,6 +184,14 @@ struct si1145_data { bool autonomous; struct iio_trigger *trig; int meas_rate; + /* + * Ensure timestamp will be naturally aligned if present. + * Maximum buffer size (may be only partly used if not all + * channels are enabled): + * 6*2 bytes channels data + 4 bytes alignment + + * 8 bytes timestamp + */ + u8 buffer[24] __aligned(8); }; /** @@ -444,12 +453,6 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) struct iio_poll_func *pf = private; struct iio_dev *indio_dev = pf->indio_dev; struct si1145_data *data = iio_priv(indio_dev); - /* - * Maximum buffer size: - * 6*2 bytes channels data + 4 bytes alignment + - * 8 bytes timestamp - */ - u8 buffer[24]; int i, j = 0; int ret; u8 irq_status = 0; @@ -482,7 +485,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) ret = i2c_smbus_read_i2c_block_data_or_emulated( data->client, indio_dev->channels[i].address, - sizeof(u16) * run, &buffer[j]); + sizeof(u16) * run, &data->buffer[j]); if (ret < 0) goto done; j += run * sizeof(u16); @@ -497,7 +500,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) goto done; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, data->buffer, iio_get_time_ns(indio_dev)); done: diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index df8f5ceea2dd..30385ba7c5d9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -571,13 +571,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index a6531ffe29a6..a5694dec3f2e 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -808,8 +808,10 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_arm; + } cq_context = mailbox->buf; @@ -851,9 +853,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } spin_lock_irq(&dev->cq_table.lock); - if (mthca_array_set(&dev->cq_table.cq, - cq->cqn & (dev->limits.num_cqs - 1), - cq)) { + err = mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), cq); + if (err) { spin_unlock_irq(&dev->cq_table.lock); goto err_out_free_mr; } diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index e908dfbaa137..1f1d6a000e5c 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -677,6 +677,7 @@ int qedr_iw_destroy_listen(struct iw_cm_id *cm_id) listener->qed_handle); cm_id->rem_ref(cm_id); + kfree(listener); return rc; } diff --git a/drivers/input/input.c b/drivers/input/input.c index 57072d5a6ae0..f96c40aede85 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -193,6 +193,7 @@ static void input_repeat_key(struct timer_list *t) input_value_sync }; + input_set_timestamp(dev, ktime_get()); input_pass_values(dev, vals, ARRAY_SIZE(vals)); if (dev->rep[REP_PERIOD]) diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index ad5d7f94f95a..1c7aa86c92ab 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -111,7 +111,8 @@ static irqreturn_t sunkbd_interrupt(struct serio *serio, switch (data) { case SUNKBD_RET_RESET: - schedule_work(&sunkbd->tq); + if (sunkbd->enabled) + schedule_work(&sunkbd->tq); sunkbd->reset = -1; break; @@ -212,16 +213,12 @@ static int sunkbd_initialize(struct sunkbd *sunkbd) } /* - * sunkbd_reinit() sets leds and beeps to a state the computer remembers they - * were in. + * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers + * they were in. */ -static void sunkbd_reinit(struct work_struct *work) +static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd) { - struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); - - wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ); - serio_write(sunkbd->serio, SUNKBD_CMD_SETLED); serio_write(sunkbd->serio, (!!test_bit(LED_CAPSL, sunkbd->dev->led) << 3) | @@ -234,11 +231,39 @@ static void sunkbd_reinit(struct work_struct *work) SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd)); } + +/* + * sunkbd_reinit() wait for the keyboard reset to complete and restores state + * of leds and beeps. + */ + +static void sunkbd_reinit(struct work_struct *work) +{ + struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); + + /* + * It is OK that we check sunkbd->enabled without pausing serio, + * as we only want to catch true->false transition that will + * happen once and we will be woken up for it. + */ + wait_event_interruptible_timeout(sunkbd->wait, + sunkbd->reset >= 0 || !sunkbd->enabled, + HZ); + + if (sunkbd->reset >= 0 && sunkbd->enabled) + sunkbd_set_leds_beeps(sunkbd); +} + static void sunkbd_enable(struct sunkbd *sunkbd, bool enable) { serio_pause_rx(sunkbd->serio); sunkbd->enabled = enable; serio_continue_rx(sunkbd->serio); + + if (!enable) { + wake_up_interruptible(&sunkbd->wait); + cancel_work_sync(&sunkbd->tq); + } } /* diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index a3e79bf5a04b..3695dd7dbb9b 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -696,7 +696,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq, struct input_dev *input_dev; const struct adxl34x_platform_data *pdata; int err, range, i; - unsigned char revid; + int revid; if (!irq) { dev_err(dev, "no IRQ?\n"); diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index e1423f7648d6..4c039e4125d9 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(hil_mlc_unregister); static LIST_HEAD(hil_mlcs); static DEFINE_RWLOCK(hil_mlcs_lock); static struct timer_list hil_mlcs_kicker; -static int hil_mlcs_probe; +static int hil_mlcs_probe, hil_mlc_stop; static void hil_mlcs_process(unsigned long unused); static DECLARE_TASKLET_DISABLED(hil_mlcs_tasklet, hil_mlcs_process, 0); @@ -702,9 +702,13 @@ static int hilse_donode(hil_mlc *mlc) if (!mlc->ostarted) { mlc->ostarted = 1; mlc->opacket = pack; - mlc->out(mlc); + rc = mlc->out(mlc); nextidx = HILSEN_DOZE; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; } mlc->ostarted = 0; @@ -715,8 +719,13 @@ static int hilse_donode(hil_mlc *mlc) case HILSE_CTS: write_lock_irqsave(&mlc->lock, flags); - nextidx = mlc->cts(mlc) ? node->bad : node->good; + rc = mlc->cts(mlc); + nextidx = rc ? node->bad : node->good; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; default: @@ -780,6 +789,12 @@ static void hil_mlcs_process(unsigned long unused) static void hil_mlcs_timer(struct timer_list *unused) { + if (hil_mlc_stop) { + /* could not send packet - stop immediately. */ + pr_warn(PREFIX "HIL seems stuck - Disabling HIL MLC.\n"); + return; + } + hil_mlcs_probe = 1; tasklet_schedule(&hil_mlcs_tasklet); /* Re-insert the periodic task. */ diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 232d30c825bd..3e85e9039374 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -210,7 +210,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) priv->tseq[2] = 1; priv->tseq[3] = 0; priv->tseq[4] = 0; - __hp_sdc_enqueue_transaction(&priv->trans); + return __hp_sdc_enqueue_transaction(&priv->trans); busy: return 1; done: @@ -219,7 +219,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) return 0; } -static void hp_sdc_mlc_out(hil_mlc *mlc) +static int hp_sdc_mlc_out(hil_mlc *mlc) { struct hp_sdc_mlc_priv_s *priv; @@ -234,7 +234,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) do_data: if (priv->emtestmode) { up(&mlc->osem); - return; + return 0; } /* Shouldn't be sending commands when loop may be busy */ BUG_ON(down_trylock(&mlc->csem)); @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) BUG_ON(down_trylock(&mlc->csem)); } enqueue: - hp_sdc_enqueue_transaction(&priv->trans); + return hp_sdc_enqueue_transaction(&priv->trans); } static int __init hp_sdc_mlc_init(void) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 95a78ccbd847..fef3b4064f18 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -125,6 +125,7 @@ module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600); MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]"); #endif +static bool i8042_present; static bool i8042_bypass_aux_irq_test; static char i8042_kbd_firmware_id[128]; static char i8042_aux_firmware_id[128]; @@ -345,6 +346,9 @@ int i8042_command(unsigned char *param, int command) unsigned long flags; int retval; + if (!i8042_present) + return -1; + spin_lock_irqsave(&i8042_lock, flags); retval = __i8042_command(param, command); spin_unlock_irqrestore(&i8042_lock, flags); @@ -1613,12 +1617,15 @@ static int __init i8042_init(void) err = i8042_platform_init(); if (err) - return err; + return (err == -ENODEV) ? 0 : err; err = i8042_controller_check(); if (err) goto err_platform_exit; + /* Set this before creating the dev to allow i8042_command to work right away */ + i8042_present = true; + pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); @@ -1637,6 +1644,9 @@ static int __init i8042_init(void) static void __exit i8042_exit(void) { + if (!i8042_present) + return; + platform_device_unregister(i8042_platform_device); platform_driver_unregister(&i8042_driver); i8042_platform_exit(); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 08afffd5bc12..b74e4a897f07 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -95,6 +95,7 @@ config TOUCHSCREEN_AD7879_SPI config TOUCHSCREEN_ADC tristate "Generic ADC based resistive touchscreen" depends on IIO + select IIO_BUFFER select IIO_BUFFER_CB help Say Y here if you want to use the generic ADC diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 859b06424e5c..df6f3cc958e5 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -410,7 +410,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT 128 struct irq_remap_table { diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c index 2cfd9389ee96..b944ae828004 100644 --- a/drivers/leds/leds-bcm6328.c +++ b/drivers/leds/leds-bcm6328.c @@ -336,7 +336,7 @@ static int bcm6328_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6328_led_set; led->cdev.blink_set = bcm6328_blink_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; diff --git a/drivers/leds/leds-bcm6358.c b/drivers/leds/leds-bcm6358.c index b2cc06618abe..a86ab6197a4e 100644 --- a/drivers/leds/leds-bcm6358.c +++ b/drivers/leds/leds-bcm6358.c @@ -141,7 +141,7 @@ static int bcm6358_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6358_led_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index fd8607124bdb..503f5e06fa86 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1371,7 +1371,7 @@ __acquires(bitmap->lock) if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + - PAGE_COUNTER_SHIFT - 1); + PAGE_COUNTER_SHIFT); else csize = ((sector_t)1) << bitmap->chunkshift; *blocks = csize - (offset & (csize - 1)); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d91154d65455..c7bda4b0bced 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2417,8 +2417,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; - mutex_unlock(&conf->cache_size_mutex); - conf->slab_cache = sc; conf->active_name = 1-conf->active_name; @@ -2441,6 +2439,8 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!err) conf->pool_size = newsize; + mutex_unlock(&conf->cache_size_mutex); + return err; } diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c index 8cc3bdb7f608..0fe8b869245b 100644 --- a/drivers/media/i2c/imx274.c +++ b/drivers/media/i2c/imx274.c @@ -1239,6 +1239,8 @@ static int imx274_s_frame_interval(struct v4l2_subdev *sd, ret = imx274_set_frame_interval(imx274, fi->interval); if (!ret) { + fi->interval = imx274->frame_interval; + /* * exposure time range is decided by frame interval * need to update it after frame interval changes @@ -1760,9 +1762,9 @@ static int imx274_set_frame_interval(struct stimx274 *priv, __func__, frame_interval.numerator, frame_interval.denominator); - if (frame_interval.numerator == 0) { - err = -EINVAL; - goto fail; + if (frame_interval.numerator == 0 || frame_interval.denominator == 0) { + frame_interval.denominator = IMX274_DEF_FRAME_RATE; + frame_interval.numerator = 1; } req_frame_rate = (u32)(frame_interval.denominator diff --git a/drivers/media/pci/tw5864/tw5864-video.c b/drivers/media/pci/tw5864/tw5864-video.c index 6c40e60ac993..b0f8d1532b70 100644 --- a/drivers/media/pci/tw5864/tw5864-video.c +++ b/drivers/media/pci/tw5864/tw5864-video.c @@ -776,6 +776,9 @@ static int tw5864_enum_frameintervals(struct file *file, void *priv, fintv->type = V4L2_FRMIVAL_TYPE_STEPWISE; ret = tw5864_frameinterval_get(input, &frameinterval); + if (ret) + return ret; + fintv->stepwise.step = frameinterval; fintv->stepwise.min = frameinterval; fintv->stepwise.max = frameinterval; @@ -794,6 +797,9 @@ static int tw5864_g_parm(struct file *file, void *priv, cp->capability = V4L2_CAP_TIMEPERFRAME; ret = tw5864_frameinterval_get(input, &cp->timeperframe); + if (ret) + return ret; + cp->timeperframe.numerator *= input->frame_interval; cp->capturemode = 0; cp->readbuffers = 2; diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c index 11429633b2fb..f0bca30a0a80 100644 --- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c @@ -579,6 +579,13 @@ static int mtk_jpeg_queue_setup(struct vb2_queue *q, if (!q_data) return -EINVAL; + if (*num_planes) { + for (i = 0; i < *num_planes; i++) + if (sizes[i] < q_data->sizeimage[i]) + return -EINVAL; + return 0; + } + *num_planes = q_data->fmt->colplanes; for (i = 0; i < q_data->fmt->colplanes; i++) { sizes[i] = q_data->sizeimage[i]; diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index abfc49901222..cb6046481aed 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1853,30 +1853,35 @@ int uvc_xu_ctrl_query(struct uvc_video_chain *chain, { struct uvc_entity *entity; struct uvc_control *ctrl; - unsigned int i, found = 0; + unsigned int i; + bool found; u32 reqflags; u16 size; u8 *data = NULL; int ret; /* Find the extension unit. */ + found = false; list_for_each_entry(entity, &chain->entities, chain) { if (UVC_ENTITY_TYPE(entity) == UVC_VC_EXTENSION_UNIT && - entity->id == xqry->unit) + entity->id == xqry->unit) { + found = true; break; + } } - if (entity->id != xqry->unit) { + if (!found) { uvc_trace(UVC_TRACE_CONTROL, "Extension unit %u not found.\n", xqry->unit); return -ENOENT; } /* Find the control and perform delayed initialization if needed. */ + found = false; for (i = 0; i < entity->ncontrols; ++i) { ctrl = &entity->controls[i]; if (ctrl->index == xqry->selector - 1) { - found = 1; + found = true; break; } } @@ -2033,13 +2038,6 @@ static int uvc_ctrl_add_info(struct uvc_device *dev, struct uvc_control *ctrl, goto done; } - /* - * Retrieve control flags from the device. Ignore errors and work with - * default flag values from the uvc_ctrl array when the device doesn't - * properly implement GET_INFO on standard controls. - */ - uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); - ctrl->initialized = 1; uvc_trace(UVC_TRACE_CONTROL, "Added control %pUl/%u to device %s " @@ -2262,6 +2260,13 @@ static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl) if (uvc_entity_match_guid(ctrl->entity, info->entity) && ctrl->index == info->index) { uvc_ctrl_add_info(dev, ctrl, info); + /* + * Retrieve control flags from the device. Ignore errors + * and work with default flag values from the uvc_ctrl + * array when the device doesn't properly implement + * GET_INFO on standard controls. + */ + uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); break; } } diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 2f214440008c..1c6b2cc6269a 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -165,35 +165,12 @@ static const struct file_operations emif_mr4_fops = { static int __init_or_module emif_debugfs_init(struct emif_data *emif) { - struct dentry *dentry; - int ret; - - dentry = debugfs_create_dir(dev_name(emif->dev), NULL); - if (!dentry) { - ret = -ENOMEM; - goto err0; - } - emif->debugfs_root = dentry; - - dentry = debugfs_create_file("regcache_dump", S_IRUGO, - emif->debugfs_root, emif, &emif_regdump_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - - dentry = debugfs_create_file("mr4", S_IRUGO, - emif->debugfs_root, emif, &emif_mr4_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - + emif->debugfs_root = debugfs_create_dir(dev_name(emif->dev), NULL); + debugfs_create_file("regcache_dump", S_IRUGO, emif->debugfs_root, emif, + &emif_regdump_fops); + debugfs_create_file("mr4", S_IRUGO, emif->debugfs_root, emif, + &emif_mr4_fops); return 0; -err1: - debugfs_remove_recursive(emif->debugfs_root); -err0: - return ret; } static void __exit emif_debugfs_exit(struct emif_data *emif) diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 2af7ae13449d..cec867c10968 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1174,8 +1174,10 @@ mptscsih_remove(struct pci_dev *pdev) MPT_SCSI_HOST *hd; int sz1; - if((hd = shost_priv(host)) == NULL) - return; + if (host == NULL) + hd = NULL; + else + hd = shost_priv(host); mptscsih_shutdown(pdev); @@ -1191,14 +1193,15 @@ mptscsih_remove(struct pci_dev *pdev) "Free'd ScsiLookup (%d) memory\n", ioc->name, sz1)); - kfree(hd->info_kbuf); + if (hd) + kfree(hd->info_kbuf); /* NULL the Scsi_Host pointer */ ioc->sh = NULL; - scsi_host_put(host); - + if (host) + scsi_host_put(host); mpt_detach(pdev); } diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 69df27769c21..3ba8cfa4b3b7 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -212,7 +212,7 @@ static int sprd_pmic_probe(struct spi_device *spi) } ret = devm_regmap_add_irq_chip(&spi->dev, ddata->regmap, ddata->irq, - IRQF_ONESHOT | IRQF_NO_SUSPEND, 0, + IRQF_ONESHOT, 0, &ddata->irq_chip, &ddata->irq_data); if (ret) { dev_err(&spi->dev, "Failed to add PMIC irq chip %d\n", ret); @@ -228,9 +228,34 @@ static int sprd_pmic_probe(struct spi_device *spi) return ret; } + device_init_wakeup(&spi->dev, true); return 0; } +#ifdef CONFIG_PM_SLEEP +static int sprd_pmic_suspend(struct device *dev) +{ + struct sprd_pmic *ddata = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + enable_irq_wake(ddata->irq); + + return 0; +} + +static int sprd_pmic_resume(struct device *dev) +{ + struct sprd_pmic *ddata = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(ddata->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(sprd_pmic_pm_ops, sprd_pmic_suspend, sprd_pmic_resume); + static const struct of_device_id sprd_pmic_match[] = { { .compatible = "sprd,sc2731", .data = &sc2731_data }, {}, @@ -242,6 +267,7 @@ static struct spi_driver sprd_pmic_driver = { .name = "sc27xx-pmic", .bus = &spi_bus_type, .of_match_table = sprd_pmic_match, + .pm = &sprd_pmic_pm_ops, }, .probe = sprd_pmic_probe, }; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 787a69a2a726..b9d3c87e9318 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -397,8 +397,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, *capp_unit_id = get_capp_unit_id(np, *phb_index); of_node_put(np); if (!*capp_unit_id) { - pr_err("cxl: invalid capp unit id (phb_index: %d)\n", - *phb_index); + pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", + *chipid, *phb_index); return -ENODEV; } diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 64e318f589b4..0d23efcc74ff 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -138,11 +138,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl) * * @cl: host client * - * Return: mtu + * Return: mtu or 0 if client is not connected */ static inline size_t mei_cl_mtu(const struct mei_cl *cl) { - return cl->me_cl->props.max_msg_length; + return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; } /** diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 61f0faddfd88..e8ab582551f8 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -764,6 +764,7 @@ int renesas_sdhi_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); renesas_sdhi_clk_disable(host); + tmio_mmc_host_free(host); return 0; } diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 145143b6a0e6..6cc187ce3a32 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -546,6 +546,43 @@ static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev, (host->mmc->caps & MMC_CAP_1_8V_DDR)) host->mmc->caps2 = MMC_CAP2_HS400_1_8V; + /* + * There are two types of presets out in the wild: + * 1) Default/broken presets. + * These presets have two sets of problems: + * a) The clock divisor for SDR12, SDR25, and SDR50 is too small. + * This results in clock frequencies that are 2x higher than + * acceptable. i.e., SDR12 = 25 MHz, SDR25 = 50 MHz, SDR50 = + * 100 MHz.x + * b) The HS200 and HS400 driver strengths don't match. + * By default, the SDR104 preset register has a driver strength of + * A, but the (internal) HS400 preset register has a driver + * strength of B. As part of initializing HS400, HS200 tuning + * needs to be performed. Having different driver strengths + * between tuning and operation is wrong. It results in different + * rise/fall times that lead to incorrect sampling. + * 2) Firmware with properly initialized presets. + * These presets have proper clock divisors. i.e., SDR12 => 12MHz, + * SDR25 => 25 MHz, SDR50 => 50 MHz. Additionally the HS200 and + * HS400 preset driver strengths match. + * + * Enabling presets for HS400 doesn't work for the following reasons: + * 1) sdhci_set_ios has a hard coded list of timings that are used + * to determine if presets should be enabled. + * 2) sdhci_get_preset_value is using a non-standard register to + * read out HS400 presets. The AMD controller doesn't support this + * non-standard register. In fact, it doesn't expose the HS400 + * preset register anywhere in the SDHCI memory map. This results + * in reading a garbage value and using the wrong presets. + * + * Since HS400 and HS200 presets must be identical, we could + * instead use the the SDR104 preset register. + * + * If the above issues are resolved we could remove this quirk for + * firmware that that has valid presets (i.e., SDR12 <= 12 MHz). + */ + host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN; + host->mmc_host_ops.select_drive_strength = amd_select_drive_strength; host->mmc_host_ops.set_ios = amd_set_ios; return 0; diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c index 246dc6255e69..9fdb92729c28 100644 --- a/drivers/mmc/host/via-sdmmc.c +++ b/drivers/mmc/host/via-sdmmc.c @@ -1273,11 +1273,14 @@ static void via_init_sdc_pm(struct via_crdr_mmc_host *host) static int via_sd_suspend(struct pci_dev *pcidev, pm_message_t state) { struct via_crdr_mmc_host *host; + unsigned long flags; host = pci_get_drvdata(pcidev); + spin_lock_irqsave(&host->lock, flags); via_save_pcictrlreg(host); via_save_sdcreg(host); + spin_unlock_irqrestore(&host->lock, flags); pci_save_state(pcidev); pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 80d64d7e7a8b..ac336164f625 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1471,6 +1471,19 @@ int ubi_thread(void *u) !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); + + /* + * Check kthread_should_stop() after we set the task + * state to guarantee that we either see the stop bit + * and exit or the task state is reset to runnable such + * that it's not scheduled out indefinitely and detects + * the stop bit at kthread_should_stop(). + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + break; + } + schedule(); continue; } diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 1545f2b299d0..f88590074569 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -493,9 +493,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 */ struct sk_buff *skb = priv->echo_skb[idx]; struct canfd_frame *cf = (struct canfd_frame *)skb->data; - u8 len = cf->len; - *len_ptr = len; + /* get the real payload length for netdev statistics */ + if (cf->can_id & CAN_RTR_FLAG) + *len_ptr = 0; + else + *len_ptr = cf->len; + priv->echo_skb[idx] = NULL; return skb; @@ -520,7 +524,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) if (!skb) return 0; - netif_rx(skb); + skb_get(skb); + if (netif_rx(skb) == NET_RX_SUCCESS) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); return len; } @@ -571,7 +579,7 @@ static void can_restart(struct net_device *dev) } cf->can_id |= CAN_ERR_RESTARTED; - netif_rx(skb); + netif_rx_ni(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0be8db6ab319..92fe345e48ab 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -301,8 +301,7 @@ static const struct flexcan_devtype_data fsl_vf610_devtype_data = { static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE | - FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, }; static const struct can_bittiming_const flexcan_bittiming_const = { diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index deb274a19ba0..fbb970220c2d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -675,7 +675,7 @@ static int m_can_handle_state_change(struct net_device *dev, unsigned int ecr; switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; priv->can.state = CAN_STATE_ERROR_WARNING; @@ -704,7 +704,7 @@ static int m_can_handle_state_change(struct net_device *dev, __m_can_get_berr_counter(dev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? @@ -976,7 +976,7 @@ static const struct can_bittiming_const m_can_bittiming_const_31X = { .name = KBUILD_MODNAME, .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ .tseg1_max = 256, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_min = 2, /* Time segment 2 = phase_seg2 */ .tseg2_max = 128, .sjw_max = 128, .brp_min = 1, diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 5696d7e80751..4bc5d522c74b 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -256,8 +256,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, cf_len = get_can_dlc(pucan_msg_get_dlc(msg)); /* if this frame is an echo, */ - if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) && - !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) { + if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) { unsigned long flags; spin_lock_irqsave(&priv->echo_lock, flags); @@ -271,7 +270,13 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, netif_wake_queue(priv->ndev); spin_unlock_irqrestore(&priv->echo_lock, flags); - return 0; + + /* if this frame is only an echo, stop here. Otherwise, + * continue to push this application self-received frame into + * its own rx queue. + */ + if (!(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) + return 0; } /* otherwise, it should be pushed into rx fifo */ diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 5f7e97d54733..5cf4171df1f4 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -281,7 +281,7 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { - kfree_skb(skb); + dev_kfree_skb_any(skb); return -ENOBUFS; } @@ -326,7 +326,7 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, { if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { - kfree_skb(skb); + dev_kfree_skb_any(skb); return -ENOBUFS; } diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index db6ea936dc3f..81a3fdd5e010 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -903,7 +903,8 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->base)) { dev_err(&pdev->dev, "hecc ioremap failed\n"); - return PTR_ERR(priv->base); + err = PTR_ERR(priv->base); + goto probe_exit_candev; } /* handle hecc-ram memory */ @@ -916,7 +917,8 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->hecc_ram = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->hecc_ram)) { dev_err(&pdev->dev, "hecc-ram ioremap failed\n"); - return PTR_ERR(priv->hecc_ram); + err = PTR_ERR(priv->hecc_ram); + goto probe_exit_candev; } /* handle mbx memory */ @@ -929,13 +931,14 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->mbx = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->mbx)) { dev_err(&pdev->dev, "mbx ioremap failed\n"); - return PTR_ERR(priv->mbx); + err = PTR_ERR(priv->mbx); + goto probe_exit_candev; } irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!irq) { dev_err(&pdev->dev, "No irq resource\n"); - goto probe_exit; + goto probe_exit_candev; } priv->ndev = ndev; @@ -988,7 +991,7 @@ probe_exit_clk: clk_put(priv->clk); probe_exit_candev: free_candev(ndev); -probe_exit: + return err; } diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index cc2e224661b3..6a57169c2715 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -71,21 +71,27 @@ enum gs_can_identify_mode { }; /* data types passed between host and device */ -struct gs_host_config { - u32 byte_order; -} __packed; -/* All data exchanged between host and device is exchanged in host byte order, - * thanks to the struct gs_host_config byte_order member, which is sent first - * to indicate the desired byte order. + +/* The firmware on the original USB2CAN by Geschwister Schneider + * Technologie Entwicklungs- und Vertriebs UG exchanges all data + * between the host and the device in host byte order. This is done + * with the struct gs_host_config::byte_order member, which is sent + * first to indicate the desired byte order. + * + * The widely used open source firmware candleLight doesn't support + * this feature and exchanges the data in little endian byte order. */ +struct gs_host_config { + __le32 byte_order; +} __packed; struct gs_device_config { u8 reserved1; u8 reserved2; u8 reserved3; u8 icount; - u32 sw_version; - u32 hw_version; + __le32 sw_version; + __le32 hw_version; } __packed; #define GS_CAN_MODE_NORMAL 0 @@ -95,26 +101,26 @@ struct gs_device_config { #define GS_CAN_MODE_ONE_SHOT BIT(3) struct gs_device_mode { - u32 mode; - u32 flags; + __le32 mode; + __le32 flags; } __packed; struct gs_device_state { - u32 state; - u32 rxerr; - u32 txerr; + __le32 state; + __le32 rxerr; + __le32 txerr; } __packed; struct gs_device_bittiming { - u32 prop_seg; - u32 phase_seg1; - u32 phase_seg2; - u32 sjw; - u32 brp; + __le32 prop_seg; + __le32 phase_seg1; + __le32 phase_seg2; + __le32 sjw; + __le32 brp; } __packed; struct gs_identify_mode { - u32 mode; + __le32 mode; } __packed; #define GS_CAN_FEATURE_LISTEN_ONLY BIT(0) @@ -125,23 +131,23 @@ struct gs_identify_mode { #define GS_CAN_FEATURE_IDENTIFY BIT(5) struct gs_device_bt_const { - u32 feature; - u32 fclk_can; - u32 tseg1_min; - u32 tseg1_max; - u32 tseg2_min; - u32 tseg2_max; - u32 sjw_max; - u32 brp_min; - u32 brp_max; - u32 brp_inc; + __le32 feature; + __le32 fclk_can; + __le32 tseg1_min; + __le32 tseg1_max; + __le32 tseg2_min; + __le32 tseg2_max; + __le32 sjw_max; + __le32 brp_min; + __le32 brp_max; + __le32 brp_inc; } __packed; #define GS_CAN_FLAG_OVERFLOW 1 struct gs_host_frame { u32 echo_id; - u32 can_id; + __le32 can_id; u8 can_dlc; u8 channel; @@ -337,13 +343,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) if (!skb) return; - cf->can_id = hf->can_id; + cf->can_id = le32_to_cpu(hf->can_id); cf->can_dlc = get_can_dlc(hf->can_dlc); memcpy(cf->data, hf->data, 8); /* ERROR frames tell us information about the controller */ - if (hf->can_id & CAN_ERR_FLAG) + if (le32_to_cpu(hf->can_id) & CAN_ERR_FLAG) gs_update_state(dev, cf); netdev->stats.rx_packets++; @@ -426,11 +432,11 @@ static int gs_usb_set_bittiming(struct net_device *netdev) if (!dbt) return -ENOMEM; - dbt->prop_seg = bt->prop_seg; - dbt->phase_seg1 = bt->phase_seg1; - dbt->phase_seg2 = bt->phase_seg2; - dbt->sjw = bt->sjw; - dbt->brp = bt->brp; + dbt->prop_seg = cpu_to_le32(bt->prop_seg); + dbt->phase_seg1 = cpu_to_le32(bt->phase_seg1); + dbt->phase_seg2 = cpu_to_le32(bt->phase_seg2); + dbt->sjw = cpu_to_le32(bt->sjw); + dbt->brp = cpu_to_le32(bt->brp); /* request bit timings */ rc = usb_control_msg(interface_to_usbdev(intf), @@ -511,7 +517,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, cf = (struct can_frame *)skb->data; - hf->can_id = cf->can_id; + hf->can_id = cpu_to_le32(cf->can_id); hf->can_dlc = cf->can_dlc; memcpy(hf->data, cf->data, cf->can_dlc); @@ -581,6 +587,7 @@ static int gs_can_open(struct net_device *netdev) int rc, i; struct gs_device_mode *dm; u32 ctrlmode; + u32 flags = 0; rc = open_candev(netdev); if (rc) @@ -648,24 +655,24 @@ static int gs_can_open(struct net_device *netdev) /* flags */ ctrlmode = dev->can.ctrlmode; - dm->flags = 0; if (ctrlmode & CAN_CTRLMODE_LOOPBACK) - dm->flags |= GS_CAN_MODE_LOOP_BACK; + flags |= GS_CAN_MODE_LOOP_BACK; else if (ctrlmode & CAN_CTRLMODE_LISTENONLY) - dm->flags |= GS_CAN_MODE_LISTEN_ONLY; + flags |= GS_CAN_MODE_LISTEN_ONLY; /* Controller is not allowed to retry TX * this mode is unavailable on atmels uc3c hardware */ if (ctrlmode & CAN_CTRLMODE_ONE_SHOT) - dm->flags |= GS_CAN_MODE_ONE_SHOT; + flags |= GS_CAN_MODE_ONE_SHOT; if (ctrlmode & CAN_CTRLMODE_3_SAMPLES) - dm->flags |= GS_CAN_MODE_TRIPLE_SAMPLE; + flags |= GS_CAN_MODE_TRIPLE_SAMPLE; /* finally start device */ - dm->mode = GS_CAN_MODE_START; + dm->mode = cpu_to_le32(GS_CAN_MODE_START); + dm->flags = cpu_to_le32(flags); rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0), GS_USB_BREQ_MODE, @@ -745,9 +752,9 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) return -ENOMEM; if (do_identify) - imode->mode = GS_CAN_IDENTIFY_ON; + imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_ON); else - imode->mode = GS_CAN_IDENTIFY_OFF; + imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_OFF); rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), @@ -798,6 +805,7 @@ static struct gs_can *gs_make_candev(unsigned int channel, struct net_device *netdev; int rc; struct gs_device_bt_const *bt_const; + u32 feature; bt_const = kmalloc(sizeof(*bt_const), GFP_KERNEL); if (!bt_const) @@ -838,14 +846,14 @@ static struct gs_can *gs_make_candev(unsigned int channel, /* dev settup */ strcpy(dev->bt_const.name, "gs_usb"); - dev->bt_const.tseg1_min = bt_const->tseg1_min; - dev->bt_const.tseg1_max = bt_const->tseg1_max; - dev->bt_const.tseg2_min = bt_const->tseg2_min; - dev->bt_const.tseg2_max = bt_const->tseg2_max; - dev->bt_const.sjw_max = bt_const->sjw_max; - dev->bt_const.brp_min = bt_const->brp_min; - dev->bt_const.brp_max = bt_const->brp_max; - dev->bt_const.brp_inc = bt_const->brp_inc; + dev->bt_const.tseg1_min = le32_to_cpu(bt_const->tseg1_min); + dev->bt_const.tseg1_max = le32_to_cpu(bt_const->tseg1_max); + dev->bt_const.tseg2_min = le32_to_cpu(bt_const->tseg2_min); + dev->bt_const.tseg2_max = le32_to_cpu(bt_const->tseg2_max); + dev->bt_const.sjw_max = le32_to_cpu(bt_const->sjw_max); + dev->bt_const.brp_min = le32_to_cpu(bt_const->brp_min); + dev->bt_const.brp_max = le32_to_cpu(bt_const->brp_max); + dev->bt_const.brp_inc = le32_to_cpu(bt_const->brp_inc); dev->udev = interface_to_usbdev(intf); dev->iface = intf; @@ -862,28 +870,29 @@ static struct gs_can *gs_make_candev(unsigned int channel, /* can settup */ dev->can.state = CAN_STATE_STOPPED; - dev->can.clock.freq = bt_const->fclk_can; + dev->can.clock.freq = le32_to_cpu(bt_const->fclk_can); dev->can.bittiming_const = &dev->bt_const; dev->can.do_set_bittiming = gs_usb_set_bittiming; dev->can.ctrlmode_supported = 0; - if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY) + feature = le32_to_cpu(bt_const->feature); + if (feature & GS_CAN_FEATURE_LISTEN_ONLY) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; - if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK) + if (feature & GS_CAN_FEATURE_LOOP_BACK) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK; - if (bt_const->feature & GS_CAN_FEATURE_TRIPLE_SAMPLE) + if (feature & GS_CAN_FEATURE_TRIPLE_SAMPLE) dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; - if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT) + if (feature & GS_CAN_FEATURE_ONE_SHOT) dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT; SET_NETDEV_DEV(netdev, &intf->dev); - if (dconf->sw_version > 1) - if (bt_const->feature & GS_CAN_FEATURE_IDENTIFY) + if (le32_to_cpu(dconf->sw_version) > 1) + if (feature & GS_CAN_FEATURE_IDENTIFY) netdev->ethtool_ops = &gs_usb_ethtool_ops; kfree(bt_const); @@ -918,7 +927,7 @@ static int gs_usb_probe(struct usb_interface *intf, if (!hconf) return -ENOMEM; - hconf->byte_order = 0x0000beef; + hconf->byte_order = cpu_to_le32(0x0000beef); /* send host config */ rc = usb_control_msg(interface_to_usbdev(intf), diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 7ab87a758754..218fadc91155 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -367,7 +367,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .tseg2_max = 32, .sjw_max = 16, .brp_min = 1, - .brp_max = 4096, + .brp_max = 8192, .brp_inc = 1, }; diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 1b0afeaf1a3c..896f5b022729 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -337,8 +337,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (!ctx) return NETDEV_TX_BUSY; - can_put_echo_skb(skb, priv->netdev, ctx->ndx); - if (cf->can_id & CAN_EFF_FLAG) { /* SIDH | SIDL | EIDH | EIDL * 28 - 21 | 20 19 18 x x x 17 16 | 15 - 8 | 7 - 0 @@ -368,6 +366,8 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (cf->can_id & CAN_RTR_FLAG) usb_msg.dlc |= MCBA_DLC_RTR_MASK; + can_put_echo_skb(skb, priv->netdev, ctx->ndx); + err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx); if (err) goto xmit_failed; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index afc8d978124e..f7d653d48a1e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -138,14 +138,55 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) /* protect from getting time before setting now */ if (ktime_to_ns(time_ref->tv_host)) { u64 delta_us; + s64 delta_ts = 0; - delta_us = ts - time_ref->ts_dev_2; - if (ts < time_ref->ts_dev_2) - delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1; + /* General case: dev_ts_1 < dev_ts_2 < ts, with: + * + * - dev_ts_1 = previous sync timestamp + * - dev_ts_2 = last sync timestamp + * - ts = event timestamp + * - ts_period = known sync period (theoretical) + * ~ dev_ts2 - dev_ts1 + * *but*: + * + * - time counters wrap (see adapter->ts_used_bits) + * - sometimes, dev_ts_1 < ts < dev_ts2 + * + * "normal" case (sync time counters increase): + * must take into account case when ts wraps (tsw) + * + * < ts_period > < > + * | | | + * ---+--------+----+-------0-+--+--> + * ts_dev_1 | ts_dev_2 | + * ts tsw + */ + if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { + /* case when event time (tsw) wraps */ + if (ts < time_ref->ts_dev_1) + delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits); - delta_us += time_ref->ts_total; + /* Otherwise, sync time counter (ts_dev_2) has wrapped: + * handle case when event time (tsn) hasn't. + * + * < ts_period > < > + * | | | + * ---+--------+--0-+---------+--+--> + * ts_dev_1 | ts_dev_2 | + * tsn ts + */ + } else if (time_ref->ts_dev_1 < ts) { + delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits); + } - delta_us *= time_ref->adapter->us_per_ts_scale; + /* add delay between last sync and event timestamps */ + delta_ts += (signed int)(ts - time_ref->ts_dev_2); + + /* add time from beginning to last sync */ + delta_ts += time_ref->ts_total; + + /* convert ticks number into microseconds */ + delta_us = delta_ts * time_ref->adapter->us_per_ts_scale; delta_us >>= time_ref->adapter->us_per_ts_shift; *time = ktime_add_us(time_ref->tv_host_0, delta_us); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 41988358f63c..19600d35aac5 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -476,12 +476,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)]; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct canfd_frame *cfd; struct sk_buff *skb; const u16 rx_msg_flags = le16_to_cpu(rm->flags); + if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev)) + return -ENOMEM; + + dev = usb_if->dev[pucan_msg_get_channel(rm)]; + netdev = dev->netdev; + if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) { /* CANFD frame case */ skb = alloc_canfd_skb(netdev, &cfd); @@ -527,15 +533,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; - struct pcan_usb_fd_device *pdev = - container_of(dev, struct pcan_usb_fd_device, dev); + struct pcan_usb_fd_device *pdev; enum can_state new_state = CAN_STATE_ERROR_ACTIVE; enum can_state rx_state, tx_state; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; + if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev)) + return -ENOMEM; + + dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; + pdev = container_of(dev, struct pcan_usb_fd_device, dev); + netdev = dev->netdev; + /* nothing should be sent while in BUS_OFF state */ if (dev->can.state == CAN_STATE_BUS_OFF) return 0; @@ -587,9 +599,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)]; - struct pcan_usb_fd_device *pdev = - container_of(dev, struct pcan_usb_fd_device, dev); + struct pcan_usb_fd_device *pdev; + struct peak_usb_device *dev; + + if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev)) + return -EINVAL; + + dev = usb_if->dev[pucan_ermsg_get_channel(er)]; + pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* keep a trace of tx and rx error counters for later use */ pdev->bec.txerr = er->tx_err_cnt; @@ -603,11 +620,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)]; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; + if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev)) + return -EINVAL; + + dev = usb_if->dev[pufd_omsg_get_channel(ov)]; + netdev = dev->netdev; + /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(netdev, &cf); if (!skb) @@ -724,6 +747,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev, u16 tx_msg_size, tx_msg_flags; u8 can_dlc; + if (cfd->len > CANFD_MAX_DLEN) + return -EINVAL; + tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4); tx_msg->size = cpu_to_le16(tx_msg_size); tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX); diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 7a6667e0b9f9..e17158139aa8 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -127,11 +127,9 @@ static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip, * Offset 0x08: VTU/STU Data Register 2 * Offset 0x09: VTU/STU Data Register 3 */ - -static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip, + u16 *regs) { - u16 regs[3]; int i; /* Read all 3 VTU/STU Data registers */ @@ -144,12 +142,45 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, return err; } - /* Extract MemberTag and PortState data */ + return 0; +} + +static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract MemberTag data */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int member_offset = (i % 4) * 4; - unsigned int state_offset = member_offset + 2; entry->member[i] = (regs[i / 4] >> member_offset) & 0x3; + } + + return 0; +} + +static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract PortState data */ + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + unsigned int state_offset = (i % 4) * 4 + 2; + entry->state[i] = (regs[i / 4] >> state_offset) & 0x3; } @@ -322,6 +353,10 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; + /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[7:4] are located in VTU Operation 11:8 */ @@ -347,11 +382,6 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; if (entry->valid) { - /* Fetch (and mask) VLAN PortState data from the STU */ - err = mv88e6xxx_g1_vtu_stu_get(chip, entry); - if (err) - return err; - err = mv88e6185_g1_vtu_data_read(chip, entry); if (err) return err; @@ -359,6 +389,15 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, err = mv88e6xxx_g1_vtu_fid_read(chip, entry); if (err) return err; + + /* Fetch VLAN PortState data from the STU */ + err = mv88e6xxx_g1_vtu_stu_get(chip, entry); + if (err) + return err; + + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; } return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 3c3222e2dcfc..9aea4cf19d0c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2433,16 +2433,9 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, goto err_mmio_read_less; } - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width)); if (rc) { - dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); - goto err_mmio_read_less; - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (rc) { - dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", - rc); + dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc); goto err_mmio_read_less; } @@ -3183,6 +3176,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS)); + if (rc) { + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc); + goto err_disable_device; + } + pci_set_master(pdev); ena_dev = vzalloc(sizeof(*ena_dev)); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 88f8d31e4c83..7aeb2805fec4 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2389,7 +2389,8 @@ static int b44_init_one(struct ssb_device *sdev, goto err_out_free_dev; } - if (dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30))) { + err = dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30)); + if (err) { dev_err(sdev->dev, "Required 30BIT DMA mask unsupported by the system\n"); goto err_out_powerdown; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c3f04fb31955..db1a23b8d531 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6326,6 +6326,11 @@ static void bnxt_report_link(struct bnxt *bp) u16 fec; netif_carrier_on(bp->dev); + speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); + if (speed == SPEED_UNKNOWN) { + netdev_info(bp->dev, "NIC Link is Up, speed unknown\n"); + return; + } if (bp->link_info.duplex == BNXT_LINK_DUPLEX_FULL) duplex = "full"; else @@ -6338,7 +6343,6 @@ static void bnxt_report_link(struct bnxt *bp) flow_ctrl = "ON - receive"; else flow_ctrl = "none"; - speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n", speed, duplex, flow_ctrl); if (bp->flags & BNXT_FLAG_EEE_CAP) @@ -8017,7 +8021,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0 && dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) { dev_err(&pdev->dev, "System does not support DMA, aborting\n"); - goto init_err_disable; + rc = -EIO; + goto init_err_release; } pci_set_master(pdev); @@ -9116,6 +9121,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) create_singlethread_workqueue("bnxt_pf_wq"); if (!bnxt_pf_wq) { dev_err(&pdev->dev, "Unable to create workqueue.\n"); + rc = -ENOMEM; goto init_err_pci_clean; } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1ea81c23039f..511240e8246f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2300,7 +2300,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 0, start, length, data); } return rc; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index acae87f548a1..0374a1ba1010 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1704,7 +1704,8 @@ static inline int macb_clear_csum(struct sk_buff *skb) static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) { - bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb); + bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || + skb_is_nonlinear(*skb); int padlen = ETH_ZLEN - (*skb)->len; int headroom = skb_headroom(*skb); int tailroom = skb_tailroom(*skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index bb3ee55cb72c..9160b44c68bb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) int err; /* do a set-tcb for smac-sel and CWR bit.. */ - err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); - if (err) - goto smac_err; - err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), TCB_SMAC_SEL_V(f->smt->idx), 1); + if (err) + goto smac_err; + + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); if (!err) return 0; @@ -608,6 +608,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || @@ -625,7 +626,8 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; + if (f->fs.newsmac) + fwr->smac_sel = f->smt->idx; fwr->rx_chan_rx_rpl_iq = htons(FW_FILTER_WR_RX_CHAN_V(0) | FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); @@ -1019,11 +1021,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_F | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, @@ -1059,11 +1058,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_F | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static int cxgb4_set_hash_filter(struct net_device *dev, @@ -1722,6 +1718,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) } return; } + switch (f->fs.action) { + case FILTER_PASS: + if (f->fs.dirsteer) + set_tcb_tflag(adap, f, tid, + TF_DIRECT_STEER_S, 1, 1); + break; + case FILTER_DROP: + set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1); + break; + case FILTER_SWITCH: + set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1); + break; + } + break; default: @@ -1781,22 +1791,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) if (ctx) ctx->result = 0; } else if (ret == FW_FILTER_WR_FLT_ADDED) { - int err = 0; - - if (f->fs.newsmac) - err = configure_filter_smac(adap, f); - - if (!err) { - f->pending = 0; /* async setup completed */ - f->valid = 1; - if (ctx) { - ctx->result = 0; - ctx->tid = idx; - } - } else { - clear_filter(adap, f); - if (ctx) - ctx->result = err; + f->pending = 0; /* async setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; } } else { /* Something went wrong. Issue a warning about the diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 3297ce025e8b..6ddc2baa7481 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -42,6 +42,10 @@ #define TCB_T_FLAGS_W 1 +#define TF_DROP_S 22 +#define TF_DIRECT_STEER_S 23 +#define TF_LPBK_S 59 + #define TF_CCTRL_ECE_S 60 #define TF_CCTRL_CWR_S 61 #define TF_CCTRL_RFR_S 62 diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e4fc38cbe853..acf27c395286 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1884,6 +1884,8 @@ static int ftgmac100_probe(struct platform_device *pdev) return 0; err_ncsi_dev: + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); err_register_netdev: ftgmac100_destroy_mdio(netdev); err_setup_mdio: @@ -1904,6 +1906,8 @@ static int ftgmac100_remove(struct platform_device *pdev) netdev = platform_get_drvdata(pdev); priv = netdev_priv(netdev); + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); unregister_netdev(netdev); clk_disable_unprepare(priv->clk); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 8243501c3757..8db0924ec681 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1388,7 +1388,7 @@ static int gfar_probe(struct platform_device *ofdev) if (dev->features & NETIF_F_IP_CSUM || priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - dev->needed_headroom = GMAC_FCB_LEN; + dev->needed_headroom = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* Initializing some of the rx/tx queue level parameters */ for (i = 0; i < priv->num_tx_queues; i++) { @@ -2370,20 +2370,12 @@ static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* make space for additional header when fcb is needed */ - if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) { - struct sk_buff *skb_new; - - skb_new = skb_realloc_headroom(skb, fcb_len); - if (!skb_new) { + if (fcb_len) { + if (unlikely(skb_cow_head(skb, fcb_len))) { dev->stats.tx_errors++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } - - if (skb->sk) - skb_set_owner_w(skb_new, skb->sk); - dev_consume_skb_any(skb); - skb = skb_new; } /* total number of fragments in the SKB */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d8115a9333e0..2938ac440fb3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2560,6 +2560,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) { int i, rc; + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); @@ -4459,6 +4462,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; crq->active = false; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 04ebce738db9..926407f0bbd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1861,8 +1861,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) -#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) -#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13) +#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x17) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 @@ -1870,7 +1870,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_DRIVER_VERSION_SZ 0x40 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) -#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x13) #define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 650ae08c71de..8f020f26ebf5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -182,8 +182,8 @@ struct mlx4_init_hca_param { u64 cmpt_base; u64 mtt_base; u64 global_caps; - u16 log_mc_entry_sz; - u16 log_mc_hash_sz; + u8 log_mc_entry_sz; + u8 log_mc_hash_sz; u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */ u8 log_num_qps; u8 log_num_srqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7366033cd31c..2190daace873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1999,12 +1999,15 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) max_guarantee = evport->info.min_rate; } - return max_t(u32, max_guarantee / fw_max_bw_share, 1); + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + return 0; } -static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +static int normalize_vports_min_rate(struct mlx5_eswitch *esw) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = calculate_vports_min_rate_divider(esw); struct mlx5_vport *evport; u32 vport_max_rate; u32 vport_min_rate; @@ -2018,9 +2021,9 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) continue; vport_min_rate = evport->info.min_rate; vport_max_rate = evport->info.max_rate; - bw_share = MLX5_MIN_BW_SHARE; + bw_share = 0; - if (vport_min_rate) + if (divider) bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, divider, fw_max_bw_share); @@ -2045,7 +2048,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, struct mlx5_vport *evport; u32 fw_max_bw_share; u32 previous_min_rate; - u32 divider; bool min_rate_supported; bool max_rate_supported; int err = 0; @@ -2071,8 +2073,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, previous_min_rate = evport->info.min_rate; evport->info.min_rate = min_rate; - divider = calculate_vports_min_rate_divider(esw); - err = normalize_vports_min_rate(esw, divider); + err = normalize_vports_min_rate(esw); if (err) { evport->info.min_rate = previous_min_rate; goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d8e7ca48753f..049ca4ba49de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -439,7 +439,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) if (trans->core->fw_flash_in_progress) timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); - queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, + timeout << trans->retries); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, @@ -488,6 +489,9 @@ static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, err = mlxsw_emad_transmit(trans->core, trans); if (err == 0) return; + + if (!atomic_dec_and_test(&trans->active)) + return; } else { err = -EIO; } @@ -1111,6 +1115,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + if (!reload) + devlink_free(devlink); return; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 208341541087..df4519c04ba0 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -145,7 +145,8 @@ static void lan743x_intr_software_isr(void *context) int_sts = lan743x_csr_read(adapter, INT_STS); if (int_sts & INT_BIT_SW_GP_) { - lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_); + /* disable the interrupt to prevent repeated re-triggering */ + lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_); intr->software_isr_flag = 1; } } @@ -672,14 +673,12 @@ clean_up: static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 select, u32 addr, u32 length, u32 *buf) { - int ret = -EIO; u32 dp_sel; int i; - mutex_lock(&adapter->dp_lock); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; dp_sel |= select; @@ -691,13 +690,10 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; } - ret = 0; -unlock: - mutex_unlock(&adapter->dp_lock); - return ret; + return 0; } static u32 lan743x_mac_mii_access(u16 id, u16 index, int read) @@ -1250,13 +1246,13 @@ clean_up_data_descriptor: goto clear_active; if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED)) { - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); goto clear_skb; } if (cleanup) { lan743x_ptp_unrequest_tx_timestamp(tx->adapter); - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); } else { ignore_sync = (buffer_info->flags & TX_BUFFER_INFO_FLAG_IGNORE_SYNC) != 0; @@ -1566,7 +1562,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, if (required_number_of_descriptors > lan743x_tx_get_avail_desc(tx)) { if (required_number_of_descriptors > (tx->ring_size - 1)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); } else { /* save to overflow buffer */ tx->overflow_skb = skb; @@ -1599,7 +1595,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, start_frame_length, do_timestamp, skb->ip_summed == CHECKSUM_PARTIAL)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } @@ -1619,7 +1615,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, * frame assembler clean up was performed inside * lan743x_tx_frame_add_fragment */ - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } } @@ -2679,7 +2675,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->intr.irq = adapter->pdev->irq; lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF); - mutex_init(&adapter->dp_lock); ret = lan743x_gpio_init(adapter); if (ret) diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 2d6eea18973e..77273be2d1ee 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -702,9 +702,6 @@ struct lan743x_adapter { struct lan743x_csr csr; struct lan743x_intr intr; - /* lock, used to prevent concurrent access to data port */ - struct mutex dp_lock; - struct lan743x_gpio gpio; struct lan743x_ptp ptp; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 39787bb885c8..80afc8f36e00 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2737,14 +2737,18 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps, sizeof(*iwarp_info->partial_fpdus), GFP_KERNEL); - if (!iwarp_info->partial_fpdus) + if (!iwarp_info->partial_fpdus) { + rc = -ENOMEM; goto err; + } iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL); - if (!iwarp_info->mpa_intermediate_buf) + if (!iwarp_info->mpa_intermediate_buf) { + rc = -ENOMEM; goto err; + } /* The mpa_bufs array serves for pending RX packets received on the * mpa ll2 that don't have place on the tx ring and require later @@ -2754,8 +2758,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc, sizeof(*iwarp_info->mpa_bufs), GFP_KERNEL); - if (!iwarp_info->mpa_bufs) + if (!iwarp_info->mpa_bufs) { + rc = -ENOMEM; goto err; + } INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list); INIT_LIST_HEAD(&iwarp_info->mpa_buf_list); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index cda5b0a9e948..10286215092f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2251,7 +2251,8 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter) /* Boot either flash image or firmware image from host file system */ if (qlcnic_load_fw_file == 1) { - if (qlcnic_83xx_load_fw_image_from_host(adapter)) + err = qlcnic_83xx_load_fw_image_from_host(adapter); + if (err) return err; } else { QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID, diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index c9d43bad1e2f..9b1804a228d9 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -197,6 +197,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port_rcu(dev); + if (unlikely(!port)) { + atomic_long_inc(&skb->dev->rx_nohandler); + kfree_skb(skb); + goto done; + } switch (port->rmnet_mode) { case RMNET_EPMODE_VND: diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 126e3a1cf0eb..1df551afc207 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6275,7 +6275,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return !eth_skb_pad(skb); + /* eth_skb_pad would free the skb on error */ + return !__skb_put_padto(skb, ETH_ZLEN, false); } return true; @@ -6631,7 +6632,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) return IRQ_NONE; rtl_irq_disable(tp); - napi_schedule_irqoff(&tp->napi); + napi_schedule(&tp->napi); return IRQ_HANDLED; } @@ -6887,7 +6888,7 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt, - IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp); + IRQF_SHARED, dev->name, tp); if (retval < 0) goto err_release_fw_2; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 569e698b5c80..c24b7ea37e39 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1732,12 +1732,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req) config.flags = 0; config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) + switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { + case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) + break; + case RAVB_RXTSTAMP_TYPE_ALL: config.rx_filter = HWTSTAMP_FILTER_ALL; - else + break; + default: config.rx_filter = HWTSTAMP_FILTER_NONE; + } return copy_to_user(req->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d0b5844c8a31..2e2afc824a6a 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -223,8 +223,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (gnvh->oam ? TUNNEL_OAM : 0) | + flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f2fecb684220..bb9cd1262cc9 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -667,10 +667,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, gtp = netdev_priv(dev); - err = gtp_encap_enable(gtp, data); - if (err < 0) - return err; - if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { @@ -681,12 +677,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, err = gtp_hashtable_new(gtp, hashsize); if (err < 0) - goto out_encap; + return err; + + err = gtp_encap_enable(gtp, data); + if (err < 0) + goto out_hashtable; err = register_netdevice(dev); if (err < 0) { netdev_dbg(dev, "failed to register new netdev %d\n", err); - goto out_hashtable; + goto out_encap; } gn = net_generic(dev_net(dev), gtp_net_id); @@ -697,11 +697,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, return 0; +out_encap: + gtp_encap_disable(gtp); out_hashtable: kfree(gtp->addr_hash); kfree(gtp->tid_hash); -out_encap: - gtp_encap_disable(gtp); return err; } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 998d08ae7431..47d518e6d5d4 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1886,7 +1886,8 @@ static int sfp_probe(struct platform_device *pdev) continue; irq = gpiod_to_irq(sfp->gpio[i]); - if (!irq) { + if (irq < 0) { + irq = 0; poll = true; continue; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d2612b69257e..ebd630a94571 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1029,7 +1029,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9011, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9021, 1)}, {QMI_FIXED_INTF(0x05c6, 0x9022, 2)}, - {QMI_FIXED_INTF(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ + {QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ {QMI_FIXED_INTF(0x05c6, 0x9026, 3)}, {QMI_FIXED_INTF(0x05c6, 0x902e, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9031, 5)}, @@ -1268,6 +1268,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b55eeb8f8fa3..93899a7be9c5 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -336,8 +336,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static int vrf_finish_direct(struct net *net, struct sock *sk, - struct sk_buff *skb) +static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; @@ -356,7 +355,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk, skb_pull(skb, ETH_HLEN); } - return 1; + /* reset skb device */ + nf_reset(skb); } #if IS_ENABLED(CONFIG_IPV6) @@ -435,15 +435,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output6_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip6_local_out(net, sk, skb); +} + static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IPV6); - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output6_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output6_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip6_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, @@ -456,18 +482,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output6_direct); + skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, @@ -649,15 +672,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip_local_out(net, sk, skb); +} + static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, @@ -670,18 +719,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output_direct); + skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index f6b000ddcd15..b7bfc0caa5dc 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -902,6 +902,7 @@ static ssize_t cosa_write(struct file *file, chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); up(&chan->wsem); + kfree(kbuf); return -ERESTARTSYS; } } diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 03b5f5cce6f4..96b4ce13f3a5 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -276,63 +276,69 @@ static inline struct net_device **get_dev_p(struct pvc_device *pvc, static int fr_hard_header(struct sk_buff **skb_p, u16 dlci) { - u16 head_len; struct sk_buff *skb = *skb_p; - switch (skb->protocol) { - case cpu_to_be16(NLPID_CCITT_ANSI_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CCITT_ANSI_LMI; - break; + if (!skb->dev) { /* Control packets */ + switch (dlci) { + case LMI_CCITT_ANSI_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CCITT_ANSI_LMI; + break; - case cpu_to_be16(NLPID_CISCO_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CISCO_LMI; - break; + case LMI_CISCO_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CISCO_LMI; + break; - case cpu_to_be16(ETH_P_IP): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IP; - break; + default: + return -EINVAL; + } - case cpu_to_be16(ETH_P_IPV6): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IPV6; - break; + } else if (skb->dev->type == ARPHRD_DLCI) { + switch (skb->protocol) { + case htons(ETH_P_IP): + skb_push(skb, 4); + skb->data[3] = NLPID_IP; + break; - case cpu_to_be16(ETH_P_802_3): - head_len = 10; - if (skb_headroom(skb) < head_len) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, - head_len); + case htons(ETH_P_IPV6): + skb_push(skb, 4); + skb->data[3] = NLPID_IPV6; + break; + + default: + skb_push(skb, 10); + skb->data[3] = FR_PAD; + skb->data[4] = NLPID_SNAP; + /* OUI 00-00-00 indicates an Ethertype follows */ + skb->data[5] = 0x00; + skb->data[6] = 0x00; + skb->data[7] = 0x00; + /* This should be an Ethertype: */ + *(__be16 *)(skb->data + 8) = skb->protocol; + } + + } else if (skb->dev->type == ARPHRD_ETHER) { + if (skb_headroom(skb) < 10) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, 10); if (!skb2) return -ENOBUFS; dev_kfree_skb(skb); skb = *skb_p = skb2; } - skb_push(skb, head_len); + skb_push(skb, 10); skb->data[3] = FR_PAD; skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; + /* OUI 00-80-C2 stands for the 802.1 organization */ + skb->data[5] = 0x00; skb->data[6] = 0x80; skb->data[7] = 0xC2; + /* PID 00-07 stands for Ethernet frames without FCS */ skb->data[8] = 0x00; - skb->data[9] = 0x07; /* bridged Ethernet frame w/out FCS */ - break; + skb->data[9] = 0x07; - default: - head_len = 10; - skb_push(skb, head_len); - skb->data[3] = FR_PAD; - skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; - skb->data[6] = FR_PAD; - skb->data[7] = FR_PAD; - *(__be16*)(skb->data + 8) = skb->protocol; + } else { + return -EINVAL; } dlci_to_q922(skb->data, dlci); @@ -428,8 +434,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) skb_put(skb, pad); memset(skb->data + len, 0, pad); } - skb->protocol = cpu_to_be16(ETH_P_802_3); } + skb->dev = dev; if (!fr_hard_header(&skb, pvc->dlci)) { dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; @@ -497,10 +503,8 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) memset(skb->data, 0, len); skb_reserve(skb, 4); if (lmi == LMI_CISCO) { - skb->protocol = cpu_to_be16(NLPID_CISCO_LMI); fr_hard_header(&skb, LMI_CISCO_DLCI); } else { - skb->protocol = cpu_to_be16(NLPID_CCITT_ANSI_LMI); fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI); } data = skb_tail_pointer(skb); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 7d15f6208b46..68cda1564c77 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -767,6 +767,7 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, u8 preamble = 0; u8 group_id; u32 info1, info2, info3; + u32 stbc, nsts_su; info1 = __le32_to_cpu(rxd->ppdu_start.info1); info2 = __le32_to_cpu(rxd->ppdu_start.info2); @@ -811,11 +812,16 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, */ bw = info2 & 3; sgi = info3 & 1; + stbc = (info2 >> 3) & 1; group_id = (info2 >> 4) & 0x3F; if (GROUP_ID_IS_SU_MIMO(group_id)) { mcs = (info3 >> 4) & 0x0F; - nss = ((info2 >> 10) & 0x07) + 1; + nsts_su = ((info2 >> 10) & 0x07); + if (stbc) + nss = (nsts_su >> 2) + 1; + else + nss = (nsts_su + 1); } else { /* Hardware doesn't decode VHT-SIG-B into Rx descriptor * so it's impossible to decode MCS. Also since diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 0cdaecb0e28a..28d86da65c05 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -561,6 +561,10 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar, le16_to_cpu(htc_hdr->len), ATH10K_HTC_MBOX_MAX_PAYLOAD_LENGTH); ret = -ENOMEM; + + queue_work(ar->workqueue, &ar->restart_work); + ath10k_warn(ar, "exceeds length, start recovery\n"); + goto err; } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index f19393e584dc..d567fbe79cff 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -973,7 +973,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, struct ath_htc_rx_status *rxstatus; struct ath_rx_status rx_stats; bool decrypt_error = false; - __be16 rs_datalen; + u16 rs_datalen; bool is_phyerr; if (skb->len < HTC_RX_FRAME_HEADER_SIZE) { diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c index 57ad56435dda..8bc0286b4f8c 100644 --- a/drivers/net/wireless/intersil/p54/p54pci.c +++ b/drivers/net/wireless/intersil/p54/p54pci.c @@ -332,10 +332,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) struct p54p_desc *desc; dma_addr_t mapping; u32 idx, i; + __le32 device_addr; spin_lock_irqsave(&priv->lock, flags); idx = le32_to_cpu(ring_control->host_idx[1]); i = idx % ARRAY_SIZE(ring_control->tx_data); + device_addr = ((struct p54_hdr *)skb->data)->req_id; mapping = pci_map_single(priv->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); @@ -349,7 +351,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) desc = &ring_control->tx_data[i]; desc->host_addr = cpu_to_le32(mapping); - desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; + desc->device_addr = device_addr; desc->len = cpu_to_le16(skb->len); desc->flags = 0; diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index c9baf4eab960..075e25d65634 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -478,7 +478,6 @@ static void virt_wifi_net_device_destructor(struct net_device *dev) */ kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; - free_netdev(dev); } /* No lock interaction. */ @@ -486,7 +485,7 @@ static void virt_wifi_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &virt_wifi_ops; - dev->priv_destructor = virt_wifi_net_device_destructor; + dev->needs_free_netdev = true; } /* Called in a RCU read critical section from netif_receive_skb */ @@ -572,10 +571,12 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, goto unregister_netdev; } + dev->priv_destructor = virt_wifi_net_device_destructor; priv->being_deleted = false; priv->is_connected = false; priv->is_up = false; INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); + __module_get(THIS_MODULE); return 0; unregister_netdev: @@ -606,6 +607,7 @@ static void virt_wifi_dellink(struct net_device *dev, netdev_upper_dev_unlink(priv->lowerdev, dev); unregister_netdevice_queue(dev, head); + module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ } @@ -618,6 +620,42 @@ static struct rtnl_link_ops virt_wifi_link_ops = { .priv_size = sizeof(struct virt_wifi_netdev_priv), }; +static bool netif_is_virt_wifi_dev(const struct net_device *dev) +{ + return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler; +} + +static int virt_wifi_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr); + struct virt_wifi_netdev_priv *priv; + struct net_device *upper_dev; + LIST_HEAD(list_kill); + + if (!netif_is_virt_wifi_dev(lower_dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + priv = rtnl_dereference(lower_dev->rx_handler_data); + if (!priv) + return NOTIFY_DONE; + + upper_dev = priv->upperdev; + + upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); + unregister_netdevice_many(&list_kill); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block virt_wifi_notifier = { + .notifier_call = virt_wifi_event, +}; + /* Acquires and releases the rtnl lock. */ static int __init virt_wifi_init_module(void) { @@ -626,14 +664,25 @@ static int __init virt_wifi_init_module(void) /* Guaranteed to be locallly-administered and not multicast. */ eth_random_addr(fake_router_bssid); + err = register_netdevice_notifier(&virt_wifi_notifier); + if (err) + return err; + + err = -ENOMEM; common_wiphy = virt_wifi_make_wiphy(); if (!common_wiphy) - return -ENOMEM; + goto notifier; err = rtnl_link_register(&virt_wifi_link_ops); if (err) - virt_wifi_destroy_wiphy(common_wiphy); + goto destroy_wiphy; + return 0; + +destroy_wiphy: + virt_wifi_destroy_wiphy(common_wiphy); +notifier: + unregister_netdevice_notifier(&virt_wifi_notifier); return err; } @@ -643,6 +692,7 @@ static void __exit virt_wifi_cleanup_module(void) /* Will delete any devices that depend on the wiphy. */ rtnl_link_unregister(&virt_wifi_link_ops); virt_wifi_destroy_wiphy(common_wiphy); + unregister_netdevice_notifier(&virt_wifi_notifier); } int virt_wifi_register_network_simulation diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 936c0b3e0ba2..86d23d0f563c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ struct xenvif *vif; /* Parent VIF */ + /* + * TX/RX common EOI handling. + * When feature-split-event-channels = 0, interrupt handler sets + * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set + * by the RX and TX interrupt handlers. + * RX and TX handler threads will issue an EOI when either + * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or + * NETBK_TX_EOI) are set and they will reset those bits. + */ + atomic_t eoi_pending; +#define NETBK_RX_EOI 0x01 +#define NETBK_TX_EOI 0x02 +#define NETBK_COMMON_EOI 0x04 + /* Use NAPI for guest TX */ struct napi_struct napi; /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ @@ -357,6 +371,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 4cafc31b98b7..c960cb7e3251 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif) !vif->disabled; } +static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + if (rc) + napi_schedule(&queue->napi); + return rc; +} + static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) - napi_schedule(&queue->napi); + old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending); + WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_tx_interrupt(queue)) { + atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget) return work_done; } +static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = xenvif_have_rx_work(queue, false); + if (rc) + xenvif_kick_thread(queue); + return rc; +} + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - xenvif_kick_thread(queue); + old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending); + WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_rx_interrupt(queue)) { + atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } irqreturn_t xenvif_interrupt(int irq, void *dev_id) { - xenvif_tx_interrupt(irq, dev_id); - xenvif_rx_interrupt(irq, dev_id); + struct xenvif_queue *queue = dev_id; + int old; + + old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); + WARN(old, "Interrupt while EOI pending\n"); + + /* Use bitwise or as we need to call both functions. */ + if ((!xenvif_handle_tx_interrupt(queue) | + !xenvif_handle_rx_interrupt(queue))) { + atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, queue->name, queue); if (err < 0) @@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, /* feature-split-event-channels == 1 */ snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, queue->tx_irq_name, queue); if (err < 0) @@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, queue->rx_irq_name, queue); if (err < 0) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1c849106b793..f228298c3bd0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) if (more_to_do) napi_schedule(&queue->napi); + else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_TX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->tx_irq, 0); } static void tx_add_credit(struct xenvif_queue *queue) @@ -1613,9 +1617,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; + unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS; - while (xenvif_ctrl_work_todo(vif)) + while (xenvif_ctrl_work_todo(vif)) { xenvif_ctrl_action(vif); + eoi_flag = 0; + } + + xen_irq_lateeoi(irq, eoi_flag); return IRQ_HANDLED; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index ef5887037b22..9b62f65b630e 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) return queue->stalled && prod - cons >= 1; } -static bool xenvif_have_rx_work(struct xenvif_queue *queue) +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) { return xenvif_rx_ring_slots_available(queue) || (queue->vif->stall_timeout && (xenvif_rx_queue_stalled(queue) || xenvif_rx_queue_ready(queue))) || - kthread_should_stop() || + (test_kthread && kthread_should_stop()) || queue->vif->disabled; } @@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) { DEFINE_WAIT(wait); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) return; for (;;) { long ret; prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) break; + if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_RX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->rx_irq, 0); + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); if (!ret) break; diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index 4da409e77a72..6c78529a8c89 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -37,8 +37,8 @@ struct s3fwrn5_i2c_phy { struct i2c_client *i2c_dev; struct nci_dev *ndev; - unsigned int gpio_en; - unsigned int gpio_fw_wake; + int gpio_en; + int gpio_fw_wake; struct mutex mutex; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3c68a5b35ec1..a52b2f15f372 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -276,9 +276,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev, nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; } +static void nvme_dbbuf_free(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return; + + nvmeq->dbbuf_sq_db = NULL; + nvmeq->dbbuf_cq_db = NULL; + nvmeq->dbbuf_sq_ei = NULL; + nvmeq->dbbuf_cq_ei = NULL; +} + static void nvme_dbbuf_set(struct nvme_dev *dev) { struct nvme_command c; + unsigned int i; if (!dev->dbbuf_dbs) return; @@ -292,6 +304,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); + + for (i = 1; i <= dev->online_queues; i++) + nvme_dbbuf_free(&dev->queues[i]); } } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 077c67816665..134e14e778f8 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1640,7 +1640,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, complete(&queue->cm_done); return 0; case RDMA_CM_EVENT_REJECTED: - nvme_rdma_destroy_queue_ib(queue); cm_error = nvme_rdma_conn_rejected(queue, ev); break; case RDMA_CM_EVENT_ROUTE_ERROR: diff --git a/drivers/of/address.c b/drivers/of/address.c index c42aebba35ab..30806dd35735 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -975,11 +975,13 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); */ bool of_dma_is_coherent(struct device_node *np) { - struct device_node *node = of_node_get(np); + struct device_node *node; if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) return true; + node = of_node_get(np); + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 779d62284b9d..06553d5cbf8c 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -221,6 +221,16 @@ static int __init __rmem_cmp(const void *a, const void *b) if (ra->base > rb->base) return 1; + /* + * Put the dynamic allocations (address == 0, size == 0) before static + * allocations at address 0x0 so that overlap detection works + * correctly. + */ + if (ra->size < rb->size) + return -1; + if (ra->size > rb->size) + return 1; + return 0; } @@ -238,8 +248,7 @@ static void __init __rmem_check_for_overlap(void) this = &reserved_mem[i]; next = &reserved_mem[i + 1]; - if (!(this->base && next->base)) - continue; + if (this->base + this->size > next->base) { phys_addr_t this_end, next_end; diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 379544697aa3..1dcf0e863f62 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -526,10 +526,11 @@ static int __init of_platform_default_populate_init(void) { struct device_node *node; + device_links_supplier_sync_state_pause(); + if (!of_have_populated_dt()) return -ENODEV; - device_links_supplier_sync_state_pause(); /* * Handle certain compatibles explicitly, since we don't want to create * platform_devices for every node in /reserved-memory with a @@ -555,8 +556,7 @@ arch_initcall_sync(of_platform_default_populate_init); static int __init of_platform_sync_state_init(void) { - if (of_have_populated_dt()) - device_links_supplier_sync_state_resume(); + device_links_supplier_sync_state_resume(); return 0; } late_initcall_sync(of_platform_sync_state_init); diff --git a/drivers/of/property.c b/drivers/of/property.c index 5a29428ac947..162c88d2691c 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1031,8 +1031,20 @@ static int of_link_to_phandle(struct device *dev, struct device_node *sup_np, * Find the device node that contains the supplier phandle. It may be * @sup_np or it may be an ancestor of @sup_np. */ - while (sup_np && !of_find_property(sup_np, "compatible", NULL)) + while (sup_np) { + + /* Don't allow linking to a disabled supplier */ + if (!of_device_is_available(sup_np)) { + of_node_put(sup_np); + sup_np = NULL; + } + + if (of_find_property(sup_np, "compatible", NULL)) + break; + sup_np = of_get_next_parent(sup_np); + } + if (!sup_np) { dev_dbg(dev, "Not linking to %pOFP - No device\n", tmp_np); return -ENODEV; @@ -1327,7 +1339,7 @@ static int of_link_to_suppliers(struct device *dev, if (of_link_property(dev, con_np, p->name)) ret = -ENODEV; - for_each_child_of_node(con_np, child) + for_each_available_child_of_node(con_np, child) if (of_link_to_suppliers(dev, child) && !ret) ret = -EAGAIN; diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index de1b4ebe4de2..39c01ef57d83 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -899,6 +899,7 @@ remove_pads: reset: reset_control_assert(padctl->rst); remove: + platform_set_drvdata(pdev, NULL); soc->ops->remove(padctl); return err; } diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index aefe3c33dffd..8dec302dc067 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -458,13 +458,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr) { /* - * The signal type is GPIO if the signal name has "GPIO" as a prefix. + * The signal type is GPIO if the signal name has "GPI" as a prefix. * strncmp (rather than strcmp) is used to implement the prefix * requirement. * - * expr->signal might look like "GPIOT3" in the GPIO case. + * expr->signal might look like "GPIOB1" in the GPIO case. + * expr->signal might look like "GPIT0" in the GPI case. */ - return strncmp(expr->signal, "GPIO", 4) == 0; + return strncmp(expr->signal, "GPI", 3) == 0; } static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 89ff2795a8b5..5e0adb00b430 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -621,6 +621,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin, value |= PADCFG1_TERM_UP; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; @@ -643,6 +647,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin, case PIN_CONFIG_BIAS_PULL_DOWN: value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK); + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b1ffdd3f6d07..d6255049e519 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -157,7 +157,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); } else if (debounce < 250000) { - time = debounce / 15600; + time = debounce / 15625; pin_reg |= time & DB_TMR_OUT_MASK; pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); @@ -167,14 +167,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); } else { - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); ret = -EINVAL; } } else { pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); pin_reg &= ~DB_TMR_OUT_MASK; - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); } writel(pin_reg, gpio_dev->base + offset * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 98bd8213b037..8cc01857bc5c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4251,6 +4251,7 @@ static void hotkey_resume(void) pr_err("error while attempting to reset the event firmware interface\n"); tpacpi_send_radiosw_update(); + tpacpi_input_send_tabletsw(); hotkey_tablet_mode_notify_change(); hotkey_wakeup_reason_notify_change(); hotkey_wakeup_hotunplug_complete_notify_change(); diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index e366977bda41..8c3e9bac4754 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -1497,7 +1497,7 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); char *buffer; char *cmd; - int lcd_out, crt_out, tv_out; + int lcd_out = -1, crt_out = -1, tv_out = -1; int remain = count; int value; int ret; @@ -1529,7 +1529,6 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, kfree(cmd); - lcd_out = crt_out = tv_out = -1; ret = get_video_status(dev, &video_out); if (!ret) { unsigned int new_video_out = video_out; diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index ff02a917556a..93e3d9c747aa 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1680,8 +1680,6 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_FULL; else if (di->cache.flags & BQ27000_FLAG_CHGS) status = POWER_SUPPLY_STATUS_CHARGING; - else if (power_supply_am_i_supplied(di->bat) > 0) - status = POWER_SUPPLY_STATUS_NOT_CHARGING; else status = POWER_SUPPLY_STATUS_DISCHARGING; } else { @@ -1693,6 +1691,10 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_CHARGING; } + if ((status == POWER_SUPPLY_STATUS_DISCHARGING) && + (power_supply_am_i_supplied(di->bat) > 0)) + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + val->intval = status; return 0; diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c index 036eb726088e..1e5bc8ad0516 100644 --- a/drivers/power/supply/test_power.c +++ b/drivers/power/supply/test_power.c @@ -358,6 +358,7 @@ static int param_set_ac_online(const char *key, const struct kernel_param *kp) static int param_get_ac_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, ac_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -371,6 +372,7 @@ static int param_set_usb_online(const char *key, const struct kernel_param *kp) static int param_get_usb_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, usb_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -385,6 +387,7 @@ static int param_set_battery_status(const char *key, static int param_get_battery_status(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_status, battery_status, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -399,6 +402,7 @@ static int param_set_battery_health(const char *key, static int param_get_battery_health(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_health, battery_health, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -414,6 +418,7 @@ static int param_get_battery_present(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_present, battery_present, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -431,6 +436,7 @@ static int param_get_battery_technology(char *buffer, { strcpy(buffer, map_get_key(map_technology, battery_technology, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 9e2f274bd44f..60c8375c3c81 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -379,9 +379,9 @@ static void create_power_zone_common_attributes( &dev_attr_max_energy_range_uj.attr; if (power_zone->ops->get_energy_uj) { if (power_zone->ops->reset_energy_uj) - dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR; else - dev_attr_energy_uj.attr.mode = S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IRUSR; power_zone->zone_dev_attrs[count++] = &dev_attr_energy_uj.attr; } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 39ae6a5307fa..8bc62e949956 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1143,7 +1143,6 @@ static int _regulator_do_enable(struct regulator_dev *rdev); /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source - * @constraints: constraints to apply * * Allows platform initialisation code to define and constrain * regulator circuits e.g. valid voltage/current ranges, etc. NOTE: @@ -1151,21 +1150,11 @@ static int _regulator_do_enable(struct regulator_dev *rdev); * regulator operations to proceed i.e. set_voltage, set_current_limit, * set_mode. */ -static int set_machine_constraints(struct regulator_dev *rdev, - const struct regulation_constraints *constraints) +static int set_machine_constraints(struct regulator_dev *rdev) { int ret = 0; const struct regulator_ops *ops = rdev->desc->ops; - if (constraints) - rdev->constraints = kmemdup(constraints, sizeof(*constraints), - GFP_KERNEL); - else - rdev->constraints = kzalloc(sizeof(*constraints), - GFP_KERNEL); - if (!rdev->constraints) - return -ENOMEM; - ret = machine_constraints_voltage(rdev, rdev->constraints); if (ret != 0) return ret; @@ -1661,6 +1650,15 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } } + if (r == rdev) { + dev_err(dev, "Supply for %s (%s) resolved to itself\n", + rdev->desc->name, rdev->supply_name); + if (!have_full_constraints()) + return -EINVAL; + r = dummy_regulator_rdev; + get_device(&r->dev); + } + /* * If the supply's parent device is not the same as the * regulator's parent device, then ensure the parent device @@ -3450,6 +3448,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) ret = rdev->desc->fixed_uV; } else if (rdev->supply) { ret = _regulator_get_voltage(rdev->supply->rdev); + } else if (rdev->supply_name) { + return -EPROBE_DEFER; } else { return -EINVAL; } @@ -4640,7 +4640,6 @@ struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, const struct regulator_config *cfg) { - const struct regulation_constraints *constraints = NULL; const struct regulator_init_data *init_data; struct regulator_config *config = NULL; static atomic_t regulator_no = ATOMIC_INIT(-1); @@ -4741,14 +4740,23 @@ regulator_register(const struct regulator_desc *regulator_desc, /* set regulator constraints */ if (init_data) - constraints = &init_data->constraints; + rdev->constraints = kmemdup(&init_data->constraints, + sizeof(*rdev->constraints), + GFP_KERNEL); + else + rdev->constraints = kzalloc(sizeof(*rdev->constraints), + GFP_KERNEL); + if (!rdev->constraints) { + ret = -ENOMEM; + goto wash; + } if (init_data && init_data->supply_regulator) rdev->supply_name = init_data->supply_regulator; else if (regulator_desc->supply_name) rdev->supply_name = regulator_desc->supply_name; - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); if (ret == -EPROBE_DEFER) { /* Regulator might be in bypass mode and so needs its supply * to set the constraints */ @@ -4757,7 +4765,7 @@ regulator_register(const struct regulator_desc *regulator_desc, * that is just being created */ ret = regulator_resolve_supply(rdev); if (!ret) - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); else rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 30e92a9cc97e..4b8306594c3f 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -755,11 +755,14 @@ static int pfuze100_regulator_probe(struct i2c_client *client, * the switched regulator till yet. */ if (pfuze_chip->flags & PFUZE_FLAG_DISABLE_SW) { - if (pfuze_chip->regulator_descs[i].sw_reg) { - desc->ops = &pfuze100_sw_disable_regulator_ops; - desc->enable_val = 0x8; - desc->disable_val = 0x0; - desc->enable_time = 500; + if (pfuze_chip->chip_id == PFUZE100 || + pfuze_chip->chip_id == PFUZE200) { + if (pfuze_chip->regulator_descs[i].sw_reg) { + desc->ops = &pfuze100_sw_disable_regulator_ops; + desc->enable_val = 0x8; + desc->disable_val = 0x0; + desc->enable_time = 500; + } } } diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index 89b9314d64c9..016330f909c0 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -342,8 +342,17 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) return ret; } - /* If data is exactly the same, then just update index, no change */ info = &abb->info[sel]; + /* + * When Linux kernel is starting up, we are'nt sure of the + * Bias configuration that bootloader has configured. + * So, we get to know the actual setting the first time + * we are asked to transition. + */ + if (abb->current_info_idx == -EINVAL) + goto just_set_abb; + + /* If data is exactly the same, then just update index, no change */ oinfo = &abb->info[abb->current_info_idx]; if (!memcmp(info, oinfo, sizeof(*info))) { dev_dbg(dev, "%s: Same data new idx=%d, old idx=%d\n", __func__, @@ -351,6 +360,7 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) goto out; } +just_set_abb: ret = ti_abb_set_opp(rdev, abb, info); out: diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index facc577ab0ac..a755f85686e5 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -970,7 +970,7 @@ static int qcom_glink_rx_open_ack(struct qcom_glink *glink, unsigned int lcid) return -EINVAL; } - complete(&channel->open_ack); + complete_all(&channel->open_ack); return 0; } @@ -1178,7 +1178,7 @@ static int qcom_glink_announce_create(struct rpmsg_device *rpdev) __be32 *val = defaults; int size; - if (glink->intentless) + if (glink->intentless || !completion_done(&channel->open_ack)) return 0; prop = of_find_property(np, "qcom,intents", NULL); @@ -1413,7 +1413,7 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid, channel->rcid = ret; spin_unlock_irqrestore(&glink->idr_lock, flags); - complete(&channel->open_req); + complete_all(&channel->open_req); if (create_device) { rpdev = kzalloc(sizeof(*rpdev), GFP_KERNEL); diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 7ddc22eb5b0f..f4db80f9c1b1 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -428,16 +428,26 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) } } -static struct rtc_class_ops rx8010_rtc_ops = { +static const struct rtc_class_ops rx8010_rtc_ops_default = { .read_time = rx8010_get_time, .set_time = rx8010_set_time, .ioctl = rx8010_ioctl, }; +static const struct rtc_class_ops rx8010_rtc_ops_alarm = { + .read_time = rx8010_get_time, + .set_time = rx8010_set_time, + .ioctl = rx8010_ioctl, + .read_alarm = rx8010_read_alarm, + .set_alarm = rx8010_set_alarm, + .alarm_irq_enable = rx8010_alarm_irq_enable, +}; + static int rx8010_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + const struct rtc_class_ops *rtc_ops; struct rx8010_data *rx8010; int err = 0; @@ -468,16 +478,16 @@ static int rx8010_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "unable to request IRQ\n"); - client->irq = 0; - } else { - rx8010_rtc_ops.read_alarm = rx8010_read_alarm; - rx8010_rtc_ops.set_alarm = rx8010_set_alarm; - rx8010_rtc_ops.alarm_irq_enable = rx8010_alarm_irq_enable; + return err; } + + rtc_ops = &rx8010_rtc_ops_alarm; + } else { + rtc_ops = &rx8010_rtc_ops_default; } rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8010_rtc_ops, THIS_MODULE); + rtc_ops, THIS_MODULE); if (IS_ERR(rx8010->rtc)) { dev_err(&client->dev, "unable to register the class device\n"); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a23e7d394a0a..157bbb13c26c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2833,6 +2833,12 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr) if (!block) return -EINVAL; + /* + * If the request is an ERP request there is nothing to requeue. + * This will be done with the remaining original request. + */ + if (cqr->refers) + return 0; spin_lock_irq(&cqr->dq->lock); req = (struct request *) cqr->callback_data; blk_mq_requeue_request(req, false); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5f59e2dfc7db..d0aaef937b0f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -470,12 +470,6 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx, } } - if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) == - QETH_QDIO_BUF_HANDLED_DELAYED)) { - /* for recovery situations */ - qeth_init_qdio_out_buf(q, bidx); - QETH_CARD_TEXT(q->card, 2, "clprecov"); - } } diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index c95c782b93a5..60c48dc5d945 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -672,8 +672,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) rcu_read_lock(); list_for_each_entry_rcu(h, &tmp_pg->dh_list, node) { - /* h->sdev should always be valid */ - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = desc[0]; } rcu_read_unlock(); @@ -719,7 +719,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) pg->expiry = 0; rcu_read_lock(); list_for_each_entry_rcu(h, &pg->dh_list, node) { - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = (pg->state & SCSI_ACCESS_STATE_MASK); if (pg->pref) @@ -1160,7 +1161,6 @@ static void alua_bus_detach(struct scsi_device *sdev) spin_lock(&h->pg_lock); pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); rcu_assign_pointer(h->pg, NULL); - h->sdev = NULL; spin_unlock(&h->pg_lock); if (pg) { spin_lock_irq(&pg->lock); @@ -1169,6 +1169,7 @@ static void alua_bus_detach(struct scsi_device *sdev) kref_put(&pg->kref, release_port_group); } sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 11de2198bb87..0fe21cbdf0ca 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -8781,7 +8781,7 @@ reinit_after_soft_reset: /* hook into SCSI subsystem */ rc = hpsa_scsi_add_host(h); if (rc) - goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ + goto clean8; /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */ /* Monitor the controller for firmware lockups */ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL; @@ -8796,6 +8796,8 @@ reinit_after_soft_reset: HPSA_EVENT_MONITOR_INTERVAL); return 0; +clean8: /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */ + kfree(h->lastlogicals); clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ hpsa_free_performant_mode(h); h->access.set_intr_mask(h, HPSA_INTR_OFF); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 7a05c7271766..1c69515e870c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -571,8 +571,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) if (conn->task == task) conn->task = NULL; - if (conn->ping_task == task) - conn->ping_task = NULL; + if (READ_ONCE(conn->ping_task) == task) + WRITE_ONCE(conn->ping_task, NULL); /* release get from queueing */ __iscsi_put_task(task); @@ -781,6 +781,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, task->conn->session->age); } + if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK)) + WRITE_ONCE(conn->ping_task, task); + if (!ihost->workq) { if (iscsi_prep_mgmt_task(conn, task)) goto free_task; @@ -988,8 +991,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) struct iscsi_nopout hdr; struct iscsi_task *task; - if (!rhdr && conn->ping_task) - return -EINVAL; + if (!rhdr) { + if (READ_ONCE(conn->ping_task)) + return -EINVAL; + WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK); + } memset(&hdr, 0, sizeof(struct iscsi_nopout)); hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; @@ -1004,11 +1010,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); if (!task) { + if (!rhdr) + WRITE_ONCE(conn->ping_task, NULL); iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); return -EIO; } else if (!rhdr) { /* only track our nops */ - conn->ping_task = task; conn->last_ping = jiffies; } @@ -1021,7 +1028,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task, struct iscsi_conn *conn = task->conn; int rc = 0; - if (conn->ping_task != task) { + if (READ_ONCE(conn->ping_task) != task) { /* * If this is not in response to one of our * nops then it must be from userspace. @@ -1961,7 +1968,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn) */ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) { - if (conn->ping_task && + if (READ_ONCE(conn->ping_task) && time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) return 1; @@ -2096,7 +2103,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Checking the transport already or nop from a cmd timeout still * running */ - if (conn->ping_task) { + if (READ_ONCE(conn->ping_task)) { task->have_checked_conn = true; rc = BLK_EH_RESET_TIMER; goto done; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 29b79e85fa7f..eb6112eb475e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1228,14 +1228,15 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) case DSC_DELETE_PEND: return; case DSC_DELETED: - if (tgt && tgt->tgt_stop && (tgt->sess_count == 0)) - wake_up_all(&tgt->waitQ); - if (sess->vha->fcport_count == 0) - wake_up_all(&sess->vha->fcport_waitQ); - if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] && - !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) + !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) { + if (tgt && tgt->tgt_stop && tgt->sess_count == 0) + wake_up_all(&tgt->waitQ); + + if (sess->vha->fcport_count == 0) + wake_up_all(&sess->vha->fcport_waitQ); return; + } break; case DSC_UPD_FCPORT: /* diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 9a7e3a3bd5ce..009a5b2aa3d0 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1722,15 +1722,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { - struct async_scan_data *data; + struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; + mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); - return NULL; + goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); @@ -1741,7 +1742,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) goto err; init_completion(&data->prev_finished); - mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); @@ -1756,6 +1756,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) return data; err: + mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a316cd693a83..f6d57badd952 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1608,12 +1608,12 @@ start: * work and to enable clocks. */ case CLKS_OFF: - ufshcd_scsi_block_requests(hba); hba->clk_gating.state = REQ_CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - queue_work(hba->clk_gating.clk_gating_workq, - &hba->clk_gating.ungate_work); + if (queue_work(hba->clk_gating.clk_gating_workq, + &hba->clk_gating.ungate_work)) + ufshcd_scsi_block_requests(hba); /* * fall through to check if we should wait for this * work to be done or not. @@ -7995,11 +7995,7 @@ int ufshcd_shutdown(struct ufs_hba *hba) if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; - if (pm_runtime_suspended(hba->dev)) { - ret = ufshcd_runtime_resume(hba); - if (ret) - goto out; - } + pm_runtime_get_sync(hba->dev); ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM); out: diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c index 8429d57087fd..9b716c696477 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas.c +++ b/drivers/staging/comedi/drivers/cb_pcidas.c @@ -1342,6 +1342,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, if (dev->irq && board->has_ao_fifo) { dev->write_subdev = s; s->subdev_flags |= SDF_CMD_WRITE; + s->len_chanlist = s->n_chan; s->do_cmdtest = cb_pcidas_ao_cmdtest; s->do_cmd = cb_pcidas_ao_cmd; s->cancel = cb_pcidas_ao_cancel; diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c index 7448744cc515..12a5be95457f 100644 --- a/drivers/staging/erofs/inode.c +++ b/drivers/staging/erofs/inode.c @@ -53,11 +53,9 @@ static int read_inode(struct inode *inode, void *data) i_gid_write(inode, le32_to_cpu(v2->i_gid)); set_nlink(inode, le32_to_cpu(v2->i_nlink)); - /* ns timestamp */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - le64_to_cpu(v2->i_ctime); - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - le32_to_cpu(v2->i_ctime_nsec); + /* extended inode has its own timestamp */ + inode->i_ctime.tv_sec = le64_to_cpu(v2->i_ctime); + inode->i_ctime.tv_nsec = le32_to_cpu(v2->i_ctime_nsec); inode->i_size = le64_to_cpu(v2->i_size); } else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) { @@ -83,11 +81,9 @@ static int read_inode(struct inode *inode, void *data) i_gid_write(inode, le16_to_cpu(v1->i_gid)); set_nlink(inode, le16_to_cpu(v1->i_nlink)); - /* use build time to derive all file time */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - sbi->build_time; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - sbi->build_time_nsec; + /* use build time for compact inodes */ + inode->i_ctime.tv_sec = sbi->build_time; + inode->i_ctime.tv_nsec = sbi->build_time_nsec; inode->i_size = le32_to_cpu(v1->i_size); } else { @@ -97,6 +93,11 @@ static int read_inode(struct inode *inode, void *data) return -EIO; } + inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; + inode->i_atime.tv_sec = inode->i_ctime.tv_sec; + inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + /* measure inode.i_blocks as the generic filesystem */ inode->i_blocks = ((inode->i_size - 1) >> 9) + 1; return 0; diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index f67f95043887..5761a31e2318 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -152,12 +152,6 @@ int cvm_oct_phy_setup_device(struct net_device *dev) phy_node = of_parse_phandle(priv->of_node, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(priv->of_node)) { - int rc; - - rc = of_phy_register_fixed_link(priv->of_node); - if (rc) - return rc; - phy_node = of_node_get(priv->of_node); } if (!phy_node) diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 5e271245273c..6c644ef6f3ff 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -80,15 +80,17 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) else port = work->word1.cn38xx.ipprt; - if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) { + if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) /* * Ignore length errors on min size packets. Some * equipment incorrectly pads packets to 64+4FCS * instead of 60+4FCS. Note these packets still get * counted as frame errors. */ - } else if (work->word2.snoip.err_code == 5 || - work->word2.snoip.err_code == 7) { + return 0; + + if (work->word2.snoip.err_code == 5 || + work->word2.snoip.err_code == 7) { /* * We received a packet with either an alignment error * or a FCS error. This may be signalling that we are @@ -119,7 +121,10 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) /* Port received 0xd5 preamble */ work->packet_ptr.s.addr += i + 1; work->word1.len -= i + 5; - } else if ((*ptr & 0xf) == 0xd) { + return 0; + } + + if ((*ptr & 0xf) == 0xd) { /* Port received 0xd preamble */ work->packet_ptr.s.addr += i; work->word1.len -= i + 4; @@ -129,21 +134,20 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) ((*(ptr + 1) & 0xf) << 4); ptr++; } - } else { - printk_ratelimited("Port %d unknown preamble, packet dropped\n", - port); - cvm_oct_free_work(work); - return 1; + return 0; } + + printk_ratelimited("Port %d unknown preamble, packet dropped\n", + port); + cvm_oct_free_work(work); + return 1; } - } else { - printk_ratelimited("Port %d receive error code %d, packet dropped\n", - port, work->word2.snoip.err_code); - cvm_oct_free_work(work); - return 1; } - return 0; + printk_ratelimited("Port %d receive error code %d, packet dropped\n", + port, work->word2.snoip.err_code); + cvm_oct_free_work(work); + return 1; } static void copy_segments_to_skb(cvmx_wqe_t *work, struct sk_buff *skb) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index 9b15c9ed844b..b680e5785ae3 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -875,6 +876,14 @@ static int cvm_oct_probe(struct platform_device *pdev) break; } + if (priv->of_node && of_phy_is_fixed_link(priv->of_node)) { + if (of_phy_register_fixed_link(priv->of_node)) { + netdev_err(dev, "Failed to register fixed link for interface %d, port %d\n", + interface, priv->port); + dev->netdev_ops = NULL; + } + } + if (!dev->netdev_ops) { free_netdev(dev); } else if (register_netdev(dev) < 0) { diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 49ea780f9f42..dc69c41f3e38 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -20,6 +20,7 @@ static const struct sdio_device_id sdio_ids[] = { SDIO_DEVICE(0x024c, 0x0525), }, { SDIO_DEVICE(0x024c, 0x0623), }, { SDIO_DEVICE(0x024c, 0x0626), }, + { SDIO_DEVICE(0x024c, 0x0627), }, { SDIO_DEVICE(0x024c, 0xb723), }, { /* end: all zeroes */ }, }; diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index 93742dbdee77..6c754ddf1257 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -49,15 +49,25 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty) if (tty->ops->write == NULL) return -EOPNOTSUPP; + + mutex_lock(&speakup_tty_mutex); + if (speakup_tty) { + mutex_unlock(&speakup_tty_mutex); + return -EBUSY; + } speakup_tty = tty; ldisc_data = kmalloc(sizeof(struct spk_ldisc_data), GFP_KERNEL); - if (!ldisc_data) + if (!ldisc_data) { + speakup_tty = NULL; + mutex_unlock(&speakup_tty_mutex); return -ENOMEM; + } sema_init(&ldisc_data->sem, 0); ldisc_data->buf_free = true; speakup_tty->disc_data = ldisc_data; + mutex_unlock(&speakup_tty_mutex); return 0; } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 2602b57936d4..58ccded1be85 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -492,8 +492,7 @@ EXPORT_SYMBOL(iscsit_queue_rsp); void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { spin_lock_bh(&conn->cmd_lock); - if (!list_empty(&cmd->i_conn_node) && - !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP)) + if (!list_empty(&cmd->i_conn_node)) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); @@ -4054,12 +4053,22 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_splice_init(&conn->conn_cmd_list, &tmp_list); - list_for_each_entry(cmd, &tmp_list, i_conn_node) { + list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) { struct se_cmd *se_cmd = &cmd->se_cmd; if (se_cmd->se_tfo != NULL) { spin_lock_irq(&se_cmd->t_state_lock); - se_cmd->transport_state |= CMD_T_FABRIC_STOP; + if (se_cmd->transport_state & CMD_T_ABORTED) { + /* + * LIO's abort path owns the cleanup for this, + * so put it back on the list and let + * aborted_task handle it. + */ + list_move_tail(&cmd->i_conn_node, + &conn->conn_cmd_list); + } else { + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + } spin_unlock_irq(&se_cmd->t_state_lock); } } diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index a5afbe6dee68..7cb7efe62b01 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -538,7 +538,8 @@ void optee_free_pages_list(void *list, size_t num_entries) static bool is_normal_memory(pgprot_t p) { #if defined(CONFIG_ARM) - return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; + return (((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC) || + ((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK)); #elif defined(CONFIG_ARM64) return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL); #else diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index d436a1534fc2..384623c49cfe 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -408,12 +408,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend) ring->vector = ret; - ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector); - if (ring->irq < 0) - return ring->irq; + ret = pci_irq_vector(ring->nhi->pdev, ring->vector); + if (ret < 0) + goto err_ida_remove; + + ring->irq = ret; irqflags = no_suspend ? IRQF_NO_SUSPEND : 0; - return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + if (ret) + goto err_ida_remove; + + return 0; + +err_ida_remove: + ida_simple_remove(&nhi->msix_ida, ring->vector); + + return ret; } static void ring_release_msix(struct tb_ring *ring) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index befe75490697..4eb51a123a6f 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -774,6 +774,7 @@ static void enumerate_services(struct tb_xdomain *xd) id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); if (id < 0) { + kfree(svc->key); kfree(svc); break; } diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 159169a48deb..cc4f0c5e5ddf 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -47,7 +47,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, */ baud = tty_termios_baud_rate(termios); - serial8250_do_set_termios(port, termios, old); + serial8250_do_set_termios(port, termios, NULL); tty_termios_encode_baud_rate(termios, baud, baud); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 45e4f2952143..1306ce5c5d9b 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -313,8 +313,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap, */ static int pl011_fifo_to_tty(struct uart_amba_port *uap) { - u16 status; unsigned int ch, flag, fifotaken; + int sysrq; + u16 status; for (fifotaken = 0; fifotaken != 256; fifotaken++) { status = pl011_read(uap, REG_FR); @@ -349,10 +350,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) flag = TTY_FRAME; } - if (uart_handle_sysrq_char(&uap->port, ch & 255)) - continue; + spin_unlock(&uap->port.lock); + sysrq = uart_handle_sysrq_char(&uap->port, ch & 255); + spin_lock(&uap->port.lock); - uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); + if (!sysrq) + uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); } return fifotaken; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4066cb2b79cb..7a6e26b12bf6 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1915,16 +1915,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) unsigned int ucr1; unsigned long flags = 0; int locked = 1; - int retval; - - retval = clk_enable(sport->clk_per); - if (retval) - return; - retval = clk_enable(sport->clk_ipg); - if (retval) { - clk_disable(sport->clk_per); - return; - } if (sport->port.sysrq) locked = 0; @@ -1960,9 +1950,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) if (locked) spin_unlock_irqrestore(&sport->port.lock, flags); - - clk_disable(sport->clk_ipg); - clk_disable(sport->clk_per); } /* @@ -2063,15 +2050,14 @@ imx_uart_console_setup(struct console *co, char *options) retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); - clk_disable(sport->clk_ipg); if (retval) { - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); goto error_console; } - retval = clk_prepare(sport->clk_per); + retval = clk_prepare_enable(sport->clk_per); if (retval) - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); error_console: return retval; diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index 1b4008d022bf..2f7ef64536a0 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -1284,6 +1284,9 @@ static int __init serial_txx9_init(void) #ifdef ENABLE_SERIAL_TXX9_PCI ret = pci_register_driver(&serial_txx9_pci_driver); + if (ret) { + platform_driver_unregister(&serial_txx9_plat_driver); + } #endif if (ret == 0) goto out; diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index a7455f8a4235..94cad9f86ff9 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -742,8 +742,13 @@ static void k_fn(struct vc_data *vc, unsigned char value, char up_flag) return; if ((unsigned)value < ARRAY_SIZE(func_table)) { + unsigned long flags; + + spin_lock_irqsave(&func_buf_lock, flags); if (func_table[value]) puts_queue(vc, func_table[value]); + spin_unlock_irqrestore(&func_buf_lock, flags); + } else pr_err("k_fn called with value=%d\n", value); } @@ -1990,13 +1995,11 @@ out: #undef s #undef v -/* FIXME: This one needs untangling and locking */ +/* FIXME: This one needs untangling */ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { struct kbsentry *kbs; - char *p; u_char *q; - u_char __user *up; int sz, fnw_sz; int delta; char *first_free, *fj, *fnw; @@ -2022,23 +2025,19 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) i = kbs->kb_func; switch (cmd) { - case KDGKBSENT: - sz = sizeof(kbs->kb_string) - 1; /* sz should have been - a struct member */ - up = user_kdgkb->kb_string; - p = func_table[i]; - if(p) - for ( ; *p && sz; p++, sz--) - if (put_user(*p, up++)) { - ret = -EFAULT; - goto reterr; - } - if (put_user('\0', up)) { - ret = -EFAULT; - goto reterr; - } - kfree(kbs); - return ((p && *p) ? -EOVERFLOW : 0); + case KDGKBSENT: { + /* size should have been a struct member */ + ssize_t len = sizeof(user_kdgkb->kb_string); + + spin_lock_irqsave(&func_buf_lock, flags); + len = strlcpy(kbs->kb_string, func_table[i] ? : "", len); + spin_unlock_irqrestore(&func_buf_lock, flags); + + ret = copy_to_user(user_kdgkb->kb_string, kbs->kb_string, + len + 1) ? -EFAULT : 0; + + goto reterr; + } case KDSKBSENT: if (!perm) { ret = -EPERM; diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 758f522f331e..13ea0579f104 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4574,27 +4574,6 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) return rc; } -static int con_font_copy(struct vc_data *vc, struct console_font_op *op) -{ - int con = op->height; - int rc; - - - console_lock(); - if (vc->vc_mode != KD_TEXT) - rc = -EINVAL; - else if (!vc->vc_sw->con_font_copy) - rc = -ENOSYS; - else if (con < 0 || !vc_cons_allocated(con)) - rc = -ENOTTY; - else if (con == vc->vc_num) /* nothing to do */ - rc = 0; - else - rc = vc->vc_sw->con_font_copy(vc, con); - console_unlock(); - return rc; -} - int con_font_op(struct vc_data *vc, struct console_font_op *op) { switch (op->op) { @@ -4605,7 +4584,8 @@ int con_font_op(struct vc_data *vc, struct console_font_op *op) case KD_FONT_OP_SET_DEFAULT: return con_font_default(vc, op); case KD_FONT_OP_COPY: - return con_font_copy(vc, op); + /* was buggy and never really used */ + return -EINVAL; } return -ENOSYS; } diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 6a82030cf1ef..2e959563af53 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -244,7 +244,7 @@ int vt_waitactive(int n) static inline int -do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) +do_fontx_ioctl(struct vc_data *vc, int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) { struct consolefontdesc cfdarg; int i; @@ -262,15 +262,16 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - return con_font_op(vc_cons[fg_console].d, op); - case GIO_FONTX: { + return con_font_op(vc, op); + + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; op->width = 8; op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -278,7 +279,6 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) return -EFAULT; return 0; - } } return -EINVAL; } @@ -924,7 +924,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 0; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -935,7 +935,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 32; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -952,7 +952,7 @@ int vt_ioctl(struct tty_struct *tty, case PIO_FONTX: case GIO_FONTX: - ret = do_fontx_ioctl(cmd, up, perm, &op); + ret = do_fontx_ioctl(vc, cmd, up, perm, &op); break; case PIO_FONTRESET: @@ -969,11 +969,11 @@ int vt_ioctl(struct tty_struct *tty, { op.op = KD_FONT_OP_SET_DEFAULT; op.data = NULL; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); if (ret) break; console_lock(); - con_set_default_unimap(vc_cons[fg_console].d); + con_set_default_unimap(vc); console_unlock(); break; } @@ -1100,8 +1100,9 @@ struct compat_consolefontdesc { }; static inline int -compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) +compat_fontx_ioctl(struct vc_data *vc, int cmd, + struct compat_consolefontdesc __user *user_cfd, + int perm, struct console_font_op *op) { struct compat_consolefontdesc cfdarg; int i; @@ -1119,7 +1120,8 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc_cons[fg_console].d, op); + return con_font_op(vc, op); + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; @@ -1127,7 +1129,7 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -1218,7 +1220,7 @@ long vt_compat_ioctl(struct tty_struct *tty, */ case PIO_FONTX: case GIO_FONTX: - ret = compat_fontx_ioctl(cmd, up, perm, &op); + ret = compat_fontx_ioctl(vc, cmd, up, perm, &op); break; case KDFONTOP: diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 9c788748bdc6..0e3e16c51d3a 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -413,10 +413,10 @@ static int uio_get_minor(struct uio_device *idev) return retval; } -static void uio_free_minor(struct uio_device *idev) +static void uio_free_minor(unsigned long minor) { mutex_lock(&minor_lock); - idr_remove(&uio_idr, idev->minor); + idr_remove(&uio_idr, minor); mutex_unlock(&minor_lock); } @@ -988,7 +988,7 @@ err_request_irq: err_uio_dev_add_attributes: device_del(&idev->dev); err_device_create: - uio_free_minor(idev); + uio_free_minor(idev->minor); put_device(&idev->dev); return ret; } @@ -1002,13 +1002,13 @@ EXPORT_SYMBOL_GPL(__uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; + unsigned long minor; if (!info || !info->uio_dev) return; idev = info->uio_dev; - - uio_free_minor(idev); + minor = idev->minor; mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1021,6 +1021,8 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); + uio_free_minor(minor); + return; } EXPORT_SYMBOL_GPL(uio_unregister_device); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7376f74a4f04..e0d8da4e3967 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -508,6 +508,7 @@ static void acm_read_bulk_callback(struct urb *urb) "%s - cooling babbling device\n", __func__); usb_mark_last_busy(acm->dev); set_bit(rb->index, &acm->urbs_in_error_delay); + set_bit(ACM_ERROR_DELAY, &acm->flags); cooldown = true; break; default: @@ -533,7 +534,7 @@ static void acm_read_bulk_callback(struct urb *urb) if (stopped || stalled || cooldown) { if (stalled) - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); else if (cooldown) schedule_delayed_work(&acm->dwork, HZ / 2); return; @@ -568,13 +569,13 @@ static void acm_write_bulk(struct urb *urb) acm_write_done(acm, wb); spin_unlock_irqrestore(&acm->write_lock, flags); set_bit(EVENT_TTY_WAKEUP, &acm->flags); - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); } static void acm_softint(struct work_struct *work) { int i; - struct acm *acm = container_of(work, struct acm, work); + struct acm *acm = container_of(work, struct acm, dwork.work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { smp_mb(); /* against acm_suspend() */ @@ -590,7 +591,7 @@ static void acm_softint(struct work_struct *work) if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { for (i = 0; i < acm->rx_buflimit; i++) if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) - acm_submit_read_urb(acm, i, GFP_NOIO); + acm_submit_read_urb(acm, i, GFP_KERNEL); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) @@ -1396,7 +1397,6 @@ made_compressed_probe: acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; - INIT_WORK(&acm->work, acm_softint); INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); @@ -1606,7 +1606,6 @@ static void acm_disconnect(struct usb_interface *intf) } acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); @@ -1649,7 +1648,6 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) return 0; acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); acm->urbs_in_error_delay = 0; @@ -1740,6 +1738,15 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 30380d28a504..d8f8651425c4 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -111,8 +111,7 @@ struct acm { # define ACM_ERROR_DELAY 3 unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */ struct usb_cdc_line_coding line; /* bits, stop, parity */ - struct work_struct work; /* work queue entry for various purposes*/ - struct delayed_work dwork; /* for cool downs needed in error recovery */ + struct delayed_work dwork; /* work queue entry for various purposes */ unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */ unsigned int ctrlout; /* output control lines (DTR, RTS) */ struct async_icount iocount; /* counters for control line changes */ diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 00204824bffd..732e7f1687dd 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -463,11 +463,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1927,7 +1927,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -1944,7 +1944,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2070,7 +2070,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2087,7 +2087,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2512,7 +2512,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %px\n", __func__, p); ret = proc_unlinkurb(ps, p); break; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a313e16c2987..56d8f93831d4 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -352,6 +352,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Guillemot Webcam Hercules Dualpix Exchange*/ { USB_DEVICE(0x06f8, 0x3005), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Guillemot Hercules DJ Console audio card (BZ 208357) */ + { USB_DEVICE(0x06f8, 0xb000), .driver_info = + USB_QUIRK_ENDPOINT_BLACKLIST }, + /* Midiman M-Audio Keystation 88es */ { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -429,6 +433,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ + { USB_DEVICE(0x17ef, 0xa012), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, @@ -529,6 +537,8 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. */ static const struct usb_device_id usb_endpoint_blacklist[] = { + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x01 }, + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x81 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0208, 1), .driver_info = 0x85 }, { } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 64dff7aa312d..fe68a35be3b9 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1690,6 +1690,17 @@ static int dwc3_probe(struct platform_device *pdev) err5: dwc3_event_buffers_cleanup(dwc); + + usb_phy_shutdown(dwc->usb2_phy); + usb_phy_shutdown(dwc->usb3_phy); + phy_exit(dwc->usb2_generic_phy); + phy_exit(dwc->usb3_generic_phy); + + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + phy_power_off(dwc->usb2_generic_phy); + phy_power_off(dwc->usb3_generic_phy); + dwc3_ulpi_exit(dwc); err4: @@ -1728,9 +1739,9 @@ static int dwc3_remove(struct platform_device *pdev) dwc3_core_exit(dwc); dwc3_ulpi_exit(dwc); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_allow(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); dwc3_free_event_buffers(dwc); dwc3_free_scratch_buffers(dwc); diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index d101eb706e4e..154dadcefbd9 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -957,12 +957,16 @@ static void dwc3_ep0_xfer_complete(struct dwc3 *dwc, static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req) { + unsigned int trb_length = 0; int ret; req->direction = !!dep->number; if (req->request.length == 0) { - dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 0, + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + + dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) @@ -1009,9 +1013,12 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->trb = &dwc->ep0_trb[dep->trb_enqueue - 1]; + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + /* Now prepare one extra TRB to align transfer size */ dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, - 0, DWC3_TRBCTL_CONTROL_DATA, + trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else { diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2cc7b9be451c..a028699b65bb 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1216,6 +1216,8 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, struct scatterlist *s; int i; unsigned int length = req->request.length; + unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); + unsigned int rem = length % maxp; unsigned int remaining = req->request.num_mapped_sgs - req->num_queued_sgs; @@ -1227,8 +1229,6 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, length -= sg_dma_len(s); for_each_sg(sg, s, remaining, i) { - unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); - unsigned int rem = length % maxp; unsigned int trb_length; unsigned chain = true; @@ -2759,6 +2759,12 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event, status); + req->request.actual = req->request.length - req->remaining; + + if (!dwc3_gadget_ep_request_completed(req) && + !usb_endpoint_xfer_isoc(dep->endpoint.desc)) + goto out; + if (req->needs_extra_trb) { unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); @@ -2774,27 +2780,22 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } - req->request.actual = req->request.length - req->remaining; + if (event->status & DEPEVT_STATUS_MISSED_ISOC && + usb_endpoint_xfer_isoc(dep->endpoint.desc)) { + /* + * unmap isoc request and move the request + * to the pending list to wait for kicking + * transfer again. + */ + req->remaining = 0; + req->needs_extra_trb = false; + dwc3_gadget_move_queued_request(req); + if (req->trb) + usb_gadget_unmap_request_by_dev(dwc->sysdev, + &req->request, + req->direction); + req->trb = NULL; - if (!dwc3_gadget_ep_request_completed(req) || - event->status & DEPEVT_STATUS_MISSED_ISOC) { - if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) { - /* - * unmap isoc request and move the request - * to the pending list to wait for kicking - * transfer again. - */ - req->remaining = 0; - req->needs_extra_trb = false; - dwc3_gadget_move_queued_request(req); - if (req->trb) - usb_gadget_unmap_request_by_dev(dwc->sysdev, - &req->request, - req->direction); - req->trb = NULL; - } else { - __dwc3_gadget_kick_transfer(dep); - } goto out; } @@ -2820,6 +2821,24 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, } } +static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) +{ + struct dwc3_request *req; + + if (!list_empty(&dep->pending_list)) + return true; + + /* + * We only need to check the first entry of the started list. We can + * assume the completed requests are removed from the started list. + */ + req = next_request(&dep->started_list); + if (!req) + return false; + + return !dwc3_gadget_ep_request_completed(req); +} + static void dwc3_gadget_endpoint_frame_from_event(struct dwc3_ep *dep, const struct dwc3_event_depevt *event) { @@ -2845,6 +2864,8 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep, if (event->status & DEPEVT_STATUS_MISSED_ISOC && list_empty(&dep->started_list)) dwc3_stop_active_transfer(dep, true, true); + else if (dwc3_gadget_ep_should_continue(dep)) + __dwc3_gadget_kick_transfer(dep); /* * WORKAROUND: This is the 2nd half of U1/U2 -> U0 workaround. diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index e92bfe0809d2..bf076d3b1cd0 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -213,7 +213,7 @@ void usb_ep_autoconfig_reset (struct usb_gadget *gadget) EXPORT_SYMBOL_GPL(usb_ep_autoconfig_reset); /** - * usb_ep_autoconfig_by_name - Used to pick the endpoint by name. eg ep1in-gsi + * usb_ep_autoconfig_by_name - Used to pick the endpoint by name. eg gsi-epin1 * @gadget: The device to which the endpoint must belong. * @desc: Endpoint descriptor, with endpoint direction and transfer mode * initialized. @@ -229,8 +229,12 @@ struct usb_ep *usb_ep_autoconfig_by_name( struct usb_ep *ep; bool ep_found = false; + if (!ep_name || !strlen(ep_name)) + goto err; + list_for_each_entry(ep, &gadget->ep_list, ep_list) - if (strcmp(ep->name, ep_name) == 0 && !ep->driver_data) { + if (strncmp(ep->name, ep_name, strlen(ep_name)) == 0 && + !ep->driver_data) { ep_found = true; break; } @@ -245,6 +249,7 @@ struct usb_ep *usb_ep_autoconfig_by_name( return ep; } +err: pr_err("%s:error finding ep %s\n", __func__, ep_name); return NULL; } diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 4713a1c7f622..822d45eda51d 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1380,7 +1380,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) midi->id = kstrdup(opts->id, GFP_KERNEL); if (opts->id && !midi->id) { status = -ENOMEM; - goto setup_fail; + goto midi_free; } midi->in_ports = opts->in_ports; midi->out_ports = opts->out_ports; @@ -1392,7 +1392,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) status = kfifo_alloc(&midi->in_req_fifo, midi->qlen, GFP_KERNEL); if (status) - goto setup_fail; + goto midi_free; spin_lock_init(&midi->transmit_lock); @@ -1409,9 +1409,13 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) fi->f = &midi->func; return &midi->func; +midi_free: + if (midi) + kfree(midi->id); + kfree(midi); setup_fail: mutex_unlock(&opts->lock); - kfree(midi); + return ERR_PTR(status); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 25d417ad9000..09ed5f02c24f 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -2039,6 +2039,9 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) return 0; Enomem: + kfree(CHIP); + CHIP = NULL; + return -ENOMEM; } diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c index c3721225b61e..b706ad3034bc 100644 --- a/drivers/usb/gadget/udc/goku_udc.c +++ b/drivers/usb/gadget/udc/goku_udc.c @@ -1757,6 +1757,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + pci_set_drvdata(pdev, dev); spin_lock_init(&dev->lock); dev->pdev = pdev; dev->gadget.ops = &goku_ops; @@ -1790,7 +1791,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } dev->regs = (struct goku_udc_regs __iomem *) base; - pci_set_drvdata(pdev, dev); INFO(dev, "%s\n", driver_desc); INFO(dev, "version: " DRIVER_VERSION " %s\n", dmastr()); INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base); diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 677f9d592109..de922022b83a 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -94,10 +94,13 @@ static struct platform_device *fsl_usb2_device_register( pdev->dev.coherent_dma_mask = ofdev->dev.coherent_dma_mask; - if (!pdev->dev.dma_mask) + if (!pdev->dev.dma_mask) { pdev->dev.dma_mask = &ofdev->dev.coherent_dma_mask; - else - dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + } else { + retval = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (retval) + goto error; + } retval = platform_device_add_data(pdev, pdata, sizeof(*pdata)); if (retval) diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c index 3c4abb5a1c3f..73aba464b66a 100644 --- a/drivers/usb/host/xhci-histb.c +++ b/drivers/usb/host/xhci-histb.c @@ -241,7 +241,7 @@ static int xhci_histb_probe(struct platform_device *pdev) /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) - return ret; + goto disable_pm; hcd = usb_create_hcd(driver, dev, dev_name(dev)); if (!hcd) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1a6a23e57201..0c6b6f14b169 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -21,6 +21,8 @@ #define SSIC_PORT_CFG2_OFFSET 0x30 #define PROG_DONE (1 << 30) #define SSIC_PORT_UNUSED (1 << 31) +#define SPARSE_DISABLE_BIT 17 +#define SPARSE_CNTL_ENABLE 0xC12C /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 @@ -141,6 +143,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == 0x15e0 || pdev->device == 0x15e1)) xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) + xhci->quirks |= XHCI_DISABLE_SPARSE; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; @@ -441,6 +446,15 @@ static void xhci_pme_quirk(struct usb_hcd *hcd) readl(reg); } +static void xhci_sparse_control_quirk(struct usb_hcd *hcd) +{ + u32 reg; + + reg = readl(hcd->regs + SPARSE_CNTL_ENABLE); + reg &= ~BIT(SPARSE_DISABLE_BIT); + writel(reg, hcd->regs + SPARSE_CNTL_ENABLE); +} + static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -460,6 +474,9 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) xhci_ssic_port_unused_quirk(hcd, true); + if (xhci->quirks & XHCI_DISABLE_SPARSE) + xhci_sparse_control_quirk(hcd); + ret = xhci_suspend(xhci, do_wakeup); if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED)) xhci_ssic_port_unused_quirk(hcd, false); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8fc84db8b2f4..15313529f1f6 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1002,8 +1002,6 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) xhci->shared_hcd->state != HC_STATE_SUSPENDED) return -EINVAL; - xhci_dbc_suspend(xhci); - /* Clear root port wake on bits if wakeup not allowed. */ if (!do_wakeup) xhci_disable_port_wake_on_bits(xhci); @@ -1015,6 +1013,11 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) if (xhci->quirks & XHCI_WARM_RESET_ON_SUSPEND) xhci_warm_port_reset_quirk(xhci); + if (!HCD_HW_ACCESSIBLE(hcd)) + return 0; + + xhci_dbc_suspend(xhci); + /* Don't poll the roothubs on bus suspend. */ xhci_dbg(xhci, "%s: stopping port polling.\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 77f13a2d0245..56a618616323 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1887,7 +1887,8 @@ struct xhci_hcd { #define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) #define XHCI_TRB_ENT_QUIRK BIT_ULL(36) #define XHCI_DIS_AUTOSUSPEND BIT_ULL(37) -#define XHCI_WARM_RESET_ON_SUSPEND BIT_ULL(38) +#define XHCI_DISABLE_SPARSE BIT_ULL(38) +#define XHCI_WARM_RESET_ON_SUSPEND BIT_ULL(39) unsigned int num_active_eps; unsigned int limit_active_eps; diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index b8073f36ffdc..62fdfde4ad03 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -209,6 +209,7 @@ static void adu_interrupt_out_callback(struct urb *urb) if (status != 0) { if ((status != -ENOENT) && + (status != -ESHUTDOWN) && (status != -ECONNRESET)) { dev_dbg(&dev->udev->dev, "%s :nonzero status received: %d\n", __func__, diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index bbcd3332471d..3828ed6d38a2 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -573,6 +573,7 @@ static int mtu3_gadget_stop(struct usb_gadget *g) spin_unlock_irqrestore(&mtu->lock, flags); + synchronize_irq(mtu->irq); return 0; } diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index ebd76ab07b72..36dd688b5795 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -357,11 +357,12 @@ static void cyberjack_write_bulk_callback(struct urb *urb) struct device *dev = &port->dev; int status = urb->status; unsigned long flags; + bool resubmitted = false; - set_bit(0, &port->write_urbs_free); if (status) { dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); + set_bit(0, &port->write_urbs_free); return; } @@ -394,6 +395,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) goto exit; } + resubmitted = true; + dev_dbg(dev, "%s - priv->wrsent=%d\n", __func__, priv->wrsent); dev_dbg(dev, "%s - priv->wrfilled=%d\n", __func__, priv->wrfilled); @@ -410,6 +413,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) exit: spin_unlock_irqrestore(&priv->lock, flags); + if (!resubmitted) + set_bit(0, &port->write_urbs_free); usb_serial_port_softint(port); } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b8a2317b9b55..1eabca11b4c4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -250,6 +250,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_EC200T 0x6026 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1118,6 +1119,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -1190,6 +1192,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1054, 0xff), /* Telit FT980-KS */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -1202,6 +1206,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1216,6 +1222,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1230, 0xff), /* Telit LE910Cx (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c index 74cb3c2ecb34..c950171556d8 100644 --- a/drivers/usb/typec/bus.c +++ b/drivers/usb/typec/bus.c @@ -192,7 +192,10 @@ EXPORT_SYMBOL_GPL(typec_altmode_vdm); const struct typec_altmode * typec_altmode_get_partner(struct typec_altmode *adev) { - return adev ? &to_altmode(adev)->partner->adev : NULL; + if (!adev || !to_altmode(adev)->partner) + return NULL; + + return &to_altmode(adev)->partner->adev; } EXPORT_SYMBOL_GPL(typec_altmode_get_partner); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 73ddf1589b84..015623a9b485 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2724,12 +2724,12 @@ static void tcpm_reset_port(struct tcpm_port *port) static void tcpm_detach(struct tcpm_port *port) { - if (!port->attached) - return; - if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; + if (!port->attached) + return; + tcpm_reset_port(port); } @@ -3483,7 +3483,7 @@ static void run_state_machine(struct tcpm_port *port) */ tcpm_set_pwr_role(port, TYPEC_SOURCE); tcpm_pd_send_control(port, PD_CTRL_PS_RDY); - tcpm_set_state(port, SRC_STARTUP, 0); + tcpm_set_state(port, SRC_STARTUP, PD_T_SWAP_SRC_START); break; case VCONN_SWAP_ACCEPT: diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index c0cd824be2b7..460760d0becf 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -273,7 +273,7 @@ static int vfio_platform_open(void *device_data) ret = pm_runtime_get_sync(vdev->device); if (ret < 0) - goto err_pm; + goto err_rst; ret = vfio_platform_call_reset(vdev, &extra_dbg); if (ret && vdev->reset_required) { @@ -290,7 +290,6 @@ static int vfio_platform_open(void *device_data) err_rst: pm_runtime_put(vdev->device); -err_pm: vfio_platform_irq_cleanup(vdev); err_irq: vfio_platform_regions_cleanup(vdev); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index a94d700a4503..59c61744dcc1 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -273,13 +273,14 @@ __vringh_iov(struct vringh *vrh, u16 i, desc_max = vrh->vring.num; up_next = -1; + /* You must want something! */ + if (WARN_ON(!riov && !wiov)) + return -EINVAL; + if (riov) riov->i = riov->used = 0; - else if (wiov) + if (wiov) wiov->i = wiov->used = 0; - else - /* You must want something! */ - BUG(); for (;;) { void *addr; diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 403d8cd3e582..56e70f12c996 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -712,7 +712,12 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto err1; } - fb_virt = ioremap(par->mem->start, screen_fb_size); + /* + * Map the VRAM cacheable for performance. This is also required for + * VM Connect to display properly for ARM64 Linux VM, as the host also + * maps the VRAM cacheable. + */ + fb_virt = ioremap_cache(par->mem->start, screen_fb_size); if (!fb_virt) goto err2; diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 8a53d1de611d..3fd2cb4cdfa9 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -1027,6 +1027,8 @@ static int __init pvr2fb_setup(char *options) if (!options || !*options) return 0; + cable_arg[0] = output_arg[0] = 0; + while ((this_opt = strsep(&options, ","))) { if (!*this_opt) continue; diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 50b46c4399ea..075454053e5e 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -48,12 +48,12 @@ struct mxc_w1_device { static u8 mxc_w1_ds2_reset_bus(void *data) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_RPP, dev->regs + MXC_W1_CONTROL); /* Wait for reset sequence 511+512us, use 1500us for sure */ - timeout = jiffies + usecs_to_jiffies(1500); + timeout = ktime_add_us(ktime_get(), 1500); udelay(511 + 512); @@ -63,7 +63,7 @@ static u8 mxc_w1_ds2_reset_bus(void *data) /* PST bit is valid after the RPP bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_RPP)) return !(ctrl & MXC_W1_CONTROL_PST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 1; } @@ -76,12 +76,12 @@ static u8 mxc_w1_ds2_reset_bus(void *data) static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_WR(bit), dev->regs + MXC_W1_CONTROL); /* Wait for read/write bit (60us, Max 120us), use 200us for sure */ - timeout = jiffies + usecs_to_jiffies(200); + timeout = ktime_add_us(ktime_get(), 200); udelay(60); @@ -91,7 +91,7 @@ static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) /* RDST bit is valid after the WR1/RD bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_WR(bit))) return !!(ctrl & MXC_W1_CONTROL_RDST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 0; } diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index a281aa84bfb1..4c3b4ea4e17f 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -244,6 +244,8 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.sb_pdev = pdata->sb_pdev; rdc321x_wdt_device.base_reg = r->start; + rdc321x_wdt_device.queue = 0; + rdc321x_wdt_device.default_ticks = ticks; err = misc_register(&rdc321x_wdt_misc); if (err < 0) { @@ -258,14 +260,11 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.base_reg, RDC_WDT_RST); init_completion(&rdc321x_wdt_device.stop); - rdc321x_wdt_device.queue = 0; clear_bit(0, &rdc321x_wdt_device.inuse); timer_setup(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0); - rdc321x_wdt_device.default_ticks = ticks; - dev_info(&pdev->dev, "watchdog init success\n"); return 0; diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 8edef51c92e5..f026624898e7 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port) BUG_ON(!irqs_disabled()); + smp_wmb(); /* All writes before unmask must be visible. */ + if (unlikely((cpu != cpu_from_evtchn(port)))) do_hypercall = 1; else { @@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu, * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ -static void evtchn_2l_handle_events(unsigned cpu) +static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) { int irq; xen_ulong_t pending_words; @@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu) /* Process port. */ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; - irq = get_evtchn_to_irq(port); - - if (irq != -1) - generic_handle_irq(irq); + handle_irq_for_port(port, ctrl); bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 95e5a9300ff0..aca845675279 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -32,6 +32,10 @@ #include #include #include +#include +#include +#include +#include #ifdef CONFIG_X86 #include @@ -61,6 +65,15 @@ #include "events_internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly event_loop_timeout = 2; +module_param(event_loop_timeout, uint, 0644); + +static uint __read_mostly event_eoi_delay = 10; +module_param(event_eoi_delay, uint, 0644); + const struct evtchn_ops *evtchn_ops; /* @@ -69,6 +82,24 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + * percpu eoi_list_lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -90,18 +121,23 @@ static bool (*pirq_needs_eoi)(unsigned irq); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) +static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + static struct irq_chip xen_dynamic_chip; +static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static DEFINE_PER_CPU(unsigned int, irq_epoch); + static void clear_evtchn_to_irq_row(unsigned row) { unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_to_irq[row][col], -1); } static void clear_evtchn_to_irq_all(void) @@ -138,7 +174,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -148,13 +184,24 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ struct irq_info *info_for_irq(unsigned irq) { - return irq_get_chip_data(irq); + if (irq < nr_legacy_irqs()) + return legacy_info_ptrs[irq]; + else + return irq_get_chip_data(irq); +} + +static void set_info_for_irq(unsigned int irq, struct irq_info *info) +{ + if (irq < nr_legacy_irqs()) + legacy_info_ptrs[irq] = info; + else + irq_set_chip_data(irq, info); } /* Constructors for packed IRQ information. */ @@ -246,10 +293,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -360,9 +411,157 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +struct lateeoi_work { + struct delayed_work delayed; + spinlock_t eoi_list_lock; + struct list_head eoi_list; +}; + +static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); + +static void lateeoi_list_del(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + unsigned long flags; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + list_del_init(&info->eoi_list); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void lateeoi_list_add(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + struct irq_info *elem; + u64 now = get_jiffies_64(); + unsigned long delay; + unsigned long flags; + + if (now < info->eoi_time) + delay = info->eoi_time - now; + else + delay = 1; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + if (list_empty(&eoi->eoi_list)) { + list_add(&info->eoi_list, &eoi->eoi_list); + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, delay); + } else { + list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { + if (elem->eoi_time <= info->eoi_time) + break; + } + list_add(&info->eoi_list, &elem->eoi_list); + } + + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) +{ + evtchn_port_t evtchn; + unsigned int cpu; + unsigned int delay = 0; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) + return; + + if (spurious) { + if ((1 << info->spurious_cnt) < (HZ << 2)) + info->spurious_cnt++; + if (info->spurious_cnt > 1) { + delay = 1 << (info->spurious_cnt - 2); + if (delay > HZ) + delay = HZ; + if (!info->eoi_time) + info->eoi_cpu = smp_processor_id(); + info->eoi_time = get_jiffies_64() + delay; + } + } else { + info->spurious_cnt = 0; + } + + cpu = info->eoi_cpu; + if (info->eoi_time && + (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { + lateeoi_list_add(info); + return; + } + + info->eoi_time = 0; + unmask_evtchn(evtchn); +} + +static void xen_irq_lateeoi_worker(struct work_struct *work) +{ + struct lateeoi_work *eoi; + struct irq_info *info; + u64 now = get_jiffies_64(); + unsigned long flags; + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + + read_lock_irqsave(&evtchn_rwlock, flags); + + while (true) { + spin_lock(&eoi->eoi_list_lock); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + + if (info == NULL || now < info->eoi_time) { + spin_unlock(&eoi->eoi_list_lock); + break; + } + + list_del_init(&info->eoi_list); + + spin_unlock(&eoi->eoi_list_lock); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info, false); + } + + if (info) + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, info->eoi_time - now); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} + +static void xen_cpu_init_eoi(unsigned int cpu) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); + + INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); + spin_lock_init(&eoi->eoi_list_lock); + INIT_LIST_HEAD(&eoi->eoi_list); +} + +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) +{ + struct irq_info *info; + unsigned long flags; + + read_lock_irqsave(&evtchn_rwlock, flags); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} +EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) { struct irq_info *info; + #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); @@ -375,8 +574,9 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; - irq_set_chip_data(irq, info); + set_info_for_irq(irq, info); + INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); } @@ -424,17 +624,25 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + list_del(&info->list); - irq_set_chip_data(irq, NULL); + set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -601,7 +809,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (info->refcnt > 0) { info->refcnt--; @@ -826,7 +1034,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) { int irq; int ret; @@ -843,7 +1051,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); ret = xen_irq_info_evtchn_setup(irq, evtchn); @@ -864,8 +1072,19 @@ out: return irq; } + +int bind_evtchn_to_irq(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -907,8 +1126,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) +static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, + evtchn_port_t remote_port, + struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -919,10 +1139,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, + chip); +} + +int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); + static int find_virq(unsigned int virq, unsigned int cpu) { struct evtchn_status status; @@ -1018,14 +1254,15 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) +static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id, + struct irq_chip *chip) { int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_evtchn_to_irq_chip(evtchn, chip); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1036,31 +1273,76 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, return irq; } + +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); + +static int bind_interdomain_evtchn_to_irqhandler_chip( + unsigned int remote_domain, evtchn_port_t remote_port, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, struct irq_chip *chip) +{ + int irq, retval; + + irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + chip); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} + int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - int irq, retval; - - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); - if (irq < 0) - return irq; - - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1105,7 +1387,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; @@ -1139,7 +1421,7 @@ int evtchn_make_refcounted(unsigned int evtchn) if (irq == -1) return -ENOENT; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) return -ENOENT; @@ -1167,13 +1449,13 @@ int evtchn_get(unsigned int evtchn) if (irq == -1) goto done; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) goto done; err = -EINVAL; - if (info->refcnt <= 0) + if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) goto done; info->refcnt++; @@ -1212,6 +1494,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +struct evtchn_loop_ctrl { + ktime_t timeout; + unsigned count; + bool defer_eoi; +}; + +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) +{ + int irq; + struct irq_info *info; + + irq = get_evtchn_to_irq(port); + if (irq == -1) + return; + + /* + * Check for timeout every 256 events. + * We are setting the timeout value only after the first 256 + * events in order to not hurt the common case of few loop + * iterations. The 256 is basically an arbitrary value. + * + * In case we are hitting the timeout we need to defer all further + * EOIs in order to ensure to leave the event handling loop rather + * sooner than later. + */ + if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { + ktime_t kt = ktime_get(); + + if (!ctrl->timeout) { + kt = ktime_add_ms(kt, + jiffies_to_msecs(event_loop_timeout)); + ctrl->timeout = kt; + } else if (kt > ctrl->timeout) { + ctrl->defer_eoi = true; + } + } + + info = info_for_irq(irq); + + if (ctrl->defer_eoi) { + info->eoi_cpu = smp_processor_id(); + info->irq_epoch = __this_cpu_read(irq_epoch); + info->eoi_time = get_jiffies_64() + event_eoi_delay; + } + + generic_handle_irq(irq); +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); static void __xen_evtchn_do_upcall(void) @@ -1219,6 +1549,9 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = get_cpu(); unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; + + read_lock(&evtchn_rwlock); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1226,7 +1559,7 @@ static void __xen_evtchn_do_upcall(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; - xen_evtchn_handle_events(cpu); + xen_evtchn_handle_events(cpu, &ctrl); BUG_ON(!irqs_disabled()); @@ -1235,6 +1568,14 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: + read_unlock(&evtchn_rwlock); + + /* + * Increment irq_epoch only now to defer EOIs only for + * xen_irq_lateeoi() invocations occurring from inside the loop + * above. + */ + __this_cpu_inc(irq_epoch); put_cpu(); } @@ -1601,6 +1942,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_retrigger = retrigger_dynirq, }; +static struct irq_chip xen_lateeoi_chip __read_mostly = { + /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ + .name = "xen-dyn-lateeoi", + + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = mask_ack_dynirq, + .irq_mask_ack = mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_pirq_chip __read_mostly = { .name = "xen-pirq", @@ -1667,12 +2023,31 @@ void xen_callback_vector(void) void xen_callback_vector(void) {} #endif -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "xen." - static bool fifo_events = true; module_param(fifo_events, bool, 0); +static int xen_evtchn_cpu_prepare(unsigned int cpu) +{ + int ret = 0; + + xen_cpu_init_eoi(cpu); + + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + + return ret; +} + +static int xen_evtchn_cpu_dead(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_deinit) + ret = evtchn_ops->percpu_deinit(cpu); + + return ret; +} + void __init xen_init_IRQ(void) { int ret = -EINVAL; @@ -1683,6 +2058,12 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + xen_cpu_init_eoi(smp_processor_id()); + + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, + "xen/evtchn:prepare", + xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 76b318e88382..33462521bfd0 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsigned port) return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } /* - * Clear MASKED, spinning if BUSY is set. + * Clear MASKED if not PENDING, spinning if BUSY is set. + * Return true if mask was cleared. */ -static void clear_masked(volatile event_word_t *word) +static bool clear_masked_cond(volatile event_word_t *word) { event_word_t new, old, w; w = *word; do { + if (w & (1 << EVTCHN_FIFO_PENDING)) + return false; + old = w & ~(1 << EVTCHN_FIFO_BUSY); new = old & ~(1 << EVTCHN_FIFO_MASKED); w = sync_cmpxchg(word, old, new); } while (w != old); + + return true; } static void evtchn_fifo_unmask(unsigned port) @@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned port) BUG_ON(!irqs_disabled()); - clear_masked(word); - if (evtchn_fifo_is_pending(port)) { + if (!clear_masked_cond(word)) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } @@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word) return w & EVTCHN_FIFO_LINK_MASK; } -static void handle_irq_for_port(unsigned port) -{ - int irq; - - irq = get_evtchn_to_irq(port); - if (irq != -1) - generic_handle_irq(irq); -} - -static void consume_one_event(unsigned cpu, +static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready, - bool drop) + unsigned priority, unsigned long *ready) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu, clear_bit(priority, ready); if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { - if (unlikely(drop)) + if (unlikely(!ctrl)) pr_warn("Dropping pending event for port %u\n", port); else - handle_irq_for_port(port); + handle_irq_for_port(port, ctrl); } q->head[priority] = head; } -static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) +static void __evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready, drop); + consume_one_event(cpu, ctrl, control_block, q, &ready); ready |= xchg(&control_block->ready, 0); } } -static void evtchn_fifo_handle_events(unsigned cpu) +static void evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - __evtchn_fifo_handle_events(cpu, false); + __evtchn_fifo_handle_events(cpu, ctrl); } static void evtchn_fifo_resume(void) @@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void) event_array_pages = 0; } -static const struct evtchn_ops evtchn_ops_fifo = { - .max_channels = evtchn_fifo_max_channels, - .nr_channels = evtchn_fifo_nr_channels, - .setup = evtchn_fifo_setup, - .bind_to_cpu = evtchn_fifo_bind_to_cpu, - .clear_pending = evtchn_fifo_clear_pending, - .set_pending = evtchn_fifo_set_pending, - .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, - .mask = evtchn_fifo_mask, - .unmask = evtchn_fifo_unmask, - .handle_events = evtchn_fifo_handle_events, - .resume = evtchn_fifo_resume, -}; - static int evtchn_fifo_alloc_control_block(unsigned cpu) { void *control_block = NULL; @@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu) return ret; } -static int xen_evtchn_cpu_prepare(unsigned int cpu) +static int evtchn_fifo_percpu_init(unsigned int cpu) { if (!per_cpu(cpu_control_block, cpu)) return evtchn_fifo_alloc_control_block(cpu); return 0; } -static int xen_evtchn_cpu_dead(unsigned int cpu) +static int evtchn_fifo_percpu_deinit(unsigned int cpu) { - __evtchn_fifo_handle_events(cpu, true); + __evtchn_fifo_handle_events(cpu, NULL); return 0; } +static const struct evtchn_ops evtchn_ops_fifo = { + .max_channels = evtchn_fifo_max_channels, + .nr_channels = evtchn_fifo_nr_channels, + .setup = evtchn_fifo_setup, + .bind_to_cpu = evtchn_fifo_bind_to_cpu, + .clear_pending = evtchn_fifo_clear_pending, + .set_pending = evtchn_fifo_set_pending, + .is_pending = evtchn_fifo_is_pending, + .test_and_set_mask = evtchn_fifo_test_and_set_mask, + .mask = evtchn_fifo_mask, + .unmask = evtchn_fifo_unmask, + .handle_events = evtchn_fifo_handle_events, + .resume = evtchn_fifo_resume, + .percpu_init = evtchn_fifo_percpu_init, + .percpu_deinit = evtchn_fifo_percpu_deinit, +}; + int __init xen_evtchn_fifo_init(void) { int cpu = smp_processor_id(); @@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void) evtchn_ops = &evtchn_ops_fifo; - cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, - "xen/evtchn:prepare", - xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); - return ret; } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 50c2050a1e32..b9b4f5919893 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -32,11 +32,16 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; - int refcnt; + struct list_head eoi_list; + short refcnt; + short spurious_cnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned int evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ + unsigned short eoi_cpu; /* EOI must happen on this cpu */ + unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ + u64 eoi_time; /* Time in jiffies when to EOI. */ union { unsigned short virq; @@ -55,6 +60,8 @@ struct irq_info { #define PIRQ_SHAREABLE (1 << 1) #define PIRQ_MSI_GROUP (1 << 2) +struct evtchn_loop_ctrl; + struct evtchn_ops { unsigned (*max_channels)(void); unsigned (*nr_channels)(void); @@ -69,14 +76,18 @@ struct evtchn_ops { void (*mask)(unsigned port); void (*unmask)(unsigned port); - void (*handle_events)(unsigned cpu); + void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl); void (*resume)(void); + + int (*percpu_init)(unsigned int cpu); + int (*percpu_deinit)(unsigned int cpu); }; extern const struct evtchn_ops *evtchn_ops; extern int **evtchn_to_irq; int get_evtchn_to_irq(unsigned int evtchn); +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); struct irq_info *info_for_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq); @@ -134,9 +145,10 @@ static inline void unmask_evtchn(unsigned port) return evtchn_ops->unmask(port); } -static inline void xen_evtchn_handle_events(unsigned cpu) +static inline void xen_evtchn_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - return evtchn_ops->handle_events(cpu); + return evtchn_ops->handle_events(cpu, ctrl); } static inline void xen_evtchn_resume(void) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 47c70b826a6a..4b11e60e37a3 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) "Interrupt for port %d, but apparently not enabled; per-user %p\n", evtchn->port, u); - disable_irq_nosync(irq); evtchn->enabled = false; spin_lock(&u->ring_prod_lock); @@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, evtchn = find_evtchn(u, port); if (evtchn && !evtchn->enabled) { evtchn->enabled = true; - enable_irq(irq_from_evtchn(port)); + xen_irq_lateeoi(irq_from_evtchn(port), 0); } } @@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, - u->name, evtchn); + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + u->name, evtchn); if (rc < 0) goto err; diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 398fd8b1639d..f94bb6034a5a 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -75,6 +75,7 @@ struct sock_mapping { atomic_t write; atomic_t io; atomic_t release; + atomic_t eoi; void (*saved_data_ready)(struct sock *sk); struct pvcalls_ioworker ioworker; }; @@ -96,7 +97,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, struct sock_mapping *map); -static void pvcalls_conn_back_read(void *opaque) +static bool pvcalls_conn_back_read(void *opaque) { struct sock_mapping *map = (struct sock_mapping *)opaque; struct msghdr msg; @@ -116,17 +117,17 @@ static void pvcalls_conn_back_read(void *opaque) virt_mb(); if (error) - return; + return false; size = pvcalls_queued(prod, cons, array_size); if (size >= array_size) - return; + return false; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { atomic_set(&map->read, 0); spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); - return; + return true; } spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); wanted = array_size - size; @@ -150,7 +151,7 @@ static void pvcalls_conn_back_read(void *opaque) ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); WARN_ON(ret > wanted); if (ret == -EAGAIN) /* shouldn't happen */ - return; + return true; if (!ret) ret = -ENOTCONN; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); @@ -169,10 +170,10 @@ static void pvcalls_conn_back_read(void *opaque) virt_wmb(); notify_remote_via_irq(map->irq); - return; + return true; } -static void pvcalls_conn_back_write(struct sock_mapping *map) +static bool pvcalls_conn_back_write(struct sock_mapping *map) { struct pvcalls_data_intf *intf = map->ring; struct pvcalls_data *data = &map->data; @@ -189,7 +190,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) array_size = XEN_FLEX_RING_SIZE(map->ring_order); size = pvcalls_queued(prod, cons, array_size); if (size == 0) - return; + return false; memset(&msg, 0, sizeof(msg)); msg.msg_flags |= MSG_DONTWAIT; @@ -207,12 +208,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) atomic_set(&map->write, 0); ret = inet_sendmsg(map->sock, &msg, size); - if (ret == -EAGAIN || (ret >= 0 && ret < size)) { + if (ret == -EAGAIN) { atomic_inc(&map->write); atomic_inc(&map->io); + return true; } - if (ret == -EAGAIN) - return; /* write the data, then update the indexes */ virt_wmb(); @@ -225,9 +225,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) } /* update the indexes, then notify the other end */ virt_wmb(); - if (prod != cons + ret) + if (prod != cons + ret) { atomic_inc(&map->write); + atomic_inc(&map->io); + } notify_remote_via_irq(map->irq); + + return true; } static void pvcalls_back_ioworker(struct work_struct *work) @@ -236,6 +240,7 @@ static void pvcalls_back_ioworker(struct work_struct *work) struct pvcalls_ioworker, register_work); struct sock_mapping *map = container_of(ioworker, struct sock_mapping, ioworker); + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; while (atomic_read(&map->io) > 0) { if (atomic_read(&map->release) > 0) { @@ -243,10 +248,18 @@ static void pvcalls_back_ioworker(struct work_struct *work) return; } - if (atomic_read(&map->read) > 0) - pvcalls_conn_back_read(map); - if (atomic_read(&map->write) > 0) - pvcalls_conn_back_write(map); + if (atomic_read(&map->read) > 0 && + pvcalls_conn_back_read(map)) + eoi_flags = 0; + if (atomic_read(&map->write) > 0 && + pvcalls_conn_back_write(map)) + eoi_flags = 0; + + if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) { + atomic_set(&map->eoi, 0); + xen_irq_lateeoi(map->irq, eoi_flags); + eoi_flags = XEN_EOI_FLAG_SPURIOUS; + } atomic_dec(&map->io); } @@ -343,12 +356,9 @@ static struct sock_mapping *pvcalls_new_active_socket( goto out; map->bytes = page; - ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, - evtchn, - pvcalls_back_conn_event, - 0, - "pvcalls-backend", - map); + ret = bind_interdomain_evtchn_to_irqhandler_lateeoi( + fedata->dev->otherend_id, evtchn, + pvcalls_back_conn_event, 0, "pvcalls-backend", map); if (ret < 0) goto out; map->irq = ret; @@ -882,15 +892,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) { struct xenbus_device *dev = dev_id; struct pvcalls_fedata *fedata = NULL; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (dev == NULL) - return IRQ_HANDLED; + if (dev) { + fedata = dev_get_drvdata(&dev->dev); + if (fedata) { + pvcalls_back_work(fedata); + eoi_flags = 0; + } + } - fedata = dev_get_drvdata(&dev->dev); - if (fedata == NULL) - return IRQ_HANDLED; + xen_irq_lateeoi(irq, eoi_flags); - pvcalls_back_work(fedata); return IRQ_HANDLED; } @@ -900,12 +913,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) struct pvcalls_ioworker *iow; if (map == NULL || map->sock == NULL || map->sock->sk == NULL || - map->sock->sk->sk_user_data != map) + map->sock->sk->sk_user_data != map) { + xen_irq_lateeoi(irq, 0); return IRQ_HANDLED; + } iow = &map->ioworker; atomic_inc(&map->write); + atomic_inc(&map->eoi); atomic_inc(&map->io); queue_work(iow->wq, &iow->register_work); @@ -940,7 +956,7 @@ static int backend_connect(struct xenbus_device *dev) goto error; } - err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (err < 0) goto error; fedata->irq = err; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 097410a7cdb7..adf3aae2939f 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -733,10 +733,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, wmb(); notify_remote_via_irq(pdev->evtchn_irq); + /* Enable IRQ to signal "request done". */ + xen_pcibk_lateeoi(pdev, 0); + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, !(test_bit(_XEN_PCIB_active, (unsigned long *) &sh_info->flags)), 300*HZ); + /* Enable IRQ for pcifront request if not already active. */ + if (!test_bit(_PDEVF_op_active, &pdev->flags)) + xen_pcibk_lateeoi(pdev, 0); + if (!ret) { if (test_bit(_XEN_PCIB_active, (unsigned long *)&sh_info->flags)) { @@ -750,13 +757,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - if (test_bit(_XEN_PCIF_active, - (unsigned long *)&sh_info->flags)) { - dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in " DRV_NAME "\n"); - xen_pcibk_test_and_schedule_op(psdev->pdev); - } - res = (pci_ers_result_t)aer_op->err; return res; } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 263c059bff90..235cdfe13494 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #define DRV_NAME "xen-pciback" @@ -27,6 +28,8 @@ struct pci_dev_entry { #define PDEVF_op_active (1<<(_PDEVF_op_active)) #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) +#define _EOI_pending (2) +#define EOI_pending (1<<(_EOI_pending)) struct xen_pcibk_device { void *pci_dev_data; @@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); +static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev, + unsigned int eoi_flag) +{ + if (test_and_clear_bit(_EOI_pending, &pdev->flags)) + xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag); +} + int xen_pcibk_xenbus_register(void); void xen_pcibk_xenbus_unregister(void); extern int verbose_request; - -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); #endif /* Handles shared IRQs that can to device domain and control domain. */ diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 787966f44589..c4ed2c634ca7 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, return 0; } #endif + +static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) +{ + return test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags) && + !test_and_set_bit(_PDEVF_op_active, &pdev->flags); +} + /* * Now the same evtchn is used for both pcifront conf_read_write request * as well as pcie aer front end ack. We use a new work_queue to schedule * xen_pcibk conf_read_write service for avoiding confict with aer_core * do_recovery job which also use the system default work_queue */ -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) +static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) { + bool eoi = true; + /* Check that frontend is requesting an operation and that we are not * already processing a request */ - if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) - && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + if (xen_pcibk_test_op_pending(pdev)) { schedule_work(&pdev->op_work); + eoi = false; } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) && test_bit(_PCIB_op_pending, &pdev->flags)) { wake_up(&xen_pcibk_aer_wait_queue); + eoi = false; } + + /* EOI if there was nothing to do. */ + if (eoi) + xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); } /* Performing the configuration space reads/writes must not be done in atomic @@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * use of semaphores). This function is intended to be called from a work * queue in process context taking a struct xen_pcibk_device as a parameter */ -void xen_pcibk_do_op(struct work_struct *data) +static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) { - struct xen_pcibk_device *pdev = - container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; @@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct *data) smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ clear_bit(_PDEVF_op_active, &pdev->flags); smp_mb__after_atomic(); /* /before/ final check for work */ +} - /* Check to see if the driver domain tried to start another request in - * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. - */ - xen_pcibk_test_and_schedule_op(pdev); +void xen_pcibk_do_op(struct work_struct *data) +{ + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); + + do { + xen_pcibk_do_one_op(pdev); + } while (xen_pcibk_test_op_pending(pdev)); + + xen_pcibk_lateeoi(pdev, 0); } irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) { struct xen_pcibk_device *pdev = dev_id; + bool eoi; + + /* IRQs might come in before pdev->evtchn_irq is written. */ + if (unlikely(pdev->evtchn_irq != irq)) + pdev->evtchn_irq = irq; + + eoi = test_and_set_bit(_EOI_pending, &pdev->flags); + WARN(eoi, "IRQ while EOI pending\n"); xen_pcibk_test_and_schedule_op(pdev); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 581c4e1a8b82..3bbed47da3fa 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, pdev->sh_info = vaddr; - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, 0, DRV_NAME, pdev); if (err < 0) { diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 14a3d4cbc2a7..1abc0a55b8d9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -91,7 +91,6 @@ struct vscsibk_info { unsigned int irq; struct vscsiif_back_ring ring; - int ring_error; spinlock_t ring_lock; atomic_t nr_unreplied_reqs; @@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info, return pending_req; } -static int scsiback_do_cmd_fn(struct vscsibk_info *info) +static int scsiback_do_cmd_fn(struct vscsibk_info *info, + unsigned int *eoi_flags) { struct vscsiif_back_ring *ring = &info->ring; struct vscsiif_request ring_req; @@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) rc = ring->rsp_prod_pvt; pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", info->domid, rp, rc, rp - rc); - info->ring_error = 1; - return 0; + return -EINVAL; } while ((rc != rp)) { + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) break; @@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) { struct vscsibk_info *info = dev_id; + int rc; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (info->ring_error) - return IRQ_HANDLED; - - while (scsiback_do_cmd_fn(info)) + while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0) cond_resched(); + /* In case of a ring error we keep the event channel masked. */ + if (!rc) + xen_irq_lateeoi(irq, eoi_flags); + return IRQ_HANDLED; } @@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, sring = (struct vscsiif_sring *)area; BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); if (err < 0) goto unmap_page; @@ -1252,7 +1256,6 @@ static int scsiback_probe(struct xenbus_device *dev, info->domid = dev->otherend_id; spin_lock_init(&info->ring_lock); - info->ring_error = 0; atomic_set(&info->nr_unreplied_reqs, 0); init_waitqueue_head(&info->waiting_to_free); info->dev = dev; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 550d0b169d7c..61e0c552083f 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -624,9 +624,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, - .range_start = vma->vm_pgoff * PAGE_SIZE, + .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE, /* absolute end, byte at end included */ - .range_end = vma->vm_pgoff * PAGE_SIZE + + .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start - 1), }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8007b6aacec6..f36b2a386aae 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1110,6 +1110,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1117,6 +1119,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1149,6 +1153,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (last_ref) { ret = tree_mod_log_free_eb(buf); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 554727d82d43..4d1c12faada8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1459,6 +1459,21 @@ do { \ #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + struct btrfs_map_token { const struct extent_buffer *eb; char *kaddr; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 7374fb23381c..14855972dee3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -620,8 +620,7 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, - fs_info->nodesize, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1b9c8ffb038f..4d1d2657d70c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -54,6 +54,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: + /* + * We don't have a replace item or it's corrupted. If there is + * a replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "found replace target device without a valid replace item"); + ret = -EUCLEAN; + goto out; + } ret = 0; dev_replace->replace_state = BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED; @@ -107,8 +118,19 @@ no_valid_dev_replace_entry_found: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: - dev_replace->srcdev = NULL; - dev_replace->tgtdev = NULL; + /* + * We don't have an active replace item but if there is a + * replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "replace devid present without an active replace item"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; + dev_replace->tgtdev = NULL; + } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: @@ -190,7 +212,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, int ret = 0; *device_out = NULL; - if (fs_info->fs_devices->seeding) { + if (srcdev->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 82d597b16152..dabf153843e9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -138,7 +138,61 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits, return ret; } -static void flush_write_bio(struct extent_page_data *epd); +static int __must_check submit_one_bio(struct bio *bio, int mirror_num, + unsigned long bio_flags) +{ + blk_status_t ret = 0; + struct bio_vec *bvec = bio_last_bvec_all(bio); + struct page *page = bvec->bv_page; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + + start = page_offset(page) + bvec->bv_offset; + + bio->bi_private = NULL; + + if (tree->ops) + ret = tree->ops->submit_bio_hook(tree->private_data, bio, + mirror_num, bio_flags, start); + else + btrfsic_submit_bio(bio); + + return blk_status_to_errno(ret); +} + +/* Cleanup unsubmitted bios */ +static void end_write_bio(struct extent_page_data *epd, int ret) +{ + if (epd->bio) { + epd->bio->bi_status = errno_to_blk_status(ret); + bio_endio(epd->bio); + epd->bio = NULL; + } +} + +/* + * Submit bio from extent page data via submit_one_bio + * + * Return 0 if everything is OK. + * Return <0 for error. + */ +static int __must_check flush_write_bio(struct extent_page_data *epd) +{ + int ret = 0; + + if (epd->bio) { + ret = submit_one_bio(epd->bio, 0, 0); + /* + * Clean up of epd->bio is handled by its endio function. + * And endio is either triggered by successful bio execution + * or the error handler of submit bio hook. + * So at this point, no matter what happened, we don't need + * to clean up epd->bio. + */ + epd->bio = NULL; + } + return ret; +} int __init extent_io_init(void) { @@ -2710,28 +2764,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) return bio; } -static int __must_check submit_one_bio(struct bio *bio, int mirror_num, - unsigned long bio_flags) -{ - blk_status_t ret = 0; - struct bio_vec *bvec = bio_last_bvec_all(bio); - struct page *page = bvec->bv_page; - struct extent_io_tree *tree = bio->bi_private; - u64 start; - - start = page_offset(page) + bvec->bv_offset; - - bio->bi_private = NULL; - - if (tree->ops) - ret = tree->ops->submit_bio_hook(tree->private_data, bio, - mirror_num, bio_flags, start); - else - btrfsic_submit_bio(bio); - - return blk_status_to_errno(ret); -} - /* * @opf: bio REQ_OP_* and REQ_* flags as one value * @tree: tree so we can call our merge_bio hook @@ -3439,6 +3471,9 @@ done: * records are inserted to lock ranges in the tree, and as dirty areas * are found, they are marked writeback. Then the lock bits are removed * and the end_io handler clears the writeback ranges + * + * Return 0 if everything goes well. + * Return <0 for error. */ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) @@ -3506,6 +3541,7 @@ done: end_extent_writepage(page, ret, start, page_end); } unlock_page(page); + ASSERT(ret <= 0); return ret; done_unlocked: @@ -3518,18 +3554,34 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) TASK_UNINTERRUPTIBLE); } +static void end_extent_buffer_writeback(struct extent_buffer *eb) +{ + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); +} + +/* + * Lock eb pages and flush the bio if we can't the locks + * + * Return 0 if nothing went wrong + * Return >0 is same as 0, except bio is not submitted + * Return <0 if something went wrong, no page is locked + */ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, struct btrfs_fs_info *fs_info, struct extent_page_data *epd) { - int i, num_pages; + int i, num_pages, failed_page_nr; int flush = 0; int ret = 0; if (!btrfs_try_tree_write_lock(eb)) { + ret = flush_write_bio(epd); + if (ret < 0) + return ret; flush = 1; - flush_write_bio(epd); btrfs_tree_lock(eb); } @@ -3538,7 +3590,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!epd->sync_io) return 0; if (!flush) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + if (ret < 0) + return ret; flush = 1; } while (1) { @@ -3579,7 +3633,14 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { - flush_write_bio(epd); + int err; + + err = flush_write_bio(epd); + if (err < 0) { + ret = err; + failed_page_nr = i; + goto err_unlock; + } flush = 1; } lock_page(p); @@ -3587,13 +3648,25 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, } return ret; -} - -static void end_extent_buffer_writeback(struct extent_buffer *eb) -{ - clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_atomic(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); +err_unlock: + /* Unlock already locked pages */ + for (i = 0; i < failed_page_nr; i++) + unlock_page(eb->pages[i]); + /* + * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it. + * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can + * be made and undo everything done before. + */ + btrfs_tree_lock(eb); + spin_lock(&eb->refs_lock); + set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + end_extent_buffer_writeback(eb); + spin_unlock(&eb->refs_lock); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len, + fs_info->dirty_metadata_batch); + btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_tree_unlock(eb); + return ret; } static void set_btree_ioerr(struct page *page) @@ -3840,6 +3913,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, fs_info, wbc, &epd); @@ -3869,7 +3946,44 @@ retry: index = 0; goto retry; } - flush_write_bio(&epd); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } + /* + * If something went wrong, don't allow any metadata write bio to be + * submitted. + * + * This would prevent use-after-free if we had dirty pages not + * cleaned up, which can still happen by fuzzed images. + * + * - Bad extent tree + * Allowing existing tree block to be allocated for other trees. + * + * - Log tree operations + * Exiting tree blocks get allocated to log tree, bumps its + * generation, then get cleaned in tree re-balance. + * Such tree block will not be written back, since it's clean, + * thus no WRITTEN flag set. + * And after log writes back, this tree block is not traced by + * any dirty extent_io_tree. + * + * - Offending tree block gets re-dirtied from its original owner + * Since it has bumped generation, no WRITTEN flag, it can be + * reused without COWing. This tree block will not be traced + * by btrfs_transaction::dirty_pages. + * + * Now such dirty tree block will not be cleaned by any dirty + * extent io tree. Thus we don't want to submit such wild eb + * if the fs already has error. + */ + if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + ret = flush_write_bio(&epd); + } else { + ret = -EUCLEAN; + end_write_bio(&epd, ret); + } return ret; } @@ -3966,7 +4080,8 @@ retry: * tmpfs file mapping */ if (!trylock_page(page)) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); lock_page(page); } @@ -3976,8 +4091,10 @@ retry: } if (wbc->sync_mode != WB_SYNC_NONE) { - if (PageWriteback(page)) - flush_write_bio(epd); + if (PageWriteback(page)) { + ret = flush_write_bio(epd); + BUG_ON(ret < 0); + } wait_on_page_writeback(page); } @@ -4022,8 +4139,9 @@ retry: * page in our current bio, and thus deadlock, so flush the * write bio here. */ - flush_write_bio(epd); - goto retry; + ret = flush_write_bio(epd); + if (!ret) + goto retry; } if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) @@ -4033,17 +4151,6 @@ retry: return ret; } -static void flush_write_bio(struct extent_page_data *epd) -{ - if (epd->bio) { - int ret; - - ret = submit_one_bio(epd->bio, 0, 0); - BUG_ON(ret < 0); /* -ENOMEM */ - epd->bio = NULL; - } -} - int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; @@ -4055,8 +4162,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) }; ret = __extent_writepage(page, wbc, &epd); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } - flush_write_bio(&epd); + ret = flush_write_bio(&epd); + ASSERT(ret <= 0); return ret; } @@ -4064,6 +4177,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int mode) { int ret = 0; + int flush_ret; struct address_space *mapping = inode->i_mapping; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *page; @@ -4098,7 +4212,8 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, start += PAGE_SIZE; } - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -4106,6 +4221,7 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret = 0; + int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(mapping->host)->io_tree, @@ -4114,7 +4230,8 @@ int extent_writepages(struct address_space *mapping, }; ret = extent_write_cache_pages(mapping, wbc, &epd); - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 01a90fa03c24..f3658d6ea657 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1239,6 +1239,7 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 page_start; u64 page_end; u64 page_cnt; + u64 start = (u64)start_index << PAGE_SHIFT; int ret; int i; int i_done; @@ -1255,8 +1256,7 @@ static int cluster_pages_for_defrag(struct inode *inode, page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); ret = btrfs_delalloc_reserve_space(inode, &data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT); + start, page_cnt << PAGE_SHIFT); if (ret) return ret; i_done = 0; @@ -1346,8 +1346,7 @@ again: btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, data_reserved, - start_index << PAGE_SHIFT, - (page_cnt - i_done) << PAGE_SHIFT, true); + start, (page_cnt - i_done) << PAGE_SHIFT, true); } @@ -1374,8 +1373,7 @@ out: put_page(pages[i]); } btrfs_delalloc_release_space(inode, data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT, true); + start, page_cnt << PAGE_SHIFT, true); btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; @@ -4217,6 +4215,8 @@ process_slot: ret = -EINTR; goto out; } + + cond_resched(); } ret = 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c8ed4db73b84..9936e4b991aa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -488,13 +488,13 @@ next2: break; } out: + btrfs_free_path(path); fs_info->qgroup_flags |= flags; if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && ret >= 0) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); - btrfs_free_path(path); if (ret < 0) { ulist_free(fs_info->qgroup_ulist); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 4c81ffe12385..368c349c5669 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -442,6 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, } have_zone = 1; } + if (!have_zone) + radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_read_unlock(&fs_info->dev_replace); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 5dec52bd2897..b26739d0e991 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -854,6 +854,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2bc80d0b56db..ed61c0daef41 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3810,6 +3810,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) return 0; } +/* + * When processing the new references for an inode we may orphanize an existing + * directory inode because its old name conflicts with one of the new references + * of the current inode. Later, when processing another new reference of our + * inode, we might need to orphanize another inode, but the path we have in the + * reference reflects the pre-orphanization name of the directory we previously + * orphanized. For example: + * + * parent snapshot looks like: + * + * . (ino 256) + * |----- f1 (ino 257) + * |----- f2 (ino 258) + * |----- d1/ (ino 259) + * |----- d2/ (ino 260) + * + * send snapshot looks like: + * + * . (ino 256) + * |----- d1 (ino 258) + * |----- f2/ (ino 259) + * |----- f2_link/ (ino 260) + * | |----- f1 (ino 257) + * | + * |----- d2 (ino 258) + * + * When processing inode 257 we compute the name for inode 259 as "d1", and we + * cache it in the name cache. Later when we start processing inode 258, when + * collecting all its new references we set a full path of "d1/d2" for its new + * reference with name "d2". When we start processing the new references we + * start by processing the new reference with name "d1", and this results in + * orphanizing inode 259, since its old reference causes a conflict. Then we + * move on the next new reference, with name "d2", and we find out we must + * orphanize inode 260, as its old reference conflicts with ours - but for the + * orphanization we use a source path corresponding to the path we stored in the + * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the + * receiver fail since the path component "d1/" no longer exists, it was renamed + * to "o259-6-0/" when processing the previous new reference. So in this case we + * must recompute the path in the new reference and use it for the new + * orphanization operation. + */ +static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) +{ + char *name; + int ret; + + name = kmemdup(ref->name, ref->name_len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + fs_path_reset(ref->full_path); + ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path); + if (ret < 0) + goto out; + + ret = fs_path_add(ref->full_path, name, ref->name_len); + if (ret < 0) + goto out; + + /* Update the reference's base name pointer. */ + set_ref_path(ref, ref->full_path); +out: + kfree(name); + return ret; +} + /* * This does all the move/link/unlink/rmdir magic. */ @@ -3940,6 +4006,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct name_cache_entry *nce; struct waiting_dir_move *wdm; + if (orphanized_dir) { + ret = refresh_ref_path(sctx, cur); + if (ret < 0) + goto out; + } + ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) @@ -6787,7 +6859,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); - sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL); + sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d98ec885b72a..9023e6b46396 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -448,6 +448,320 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, return 0; } +__printf(5, 6) +__cold +static void chunk_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, + const struct btrfs_chunk *chunk, u64 logical, + const char *fmt, ...) +{ + bool is_sb; + struct va_format vaf; + va_list args; + int i; + int slot = -1; + + /* Only superblock eb is able to have such small offset */ + is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); + + if (!is_sb) { + /* + * Get the slot number by iterating through all slots, this + * would provide better readability. + */ + for (i = 0; i < btrfs_header_nritems(leaf); i++) { + if (btrfs_item_ptr_offset(leaf, i) == + (unsigned long)chunk) { + slot = i; + break; + } + } + } + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (is_sb) + btrfs_crit(fs_info, + "corrupt superblock syschunk array: chunk_start=%llu, %pV", + logical, &vaf); + else + btrfs_crit(fs_info, + "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", + BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, + logical, &vaf); + va_end(args); +} + +/* + * The common chunk check which could also work on super block sys chunk array. + * + * Return -EUCLEAN if anything is corrupted. + * Return 0 if everything is OK. + */ +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + u64 features; + bool mixed = false; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (!num_stripes) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk num_stripes, have %u", num_stripes); + return -EUCLEAN; + } + if (!IS_ALIGNED(logical, fs_info->sectorsize)) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk logical, have %llu should aligned to %u", + logical, fs_info->sectorsize); + return -EUCLEAN; + } + if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk sectorsize, have %u expect %u", + btrfs_chunk_sector_size(leaf, chunk), + fs_info->sectorsize); + return -EUCLEAN; + } + if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk length, have %llu", length); + return -EUCLEAN; + } + if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk stripe length: %llu", + stripe_len); + return -EUCLEAN; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + type) { + chunk_err(fs_info, leaf, chunk, logical, + "unrecognized chunk type: 0x%llx", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EUCLEAN; + } + + if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EUCLEAN; + } + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + chunk_err(fs_info, leaf, chunk, logical, + "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", + type, BTRFS_BLOCK_GROUP_TYPE_MASK); + return -EUCLEAN; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + chunk_err(fs_info, leaf, chunk, logical, + "system chunk with data or metadata type: 0x%llx", + type); + return -EUCLEAN; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + chunk_err(fs_info, leaf, chunk, logical, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EUCLEAN; + } + } + + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EUCLEAN; + } + + return 0; +} + +__printf(4, 5) +__cold +static void dev_item_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, &vaf); + va_end(args); +} + +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dev_item *ditem; + + if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { + dev_item_err(fs_info, leaf, slot, + "invalid objectid: has=%llu expect=%llu", + key->objectid, BTRFS_DEV_ITEMS_OBJECTID); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); + if (btrfs_device_id(leaf, ditem) != key->offset) { + dev_item_err(fs_info, leaf, slot, + "devid mismatch: key has=%llu item has=%llu", + key->offset, btrfs_device_id(leaf, ditem)); + return -EUCLEAN; + } + + /* + * For device total_bytes, we don't have reliable way to check it, as + * it can be 0 for device removal. Device size check can only be done + * by dev extents check. + */ + if (btrfs_device_bytes_used(leaf, ditem) > + btrfs_device_total_bytes(leaf, ditem)) { + dev_item_err(fs_info, leaf, slot, + "invalid bytes used: have %llu expect [0, %llu]", + btrfs_device_bytes_used(leaf, ditem), + btrfs_device_total_bytes(leaf, ditem)); + return -EUCLEAN; + } + /* + * Remaining members like io_align/type/gen/dev_group aren't really + * utilized. Skip them to make later usage of them easier. + */ + return 0; +} + +/* Inode item error output has the same format as dir_item_err() */ +#define inode_item_err(fs_info, eb, slot, fmt, ...) \ + dir_item_err(fs_info, eb, slot, fmt, __VA_ARGS__) + +static int check_inode_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_inode_item *iitem; + u64 super_gen = btrfs_super_generation(fs_info->super_copy); + u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); + u32 mode; + + if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID || + key->objectid > BTRFS_LAST_FREE_OBJECTID) && + key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && + key->objectid != BTRFS_FREE_INO_OBJECTID) { + generic_err(fs_info, leaf, slot, + "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", + key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID, + BTRFS_FREE_INO_OBJECTID); + return -EUCLEAN; + } + if (key->offset != 0) { + inode_item_err(fs_info, leaf, slot, + "invalid key offset: has %llu expect 0", + key->offset); + return -EUCLEAN; + } + iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); + + /* Here we use super block generation + 1 to handle log tree */ + if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode generation: has %llu expect (0, %llu]", + btrfs_inode_generation(leaf, iitem), + super_gen + 1); + return -EUCLEAN; + } + /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ + if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode transid: has %llu expect [0, %llu]", + btrfs_inode_transid(leaf, iitem), super_gen + 1); + return -EUCLEAN; + } + + /* + * For size and nbytes it's better not to be too strict, as for dir + * item its size/nbytes can easily get wrong, but doesn't affect + * anything in the fs. So here we skip the check. + */ + mode = btrfs_inode_mode(leaf, iitem); + if (mode & ~valid_mask) { + inode_item_err(fs_info, leaf, slot, + "unknown mode bit detected: 0x%x", + mode & ~valid_mask); + return -EUCLEAN; + } + + /* + * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2, + * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG. + * Only needs to check BLK, LNK and SOCKS + */ + if (!is_power_of_2(mode & S_IFMT)) { + if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) { + inode_item_err(fs_info, leaf, slot, + "invalid mode: has 0%o expect valid S_IF* bit(s)", + mode & S_IFMT); + return -EUCLEAN; + } + } + if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) { + inode_item_err(fs_info, leaf, slot, + "invalid nlink: has %u expect no more than 1 for dir", + btrfs_inode_nlink(leaf, iitem)); + return -EUCLEAN; + } + if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) { + inode_item_err(fs_info, leaf, slot, + "unknown flags detected: 0x%llx", + btrfs_inode_flags(leaf, iitem) & + ~BTRFS_INODE_FLAG_MASK); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -456,6 +770,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, struct btrfs_key *key, int slot) { int ret = 0; + struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: @@ -472,6 +787,17 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_BLOCK_GROUP_ITEM_KEY: ret = check_block_group_item(fs_info, leaf, key, slot); break; + case BTRFS_CHUNK_ITEM_KEY: + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, + key->offset); + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, leaf, key, slot); + break; + case BTRFS_INODE_ITEM_KEY: + ret = check_inode_item(fs_info, leaf, key, slot); + break; } return ret; } diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index ff043275b784..4df45e8a6659 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -25,4 +25,8 @@ int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical); + #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3e903e6a3387..7b940264c7b9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3589,6 +3589,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * search and this search we'll not find the key again and can just * bail. */ +search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); if (ret != 0) goto done; @@ -3608,6 +3609,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, if (min_key.objectid != ino || min_key.type != key_type) goto done; + + if (need_resched()) { + btrfs_release_path(path); + cond_resched(); + goto search; + } + ret = overwrite_item(trans, log, dst_path, src, i, &min_key); if (ret) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 815b655b8f10..95b6a4ea18f7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,6 +28,7 @@ #include "math.h" #include "dev-replace.h" #include "sysfs.h" +#include "tree-checker.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -856,17 +857,25 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev != path_bdev) { bdput(path_bdev); mutex_unlock(&fs_devices->device_list_mutex); - btrfs_warn_in_rcu(device->fs_info, - "duplicate device fsid:devid for %pU:%llu old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + /* + * device->fs_info may not be reliable here, so + * pass in a NULL instead. This avoids a + * possible use-after-free when the fs_info and + * fs_info->sb are already torn down. + */ + btrfs_warn_in_rcu(NULL, + "duplicate device %s devid %llu generation %llu scanned by %s (%d)", + path, devid, found_transid, + current->comm, + task_pid_nr(current)); return ERR_PTR(-EEXIST); } bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, - "device fsid %pU devid %llu moved old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + "devid %llu device path %s changed to %s scanned by %s (%d)", + devid, rcu_str_deref(device->name), + path, current->comm, + task_pid_nr(current)); } name = rcu_string_strdup(path, GFP_NOFS); @@ -971,22 +980,13 @@ again: continue; } - if (device->devid == BTRFS_DEV_REPLACE_DEVID) { - /* - * In the first step, keep the device which has - * the correct fsid and the devid that is used - * for the dev_replace procedure. - * In the second step, the dev_replace state is - * read from the device tree and it is known - * whether the procedure is really active or - * not, which means whether this device is - * used or whether it should be removed. - */ - if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) { - continue; - } - } + /* + * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID, + * in btrfs_init_dev_replace() so just continue. + */ + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + continue; + if (device->bdev) { blkdev_put(device->bdev, device->mode); device->bdev = NULL; @@ -995,9 +995,6 @@ again: if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) - fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; @@ -2456,9 +2453,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_set_super_num_devices(fs_info->super_copy, orig_super_num_devices + 1); - /* add sysfs device entry */ - btrfs_sysfs_add_device_link(fs_devices, device); - /* * we've got more storage, clear any full flags on the space * infos @@ -2466,6 +2460,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_clear_space_info_full(fs_info); mutex_unlock(&fs_info->chunk_mutex); + + /* Add sysfs device entry */ + btrfs_sysfs_add_device_link(fs_devices, device); + mutex_unlock(&fs_devices->device_list_mutex); if (seeding_dev) { @@ -4603,15 +4601,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } -#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ - - sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - -#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ - - 2 * sizeof(struct btrfs_disk_key) \ - - 2 * sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 start, u64 type) { @@ -6368,99 +6357,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -/* Return -EIO if any error, otherwise return 0. */ -static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 logical) -{ - u64 length; - u64 stripe_len; - u16 num_stripes; - u16 sub_stripes; - u64 type; - u64 features; - bool mixed = false; - - length = btrfs_chunk_length(leaf, chunk); - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); - type = btrfs_chunk_type(leaf, chunk); - - if (!num_stripes) { - btrfs_err(fs_info, "invalid chunk num_stripes: %u", - num_stripes); - return -EIO; - } - if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk logical %llu", logical); - return -EIO; - } - if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - btrfs_err(fs_info, "invalid chunk sectorsize %u", - btrfs_chunk_sector_size(leaf, chunk)); - return -EIO; - } - if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk length %llu", length); - return -EIO; - } - if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(fs_info, "invalid chunk stripe length: %llu", - stripe_len); - return -EIO; - } - if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - type) { - btrfs_err(fs_info, "unrecognized chunk type: %llu", - ~(BTRFS_BLOCK_GROUP_TYPE_MASK | - BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && - (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - btrfs_err(fs_info, - "system chunk with data or metadata type: 0x%llx", type); - return -EIO; - } - - features = btrfs_super_incompat_flags(fs_info->super_copy); - if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) - mixed = true; - - if (!mixed) { - if ((type & BTRFS_BLOCK_GROUP_METADATA) && - (type & BTRFS_BLOCK_GROUP_DATA)) { - btrfs_err(fs_info, - "mixed chunk type in non-mixed mode: 0x%llx", type); - return -EIO; - } - } - - if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || - (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || - (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || - ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && - num_stripes != 1)) { - btrfs_err(fs_info, - "invalid num_stripes:sub_stripes %u:%u for profile %llu", - num_stripes, sub_stripes, - type & BTRFS_BLOCK_GROUP_PROFILE_MASK); - return -EIO; - } - - return 0; -} - static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, u8 *uuid, bool error) { @@ -6491,9 +6387,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, length = btrfs_chunk_length(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); - if (ret) - return ret; + /* + * Only need to verify chunk item if we're reading from sys chunk array, + * as chunk item in tree block is already verified by tree-checker. + */ + if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); + if (ret) + return ret; + } read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8e8bf3246de1..65cd023b097c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -257,6 +257,15 @@ struct btrfs_fs_devices { #define BTRFS_BIO_INLINE_CSUM_SIZE 64 +#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). diff --git a/fs/buffer.c b/fs/buffer.c index 30b8850b953c..2fa1ad58a3c2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2743,16 +2743,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ -#if 0 - /* Not really sure about this - do we need this ? */ - if (page->mapping->a_ops->invalidatepage) - page->mapping->a_ops->invalidatepage(page, offset); -#endif unlock_page(page); return 0; /* don't care */ } @@ -2947,12 +2937,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - do_invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return 0; /* don't care */ } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index f822ac9e3cb0..f5bf10729a87 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -125,7 +125,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, _debug("reissue read"); ret = bmapping->a_ops->readpage(NULL, backpage); if (ret < 0) - goto unlock_discard; + goto discard; } /* but the page may have been read before the monitor was installed, so @@ -142,6 +142,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, unlock_discard: unlock_page(backpage); +discard: spin_lock_irq(&object->work_lock); list_del(&monitor->op_link); spin_unlock_irq(&object->work_lock); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 476728bdae8c..e59b2f53a81f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1437,7 +1437,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; - loff_t off = vmf->pgoff << PAGE_SHIFT; + loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index a2b2355e7f01..9986817532b1 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -501,7 +501,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, else if (map_chars == SFM_MAP_UNI_RSVD) { bool end_of_string; - if (i == srclen - 1) + /** + * Remap spaces and periods found at the end of every + * component of the path. The special cases of '.' and + * '..' do not need to be dealt with explicitly because + * they are addressed in namei.c:link_path_walk(). + **/ + if ((i == srclen - 1) || (source[i+1] == '\\')) end_of_string = true; else end_of_string = false; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4a38f16d944d..d30eb4350656 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2550,13 +2550,18 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb); + int rc, retries = 0; - if (pTcon->unix_ext) - return cifs_setattr_unix(direntry, attrs); - - return cifs_setattr_nounix(direntry, attrs); + do { + if (pTcon->unix_ext) + rc = cifs_setattr_unix(direntry, attrs); + else + rc = cifs_setattr_nounix(direntry, attrs); + retries++; + } while (is_retryable_error(rc) && retries < 2); /* BB: add cifs_setattr_legacy for really old servers */ + return rc; } #if 0 diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index cca57e177a18..beb3a61d0dd9 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -362,13 +362,16 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_info *ci) { /* - * The READ_ONCE() here pairs with the smp_store_release() in - * fscrypt_prepare_key(). (This only matters for the per-mode keys, - * which are shared by multiple inodes.) + * The two smp_load_acquire()'s here pair with the smp_store_release()'s + * in fscrypt_prepare_inline_crypt_key() and fscrypt_prepare_key(). + * I.e., in some cases (namely, if this prep_key is a per-mode + * encryption key) another task can publish blk_key or tfm concurrently, + * executing a RELEASE barrier. We need to use smp_load_acquire() here + * to safely ACQUIRE the memory the other task published. */ if (fscrypt_using_inline_encryption(ci)) - return READ_ONCE(prep_key->blk_key) != NULL; - return READ_ONCE(prep_key->tfm) != NULL; + return smp_load_acquire(&prep_key->blk_key) != NULL; + return smp_load_acquire(&prep_key->tfm) != NULL; } #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ @@ -415,7 +418,7 @@ static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_info *ci) { - return READ_ONCE(prep_key->tfm) != NULL; + return smp_load_acquire(&prep_key->tfm) != NULL; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 6504f053ae57..3b3a684c19d5 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -202,8 +202,10 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, } } /* - * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters - * for the per-mode keys, which are shared by multiple inodes.) + * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). + * I.e., here we publish ->blk_key with a RELEASE barrier so that + * concurrent tasks can ACQUIRE it. Note that this concurrency is only + * possible for per-mode keys, not for per-file keys. */ smp_store_release(&prep_key->blk_key, blk_key); return 0; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 6a198f9be10a..e894f1939a98 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -134,8 +134,10 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, if (IS_ERR(tfm)) return PTR_ERR(tfm); /* - * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters - * for the per-mode keys, which are shared by multiple inodes.) + * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). + * I.e., here we publish ->tfm with a RELEASE barrier so that + * concurrent tasks can ACQUIRE it. Note that this concurrency is only + * possible for per-mode keys, not for per-file keys. */ smp_store_release(&prep_key->tfm, tfm); return 0; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index d23ff162c78b..026dbb2fa36f 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -77,6 +77,20 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, struct super_block *sb = inode->i_sb; int ino_bits = 64, lblk_bits = 64; + /* + * IV_INO_LBLK_* exist only because of hardware limitations, and + * currently the only known use case for them involves AES-256-XTS. + * That's also all we test currently. For these reasons, for now only + * allow AES-256-XTS here. This can be relaxed later if a use case for + * IV_INO_LBLK_* with other encryption modes arises. + */ + if (policy->contents_encryption_mode != FSCRYPT_MODE_AES_256_XTS) { + fscrypt_warn(inode, + "Can't use %s policy with contents mode other than AES-256-XTS", + type); + return false; + } + /* * It's unsafe to include inode numbers in the IVs if the filesystem can * potentially renumber inodes, e.g. via filesystem shrinking. @@ -178,10 +192,15 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, 32, 32)) return false; + /* + * IV_INO_LBLK_32 hashes the inode number, so in principle it can + * support any ino_bits. However, currently the inode number is gotten + * from inode::i_ino which is 'unsigned long'. So for now the + * implementation limit is 32 bits. + */ if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) && - /* This uses hashed inode numbers, so ino_bits doesn't matter. */ !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_32", - INT_MAX, 32)) + 32, 32)) return false; if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 8c6ab6c95727..7f40343b39b0 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, var->var.VariableName[i] = '\0'; inode->i_private = var; + kmemleak_ignore(var); err = efivar_entry_add(var, &efivarfs_list); if (err) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 5b68e4294faa..834615f13f3e 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -145,6 +145,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; + /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */ + strreplace(name, '/', '!'); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, is_removable); if (!inode) diff --git a/fs/exec.c b/fs/exec.c index e438bf91a54b..17d7f8a66167 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1028,10 +1028,23 @@ static int exec_mmap(struct mm_struct *mm) } } task_lock(tsk); + + local_irq_disable(); active_mm = tsk->active_mm; - tsk->mm = mm; tsk->active_mm = mm; + tsk->mm = mm; + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for + * lazy tlb mm refcounting when these are updated by context + * switches. Not all architectures can handle irqs off over + * activate_mm yet. + */ + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); activate_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2508e0adf600..839dc6eb3f96 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2522,7 +2522,8 @@ void ext4_insert_dentry(struct inode *dir, struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) { + if (!ext4_has_feature_dir_index(inode->i_sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { /* ext4_iget() should have caught this... */ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 0fb8d9da4552..31cba3b83381 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1940,6 +1940,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { + ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eb76f6436364..067e37bdc88d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5348,6 +5348,12 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + err = ext4_inode_blocks_set(handle, raw_inode, ei); + if (err) { + spin_unlock(&ei->i_raw_lock); + goto out_brelse; + } + raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); i_gid = i_gid_read(inode); @@ -5381,11 +5387,6 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - err = ext4_inode_blocks_set(handle, raw_inode, ei); - if (err) { - spin_unlock(&ei->i_raw_lock); - goto out_brelse; - } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ef552d93708e..8098255c2801 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -861,8 +861,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); - if (unlikely(err)) + if (unlikely(err)) { ext4_std_error(sb, err); + goto errout; + } /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9b9c539e8897..9462b900f9a7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1784,8 +1784,8 @@ static const struct mount_opts { {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), MOPT_CLEAR | MOPT_Q}, - {Opt_usrjquota, 0, MOPT_Q}, - {Opt_grpjquota, 0, MOPT_Q}, + {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING}, + {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING}, {Opt_offusrjquota, 0, MOPT_Q}, {Opt_offgrpjquota, 0, MOPT_Q}, {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, @@ -4750,6 +4750,7 @@ cantfind_ext4: failed_mount8: ext4_unregister_sysfs(sb); + kobject_put(&sbi->s_kobj); failed_mount7: ext4_unregister_li_request(sb); failed_mount6: @@ -5914,6 +5915,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) return -EXDEV; + + /* Quota already enabled for this file? */ + if (IS_NOQUOTA(d_inode(path->dentry))) + return -EBUSY; + /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 236064930251..8270e924b977 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: + if (unlikely(blkno >= TOTAL_SEGS(sbi))) + goto out; /* get sit block addr */ fio.new_blkaddr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); @@ -1047,8 +1049,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) get_pages(sbi, is_dir ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return -EIO; + } spin_lock(&sbi->inode_lock[type]); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 959c3461da6a..aa363840f1b3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -380,16 +380,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) { - *res_page = NULL; de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } - if (npages == 0) { - *res_page = NULL; + if (npages == 0) goto out; - } max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { @@ -400,7 +399,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - *res_page = NULL; de = find_in_level(dir, level, fname, res_page); if (de || IS_ERR(*res_page)) break; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c51c9a6881e4..9861204da06f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -573,6 +574,7 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) req->out.numargs = args->out.numargs; memcpy(req->out.args, args->out.args, args->out.numargs * sizeof(struct fuse_arg)); + req->out.canonical_path = args->out.canonical_path; fuse_request_send(fc, req); ret = req->out.h.error; if (!ret && args->out.argvar) { @@ -853,15 +855,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; + get_page(oldpage); err = unlock_request(cs->req); if (err) - return err; + goto out_put_old; fuse_copy_finish(cs); err = pipe_buf_confirm(cs->pipe, buf); if (err) - return err; + goto out_put_old; BUG_ON(!cs->nr_segs); cs->currbuf = buf; @@ -901,7 +904,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); if (err) { unlock_page(newpage); - return err; + goto out_put_old; } get_page(newpage); @@ -920,14 +923,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (err) { unlock_page(newpage); put_page(newpage); - return err; + goto out_put_old; } unlock_page(oldpage); + /* Drop ref for ap->pages[] array */ put_page(oldpage); cs->len = 0; - return 0; + err = 0; +out_put_old: + /* Drop ref obtained in this function */ + put_page(oldpage); + return err; out_fallback_unlock: unlock_page(newpage); @@ -936,10 +944,10 @@ out_fallback: cs->offset = buf->offset; err = lock_request(cs->req); - if (err) - return err; + if (!err) + err = 1; - return 1; + goto out_put_old; } static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, @@ -951,14 +959,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, if (cs->nr_segs == cs->pipe->buffers) return -EIO; + get_page(page); err = unlock_request(cs->req); - if (err) + if (err) { + put_page(page); return err; + } fuse_copy_finish(cs); buf = cs->pipebufs; - get_page(page); buf->page = page; buf->offset = offset; buf->len = count; @@ -1924,6 +1934,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); + if (!err && req->in.h.opcode == FUSE_CANONICAL_PATH) { + char *path = (char *)req->out.args[0].value; + + path[req->out.args[0].size - 1] = 0; + req->out.h.error = kern_path(path, 0, req->out.canonical_path); + } + spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6244345a5745..90bd6c0d18cc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -281,10 +281,49 @@ static void fuse_dentry_release(struct dentry *dentry) kfree_rcu(fd, rcu); } +/* + * Get the canonical path. Since we must translate to a path, this must be done + * in the context of the userspace daemon, however, the userspace daemon cannot + * look up paths on its own. Instead, we handle the lookup as a special case + * inside of the write request. + */ +static void fuse_dentry_canonical_path(const struct path *path, + struct path *canonical_path) +{ + struct inode *inode = d_inode(path->dentry); + struct fuse_conn *fc = get_fuse_conn(inode); + FUSE_ARGS(args); + char *path_name; + int err; + + path_name = (char *)__get_free_page(GFP_KERNEL); + if (!path_name) + goto default_path; + + args.in.h.opcode = FUSE_CANONICAL_PATH; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 0; + args.out.numargs = 1; + args.out.args[0].size = PATH_MAX; + args.out.args[0].value = path_name; + args.out.argvar = 1; + args.out.canonical_path = canonical_path; + + err = fuse_simple_request(fc, &args); + free_page((unsigned long)path_name); + if (err > 0) + return; +default_path: + canonical_path->dentry = path->dentry; + canonical_path->mnt = path->mnt; + path_get(canonical_path); +} + const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; const struct dentry_operations fuse_root_dentry_operations = { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index dbfc35efbefb..123f87804f51 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -213,6 +213,9 @@ struct fuse_out { /** Array of arguments */ struct fuse_arg args[2]; + + /* Path used for completing d_canonical_path */ + struct path *canonical_path; }; /** FUSE page descriptor */ @@ -235,6 +238,9 @@ struct fuse_args { unsigned argvar:1; unsigned numargs; struct fuse_arg args[2]; + + /* Path used for completing d_canonical_path */ + struct path *canonical_path; } out; }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ccdd8c821abd..c20d71d86812 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -870,7 +870,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); out: return ret; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9448c8461e57..17001f4e9f84 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -161,15 +161,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - /* If format numbers match exactly, we're done. */ + if (sb->sb_fs_format != GFS2_FORMAT_FS || + sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); + return -EINVAL; + } - if (sb->sb_fs_format == GFS2_FORMAT_FS && - sb->sb_multihost_format == GFS2_FORMAT_MULTI) - return 0; + if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + (sb->sb_bsize & (sb->sb_bsize - 1))) { + pr_warn("Invalid superblock size\n"); + return -EINVAL; + } - fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); - - return -EINVAL; + return 0; } static void end_bio_io_page(struct bio *bio) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c94c4ac1ae78..de9b561b1c38 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -739,9 +739,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) } gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } @@ -1387,6 +1387,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + if (!blk_queue_discard(q)) return -EOPNOTSUPP; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a971862b186e..22cd68bd8c9b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -934,6 +934,7 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig index 1ffe31a9c0ff..8f7d5226266b 100644 --- a/fs/incfs/Kconfig +++ b/fs/incfs/Kconfig @@ -2,13 +2,9 @@ config INCREMENTAL_FS tristate "Incremental file system support" depends on BLOCK select DECOMPRESS_LZ4 - select CRC32 - select CRYPTO - select CRYPTO_RSA + select DECOMPRESS_ZSTD + select CRYPTO_ZSTD select CRYPTO_SHA256 - select X509_CERTIFICATE_PARSER - select ASYMMETRIC_KEY_TYPE - select ASYMMETRIC_PUBLIC_KEY_SUBTYPE help Incremental FS is a read-only virtual file system that facilitates execution of programs while their binaries are still being lazily downloaded over the diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile index 8d734bf91ecd..0c3f6d348bec 100644 --- a/fs/incfs/Makefile +++ b/fs/incfs/Makefile @@ -6,4 +6,5 @@ incrementalfs-y := \ format.o \ integrity.o \ main.o \ + pseudo_files.o \ vfs.o diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 074a733c7001..48ab7428d627 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -3,11 +3,13 @@ * Copyright 2019 Google LLC */ #include +#include #include #include #include #include #include +#include #include #include #include @@ -17,6 +19,8 @@ #include "format.h" #include "integrity.h" +static int incfs_scan_metadata_chain(struct data_file *df); + static void log_wake_up_all(struct work_struct *work) { struct delayed_work *dw = container_of(work, struct delayed_work, work); @@ -24,6 +28,19 @@ static void log_wake_up_all(struct work_struct *work) wake_up_all(&rl->ml_notif_wq); } +static void zstd_free_workspace(struct work_struct *work) +{ + struct delayed_work *dw = container_of(work, struct delayed_work, work); + struct mount_info *mi = + container_of(dw, struct mount_info, mi_zstd_cleanup_work); + + mutex_lock(&mi->mi_zstd_workspace_mutex); + kvfree(mi->mi_zstd_workspace); + mi->mi_zstd_workspace = NULL; + mi->mi_zstd_stream = NULL; + mutex_unlock(&mi->mi_zstd_workspace_mutex); +} + struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path) @@ -40,12 +57,17 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mi->mi_owner = get_current_cred(); path_get(&mi->mi_backing_dir_path); mutex_init(&mi->mi_dir_struct_mutex); - mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); + init_waitqueue_head(&mi->mi_blocks_written_notif_wq); + atomic_set(&mi->mi_blocks_written, 0); INIT_DELAYED_WORK(&mi->mi_log.ml_wakeup_work, log_wake_up_all); spin_lock_init(&mi->mi_log.rl_lock); + spin_lock_init(&mi->pending_read_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); + spin_lock_init(&mi->mi_per_uid_read_timeouts_lock); + mutex_init(&mi->mi_zstd_workspace_mutex); + INIT_DELAYED_WORK(&mi->mi_zstd_cleanup_work, zstd_free_workspace); error = incfs_realloc_mount_info(mi, options); if (error) @@ -105,28 +127,106 @@ void incfs_free_mount_info(struct mount_info *mi) return; flush_delayed_work(&mi->mi_log.ml_wakeup_work); + flush_delayed_work(&mi->mi_zstd_cleanup_work); dput(mi->mi_index_dir); + dput(mi->mi_incomplete_dir); path_put(&mi->mi_backing_dir_path); mutex_destroy(&mi->mi_dir_struct_mutex); - mutex_destroy(&mi->mi_pending_reads_mutex); + mutex_destroy(&mi->mi_zstd_workspace_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); kfree(mi->log_xattr); kfree(mi->pending_read_xattr); + kfree(mi->mi_per_uid_read_timeouts); kfree(mi); } static void data_file_segment_init(struct data_file_segment *segment) { init_waitqueue_head(&segment->new_data_arrival_wq); - mutex_init(&segment->blockmap_mutex); + init_rwsem(&segment->rwsem); INIT_LIST_HEAD(&segment->reads_list_head); } -static void data_file_segment_destroy(struct data_file_segment *segment) +char *file_id_to_str(incfs_uuid_t id) { - mutex_destroy(&segment->blockmap_mutex); + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); + char *end; + + if (!result) + return NULL; + + end = bin2hex(result, id.bytes, sizeof(id.bytes)); + *end = 0; + return result; +} + +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name) +{ + struct inode *inode; + struct dentry *result = NULL; + + if (!parent) + return ERR_PTR(-EFAULT); + + inode = d_inode(parent); + inode_lock_nested(inode, I_MUTEX_PARENT); + result = lookup_one_len(name, parent, strlen(name)); + inode_unlock(inode); + + if (IS_ERR(result)) + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); + + return result; +} + +static struct data_file *handle_mapped_file(struct mount_info *mi, + struct data_file *df) +{ + char *file_id_str; + struct dentry *index_file_dentry; + struct path path; + struct file *bf; + struct data_file *result = NULL; + + file_id_str = file_id_to_str(df->df_id); + if (!file_id_str) + return ERR_PTR(-ENOENT); + + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, + file_id_str); + kfree(file_id_str); + if (!index_file_dentry) + return ERR_PTR(-ENOENT); + if (IS_ERR(index_file_dentry)) + return (struct data_file *)index_file_dentry; + if (!d_really_is_positive(index_file_dentry)) { + result = ERR_PTR(-ENOENT); + goto out; + } + + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = index_file_dentry + }; + + bf = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); + if (IS_ERR(bf)) { + result = (struct data_file *)bf; + goto out; + } + + result = incfs_open_data_file(mi, bf); + fput(bf); + if (IS_ERR(result)) + goto out; + + result->df_mapped_offset = df->df_metadata_off; + +out: + dput(index_file_dentry); + return result; } struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) @@ -159,12 +259,8 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) data_file_segment_init(&df->df_segments[i]); - error = mutex_lock_interruptible(&bfc->bc_mutex); - if (error) - goto out; error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id, &size, &df->df_header_flags); - mutex_unlock(&bfc->bc_mutex); if (error) goto out; @@ -173,6 +269,13 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) if (size > 0) df->df_data_block_count = get_blocks_count_for_size(size); + if (df->df_header_flags & INCFS_FILE_MAPPED) { + struct data_file *mapped_df = handle_mapped_file(mi, df); + + incfs_free_data_file(df); + return mapped_df; + } + md_records = incfs_scan_metadata_chain(df); if (md_records < 0) error = md_records; @@ -190,14 +293,34 @@ out: void incfs_free_data_file(struct data_file *df) { - int i; + u32 data_blocks_written, hash_blocks_written; if (!df) return; + data_blocks_written = atomic_read(&df->df_data_blocks_written); + hash_blocks_written = atomic_read(&df->df_hash_blocks_written); + + if (data_blocks_written != df->df_initial_data_blocks_written || + hash_blocks_written != df->df_initial_hash_blocks_written) { + struct backing_file_context *bfc = df->df_backing_file_context; + int error = -1; + + if (bfc && !mutex_lock_interruptible(&bfc->bc_mutex)) { + error = incfs_write_status_to_backing_file( + df->df_backing_file_context, + df->df_status_offset, + data_blocks_written, + hash_blocks_written); + mutex_unlock(&bfc->bc_mutex); + } + + if (error) + /* Nothing can be done, just warn */ + pr_warn("incfs: failed to write status to backing file\n"); + } + incfs_free_mtree(df->df_hash_tree); - for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) - data_file_segment_destroy(&df->df_segments[i]); incfs_free_bfc(df->df_backing_file_context); kfree(df); } @@ -251,16 +374,73 @@ void incfs_free_dir_file(struct dir_file *dir) kfree(dir); } -static ssize_t decompress(struct mem_range src, struct mem_range dst) +static ssize_t zstd_decompress_safe(struct mount_info *mi, + struct mem_range src, struct mem_range dst) { - int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len); + ssize_t result; + ZSTD_inBuffer inbuf = {.src = src.data, .size = src.len}; + ZSTD_outBuffer outbuf = {.dst = dst.data, .size = dst.len}; - if (result < 0) - return -EBADMSG; + result = mutex_lock_interruptible(&mi->mi_zstd_workspace_mutex); + if (result) + return result; + if (!mi->mi_zstd_stream) { + unsigned int workspace_size = ZSTD_DStreamWorkspaceBound( + INCFS_DATA_FILE_BLOCK_SIZE); + void *workspace = kvmalloc(workspace_size, GFP_NOFS); + ZSTD_DStream *stream; + + if (!workspace) { + result = -ENOMEM; + goto out; + } + + stream = ZSTD_initDStream(INCFS_DATA_FILE_BLOCK_SIZE, workspace, + workspace_size); + if (!stream) { + kvfree(workspace); + result = -EIO; + goto out; + } + + mi->mi_zstd_workspace = workspace; + mi->mi_zstd_stream = stream; + } + + result = ZSTD_decompressStream(mi->mi_zstd_stream, &outbuf, &inbuf) ? + -EBADMSG : outbuf.pos; + + mod_delayed_work(system_wq, &mi->mi_zstd_cleanup_work, + msecs_to_jiffies(5000)); + +out: + mutex_unlock(&mi->mi_zstd_workspace_mutex); return result; } +static ssize_t decompress(struct mount_info *mi, + struct mem_range src, struct mem_range dst, int alg) +{ + int result; + + switch (alg) { + case INCFS_BLOCK_COMPRESSED_LZ4: + result = LZ4_decompress_safe(src.data, dst.data, src.len, + dst.len); + if (result < 0) + return -EBADMSG; + return result; + + case INCFS_BLOCK_COMPRESSED_ZSTD: + return zstd_decompress_safe(mi, src, dst); + + default: + WARN_ON(true); + return -EOPNOTSUPP; + } +} + static void log_read_one_record(struct read_log *rl, struct read_log_state *rs) { union log_record *record = @@ -313,6 +493,7 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, s64 relative_us; union log_record record; size_t record_size; + uid_t uid = current_uid().val; /* * This may read the old value, but it's OK to delay the logging start @@ -334,12 +515,14 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, relative_us = now_us - head->base_record.absolute_ts_us; if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) || - relative_us >= 1ll << 32) { + relative_us >= 1ll << 32 || + uid != head->base_record.uid) { record.full_record = (struct full_record){ .type = FULL, .block_index = block_index, .file_id = *id, .absolute_ts_us = now_us, + .uid = uid, }; head->base_record.file_id = *id; record_size = sizeof(struct full_record); @@ -527,9 +710,7 @@ static void convert_data_file_block(struct incfs_blockmap_entry *bme, res_block->db_backing_file_data_offset |= le32_to_cpu(bme->me_data_offset_lo); res_block->db_stored_size = le16_to_cpu(bme->me_data_size); - res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? - COMPRESSION_LZ4 : - COMPRESSION_NONE; + res_block->db_comp_alg = flags & INCFS_BLOCK_COMPRESSED_MASK; } static int get_data_file_block(struct data_file *df, int index, @@ -579,36 +760,9 @@ static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, return 0; } -static int update_file_header_flags(struct data_file *df, u32 bits_to_reset, - u32 bits_to_set) -{ - int result; - u32 new_flags; - struct backing_file_context *bfc; - - if (!df) - return -EFAULT; - bfc = df->df_backing_file_context; - if (!bfc) - return -EFAULT; - - result = mutex_lock_interruptible(&bfc->bc_mutex); - if (result) - return result; - - new_flags = (df->df_header_flags & ~bits_to_reset) | bits_to_set; - if (new_flags != df->df_header_flags) { - df->df_header_flags = new_flags; - result = incfs_write_file_header_flags(bfc, new_flags); - } - - mutex_unlock(&bfc->bc_mutex); - - return result; -} - #define READ_BLOCKMAP_ENTRIES 512 int incfs_get_filled_blocks(struct data_file *df, + struct incfs_file_data *fd, struct incfs_get_filled_blocks_args *arg) { int error = 0; @@ -622,6 +776,8 @@ int incfs_get_filled_blocks(struct data_file *df, int i = READ_BLOCKMAP_ENTRIES - 1; int entries_read = 0; struct incfs_blockmap_entry *bme; + int data_blocks_filled = 0; + int hash_blocks_filled = 0; *size_out = 0; if (end_index > df->df_total_block_count) @@ -629,7 +785,8 @@ int incfs_get_filled_blocks(struct data_file *df, arg->total_blocks_out = df->df_total_block_count; arg->data_blocks_out = df->df_data_block_count; - if (df->df_header_flags & INCFS_FILE_COMPLETE) { + if (atomic_read(&df->df_data_blocks_written) == + df->df_data_block_count) { pr_debug("File marked full, fast get_filled_blocks"); if (arg->start_index > end_index) { arg->index_out = arg->start_index; @@ -682,6 +839,13 @@ int incfs_get_filled_blocks(struct data_file *df, convert_data_file_block(bme + i, &dfb); + if (is_data_block_present(&dfb)) { + if (arg->index_out >= df->df_data_block_count) + ++hash_blocks_filled; + else + ++data_blocks_filled; + } + if (is_data_block_present(&dfb) == in_range) continue; @@ -711,13 +875,28 @@ int incfs_get_filled_blocks(struct data_file *df, arg->index_out = range.begin; } - if (!error && in_range && arg->start_index == 0 && - end_index == df->df_total_block_count && - *size_out == sizeof(struct incfs_filled_range)) { - int result = - update_file_header_flags(df, 0, INCFS_FILE_COMPLETE); - /* Log failure only, since it's just a failed optimization */ - pr_debug("Marked file full with result %d", result); + if (arg->start_index == 0) { + fd->fd_get_block_pos = 0; + fd->fd_filled_data_blocks = 0; + fd->fd_filled_hash_blocks = 0; + } + + if (arg->start_index == fd->fd_get_block_pos) { + fd->fd_get_block_pos = arg->index_out + 1; + fd->fd_filled_data_blocks += data_blocks_filled; + fd->fd_filled_hash_blocks += hash_blocks_filled; + } + + if (fd->fd_get_block_pos == df->df_total_block_count + 1) { + if (fd->fd_filled_data_blocks > + atomic_read(&df->df_data_blocks_written)) + atomic_set(&df->df_data_blocks_written, + fd->fd_filled_data_blocks); + + if (fd->fd_filled_hash_blocks > + atomic_read(&df->df_hash_blocks_written)) + atomic_set(&df->df_hash_blocks_written, + fd->fd_filled_hash_blocks); } kfree(bme); @@ -755,20 +934,31 @@ static struct pending_read *add_pending_read(struct data_file *df, result->file_id = df->df_id; result->block_index = block_index; result->timestamp_us = ktime_to_us(ktime_get()); + result->uid = current_uid().val; - mutex_lock(&mi->mi_pending_reads_mutex); + spin_lock(&mi->pending_read_lock); result->serial_number = ++mi->mi_last_pending_read_number; mi->mi_pending_reads_count++; - list_add(&result->mi_reads_list, &mi->mi_reads_list_head); - list_add(&result->segment_reads_list, &segment->reads_list_head); - mutex_unlock(&mi->mi_pending_reads_mutex); + list_add_rcu(&result->mi_reads_list, &mi->mi_reads_list_head); + list_add_rcu(&result->segment_reads_list, &segment->reads_list_head); + + spin_unlock(&mi->pending_read_lock); wake_up_all(&mi->mi_pending_reads_notif_wq); return result; } +static void free_pending_read_entry(struct rcu_head *entry) +{ + struct pending_read *read; + + read = container_of(entry, struct pending_read, rcu); + + kfree(read); +} + /* Notifies a given data file that pending read is completed. */ static void remove_pending_read(struct data_file *df, struct pending_read *read) { @@ -782,14 +972,17 @@ static void remove_pending_read(struct data_file *df, struct pending_read *read) mi = df->df_mount_info; - mutex_lock(&mi->mi_pending_reads_mutex); - list_del(&read->mi_reads_list); - list_del(&read->segment_reads_list); + spin_lock(&mi->pending_read_lock); + + list_del_rcu(&read->mi_reads_list); + list_del_rcu(&read->segment_reads_list); mi->mi_pending_reads_count--; - mutex_unlock(&mi->mi_pending_reads_mutex); - kfree(read); + spin_unlock(&mi->pending_read_lock); + + /* Don't free. Wait for readers */ + call_rcu(&read->rcu, free_pending_read_entry); } static void notify_pending_reads(struct mount_info *mi, @@ -799,18 +992,22 @@ static void notify_pending_reads(struct mount_info *mi, struct pending_read *entry = NULL; /* Notify pending reads waiting for this block. */ - mutex_lock(&mi->mi_pending_reads_mutex); - list_for_each_entry(entry, &segment->reads_list_head, + rcu_read_lock(); + list_for_each_entry_rcu(entry, &segment->reads_list_head, segment_reads_list) { if (entry->block_index == index) set_read_done(entry); } - mutex_unlock(&mi->mi_pending_reads_mutex); + rcu_read_unlock(); wake_up_all(&segment->new_data_arrival_wq); + + atomic_inc(&mi->mi_blocks_written); + wake_up_all(&mi->mi_blocks_written_notif_wq); } static int wait_for_data_block(struct data_file *df, int block_index, - int timeout_ms, + int min_time_ms, int min_pending_time_ms, + int max_pending_time_ms, struct data_file_block *res_block) { struct data_file_block block = {}; @@ -819,6 +1016,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, struct mount_info *mi = NULL; int error = 0; int wait_res = 0; + u64 time; if (!df || !res_block) return -EFAULT; @@ -826,50 +1024,53 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (block_index < 0 || block_index >= df->df_data_block_count) return -EINVAL; - if (df->df_blockmap_off <= 0) + if (df->df_blockmap_off <= 0 || !df->df_mount_info) return -ENODATA; + mi = df->df_mount_info; segment = get_file_segment(df, block_index); - error = mutex_lock_interruptible(&segment->blockmap_mutex); + + error = down_read_killable(&segment->rwsem); if (error) return error; /* Look up the given block */ error = get_data_file_block(df, block_index, &block); - /* If it's not found, create a pending read */ - if (!error && !is_data_block_present(&block) && timeout_ms != 0) - read = add_pending_read(df, block_index); + up_read(&segment->rwsem); - mutex_unlock(&segment->blockmap_mutex); if (error) return error; /* If the block was found, just return it. No need to wait. */ if (is_data_block_present(&block)) { + if (min_time_ms) + error = msleep_interruptible(min_time_ms); *res_block = block; - return 0; + return error; + } else { + /* If it's not found, create a pending read */ + if (max_pending_time_ms != 0) { + read = add_pending_read(df, block_index); + if (!read) + return -ENOMEM; + } else { + log_block_read(mi, &df->df_id, block_index); + return -ETIME; + } } - mi = df->df_mount_info; - - if (timeout_ms == 0) { - log_block_read(mi, &df->df_id, block_index); - return -ETIME; - } - - if (!read) - return -ENOMEM; + if (min_pending_time_ms) + time = ktime_get_ns(); /* Wait for notifications about block's arrival */ wait_res = wait_event_interruptible_timeout(segment->new_data_arrival_wq, - (is_read_done(read)), - msecs_to_jiffies(timeout_ms)); + (is_read_done(read)), + msecs_to_jiffies(max_pending_time_ms)); /* Woke up, the pending read is no longer needed. */ remove_pending_read(df, read); - read = NULL; if (wait_res == 0) { /* Wait has timed out */ @@ -884,7 +1085,17 @@ static int wait_for_data_block(struct data_file *df, int block_index, return wait_res; } - error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (min_pending_time_ms) { + time = div_u64(ktime_get_ns() - time, 1000000); + if (min_pending_time_ms > time) { + error = msleep_interruptible( + min_pending_time_ms - time); + if (error) + return error; + } + } + + error = down_read_killable(&segment->rwsem); if (error) return error; @@ -906,13 +1117,14 @@ static int wait_for_data_block(struct data_file *df, int block_index, } } - mutex_unlock(&segment->blockmap_mutex); + up_read(&segment->rwsem); return error; } ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp) + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp) { loff_t pos; ssize_t result; @@ -922,7 +1134,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, struct data_file_block block = {}; struct data_file *df = get_incfs_data_file(f); - if (!dst.data || !df) + if (!dst.data || !df || !tmp.data) return -EFAULT; if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) @@ -931,7 +1143,8 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, mi = df->df_mount_info; bf = df->df_backing_file_context->bc_file; - result = wait_for_data_block(df, index, timeout_ms, &block); + result = wait_for_data_block(df, index, min_time_ms, + min_pending_time_ms, max_pending_time_ms, &block); if (result < 0) goto out; @@ -948,7 +1161,8 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, result = incfs_kread(bf, tmp.data, bytes_to_read, pos); if (result == bytes_to_read) { result = - decompress(range(tmp.data, bytes_to_read), dst); + decompress(mi, range(tmp.data, bytes_to_read), + dst, block.db_comp_alg); if (result < 0) { const char *name = bf->f_path.dentry->d_name.name; @@ -998,21 +1212,33 @@ int incfs_process_new_data_block(struct data_file *df, segment = get_file_segment(df, block->block_index); if (!segment) return -EFAULT; + if (block->compression == COMPRESSION_LZ4) flags |= INCFS_BLOCK_COMPRESSED_LZ4; + else if (block->compression == COMPRESSION_ZSTD) + flags |= INCFS_BLOCK_COMPRESSED_ZSTD; + else if (block->compression) + return -EINVAL; - error = mutex_lock_interruptible(&segment->blockmap_mutex); + error = down_read_killable(&segment->rwsem); if (error) return error; error = get_data_file_block(df, block->block_index, &existing_block); + + up_read(&segment->rwsem); + if (error) - goto unlock; + return error; if (is_data_block_present(&existing_block)) { /* Block is already present, nothing to do here */ - goto unlock; + return 0; } + error = down_write_killable(&segment->rwsem); + if (error) + return error; + error = mutex_lock_interruptible(&bfc->bc_mutex); if (!error) { error = incfs_write_data_block_to_backing_file( @@ -1020,11 +1246,13 @@ int incfs_process_new_data_block(struct data_file *df, df->df_blockmap_off, flags); mutex_unlock(&bfc->bc_mutex); } - if (!error) + if (!error) { notify_pending_reads(mi, segment, block->block_index); + atomic_inc(&df->df_data_blocks_written); + } + + up_write(&segment->rwsem); -unlock: - mutex_unlock(&segment->blockmap_mutex); if (error) pr_debug("incfs: %s %d error: %d\n", __func__, block->block_index, error); @@ -1101,6 +1329,9 @@ int incfs_process_new_hash_block(struct data_file *df, hash_area_base, df->df_blockmap_off, df->df_size); mutex_unlock(&bfc->bc_mutex); } + if (!error) + atomic_inc(&df->df_hash_blocks_written); + return error; } @@ -1123,25 +1354,6 @@ static int process_blockmap_md(struct incfs_blockmap *bm, return error; } -static int process_file_attr_md(struct incfs_file_attr *fa, - struct metadata_handler *handler) -{ - struct data_file *df = handler->context; - u16 attr_size = le16_to_cpu(fa->fa_size); - - if (!df) - return -EFAULT; - - if (attr_size > INCFS_MAX_FILE_ATTR_SIZE) - return -E2BIG; - - df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset); - df->n_attr.fa_value_size = attr_size; - df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc); - - return 0; -} - static int process_file_signature_md(struct incfs_file_signature *sg, struct metadata_handler *handler) { @@ -1217,7 +1429,27 @@ out: return error; } -int incfs_scan_metadata_chain(struct data_file *df) +static int process_status_md(struct incfs_status *is, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + + df->df_initial_data_blocks_written = + le32_to_cpu(is->is_data_blocks_written); + atomic_set(&df->df_data_blocks_written, + df->df_initial_data_blocks_written); + + df->df_initial_hash_blocks_written = + le32_to_cpu(is->is_hash_blocks_written); + atomic_set(&df->df_hash_blocks_written, + df->df_initial_hash_blocks_written); + + df->df_status_offset = handler->md_record_offset; + + return 0; +} + +static int incfs_scan_metadata_chain(struct data_file *df) { struct metadata_handler *handler = NULL; int result = 0; @@ -1234,20 +1466,12 @@ int incfs_scan_metadata_chain(struct data_file *df) if (!handler) return -ENOMEM; - /* No writing to the backing file while it's being scanned. */ - error = mutex_lock_interruptible(&bfc->bc_mutex); - if (error) - goto out; - - /* Reading superblock */ handler->md_record_offset = df->df_metadata_off; handler->context = df; handler->handle_blockmap = process_blockmap_md; - handler->handle_file_attr = process_file_attr_md; handler->handle_signature = process_file_signature_md; + handler->handle_status = process_status_md; - pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n", - handler->md_record_offset); while (handler->md_record_offset > 0) { error = incfs_read_next_metadata_record(bfc, handler); if (error) { @@ -1259,15 +1483,11 @@ int incfs_scan_metadata_chain(struct data_file *df) records_count++; } if (error) { - pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n", + pr_warn("incfs: Error %d after reading %d incfs-metadata records.\n", -error, records_count); result = error; - } else { - pr_debug("incfs: Finished reading %d incfs-metadata records.\n", - records_count); + } else result = records_count; - } - mutex_unlock(&bfc->bc_mutex); if (df->df_hash_tree) { int hash_block_count = get_blocks_count_for_size( @@ -1279,7 +1499,6 @@ int incfs_scan_metadata_chain(struct data_file *df) } else if (df->df_data_block_count != df->df_total_block_count) result = -EINVAL; -out: kfree(handler); return result; } @@ -1292,16 +1511,17 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) { bool result = false; - mutex_lock(&mi->mi_pending_reads_mutex); + spin_lock(&mi->pending_read_lock); result = (mi->mi_last_pending_read_number > last_number) && - (mi->mi_pending_reads_count > 0); - mutex_unlock(&mi->mi_pending_reads_mutex); + (mi->mi_pending_reads_count > 0); + spin_unlock(&mi->pending_read_lock); return result; } int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, - int reads_size) + struct incfs_pending_read_info2 *reads2, + int reads_size, int *new_max_sn) { int reported_reads = 0; struct pending_read *entry = NULL; @@ -1312,29 +1532,43 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, if (reads_size <= 0) return 0; - mutex_lock(&mi->mi_pending_reads_mutex); + if (!incfs_fresh_pending_reads_exist(mi, sn_lowerbound)) + return 0; - if (mi->mi_last_pending_read_number <= sn_lowerbound - || mi->mi_pending_reads_count == 0) - goto unlock; + rcu_read_lock(); - list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) { + list_for_each_entry_rcu(entry, &mi->mi_reads_list_head, mi_reads_list) { if (entry->serial_number <= sn_lowerbound) continue; - reads[reported_reads].file_id = entry->file_id; - reads[reported_reads].block_index = entry->block_index; - reads[reported_reads].serial_number = entry->serial_number; - reads[reported_reads].timestamp_us = entry->timestamp_us; - /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */ + if (reads) { + reads[reported_reads].file_id = entry->file_id; + reads[reported_reads].block_index = entry->block_index; + reads[reported_reads].serial_number = + entry->serial_number; + reads[reported_reads].timestamp_us = + entry->timestamp_us; + } + + if (reads2) { + reads2[reported_reads].file_id = entry->file_id; + reads2[reported_reads].block_index = entry->block_index; + reads2[reported_reads].serial_number = + entry->serial_number; + reads2[reported_reads].timestamp_us = + entry->timestamp_us; + reads2[reported_reads].uid = entry->uid; + } + + if (entry->serial_number > *new_max_sn) + *new_max_sn = entry->serial_number; reported_reads++; if (reported_reads >= reads_size) break; } -unlock: - mutex_unlock(&mi->mi_pending_reads_mutex); + rcu_read_unlock(); return reported_reads; } @@ -1370,8 +1604,9 @@ int incfs_get_uncollected_logs_count(struct mount_info *mi, } int incfs_collect_logged_reads(struct mount_info *mi, - struct read_log_state *reader_state, + struct read_log_state *state, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size) { int dst_idx; @@ -1382,45 +1617,51 @@ int incfs_collect_logged_reads(struct mount_info *mi, head = &log->rl_head; tail = &log->rl_tail; - if (reader_state->generation_id != head->generation_id) { + if (state->generation_id != head->generation_id) { pr_debug("read ptr is wrong generation: %u/%u", - reader_state->generation_id, head->generation_id); + state->generation_id, head->generation_id); - *reader_state = (struct read_log_state){ + *state = (struct read_log_state){ .generation_id = head->generation_id, }; } - if (reader_state->current_record_no < tail->current_record_no) { + if (state->current_record_no < tail->current_record_no) { pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n", - (u32)reader_state->next_offset, - (u32)reader_state->current_pass_no, + (u32)state->next_offset, + (u32)state->current_pass_no, (u32)tail->next_offset, (u32)tail->current_pass_no); - *reader_state = *tail; + *state = *tail; } for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { - if (reader_state->current_record_no == head->current_record_no) + if (state->current_record_no == head->current_record_no) break; - log_read_one_record(log, reader_state); + log_read_one_record(log, state); - reads[dst_idx] = (struct incfs_pending_read_info){ - .file_id = reader_state->base_record.file_id, - .block_index = reader_state->base_record.block_index, - .serial_number = reader_state->current_record_no, - .timestamp_us = reader_state->base_record.absolute_ts_us - }; + if (reads) + reads[dst_idx] = (struct incfs_pending_read_info) { + .file_id = state->base_record.file_id, + .block_index = state->base_record.block_index, + .serial_number = state->current_record_no, + .timestamp_us = + state->base_record.absolute_ts_us, + }; + + if (reads2) + reads2[dst_idx] = (struct incfs_pending_read_info2) { + .file_id = state->base_record.file_id, + .block_index = state->base_record.block_index, + .serial_number = state->current_record_no, + .timestamp_us = + state->base_record.absolute_ts_us, + .uid = state->base_record.uid, + }; } spin_unlock(&log->rl_lock); return dst_idx; } -bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) -{ - if (lhs.len != rhs.len) - return false; - return memcmp(lhs.data, rhs.data, lhs.len) == 0; -} diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 2726867835a8..a63af708fa6d 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -10,9 +10,12 @@ #include #include #include +#include #include #include +#include #include +#include #include @@ -32,13 +35,14 @@ struct full_record { u32 block_index : 30; incfs_uuid_t file_id; u64 absolute_ts_us; + uid_t uid; } __packed; /* 28 bytes */ struct same_file_record { enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */ u32 block_index : 30; u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */ -} __packed; /* 12 bytes */ +} __packed; /* 8 bytes */ struct same_file_next_block { enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */ @@ -99,8 +103,7 @@ struct mount_options { unsigned int readahead_pages; unsigned int read_log_pages; unsigned int read_log_wakeup_count; - bool no_backing_file_cache; - bool no_backing_file_readahead; + bool report_uid; }; struct mount_info { @@ -110,6 +113,8 @@ struct mount_info { struct dentry *mi_index_dir; + struct dentry *mi_incomplete_dir; + const struct cred *mi_owner; struct mount_options mi_options; @@ -123,13 +128,13 @@ struct mount_info { wait_queue_head_t mi_pending_reads_notif_wq; /* - * Protects: + * Protects - RCU safe: * - reads_list_head * - mi_pending_reads_count * - mi_last_pending_read_number * - data_file_segment.reads_list_head */ - struct mutex mi_pending_reads_mutex; + spinlock_t pending_read_lock; /* List of active pending_read objects */ struct list_head mi_reads_list_head; @@ -151,6 +156,23 @@ struct mount_info { void *pending_read_xattr; size_t pending_read_xattr_size; + + /* A queue of waiters who want to be notified about blocks_written */ + wait_queue_head_t mi_blocks_written_notif_wq; + + /* Number of blocks written since mount */ + atomic_t mi_blocks_written; + + /* Per UID read timeouts */ + spinlock_t mi_per_uid_read_timeouts_lock; + struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; + int mi_per_uid_read_timeouts_size; + + /* zstd workspace */ + struct mutex mi_zstd_workspace_mutex; + void *mi_zstd_workspace; + ZSTD_DStream *mi_zstd_stream; + struct delayed_work mi_zstd_cleanup_work; }; struct data_file_block { @@ -172,17 +194,20 @@ struct pending_read { int serial_number; + uid_t uid; + struct list_head mi_reads_list; struct list_head segment_reads_list; + + struct rcu_head rcu; }; struct data_file_segment { wait_queue_head_t new_data_arrival_wq; /* Protects reads and writes from the blockmap */ - /* Good candidate for read/write mutex */ - struct mutex blockmap_mutex; + struct rw_semaphore rwsem; /* List of active pending_read objects belonging to this segment */ /* Protected by mount_info.pending_reads_mutex */ @@ -232,7 +257,23 @@ struct data_file { /* Total number of blocks, data + hash */ int df_total_block_count; - struct file_attr n_attr; + /* For mapped files, the offset into the actual file */ + loff_t df_mapped_offset; + + /* Number of data blocks written to file */ + atomic_t df_data_blocks_written; + + /* Number of data blocks in the status block */ + u32 df_initial_data_blocks_written; + + /* Number of hash blocks written to file */ + atomic_t df_hash_blocks_written; + + /* Number of hash blocks in the status block */ + u32 df_initial_hash_blocks_written; + + /* Offset to status metadata header */ + loff_t df_status_offset; struct mtree *df_hash_tree; @@ -259,6 +300,23 @@ struct dentry_info { struct path backing_path; }; +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + +struct incfs_file_data { + /* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */ + enum FILL_PERMISSION fd_fill_permission; + + /* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */ + int fd_get_block_pos; + + /* And how many filled blocks are there up to that point */ + int fd_filled_data_blocks; + int fd_filled_hash_blocks; +}; + struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path); @@ -268,19 +326,21 @@ int incfs_realloc_mount_info(struct mount_info *mi, void incfs_free_mount_info(struct mount_info *mi); +char *file_id_to_str(incfs_uuid_t id); +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); void incfs_free_data_file(struct data_file *df); -int incfs_scan_metadata_chain(struct data_file *df); - struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); void incfs_free_dir_file(struct dir_file *dir); ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp); + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp); int incfs_get_filled_blocks(struct data_file *df, + struct incfs_file_data *fd, struct incfs_get_filled_blocks_args *arg); int incfs_read_file_signature(struct data_file *df, struct mem_range dst); @@ -300,11 +360,13 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); */ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, - int reads_size); + struct incfs_pending_read_info2 *reads2, + int reads_size, int *new_max_sn); int incfs_collect_logged_reads(struct mount_info *mi, struct read_log_state *start_state, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size); struct read_log_state incfs_get_log_state(struct mount_info *mi); int incfs_get_uncollected_logs_count(struct mount_info *mi, @@ -389,6 +451,4 @@ static inline int get_blocks_count_for_size(u64 size) return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; } -bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); - #endif /* _INCFS_DATA_MGMT_H */ diff --git a/fs/incfs/format.c b/fs/incfs/format.c index c56e559b6893..52079b5a45e6 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -40,7 +40,7 @@ void incfs_free_bfc(struct backing_file_context *bfc) kfree(bfc); } -loff_t incfs_get_end_offset(struct file *f) +static loff_t incfs_get_end_offset(struct file *f) { /* * This function assumes that file size and the end-offset @@ -89,11 +89,42 @@ static int truncate_backing_file(struct backing_file_context *bfc, return result; } +static int write_to_bf(struct backing_file_context *bfc, const void *buf, + size_t count, loff_t pos) +{ + ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); + + if (res < 0) + return res; + if (res != count) + return -EIO; + return 0; +} + +static int append_zeros_no_fallocate(struct backing_file_context *bfc, + size_t file_size, size_t len) +{ + u8 buffer[256] = {}; + size_t i; + + for (i = 0; i < len; i += sizeof(buffer)) { + int to_write = len - i > sizeof(buffer) + ? sizeof(buffer) : len - i; + int err = write_to_bf(bfc, buffer, to_write, file_size + i); + + if (err) + return err; + } + + return 0; +} + /* Append a given number of zero bytes to the end of the backing file. */ static int append_zeros(struct backing_file_context *bfc, size_t len) { loff_t file_size = 0; loff_t new_last_byte_offset = 0; + int result; if (!bfc) return -EFAULT; @@ -110,39 +141,11 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) */ file_size = incfs_get_end_offset(bfc->bc_file); new_last_byte_offset = file_size + len - 1; - return vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); -} + result = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (result != -EOPNOTSUPP) + return result; -static int write_to_bf(struct backing_file_context *bfc, const void *buf, - size_t count, loff_t pos) -{ - ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); - - if (res < 0) - return res; - if (res != count) - return -EIO; - return 0; -} - -static u32 calc_md_crc(struct incfs_md_header *record) -{ - u32 result = 0; - __le32 saved_crc = record->h_record_crc; - __le64 saved_md_offset = record->h_next_md_offset; - size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size), - INCFS_MAX_METADATA_RECORD_SIZE); - - /* Zero fields which needs to be excluded from CRC calculation. */ - record->h_record_crc = 0; - record->h_next_md_offset = 0; - result = crc32(0, record, record_size); - - /* Restore excluded fields. */ - record->h_record_crc = saved_crc; - record->h_next_md_offset = saved_md_offset; - - return result; + return append_zeros_no_fallocate(bfc, file_size, len); } /* @@ -168,9 +171,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, record_size = le16_to_cpu(record->h_record_size); file_pos = incfs_get_end_offset(bfc->bc_file); - record->h_prev_md_offset = cpu_to_le64(bfc->bc_last_md_record_offset); record->h_next_md_offset = 0; - record->h_record_crc = cpu_to_le32(calc_md_crc(record)); /* Write the metadata record to the end of the backing file */ record_offset = file_pos; @@ -204,16 +205,6 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, return result; } -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) -{ - if (!bfc) - return -EFAULT; - - return write_to_bf(bfc, &flags, sizeof(flags), - offsetof(struct incfs_file_header, - fh_file_header_flags)); -} - /* * Reserve 0-filled space for the blockmap body, and append * incfs_blockmap metadata record pointing to it. @@ -252,49 +243,6 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, return result; } -/* - * Write file attribute data and metadata record to the backing file. - */ -int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, - struct mem_range value, struct incfs_file_attr *attr) -{ - struct incfs_file_attr file_attr = {}; - int result = 0; - u32 crc = 0; - loff_t value_offset = 0; - - if (!bfc) - return -EFAULT; - - if (value.len > INCFS_MAX_FILE_ATTR_SIZE) - return -ENOSPC; - - LOCK_REQUIRED(bfc->bc_mutex); - - crc = crc32(0, value.data, value.len); - value_offset = incfs_get_end_offset(bfc->bc_file); - file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR; - file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr)); - file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); - file_attr.fa_size = cpu_to_le16((u16)value.len); - file_attr.fa_offset = cpu_to_le64(value_offset); - file_attr.fa_crc = cpu_to_le32(crc); - - result = write_to_bf(bfc, value.data, value.len, value_offset); - if (result) - return result; - - result = append_md_to_backing_file(bfc, &file_attr.fa_header); - if (result) { - /* Error, rollback file changes */ - truncate_backing_file(bfc, value_offset); - } else if (attr) { - *attr = file_attr; - } - - return result; -} - int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size) { @@ -359,10 +307,62 @@ err: return result; } +static int write_new_status_to_backing_file(struct backing_file_context *bfc, + u32 data_blocks_written, + u32 hash_blocks_written) +{ + int result; + loff_t rollback_pos; + struct incfs_status is = { + .is_header = { + .h_md_entry_type = INCFS_MD_STATUS, + .h_record_size = cpu_to_le16(sizeof(is)), + }, + .is_data_blocks_written = cpu_to_le32(data_blocks_written), + .is_hash_blocks_written = cpu_to_le32(hash_blocks_written), + }; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + rollback_pos = incfs_get_end_offset(bfc->bc_file); + result = append_md_to_backing_file(bfc, &is.is_header); + if (result) + truncate_backing_file(bfc, rollback_pos); + + return result; +} + +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, + loff_t status_offset, + u32 data_blocks_written, + u32 hash_blocks_written) +{ + struct incfs_status is; + int result; + + if (status_offset == 0) + return write_new_status_to_backing_file(bfc, + data_blocks_written, hash_blocks_written); + + result = incfs_kread(bfc->bc_file, &is, sizeof(is), status_offset); + if (result != sizeof(is)) + return -EIO; + + is.is_data_blocks_written = cpu_to_le32(data_blocks_written); + is.is_hash_blocks_written = cpu_to_le32(hash_blocks_written); + result = incfs_kwrite(bfc->bc_file, &is, sizeof(is), status_offset); + if (result != sizeof(is)) + return -EIO; + + return 0; +} + /* * Write a backing file header * It should always be called only on empty file. - * incfs_super_block.s_first_md_offset is 0 for now, but will be updated + * fh.fh_first_md_offset is 0 for now, but will be updated * once first metadata record is added. */ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, @@ -392,6 +392,38 @@ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, return write_to_bf(bfc, &fh, sizeof(fh), file_pos); } +/* + * Write a backing file header for a mapping file + * It should always be called only on empty file. + */ +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size, u64 offset) +{ + struct incfs_file_header fh = {}; + loff_t file_pos = 0; + + if (!bfc) + return -EFAULT; + + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); + fh.fh_header_size = cpu_to_le16(sizeof(fh)); + fh.fh_original_offset = cpu_to_le64(offset); + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + + fh.fh_mapped_file_size = cpu_to_le64(file_size); + fh.fh_original_uuid = *uuid; + fh.fh_flags = cpu_to_le32(INCFS_FILE_MAPPED); + + LOCK_REQUIRED(bfc->bc_mutex); + + file_pos = incfs_get_end_offset(bfc->bc_file); + if (file_pos != 0) + return -EEXIST; + + return write_to_bf(bfc, &fh, sizeof(fh), file_pos); +} + /* Write a given data block and update file's blockmap to point it. */ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, @@ -467,34 +499,10 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); - bm_entry.me_flags = cpu_to_le16(INCFS_BLOCK_HASH); return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); } -/* Initialize a new image in a given backing file. */ -int incfs_make_empty_backing_file(struct backing_file_context *bfc, - incfs_uuid_t *uuid, u64 file_size) -{ - int result = 0; - - if (!bfc || !bfc->bc_file) - return -EFAULT; - - result = mutex_lock_interruptible(&bfc->bc_mutex); - if (result) - goto out; - - result = truncate_backing_file(bfc, 0); - if (result) - goto out; - - result = incfs_write_fh_to_backing_file(bfc, uuid, file_size); -out: - mutex_unlock(&bfc->bc_mutex); - return result; -} - int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, struct incfs_blockmap_entry *bm_entry) @@ -548,7 +556,6 @@ int incfs_read_file_header(struct backing_file_context *bfc, if (!bfc || !first_md_off) return -EFAULT; - LOCK_REQUIRED(bfc->bc_mutex); bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0); if (bytes_read < 0) return bytes_read; @@ -575,7 +582,7 @@ int incfs_read_file_header(struct backing_file_context *bfc, if (file_size) *file_size = le64_to_cpu(fh.fh_file_size); if (flags) - *flags = le32_to_cpu(fh.fh_file_header_flags); + *flags = le32_to_cpu(fh.fh_flags); return 0; } @@ -590,15 +597,12 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, ssize_t bytes_read = 0; size_t md_record_size = 0; loff_t next_record = 0; - loff_t prev_record = 0; int res = 0; struct incfs_md_header *md_hdr = NULL; if (!bfc || !handler) return -EFAULT; - LOCK_REQUIRED(bfc->bc_mutex); - if (handler->md_record_offset == 0) return -EPERM; @@ -612,7 +616,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, md_hdr = &handler->md_buffer.md_header; next_record = le64_to_cpu(md_hdr->h_next_md_offset); - prev_record = le64_to_cpu(md_hdr->h_prev_md_offset); md_record_size = le16_to_cpu(md_hdr->h_record_size); if (md_record_size > max_md_size) { @@ -632,16 +635,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, return -EBADMSG; } - if (prev_record != handler->md_prev_record_offset) { - pr_warn("incfs: Metadata chain has been corrupted."); - return -EBADMSG; - } - - if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) { - pr_warn("incfs: Metadata CRC mismatch."); - return -EBADMSG; - } - switch (md_hdr->h_md_entry_type) { case INCFS_MD_NONE: break; @@ -651,15 +644,21 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, &handler->md_buffer.blockmap, handler); break; case INCFS_MD_FILE_ATTR: - if (handler->handle_file_attr) - res = handler->handle_file_attr( - &handler->md_buffer.file_attr, handler); + /* + * File attrs no longer supported, ignore section for + * compatibility + */ break; case INCFS_MD_SIGNATURE: if (handler->handle_signature) res = handler->handle_signature( &handler->md_buffer.signature, handler); break; + case INCFS_MD_STATUS: + if (handler->handle_status) + res = handler->handle_status( + &handler->md_buffer.status, handler); + break; default: res = -ENOTSUPP; break; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 1a83349bb2eb..87d10157dfe1 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -72,7 +72,7 @@ * * * +-------------------------------------------+ - * | incfs_super_block |]---+ + * | incfs_file_header |]---+ * +-------------------------------------------+ | * | metadata |<---+ * | incfs_file_signature |]---+ @@ -118,11 +118,12 @@ enum incfs_metadata_type { INCFS_MD_NONE = 0, INCFS_MD_BLOCK_MAP = 1, INCFS_MD_FILE_ATTR = 2, - INCFS_MD_SIGNATURE = 3 + INCFS_MD_SIGNATURE = 3, + INCFS_MD_STATUS = 4, }; enum incfs_file_header_flags { - INCFS_FILE_COMPLETE = 1 << 0, + INCFS_FILE_MAPPED = 1 << 1, }; /* Header included at the beginning of all metadata records on the disk. */ @@ -136,16 +137,16 @@ struct incfs_md_header { __le16 h_record_size; /* - * CRC32 of the metadata record. + * Was: CRC32 of the metadata record. * (e.g. inode, dir entry etc) not just this struct. */ - __le32 h_record_crc; + __le32 h_unused1; /* Offset of the next metadata entry if any */ __le64 h_next_md_offset; - /* Offset of the previous metadata entry if any */ - __le64 h_prev_md_offset; + /* Was: Offset of the previous metadata entry if any */ + __le64 h_unused2; } __packed; @@ -164,25 +165,41 @@ struct incfs_file_header { __le16 fh_data_block_size; /* File flags, from incfs_file_header_flags */ - __le32 fh_file_header_flags; + __le32 fh_flags; - /* Offset of the first metadata record */ - __le64 fh_first_md_offset; + union { + /* Standard incfs file */ + struct { + /* Offset of the first metadata record */ + __le64 fh_first_md_offset; - /* - * Put file specific information after this point - */ + /* Full size of the file's content */ + __le64 fh_file_size; - /* Full size of the file's content */ - __le64 fh_file_size; + /* File uuid */ + incfs_uuid_t fh_uuid; + }; - /* File uuid */ - incfs_uuid_t fh_uuid; + /* Mapped file - INCFS_FILE_MAPPED set in fh_flags */ + struct { + /* Offset in original file */ + __le64 fh_original_offset; + + /* Full size of the file's content */ + __le64 fh_mapped_file_size; + + /* Original file's uuid */ + incfs_uuid_t fh_original_uuid; + }; + }; } __packed; enum incfs_block_map_entry_flags { - INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), - INCFS_BLOCK_HASH = (1 << 1), + INCFS_BLOCK_COMPRESSED_LZ4 = 1, + INCFS_BLOCK_COMPRESSED_ZSTD = 2, + + /* Reserve 3 bits for compression alg */ + INCFS_BLOCK_COMPRESSED_MASK = 7, }; /* Block map entry pointing to an actual location of the data block. */ @@ -211,17 +228,6 @@ struct incfs_blockmap { __le32 m_block_count; } __packed; -/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */ -struct incfs_file_attr { - struct incfs_md_header fa_header; - - __le64 fa_offset; - - __le16 fa_size; - - __le32 fa_crc; -} __packed; - /* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ struct incfs_file_signature { struct incfs_md_header sg_header; @@ -243,6 +249,16 @@ struct incfs_df_signature { u64 hash_offset; }; +struct incfs_status { + struct incfs_md_header is_header; + + __le32 is_data_blocks_written; /* Number of data blocks written */ + + __le32 is_hash_blocks_written; /* Number of hash blocks written */ + + __le32 is_dummy[6]; /* Spare fields */ +} __packed; + /* State of the backing file. */ struct backing_file_context { /* Protects writes to bc_file */ @@ -266,22 +282,20 @@ struct metadata_handler { union { struct incfs_md_header md_header; struct incfs_blockmap blockmap; - struct incfs_file_attr file_attr; struct incfs_file_signature signature; + struct incfs_status status; } md_buffer; int (*handle_blockmap)(struct incfs_blockmap *bm, struct metadata_handler *handler); - int (*handle_file_attr)(struct incfs_file_attr *fa, - struct metadata_handler *handler); int (*handle_signature)(struct incfs_file_signature *sig, struct metadata_handler *handler); + int (*handle_status)(struct incfs_status *sig, + struct metadata_handler *handler); }; #define INCFS_MAX_METADATA_RECORD_SIZE \ FIELD_SIZEOF(struct metadata_handler, md_buffer) -loff_t incfs_get_end_offset(struct file *f); - /* Backing file context management */ struct backing_file_context *incfs_alloc_bfc(struct file *backing_file); @@ -294,6 +308,9 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size, u64 offset); + int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, loff_t bm_base_off, @@ -306,16 +323,13 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, loff_t bm_base_off, loff_t file_size); -int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, - struct mem_range value, struct incfs_file_attr *attr); - int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size); -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); - -int incfs_make_empty_backing_file(struct backing_file_context *bfc, - incfs_uuid_t *uuid, u64 file_size); +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, + loff_t status_offset, + u32 data_blocks_written, + u32 hash_blocks_written); /* Reading stuff */ int incfs_read_file_header(struct backing_file_context *bfc, diff --git a/fs/incfs/main.c b/fs/incfs/main.c index e65d0d895128..2b8161f6c83a 100644 --- a/fs/incfs/main.c +++ b/fs/incfs/main.c @@ -30,8 +30,17 @@ static ssize_t corefs_show(struct kobject *kobj, static struct kobj_attribute corefs_attr = __ATTR_RO(corefs); +static ssize_t report_uid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "supported\n"); +} + +static struct kobj_attribute report_uid_attr = __ATTR_RO(report_uid); + static struct attribute *attributes[] = { &corefs_attr.attr, + &report_uid_attr.attr, NULL, }; diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c new file mode 100644 index 000000000000..768abd4269cf --- /dev/null +++ b/fs/incfs/pseudo_files.c @@ -0,0 +1,1280 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ + +#include +#include +#include +#include +#include + +#include + +#include "pseudo_files.h" + +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" +#include "vfs.h" + +#define INCFS_PENDING_READS_INODE 2 +#define INCFS_LOG_INODE 3 +#define INCFS_BLOCKS_WRITTEN_INODE 4 +#define READ_WRITE_FILE_MODE 0666 + +/******************************************************************************* + * .log pseudo file definition + ******************************************************************************/ +static const char log_file_name[] = INCFS_LOG_FILENAME; +static const struct mem_range log_file_name_range = { + .data = (u8 *)log_file_name, + .len = ARRAY_SIZE(log_file_name) - 1 +}; + +/* State of an open .log file, unique for each file descriptor. */ +struct log_file_state { + struct read_log_state state; +}; + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct log_file_state *log_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + int total_reads_collected = 0; + int rl_size; + ssize_t result = 0; + bool report_uid; + unsigned long page = 0; + struct incfs_pending_read_info *reads_buf = NULL; + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + ssize_t reads_to_collect; + ssize_t reads_per_page; + + if (!mi) + return -EFAULT; + + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + reads_per_page = PAGE_SIZE / record_size; + + rl_size = READ_ONCE(mi->mi_log.rl_size); + if (rl_size == 0) + return 0; + + page = __get_free_page(GFP_NOFS); + if (!page) + return -ENOMEM; + + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + + reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); + while (reads_to_collect > 0) { + struct read_log_state next_state; + int reads_collected; + + memcpy(&next_state, &log_state->state, sizeof(next_state)); + reads_collected = incfs_collect_logged_reads( + mi, &next_state, reads_buf, reads_buf2, + min_t(ssize_t, reads_to_collect, reads_per_page)); + if (reads_collected <= 0) { + result = total_reads_collected ? + total_reads_collected * record_size : + reads_collected; + goto out; + } + if (copy_to_user(buf, (void *) page, + reads_collected * record_size)) { + result = total_reads_collected ? + total_reads_collected * record_size : + -EFAULT; + goto out; + } + + memcpy(&log_state->state, &next_state, sizeof(next_state)); + total_reads_collected += reads_collected; + buf += reads_collected * record_size; + reads_to_collect -= reads_collected; + } + + result = total_reads_collected * record_size; + *ppos = 0; +out: + free_page(page); + return result; +} + +static __poll_t log_poll(struct file *file, poll_table *wait) +{ + struct log_file_state *log_state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + int count; + __poll_t ret = 0; + + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); + count = incfs_get_uncollected_logs_count(mi, &log_state->state); + if (count >= mi->mi_options.read_log_wakeup_count) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int log_open(struct inode *inode, struct file *file) +{ + struct log_file_state *log_state = NULL; + struct mount_info *mi = get_mount_info(file_superblock(file)); + + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); + if (!log_state) + return -ENOMEM; + + log_state->state = incfs_get_log_state(mi); + file->private_data = log_state; + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations incfs_log_file_ops = { + .read = log_read, + .poll = log_poll, + .open = log_open, + .release = log_release, + .llseek = noop_llseek, +}; + +/******************************************************************************* + * .pending_reads pseudo file definition + ******************************************************************************/ +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; +static const struct mem_range pending_reads_file_name_range = { + .data = (u8 *)pending_reads_file_name, + .len = ARRAY_SIZE(pending_reads_file_name) - 1 +}; + +/* State of an open .pending_reads file, unique for each file descriptor. */ +struct pending_reads_state { + /* A serial number of the last pending read obtained from this file. */ + int last_pending_read_sn; +}; + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct pending_reads_state *pr_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + bool report_uid; + unsigned long page = 0; + struct incfs_pending_read_info *reads_buf = NULL; + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + size_t reads_to_collect; + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); + int new_max_sn = last_known_read_sn; + int reads_collected = 0; + ssize_t result = 0; + + if (!mi) + return -EFAULT; + + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) + return 0; + + page = get_zeroed_page(GFP_NOFS); + if (!page) + return -ENOMEM; + + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / record_size, reads_to_collect); + + reads_collected = incfs_collect_pending_reads(mi, last_known_read_sn, + reads_buf, reads_buf2, reads_to_collect, + &new_max_sn); + + if (reads_collected < 0) { + result = reads_collected; + goto out; + } + + /* + * Just to make sure that we don't accidentally copy more data + * to reads buffer than userspace can handle. + */ + reads_collected = min_t(size_t, reads_collected, reads_to_collect); + result = reads_collected * record_size; + + /* Copy reads info to the userspace buffer */ + if (copy_to_user(buf, (void *)page, result)) { + result = -EFAULT; + goto out; + } + + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); + *ppos = 0; + +out: + free_page(page); + return result; +} + +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) +{ + struct pending_reads_state *state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + __poll_t ret = 0; + + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); + if (incfs_fresh_pending_reads_exist(mi, + state->last_pending_read_sn)) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int pending_reads_open(struct inode *inode, struct file *file) +{ + struct pending_reads_state *state = NULL; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + + file->private_data = state; + return 0; +} + +static int pending_reads_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static long ioctl_permit_fill(struct file *f, void __user *arg) +{ + struct incfs_permit_fill __user *usr_permit_fill = arg; + struct incfs_permit_fill permit_fill; + long error = 0; + struct file *file = NULL; + struct incfs_file_data *fd; + + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) + return -EFAULT; + + file = fget(permit_fill.file_descriptor); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (file->f_op != &incfs_file_ops) { + error = -EPERM; + goto out; + } + + if (file->f_inode->i_sb != f->f_inode->i_sb) { + error = -EPERM; + goto out; + } + + fd = file->private_data; + + switch (fd->fd_fill_permission) { + case CANT_FILL: + fd->fd_fill_permission = CAN_FILL; + break; + + case CAN_FILL: + pr_debug("CAN_FILL already set"); + break; + + default: + pr_warn("Invalid file private data"); + error = -EFAULT; + goto out; + } + +out: + fput(file); + return error; +} + +static int chmod(struct dentry *dentry, umode_t mode) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + struct iattr newattrs; + int error; + +retry_deleg: + inode_lock(inode); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + error = notify_change(dentry, &newattrs, &delegated_inode); + inode_unlock(inode); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + return error; +} + +static bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) +{ + if (lhs.len != rhs.len) + return false; + return memcmp(lhs.data, rhs.data, lhs.len) == 0; +} + +static bool is_pseudo_filename(struct mem_range name) +{ + if (incfs_equal_ranges(pending_reads_file_name_range, name)) + return true; + if (incfs_equal_ranges(log_file_name_range, name)) + return true; + + return false; +} + +static int validate_name(char *file_name) +{ + struct mem_range name = range(file_name, strlen(file_name)); + int i = 0; + + if (name.len > INCFS_MAX_NAME_LEN) + return -ENAMETOOLONG; + + if (is_pseudo_filename(name)) + return -EINVAL; + + for (i = 0; i < name.len; i++) + if (name.data[i] == '/') + return -EINVAL; + + return 0; +} + +static int dir_relative_path_resolve( + struct mount_info *mi, + const char __user *relative_path, + struct path *result_path) +{ + struct path *base_path = &mi->mi_backing_dir_path; + int dir_fd = get_unused_fd_flags(0); + struct file *dir_f = NULL; + int error = 0; + + if (dir_fd < 0) + return dir_fd; + + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + + if (IS_ERR(dir_f)) { + error = PTR_ERR(dir_f); + goto out; + } + fd_install(dir_fd, dir_f); + + if (!relative_path) { + /* No relative path given, just return the base dir. */ + *result_path = *base_path; + path_get(result_path); + goto out; + } + + error = user_path_at_empty(dir_fd, relative_path, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + +out: + ksys_close(dir_fd); + if (error) + pr_debug("incfs: %s %d\n", __func__, error); + return error; +} + +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, + u64 size) +{ + u8 *result; + + if (!original) + return range(NULL, 0); + + if (size > INCFS_MAX_SIGNATURE_SIZE) + return range(ERR_PTR(-EFAULT), 0); + + result = kzalloc(size, GFP_NOFS | __GFP_COMP); + if (!result) + return range(ERR_PTR(-ENOMEM), 0); + + if (copy_from_user(result, original, size)) { + kfree(result); + return range(ERR_PTR(-EFAULT), 0); + } + + return range(result, size); +} + +static int init_new_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + u8 __user *user_signature_info, u64 signature_size) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + u32 block_count; + struct mem_range raw_signature = { NULL }; + struct mtree *hash_tree = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + fput(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_fh_to_backing_file(bfc, uuid, size); + if (error) + goto out; + + block_count = (u32)get_blocks_count_for_size(size); + + if (user_signature_info) { + raw_signature = incfs_copy_signature_info_from_user( + user_signature_info, signature_size); + + if (IS_ERR(raw_signature.data)) { + error = PTR_ERR(raw_signature.data); + raw_signature.data = NULL; + goto out; + } + + hash_tree = incfs_alloc_mtree(raw_signature, block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + + error = incfs_write_signature_to_backing_file( + bfc, raw_signature, hash_tree->hash_tree_area_size); + if (error) + goto out; + + block_count += get_blocks_count_for_size( + hash_tree->hash_tree_area_size); + } + + if (block_count) + error = incfs_write_blockmap_to_backing_file(bfc, block_count); + + if (error) + goto out; +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + incfs_free_mtree(hash_tree); + kfree(raw_signature.data); + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static long ioctl_create_file(struct mount_info *mi, + struct incfs_new_file_args __user *usr_args) +{ + struct incfs_new_file_args args; + char *file_id_str = NULL; + struct dentry *index_file_dentry = NULL; + struct dentry *named_file_dentry = NULL; + struct dentry *incomplete_file_dentry = NULL; + struct path parent_dir_path = {}; + struct inode *index_dir_inode = NULL; + __le64 size_attr_value = 0; + char *file_name = NULL; + char *attr_value = NULL; + int error = 0; + bool locked = false; + bool index_linked = false; + bool name_linked = false; + bool incomplete_linked = false; + + if (!mi || !mi->mi_index_dir || !mi->mi_incomplete_dir) { + error = -EFAULT; + goto out; + } + + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { + error = -EFAULT; + goto out; + } + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + file_id_str = file_id_to_str(args.file_id); + if (!file_id_str) { + error = -ENOMEM; + goto out; + } + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + goto out; + locked = true; + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + if (parent_dir_path.dentry == mi->mi_incomplete_dir) { + /* Can't create a file directly inside .incomplete */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!named_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(named_file_dentry)) { + error = PTR_ERR(named_file_dentry); + named_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(named_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + + /* Look up a dentry in the incomplete dir. It should be negative. */ + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, + file_id_str); + if (!incomplete_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(incomplete_file_dentry)) { + error = PTR_ERR(incomplete_file_dentry); + incomplete_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(incomplete_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + + /* Look up a dentry in the .index dir. It should be negative. */ + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (!index_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(index_file_dentry)) { + /* File with this ID already exists in index. */ + error = -EEXIST; + goto out; + } + + /* Creating a file in the .index dir. */ + index_dir_inode = d_inode(mi->mi_index_dir); + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); + error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, + true); + inode_unlock(index_dir_inode); + + if (error) + goto out; + if (!d_really_is_positive(index_file_dentry)) { + error = -EINVAL; + goto out; + } + + error = chmod(index_file_dentry, args.mode | 0222); + if (error) { + pr_debug("incfs: chmod err: %d\n", error); + goto out; + } + + /* Save the file's ID as an xattr for easy fetching in future. */ + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, + file_id_str, strlen(file_id_str), XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto out; + } + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto out; + } + + /* Save the file's attribute as an xattr */ + if (args.file_attr_len && args.file_attr) { + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto out; + } + + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); + if (!attr_value) { + error = -ENOMEM; + goto out; + } + + if (copy_from_user(attr_value, + u64_to_user_ptr(args.file_attr), + args.file_attr_len) > 0) { + error = -EFAULT; + goto out; + } + + error = vfs_setxattr(index_file_dentry, + INCFS_XATTR_METADATA_NAME, + attr_value, args.file_attr_len, + XATTR_CREATE); + + if (error) + goto out; + } + + /* Initializing a newly created file. */ + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, + range(attr_value, args.file_attr_len), + (u8 __user *)args.signature_info, + args.signature_size); + if (error) + goto out; + index_linked = true; + + /* Linking a file with its real name from the requested dir. */ + error = incfs_link(index_file_dentry, named_file_dentry); + if (error) + goto out; + name_linked = true; + + if (args.size) { + /* Linking a file with its incomplete entry */ + error = incfs_link(index_file_dentry, incomplete_file_dentry); + if (error) + goto out; + incomplete_linked = true; + } + +out: + if (error) { + pr_debug("incfs: %s err:%d\n", __func__, error); + if (index_linked) + incfs_unlink(index_file_dentry); + if (name_linked) + incfs_unlink(named_file_dentry); + if (incomplete_linked) + incfs_unlink(incomplete_file_dentry); + } + + kfree(file_id_str); + kfree(file_name); + kfree(attr_value); + dput(named_file_dentry); + dput(index_file_dentry); + dput(incomplete_file_dentry); + path_put(&parent_dir_path); + if (locked) + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static int init_new_mapped_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, u64 offset) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + fput(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_mapping_fh_to_backing_file(bfc, uuid, size, offset); + if (error) + goto out; + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static long ioctl_create_mapped_file(struct mount_info *mi, void __user *arg) +{ + struct incfs_create_mapped_file_args __user *args_usr_ptr = arg; + struct incfs_create_mapped_file_args args = {}; + char *file_name; + int error = 0; + struct path parent_dir_path = {}; + char *source_file_name = NULL; + struct dentry *source_file_dentry = NULL; + u64 source_file_size; + struct dentry *file_dentry = NULL; + struct inode *parent_inode; + __le64 size_attr_value; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + if (args.source_offset % INCFS_DATA_FILE_BLOCK_SIZE) { + error = -EINVAL; + goto out; + } + + /* Validate file mapping is in range */ + source_file_name = file_id_to_str(args.source_file_id); + if (!source_file_name) { + pr_warn("Failed to alloc source_file_name\n"); + error = -ENOMEM; + goto out; + } + + source_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, + source_file_name); + if (!source_file_dentry) { + pr_warn("Source file does not exist\n"); + error = -EINVAL; + goto out; + } + if (IS_ERR(source_file_dentry)) { + pr_warn("Error opening source file\n"); + error = PTR_ERR(source_file_dentry); + source_file_dentry = NULL; + goto out; + } + if (!d_really_is_positive(source_file_dentry)) { + pr_warn("Source file dentry negative\n"); + error = -EINVAL; + goto out; + } + + error = vfs_getxattr(source_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value)); + if (error < 0) + goto out; + + if (error != sizeof(size_attr_value)) { + pr_warn("Mapped file has no size attr\n"); + error = -EINVAL; + goto out; + } + + source_file_size = le64_to_cpu(size_attr_value); + if (args.source_offset + args.size > source_file_size) { + pr_warn("Mapped file out of range\n"); + error = -EINVAL; + goto out; + } + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(file_dentry)) { + error = PTR_ERR(file_dentry); + file_dentry = NULL; + goto out; + } + if (d_really_is_positive(file_dentry)) { + error = -EEXIST; + goto out; + } + + parent_inode = d_inode(parent_dir_path.dentry); + inode_lock_nested(parent_inode, I_MUTEX_PARENT); + error = vfs_create(parent_inode, file_dentry, args.mode | 0222, true); + inode_unlock(parent_inode); + if (error) + goto out; + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_file; + } + + error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, + args.size, args.source_offset); + if (error) + goto delete_file; + + goto out; + +delete_file: + incfs_unlink(file_dentry); + +out: + dput(file_dentry); + dput(source_file_dentry); + path_put(&parent_dir_path); + kfree(file_name); + kfree(source_file_name); + return error; +} + +static long ioctl_get_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_get_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_get_read_timeouts_args args = {}; + int error = 0; + struct incfs_per_uid_read_timeouts *buffer; + int size; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + if (args.timeouts_array_size_out > INCFS_DATA_FILE_BLOCK_SIZE) + return -EINVAL; + + buffer = kzalloc(args.timeouts_array_size_out, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + size = mi->mi_per_uid_read_timeouts_size; + if (args.timeouts_array_size < size) + error = -E2BIG; + else if (size) + memcpy(buffer, mi->mi_per_uid_read_timeouts, size); + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + + args.timeouts_array_size_out = size; + if (!error && size) + if (copy_to_user(u64_to_user_ptr(args.timeouts_array), buffer, + size)) + error = -EFAULT; + + if (!error || error == -E2BIG) + if (copy_to_user(args_usr_ptr, &args, sizeof(args)) > 0) + error = -EFAULT; + + kfree(buffer); + return error; +} + +static long ioctl_set_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_set_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_set_read_timeouts_args args = {}; + int error = 0; + int size; + struct incfs_per_uid_read_timeouts *buffer = NULL, *tmp; + int i; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + size = args.timeouts_array_size; + if (size) { + if (size > INCFS_DATA_FILE_BLOCK_SIZE || + size % sizeof(*buffer) != 0) + return -EINVAL; + + buffer = kzalloc(size, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + if (copy_from_user(buffer, u64_to_user_ptr(args.timeouts_array), + size)) { + error = -EINVAL; + goto out; + } + + for (i = 0; i < size / sizeof(*buffer); ++i) { + struct incfs_per_uid_read_timeouts *t = &buffer[i]; + + if (t->min_pending_time_ms > t->max_pending_time_ms) { + error = -EINVAL; + goto out; + } + } + } + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + mi->mi_per_uid_read_timeouts_size = size; + tmp = mi->mi_per_uid_read_timeouts; + mi->mi_per_uid_read_timeouts = buffer; + buffer = tmp; + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + +out: + kfree(buffer); + return error; +} + +static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, + unsigned long arg) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + + switch (req) { + case INCFS_IOC_CREATE_FILE: + return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_PERMIT_FILL: + return ioctl_permit_fill(f, (void __user *)arg); + case INCFS_IOC_CREATE_MAPPED_FILE: + return ioctl_create_mapped_file(mi, (void __user *)arg); + case INCFS_IOC_GET_READ_TIMEOUTS: + return ioctl_get_read_timeouts(mi, (void __user *)arg); + case INCFS_IOC_SET_READ_TIMEOUTS: + return ioctl_set_read_timeouts(mi, (void __user *)arg); + default: + return -EINVAL; + } +} + +static const struct file_operations incfs_pending_read_file_ops = { + .read = pending_reads_read, + .poll = pending_reads_poll, + .open = pending_reads_open, + .release = pending_reads_release, + .llseek = noop_llseek, + .unlocked_ioctl = pending_reads_dispatch_ioctl, + .compat_ioctl = pending_reads_dispatch_ioctl +}; + +/******************************************************************************* + * .blocks_written pseudo file definition + ******************************************************************************/ +static const char blocks_written_file_name[] = INCFS_BLOCKS_WRITTEN_FILENAME; +static const struct mem_range blocks_written_file_name_range = { + .data = (u8 *)blocks_written_file_name, + .len = ARRAY_SIZE(blocks_written_file_name) - 1 +}; + +/* State of an open .blocks_written file, unique for each file descriptor. */ +struct blocks_written_file_state { + unsigned long blocks_written; +}; + +static ssize_t blocks_written_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct blocks_written_file_state *state = f->private_data; + unsigned long blocks_written; + char string[21]; + int result = 0; + + if (!mi) + return -EFAULT; + + blocks_written = atomic_read(&mi->mi_blocks_written); + if (state->blocks_written == blocks_written) + return 0; + + result = snprintf(string, sizeof(string), "%lu", blocks_written); + if (result > len) + result = len; + if (copy_to_user(buf, string, result)) + return -EFAULT; + + state->blocks_written = blocks_written; + return result; +} + +static __poll_t blocks_written_poll(struct file *f, poll_table *wait) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct blocks_written_file_state *state = f->private_data; + unsigned long blocks_written; + + if (!mi) + return 0; + + poll_wait(f, &mi->mi_blocks_written_notif_wq, wait); + blocks_written = atomic_read(&mi->mi_blocks_written); + if (state->blocks_written == blocks_written) + return 0; + + return EPOLLIN | EPOLLRDNORM; +} + +static int blocks_written_open(struct inode *inode, struct file *file) +{ + struct blocks_written_file_state *state = + kzalloc(sizeof(*state), GFP_NOFS); + + if (!state) + return -ENOMEM; + + state->blocks_written = -1; + file->private_data = state; + return 0; +} + +static int blocks_written_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations incfs_blocks_written_file_ops = { + .read = blocks_written_read, + .poll = blocks_written_poll, + .open = blocks_written_open, + .release = blocks_written_release, + .llseek = noop_llseek, +}; + +/******************************************************************************* + * Generic inode lookup functionality + ******************************************************************************/ +static bool get_pseudo_inode(int ino, struct inode *inode) +{ + inode->i_ctime = (struct timespec64){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = ino; + inode->i_private = NULL; + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + inode->i_op = &incfs_file_inode_ops; + + switch (ino) { + case INCFS_PENDING_READS_INODE: + inode->i_fop = &incfs_pending_read_file_ops; + return true; + + case INCFS_LOG_INODE: + inode->i_fop = &incfs_log_file_ops; + return true; + + case INCFS_BLOCKS_WRITTEN_INODE: + inode->i_fop = &incfs_blocks_written_file_ops; + return true; + + default: + return false; + } +} + +struct inode_search { + unsigned long ino; +}; + +static int inode_test(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + + return inode->i_ino == search->ino; +} + +static int inode_set(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + + if (get_pseudo_inode(search->ino, inode)) + return 0; + + /* Unknown inode requested. */ + return -EINVAL; +} + +static struct inode *fetch_inode(struct super_block *sb, unsigned long ino) +{ + struct inode_search search = { + .ino = ino + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry) +{ + struct mem_range name_range = + range((u8 *)dentry->d_name.name, dentry->d_name.len); + unsigned long ino; + struct inode *inode; + + if (incfs_equal_ranges(pending_reads_file_name_range, name_range)) + ino = INCFS_PENDING_READS_INODE; + else if (incfs_equal_ranges(log_file_name_range, name_range)) + ino = INCFS_LOG_INODE; + else if (incfs_equal_ranges(blocks_written_file_name_range, name_range)) + ino = INCFS_BLOCKS_WRITTEN_INODE; + else + return -ENOENT; + + inode = fetch_inode(sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_add(dentry, inode); + return 0; +} + +int emit_pseudo_files(struct dir_context *ctx) +{ + if (ctx->pos == 0) { + if (!dir_emit(ctx, pending_reads_file_name, + ARRAY_SIZE(pending_reads_file_name) - 1, + INCFS_PENDING_READS_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + + if (ctx->pos == 1) { + if (!dir_emit(ctx, log_file_name, + ARRAY_SIZE(log_file_name) - 1, + INCFS_LOG_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + + if (ctx->pos == 2) { + if (!dir_emit(ctx, blocks_written_file_name, + ARRAY_SIZE(blocks_written_file_name) - 1, + INCFS_BLOCKS_WRITTEN_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + + return 0; +} diff --git a/fs/incfs/pseudo_files.h b/fs/incfs/pseudo_files.h new file mode 100644 index 000000000000..358bcabfe49a --- /dev/null +++ b/fs/incfs/pseudo_files.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020 Google LLC + */ + +#ifndef _INCFS_PSEUDO_FILES_H +#define _INCFS_PSEUDO_FILES_H + +#define PSEUDO_FILE_COUNT 3 +#define INCFS_START_INO_RANGE 10 + +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry); +int emit_pseudo_files(struct dir_context *ctx); + +#endif diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index a13330eabe7d..40192863eb4e 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -4,32 +4,21 @@ */ #include -#include -#include #include #include #include #include #include -#include #include -#include -#include #include #include "vfs.h" + #include "data_mgmt.h" #include "format.h" -#include "integrity.h" #include "internal.h" - -#define INCFS_PENDING_READS_INODE 2 -#define INCFS_LOG_INODE 3 -#define INCFS_START_INO_RANGE 10 -#define READ_FILE_MODE 0444 -#define READ_EXEC_FILE_MODE 0555 -#define READ_WRITE_FILE_MODE 0666 +#include "pseudo_files.h" static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); @@ -52,18 +41,6 @@ static int file_release(struct inode *inode, struct file *file); static int read_single_page(struct file *f, struct page *page); static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); -static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos); -static __poll_t pending_reads_poll(struct file *file, poll_table *wait); -static int pending_reads_open(struct inode *inode, struct file *file); -static int pending_reads_release(struct inode *, struct file *); - -static ssize_t log_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos); -static __poll_t log_poll(struct file *file, poll_table *wait); -static int log_open(struct inode *inode, struct file *file); -static int log_release(struct inode *, struct file *); - static struct inode *alloc_inode(struct super_block *sb); static void free_inode(struct inode *inode); static void evict_inode(struct inode *inode); @@ -109,8 +86,6 @@ static const struct file_operations incfs_dir_fops = { .iterate = iterate_incfs_dir, .open = file_open, .release = file_release, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl }; static const struct dentry_operations incfs_dentry_ops = { @@ -123,7 +98,7 @@ static const struct address_space_operations incfs_address_space_ops = { /* .readpages = readpages */ }; -static const struct file_operations incfs_file_ops = { +const struct file_operations incfs_file_ops = { .open = file_open, .release = file_release, .read_iter = generic_file_read_iter, @@ -134,32 +109,7 @@ static const struct file_operations incfs_file_ops = { .compat_ioctl = dispatch_ioctl }; -enum FILL_PERMISSION { - CANT_FILL = 0, - CAN_FILL = 1, -}; - -static const struct file_operations incfs_pending_read_file_ops = { - .read = pending_reads_read, - .poll = pending_reads_poll, - .open = pending_reads_open, - .release = pending_reads_release, - .llseek = noop_llseek, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl -}; - -static const struct file_operations incfs_log_file_ops = { - .read = log_read, - .poll = log_poll, - .open = log_open, - .release = log_release, - .llseek = noop_llseek, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl -}; - -static const struct inode_operations incfs_file_inode_ops = { +const struct inode_operations incfs_file_inode_ops = { .setattr = incfs_setattr, .getattr = simple_getattr, .listxattr = incfs_listxattr @@ -191,17 +141,6 @@ static const struct xattr_handler *incfs_xattr_ops[] = { NULL, }; -/* State of an open .pending_reads file, unique for each file descriptor. */ -struct pending_reads_state { - /* A serial number of the last pending read obtained from this file. */ - int last_pending_read_sn; -}; - -/* State of an open .log file, unique for each file descriptor. */ -struct log_file_state { - struct read_log_state state; -}; - struct inode_search { unsigned long ino; @@ -213,32 +152,18 @@ struct inode_search { enum parse_parameter { Opt_read_timeout, Opt_readahead_pages, - Opt_no_backing_file_cache, - Opt_no_backing_file_readahead, Opt_rlog_pages, Opt_rlog_wakeup_cnt, + Opt_report_uid, Opt_err }; -static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; -static struct mem_range pending_reads_file_name_range = { - .data = (u8 *)pending_reads_file_name, - .len = ARRAY_SIZE(pending_reads_file_name) - 1 -}; - -static const char log_file_name[] = INCFS_LOG_FILENAME; -static struct mem_range log_file_name_range = { - .data = (u8 *)log_file_name, - .len = ARRAY_SIZE(log_file_name) - 1 -}; - static const match_table_t option_tokens = { { Opt_read_timeout, "read_timeout_ms=%u" }, { Opt_readahead_pages, "readahead=%u" }, - { Opt_no_backing_file_cache, "no_bf_cache=%u" }, - { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, { Opt_rlog_pages, "rlog_pages=%u" }, { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, + { Opt_report_uid, "report_uid" }, { Opt_err, NULL } }; @@ -251,12 +176,13 @@ static int parse_options(struct mount_options *opts, char *str) if (opts == NULL) return -EFAULT; - opts->read_timeout_ms = 1000; /* Default: 1s */ - opts->readahead_pages = 10; - opts->read_log_pages = 2; - opts->read_log_wakeup_count = 10; - opts->no_backing_file_cache = false; - opts->no_backing_file_readahead = false; + *opts = (struct mount_options) { + .read_timeout_ms = 1000, /* Default: 1s */ + .readahead_pages = 10, + .read_log_pages = 2, + .read_log_wakeup_count = 10, + }; + if (str == NULL || *str == 0) return 0; @@ -279,16 +205,6 @@ static int parse_options(struct mount_options *opts, char *str) return -EINVAL; opts->readahead_pages = value; break; - case Opt_no_backing_file_cache: - if (match_int(&args[0], &value)) - return -EINVAL; - opts->no_backing_file_cache = (value != 0); - break; - case Opt_no_backing_file_readahead: - if (match_int(&args[0], &value)) - return -EINVAL; - opts->no_backing_file_readahead = (value != 0); - break; case Opt_rlog_pages: if (match_int(&args[0], &value)) return -EINVAL; @@ -299,6 +215,9 @@ static int parse_options(struct mount_options *opts, char *str) return -EINVAL; opts->read_log_wakeup_count = value; break; + case Opt_report_uid: + opts->report_uid = true; + break; default: return -EINVAL; } @@ -307,21 +226,6 @@ static int parse_options(struct mount_options *opts, char *str) return 0; } -static struct super_block *file_superblock(struct file *f) -{ - struct inode *inode = file_inode(f); - - return inode->i_sb; -} - -static struct mount_info *get_mount_info(struct super_block *sb) -{ - struct mount_info *result = sb->s_fs_info; - - WARN_ON(!result); - return result; -} - /* Read file size from the attribute. Quicker than reading the header */ static u64 read_size_attr(struct dentry *backing_dentry) { @@ -341,96 +245,53 @@ static int inode_test(struct inode *inode, void *opaque) { struct inode_search *search = opaque; struct inode_info *node = get_incfs_node(inode); + struct inode *backing_inode = d_inode(search->backing_dentry); if (!node) return 0; - if (search->backing_dentry) { - struct inode *backing_inode = d_inode(search->backing_dentry); - - return (node->n_backing_inode == backing_inode) && - inode->i_ino == search->ino; - } else - return inode->i_ino == search->ino; + return node->n_backing_inode == backing_inode && + inode->i_ino == search->ino; } static int inode_set(struct inode *inode, void *opaque) { struct inode_search *search = opaque; struct inode_info *node = get_incfs_node(inode); + struct dentry *backing_dentry = search->backing_dentry; + struct inode *backing_inode = d_inode(backing_dentry); - if (search->backing_dentry) { - /* It's a regular inode that has corresponding backing inode */ - struct dentry *backing_dentry = search->backing_dentry; - struct inode *backing_inode = d_inode(backing_dentry); - - fsstack_copy_attr_all(inode, backing_inode); - if (S_ISREG(inode->i_mode)) { - u64 size = search->size; - - inode->i_size = size; - inode->i_blocks = get_blocks_count_for_size(size); - inode->i_mapping->a_ops = &incfs_address_space_ops; - inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_file_ops; - inode->i_mode &= ~0222; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_size = 0; - inode->i_blocks = 1; - inode->i_mapping->a_ops = &incfs_address_space_ops; - inode->i_op = &incfs_dir_inode_ops; - inode->i_fop = &incfs_dir_fops; - } else { - pr_warn_once("incfs: Unexpected inode type\n"); - return -EBADF; - } - - ihold(backing_inode); - node->n_backing_inode = backing_inode; - node->n_mount_info = get_mount_info(inode->i_sb); - inode->i_ctime = backing_inode->i_ctime; - inode->i_mtime = backing_inode->i_mtime; - inode->i_atime = backing_inode->i_atime; - inode->i_ino = backing_inode->i_ino; - if (backing_inode->i_ino < INCFS_START_INO_RANGE) { - pr_warn("incfs: ino conflict with backing FS %ld\n", - backing_inode->i_ino); - } - - return 0; - } else if (search->ino == INCFS_PENDING_READS_INODE) { - /* It's an inode for .pending_reads pseudo file. */ - - inode->i_ctime = (struct timespec64){}; - inode->i_mtime = inode->i_ctime; - inode->i_atime = inode->i_ctime; - inode->i_size = 0; - inode->i_ino = INCFS_PENDING_READS_INODE; - inode->i_private = NULL; - - inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + fsstack_copy_attr_all(inode, backing_inode); + if (S_ISREG(inode->i_mode)) { + u64 size = search->size; + inode->i_size = size; + inode->i_blocks = get_blocks_count_for_size(size); + inode->i_mapping->a_ops = &incfs_address_space_ops; inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_pending_read_file_ops; - - } else if (search->ino == INCFS_LOG_INODE) { - /* It's an inode for .log pseudo file. */ - - inode->i_ctime = (struct timespec64){}; - inode->i_mtime = inode->i_ctime; - inode->i_atime = inode->i_ctime; + inode->i_fop = &incfs_file_ops; + inode->i_mode &= ~0222; + } else if (S_ISDIR(inode->i_mode)) { inode->i_size = 0; - inode->i_ino = INCFS_LOG_INODE; - inode->i_private = NULL; - - inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); - - inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_log_file_ops; - + inode->i_blocks = 1; + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_dir_inode_ops; + inode->i_fop = &incfs_dir_fops; } else { - /* Unknown inode requested. */ - return -EINVAL; + pr_warn_once("incfs: Unexpected inode type\n"); + return -EBADF; + } + + ihold(backing_inode); + node->n_backing_inode = backing_inode; + node->n_mount_info = get_mount_info(inode->i_sb); + inode->i_ctime = backing_inode->i_ctime; + inode->i_mtime = backing_inode->i_mtime; + inode->i_atime = backing_inode->i_atime; + inode->i_ino = backing_inode->i_ino; + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { + pr_warn("incfs: ino conflict with backing FS %ld\n", + backing_inode->i_ino); } return 0; @@ -457,219 +318,6 @@ static struct inode *fetch_regular_inode(struct super_block *sb, return inode; } -static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos) -{ - struct pending_reads_state *pr_state = f->private_data; - struct mount_info *mi = get_mount_info(file_superblock(f)); - struct incfs_pending_read_info *reads_buf = NULL; - size_t reads_to_collect = len / sizeof(*reads_buf); - int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); - int new_max_sn = last_known_read_sn; - int reads_collected = 0; - ssize_t result = 0; - int i = 0; - - if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) - return 0; - - reads_buf = (struct incfs_pending_read_info *)get_zeroed_page(GFP_NOFS); - if (!reads_buf) - return -ENOMEM; - - reads_to_collect = - min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); - - reads_collected = incfs_collect_pending_reads( - mi, last_known_read_sn, reads_buf, reads_to_collect); - if (reads_collected < 0) { - result = reads_collected; - goto out; - } - - for (i = 0; i < reads_collected; i++) - if (reads_buf[i].serial_number > new_max_sn) - new_max_sn = reads_buf[i].serial_number; - - /* - * Just to make sure that we don't accidentally copy more data - * to reads buffer than userspace can handle. - */ - reads_collected = min_t(size_t, reads_collected, reads_to_collect); - result = reads_collected * sizeof(*reads_buf); - - /* Copy reads info to the userspace buffer */ - if (copy_to_user(buf, reads_buf, result)) { - result = -EFAULT; - goto out; - } - - WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); - *ppos = 0; -out: - if (reads_buf) - free_page((unsigned long)reads_buf); - return result; -} - - -static __poll_t pending_reads_poll(struct file *file, poll_table *wait) -{ - struct pending_reads_state *state = file->private_data; - struct mount_info *mi = get_mount_info(file_superblock(file)); - __poll_t ret = 0; - - poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); - if (incfs_fresh_pending_reads_exist(mi, - state->last_pending_read_sn)) - ret = EPOLLIN | EPOLLRDNORM; - - return ret; -} - -static int pending_reads_open(struct inode *inode, struct file *file) -{ - struct pending_reads_state *state = NULL; - - state = kzalloc(sizeof(*state), GFP_NOFS); - if (!state) - return -ENOMEM; - - file->private_data = state; - return 0; -} - -static int pending_reads_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - return 0; -} - -static struct inode *fetch_pending_reads_inode(struct super_block *sb) -{ - struct inode_search search = { - .ino = INCFS_PENDING_READS_INODE - }; - struct inode *inode = iget5_locked(sb, search.ino, inode_test, - inode_set, &search); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - return inode; -} - -static int log_open(struct inode *inode, struct file *file) -{ - struct log_file_state *log_state = NULL; - struct mount_info *mi = get_mount_info(file_superblock(file)); - - log_state = kzalloc(sizeof(*log_state), GFP_NOFS); - if (!log_state) - return -ENOMEM; - - log_state->state = incfs_get_log_state(mi); - file->private_data = log_state; - return 0; -} - -static int log_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - return 0; -} - -static ssize_t log_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos) -{ - struct log_file_state *log_state = f->private_data; - struct mount_info *mi = get_mount_info(file_superblock(f)); - int total_reads_collected = 0; - int rl_size; - ssize_t result = 0; - struct incfs_pending_read_info *reads_buf; - ssize_t reads_to_collect = len / sizeof(*reads_buf); - ssize_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); - - rl_size = READ_ONCE(mi->mi_log.rl_size); - if (rl_size == 0) - return 0; - - reads_buf = (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); - if (!reads_buf) - return -ENOMEM; - - reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); - while (reads_to_collect > 0) { - struct read_log_state next_state = READ_ONCE(log_state->state); - int reads_collected = incfs_collect_logged_reads( - mi, &next_state, reads_buf, - min_t(ssize_t, reads_to_collect, reads_per_page)); - if (reads_collected <= 0) { - result = total_reads_collected ? - total_reads_collected * - sizeof(*reads_buf) : - reads_collected; - goto out; - } - if (copy_to_user(buf, reads_buf, - reads_collected * sizeof(*reads_buf))) { - result = total_reads_collected ? - total_reads_collected * - sizeof(*reads_buf) : - -EFAULT; - goto out; - } - - WRITE_ONCE(log_state->state, next_state); - total_reads_collected += reads_collected; - buf += reads_collected * sizeof(*reads_buf); - reads_to_collect -= reads_collected; - } - - result = total_reads_collected * sizeof(*reads_buf); - *ppos = 0; -out: - if (reads_buf) - free_page((unsigned long)reads_buf); - return result; -} - -static __poll_t log_poll(struct file *file, poll_table *wait) -{ - struct log_file_state *log_state = file->private_data; - struct mount_info *mi = get_mount_info(file_superblock(file)); - int count; - __poll_t ret = 0; - - poll_wait(file, &mi->mi_log.ml_notif_wq, wait); - count = incfs_get_uncollected_logs_count(mi, &log_state->state); - if (count >= mi->mi_options.read_log_wakeup_count) - ret = EPOLLIN | EPOLLRDNORM; - - return ret; -} - -static struct inode *fetch_log_inode(struct super_block *sb) -{ - struct inode_search search = { - .ino = INCFS_LOG_INODE - }; - struct inode *inode = iget5_locked(sb, search.ino, inode_test, - inode_set, &search); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - return inode; -} - static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) { struct dir_file *dir = get_incfs_dir_file(file); @@ -685,29 +333,15 @@ static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) root = dir->backing_dir->f_inode == d_inode(mi->mi_backing_dir_path.dentry); - if (root && ctx->pos == 0) { - if (!dir_emit(ctx, pending_reads_file_name, - ARRAY_SIZE(pending_reads_file_name) - 1, - INCFS_PENDING_READS_INODE, DT_REG)) { - error = -EINVAL; + if (root) { + error = emit_pseudo_files(ctx); + if (error) goto out; - } - ctx->pos++; } - if (root && ctx->pos == 1) { - if (!dir_emit(ctx, log_file_name, - ARRAY_SIZE(log_file_name) - 1, - INCFS_LOG_INODE, DT_REG)) { - error = -EINVAL; - goto out; - } - ctx->pos++; - } - - ctx->pos -= 2; + ctx->pos -= PSEUDO_FILE_COUNT; error = iterate_dir(dir->backing_dir, ctx); - ctx->pos += 2; + ctx->pos += PSEUDO_FILE_COUNT; file->f_pos = dir->backing_dir->f_pos; out: if (error) @@ -734,29 +368,9 @@ static int incfs_init_dentry(struct dentry *dentry, struct path *path) return 0; } -static struct dentry *incfs_lookup_dentry(struct dentry *parent, - const char *name) +static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, + const char *name) { - struct inode *inode; - struct dentry *result = NULL; - - if (!parent) - return ERR_PTR(-EFAULT); - - inode = d_inode(parent); - inode_lock_nested(inode, I_MUTEX_PARENT); - result = lookup_one_len(name, parent, strlen(name)); - inode_unlock(inode); - - if (IS_ERR(result)) - pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); - - return result; -} - -static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) -{ - static const char name[] = ".index"; struct dentry *index_dentry; struct inode *backing_inode = d_inode(backing_dir); int err = 0; @@ -779,7 +393,8 @@ static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) if (err) return ERR_PTR(err); - if (!d_really_is_positive(index_dentry)) { + if (!d_really_is_positive(index_dentry) || + unlikely(d_unhashed(index_dentry))) { dput(index_dentry); return ERR_PTR(-EINVAL); } @@ -787,6 +402,39 @@ static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) return index_dentry; } +static int read_single_page_timeouts(struct data_file *df, struct file *f, + int block_index, struct mem_range range, + struct mem_range tmp) +{ + struct mount_info *mi = df->df_mount_info; + u32 min_time_ms = 0; + u32 min_pending_time_ms = 0; + u32 max_pending_time_ms = U32_MAX; + int uid = current_uid().val; + int i; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + for (i = 0; i < mi->mi_per_uid_read_timeouts_size / + sizeof(*mi->mi_per_uid_read_timeouts); ++i) { + struct incfs_per_uid_read_timeouts *t = + &mi->mi_per_uid_read_timeouts[i]; + + if(t->uid == uid) { + min_time_ms = t->min_time_ms; + min_pending_time_ms = t->min_pending_time_ms; + max_pending_time_ms = t->max_pending_time_ms; + break; + } + } + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + if (max_pending_time_ms == U32_MAX) + max_pending_time_ms = mi->mi_options.read_timeout_ms; + + return incfs_read_data_file_block(range, f, block_index, + min_time_ms, min_pending_time_ms, max_pending_time_ms, + tmp); +} + static int read_single_page(struct file *f, struct page *page) { loff_t offset = 0; @@ -795,28 +443,34 @@ static int read_single_page(struct file *f, struct page *page) ssize_t read_result = 0; struct data_file *df = get_incfs_data_file(f); int result = 0; - void *page_start = kmap(page); + void *page_start; int block_index; - int timeout_ms; - if (!df) + if (!df) { + SetPageError(page); + unlock_page(page); return -EBADF; + } + page_start = kmap(page); offset = page_offset(page); - block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; + block_index = (offset + df->df_mapped_offset) / + INCFS_DATA_FILE_BLOCK_SIZE; size = df->df_size; - timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE }; - tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); + if (!tmp.data) { + read_result = -ENOMEM; + goto err; + } bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); - read_result = incfs_read_data_file_block( - range(page_start, bytes_to_read), f, block_index, - timeout_ms, tmp); + + read_result = read_single_page_timeouts(df, f, block_index, + range(page_start, bytes_to_read), tmp); free_pages((unsigned long)tmp.data, get_order(tmp.len)); } else { @@ -824,6 +478,7 @@ static int read_single_page(struct file *f, struct page *page) read_result = 0; } +err: if (read_result < 0) result = read_result; else if (read_result < PAGE_SIZE) @@ -840,137 +495,7 @@ static int read_single_page(struct file *f, struct page *page) return result; } -static char *file_id_to_str(incfs_uuid_t id) -{ - char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); - char *end; - - if (!result) - return NULL; - - end = bin2hex(result, id.bytes, sizeof(id.bytes)); - *end = 0; - return result; -} - -static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, - u64 size) -{ - u8 *result; - - if (!original) - return range(NULL, 0); - - if (size > INCFS_MAX_SIGNATURE_SIZE) - return range(ERR_PTR(-EFAULT), 0); - - result = kzalloc(size, GFP_NOFS | __GFP_COMP); - if (!result) - return range(ERR_PTR(-ENOMEM), 0); - - if (copy_from_user(result, original, size)) { - kfree(result); - return range(ERR_PTR(-EFAULT), 0); - } - - return range(result, size); -} - -static int init_new_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, struct mem_range attr, - u8 __user *user_signature_info, u64 signature_size) -{ - struct path path = {}; - struct file *new_file; - int error = 0; - struct backing_file_context *bfc = NULL; - u32 block_count; - struct mem_range raw_signature = { NULL }; - struct mtree *hash_tree = NULL; - - if (!mi || !dentry || !uuid) - return -EFAULT; - - /* Resize newly created file to its true size. */ - path = (struct path) { - .mnt = mi->mi_backing_dir_path.mnt, - .dentry = dentry - }; - new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, - mi->mi_owner); - - if (IS_ERR(new_file)) { - error = PTR_ERR(new_file); - goto out; - } - - bfc = incfs_alloc_bfc(new_file); - fput(new_file); - if (IS_ERR(bfc)) { - error = PTR_ERR(bfc); - bfc = NULL; - goto out; - } - - mutex_lock(&bfc->bc_mutex); - error = incfs_write_fh_to_backing_file(bfc, uuid, size); - if (error) - goto out; - - if (attr.data && attr.len) { - error = incfs_write_file_attr_to_backing_file(bfc, - attr, NULL); - if (error) - goto out; - } - - block_count = (u32)get_blocks_count_for_size(size); - - if (user_signature_info) { - raw_signature = incfs_copy_signature_info_from_user( - user_signature_info, signature_size); - - if (IS_ERR(raw_signature.data)) { - error = PTR_ERR(raw_signature.data); - raw_signature.data = NULL; - goto out; - } - - hash_tree = incfs_alloc_mtree(raw_signature, block_count); - if (IS_ERR(hash_tree)) { - error = PTR_ERR(hash_tree); - hash_tree = NULL; - goto out; - } - - error = incfs_write_signature_to_backing_file( - bfc, raw_signature, hash_tree->hash_tree_area_size); - if (error) - goto out; - - block_count += get_blocks_count_for_size( - hash_tree->hash_tree_area_size); - } - - if (block_count) - error = incfs_write_blockmap_to_backing_file(bfc, block_count); - - if (error) - goto out; -out: - if (bfc) { - mutex_unlock(&bfc->bc_mutex); - incfs_free_bfc(bfc); - } - incfs_free_mtree(hash_tree); - kfree(raw_signature.data); - - if (error) - pr_debug("incfs: %s error: %d\n", __func__, error); - return error; -} - -static int incfs_link(struct dentry *what, struct dentry *where) +int incfs_link(struct dentry *what, struct dentry *where) { struct dentry *parent_dentry = dget_parent(where); struct inode *pinode = d_inode(parent_dentry); @@ -984,7 +509,7 @@ static int incfs_link(struct dentry *what, struct dentry *where) return error; } -static int incfs_unlink(struct dentry *dentry) +int incfs_unlink(struct dentry *dentry) { struct dentry *parent_dentry = dget_parent(dentry); struct inode *pinode = d_inode(parent_dentry); @@ -1012,274 +537,36 @@ static int incfs_rmdir(struct dentry *dentry) return error; } -static int dir_relative_path_resolve( - struct mount_info *mi, - const char __user *relative_path, - struct path *result_path) +static void maybe_delete_incomplete_file(struct data_file *df) { - struct path *base_path = &mi->mi_backing_dir_path; - int dir_fd = get_unused_fd_flags(0); - struct file *dir_f = NULL; - int error = 0; + char *file_id_str; + struct dentry *incomplete_file_dentry; - if (dir_fd < 0) - return dir_fd; + if (atomic_read(&df->df_data_blocks_written) < df->df_data_block_count) + return; - dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + /* This is best effort - there is no useful action to take on failure */ + file_id_str = file_id_to_str(df->df_id); + if (!file_id_str) + return; - if (IS_ERR(dir_f)) { - error = PTR_ERR(dir_f); - goto out; - } - fd_install(dir_fd, dir_f); - - if (!relative_path) { - /* No relative path given, just return the base dir. */ - *result_path = *base_path; - path_get(result_path); + incomplete_file_dentry = incfs_lookup_dentry( + df->df_mount_info->mi_incomplete_dir, + file_id_str); + if (!incomplete_file_dentry || IS_ERR(incomplete_file_dentry)) { + incomplete_file_dentry = NULL; goto out; } - error = user_path_at_empty(dir_fd, relative_path, - LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + if (!d_really_is_positive(incomplete_file_dentry)) + goto out; + + vfs_fsync(df->df_backing_file_context->bc_file, 0); + incfs_unlink(incomplete_file_dentry); out: - ksys_close(dir_fd); - if (error) - pr_debug("incfs: %s %d\n", __func__, error); - return error; -} - -static int validate_name(char *file_name) -{ - struct mem_range name = range(file_name, strlen(file_name)); - int i = 0; - - if (name.len > INCFS_MAX_NAME_LEN) - return -ENAMETOOLONG; - - if (incfs_equal_ranges(pending_reads_file_name_range, name)) - return -EINVAL; - - for (i = 0; i < name.len; i++) - if (name.data[i] == '/') - return -EINVAL; - - return 0; -} - -static int chmod(struct dentry *dentry, umode_t mode) -{ - struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; - struct iattr newattrs; - int error; - -retry_deleg: - inode_lock(inode); - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(dentry, &newattrs, &delegated_inode); - inode_unlock(inode); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } - return error; -} - -static long ioctl_create_file(struct mount_info *mi, - struct incfs_new_file_args __user *usr_args) -{ - struct incfs_new_file_args args; - char *file_id_str = NULL; - struct dentry *index_file_dentry = NULL; - struct dentry *named_file_dentry = NULL; - struct path parent_dir_path = {}; - struct inode *index_dir_inode = NULL; - __le64 size_attr_value = 0; - char *file_name = NULL; - char *attr_value = NULL; - int error = 0; - bool locked = false; - - if (!mi || !mi->mi_index_dir) { - error = -EFAULT; - goto out; - } - - if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { - error = -EFAULT; - goto out; - } - - file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); - if (IS_ERR(file_name)) { - error = PTR_ERR(file_name); - file_name = NULL; - goto out; - } - - error = validate_name(file_name); - if (error) - goto out; - - file_id_str = file_id_to_str(args.file_id); - if (!file_id_str) { - error = -ENOMEM; - goto out; - } - - error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); - if (error) - goto out; - locked = true; - - /* Find a directory to put the file into. */ - error = dir_relative_path_resolve(mi, - u64_to_user_ptr(args.directory_path), - &parent_dir_path); - if (error) - goto out; - - if (parent_dir_path.dentry == mi->mi_index_dir) { - /* Can't create a file directly inside .index */ - error = -EBUSY; - goto out; - } - - /* Look up a dentry in the parent dir. It should be negative. */ - named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, - file_name); - if (!named_file_dentry) { - error = -EFAULT; - goto out; - } - if (IS_ERR(named_file_dentry)) { - error = PTR_ERR(named_file_dentry); - named_file_dentry = NULL; - goto out; - } - if (d_really_is_positive(named_file_dentry)) { - /* File with this path already exists. */ - error = -EEXIST; - goto out; - } - /* Look up a dentry in the .index dir. It should be negative. */ - index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); - if (!index_file_dentry) { - error = -EFAULT; - goto out; - } - if (IS_ERR(index_file_dentry)) { - error = PTR_ERR(index_file_dentry); - index_file_dentry = NULL; - goto out; - } - if (d_really_is_positive(index_file_dentry)) { - /* File with this ID already exists in index. */ - error = -EEXIST; - goto out; - } - - /* Creating a file in the .index dir. */ - index_dir_inode = d_inode(mi->mi_index_dir); - inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); - error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, - true); - inode_unlock(index_dir_inode); - - if (error) - goto out; - if (!d_really_is_positive(index_file_dentry)) { - error = -EINVAL; - goto out; - } - - error = chmod(index_file_dentry, args.mode | 0222); - if (error) { - pr_debug("incfs: chmod err: %d\n", error); - goto delete_index_file; - } - - /* Save the file's ID as an xattr for easy fetching in future. */ - error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, - file_id_str, strlen(file_id_str), XATTR_CREATE); - if (error) { - pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; - } - - /* Save the file's size as an xattr for easy fetching in future. */ - size_attr_value = cpu_to_le64(args.size); - error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, - (char *)&size_attr_value, sizeof(size_attr_value), - XATTR_CREATE); - if (error) { - pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; - } - - /* Save the file's attribute as an xattr */ - if (args.file_attr_len && args.file_attr) { - if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { - error = -E2BIG; - goto delete_index_file; - } - - attr_value = kmalloc(args.file_attr_len, GFP_NOFS); - if (!attr_value) { - error = -ENOMEM; - goto delete_index_file; - } - - if (copy_from_user(attr_value, - u64_to_user_ptr(args.file_attr), - args.file_attr_len) > 0) { - error = -EFAULT; - goto delete_index_file; - } - - error = vfs_setxattr(index_file_dentry, - INCFS_XATTR_METADATA_NAME, - attr_value, args.file_attr_len, - XATTR_CREATE); - - if (error) - goto delete_index_file; - } - - /* Initializing a newly created file. */ - error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, - range(attr_value, args.file_attr_len), - (u8 __user *)args.signature_info, - args.signature_size); - if (error) - goto delete_index_file; - - /* Linking a file with it's real name from the requested dir. */ - error = incfs_link(index_file_dentry, named_file_dentry); - - if (!error) - goto out; - -delete_index_file: - incfs_unlink(index_file_dentry); - -out: - if (error) - pr_debug("incfs: %s err:%d\n", __func__, error); - + dput(incomplete_file_dentry); kfree(file_id_str); - kfree(file_name); - kfree(attr_value); - dput(named_file_dentry); - dput(index_file_dentry); - path_put(&parent_dir_path); - if (locked) - mutex_unlock(&mi->mi_dir_struct_mutex); - return error; } static long ioctl_fill_blocks(struct file *f, void __user *arg) @@ -1288,6 +575,7 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) struct incfs_fill_blocks fill_blocks; struct incfs_fill_block __user *usr_fill_block_array; struct data_file *df = get_incfs_data_file(f); + struct incfs_file_data *fd = f->private_data; const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; u8 *data_buf = NULL; ssize_t error = 0; @@ -1296,7 +584,7 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (!df) return -EBADF; - if ((uintptr_t)f->private_data != CAN_FILL) + if (!fd || fd->fd_fill_permission != CAN_FILL) return -EPERM; if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) @@ -1342,6 +630,8 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (data_buf) free_pages((unsigned long)data_buf, get_order(data_buf_size)); + maybe_delete_incomplete_file(df); + /* * Only report the error if no records were processed, otherwise * just return how many were processed successfully. @@ -1352,53 +642,6 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) return i; } -static long ioctl_permit_fill(struct file *f, void __user *arg) -{ - struct incfs_permit_fill __user *usr_permit_fill = arg; - struct incfs_permit_fill permit_fill; - long error = 0; - struct file *file = NULL; - - if (f->f_op != &incfs_pending_read_file_ops) - return -EPERM; - - if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) - return -EFAULT; - - file = fget(permit_fill.file_descriptor); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (file->f_op != &incfs_file_ops) { - error = -EPERM; - goto out; - } - - if (file->f_inode->i_sb != f->f_inode->i_sb) { - error = -EPERM; - goto out; - } - - switch ((uintptr_t)file->private_data) { - case CANT_FILL: - file->private_data = (void *)CAN_FILL; - break; - - case CAN_FILL: - pr_debug("CAN_FILL already set"); - break; - - default: - pr_warn("Invalid file private data"); - error = -EFAULT; - goto out; - } - -out: - fput(file); - return error; -} - static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1452,18 +695,19 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) struct incfs_get_filled_blocks_args __user *args_usr_ptr = arg; struct incfs_get_filled_blocks_args args = {}; struct data_file *df = get_incfs_data_file(f); + struct incfs_file_data *fd = f->private_data; int error; - if (!df) + if (!df || !fd) return -EINVAL; - if ((uintptr_t)f->private_data != CAN_FILL) + if (fd->fd_fill_permission != CAN_FILL) return -EPERM; if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; - error = incfs_get_filled_blocks(df, &args); + error = incfs_get_filled_blocks(df, fd, &args); if (copy_to_user(args_usr_ptr, &args, sizeof(args))) return -EFAULT; @@ -1471,21 +715,38 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) return error; } +static long ioctl_get_block_count(struct file *f, void __user *arg) +{ + struct incfs_get_block_count_args __user *args_usr_ptr = arg; + struct incfs_get_block_count_args args = {}; + struct data_file *df = get_incfs_data_file(f); + + if (!df) + return -EINVAL; + + args.total_data_blocks_out = df->df_data_block_count; + args.filled_data_blocks_out = atomic_read(&df->df_data_blocks_written); + args.total_hash_blocks_out = df->df_total_block_count - + df->df_data_block_count; + args.filled_hash_blocks_out = atomic_read(&df->df_hash_blocks_written); + + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + return -EFAULT; + + return 0; +} + static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { - struct mount_info *mi = get_mount_info(file_superblock(f)); - switch (req) { - case INCFS_IOC_CREATE_FILE: - return ioctl_create_file(mi, (void __user *)arg); case INCFS_IOC_FILL_BLOCKS: return ioctl_fill_blocks(f, (void __user *)arg); - case INCFS_IOC_PERMIT_FILL: - return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); case INCFS_IOC_GET_FILLED_BLOCKS: return ioctl_get_filled_blocks(f, (void __user *)arg); + case INCFS_IOC_GET_BLOCK_COUNT: + return ioctl_get_block_count(f, (void __user *)arg); default: return -EINVAL; } @@ -1499,40 +760,18 @@ static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, struct dentry *backing_dentry = NULL; struct path dir_backing_path = {}; struct inode_info *dir_info = get_incfs_node(dir_inode); - struct mem_range name_range = - range((u8 *)dentry->d_name.name, dentry->d_name.len); int err = 0; + if (!mi || !dir_info || !dir_info->n_backing_inode) + return ERR_PTR(-EBADF); + if (d_inode(mi->mi_backing_dir_path.dentry) == dir_info->n_backing_inode) { /* We do lookup in the FS root. Show pseudo files. */ - - if (incfs_equal_ranges(pending_reads_file_name_range, - name_range)) { - struct inode *inode = fetch_pending_reads_inode( - dir_inode->i_sb); - - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - - d_add(dentry, inode); + err = dir_lookup_pseudo_files(dir_inode->i_sb, dentry); + if (err != -ENOENT) goto out; - } - - if (incfs_equal_ranges(log_file_name_range, name_range)) { - struct inode *inode = fetch_log_inode( - dir_inode->i_sb); - - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - - d_add(dentry, inode); - goto out; - } + err = 0; } dir_dentry = dget_parent(dentry); @@ -1618,7 +857,7 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!backing_dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_dentry->d_parent == mi->mi_index_dir) { @@ -1627,13 +866,19 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } + if (backing_dentry->d_parent == mi->mi_incomplete_dir) { + /* Can't create a subdir inside .incomplete */ + err = -EBUSY; + goto out; + } inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode | 0222); inode_unlock(dir_node->n_backing_inode); if (!err) { struct inode *inode = NULL; - if (d_really_is_negative(backing_dentry)) { + if (d_really_is_negative(backing_dentry) || + unlikely(d_unhashed(backing_dentry))) { err = -EINVAL; goto out; } @@ -1650,23 +895,34 @@ out: if (d_really_is_negative(dentry)) d_drop(dentry); path_put(&backing_path); + +path_err: mutex_unlock(&mi->mi_dir_struct_mutex); if (err) pr_debug("incfs: %s err:%d\n", __func__, err); return err; } -/* Delete file referenced by backing_dentry and also its hardlink from .index */ -static int final_file_delete(struct mount_info *mi, - struct dentry *backing_dentry) +/* + * Delete file referenced by backing_dentry and if appropriate its hardlink + * from .index and .incomplete + */ +static int file_delete(struct mount_info *mi, + struct dentry *backing_dentry, + int nlink) { struct dentry *index_file_dentry = NULL; + struct dentry *incomplete_file_dentry = NULL; /* 2 chars per byte of file ID + 1 char for \0 */ char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; ssize_t uuid_size = 0; int error = 0; WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); + + if (nlink > 3) + goto just_unlink; + uuid_size = vfs_getxattr(backing_dentry, INCFS_XATTR_ID_NAME, file_id_str, 2 * sizeof(incfs_uuid_t)); if (uuid_size < 0) { @@ -1682,17 +938,46 @@ static int final_file_delete(struct mount_info *mi, index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); if (IS_ERR(index_file_dentry)) { error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; goto out; } - error = incfs_unlink(backing_dentry); - if (error) + if (d_really_is_positive(index_file_dentry) && nlink > 0) + nlink--; + + if (nlink > 2) + goto just_unlink; + + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, + file_id_str); + if (IS_ERR(incomplete_file_dentry)) { + error = PTR_ERR(incomplete_file_dentry); + incomplete_file_dentry = NULL; goto out; + } + + if (d_really_is_positive(incomplete_file_dentry) && nlink > 0) + nlink--; + + if (nlink > 1) + goto just_unlink; if (d_really_is_positive(index_file_dentry)) error = incfs_unlink(index_file_dentry); + if (error) + goto out; + + if (d_really_is_positive(incomplete_file_dentry)) + error = incfs_unlink(incomplete_file_dentry); + if (error) + goto out; + +just_unlink: + error = incfs_unlink(backing_dentry); + out: dput(index_file_dentry); + dput(incomplete_file_dentry); if (error) pr_debug("incfs: delete_file_from_index err:%d\n", error); return error; @@ -1705,6 +990,9 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) struct kstat stat; int err = 0; + if (!mi) + return -EBADF; + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (err) return err; @@ -1712,7 +1000,7 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) get_incfs_backing_path(dentry, &backing_path); if (!backing_path.dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_path.dentry->d_parent == mi->mi_index_dir) { @@ -1721,25 +1009,23 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) goto out; } + if (backing_path.dentry->d_parent == mi->mi_incomplete_dir) { + /* Direct unlink from .incomplete are not allowed. */ + err = -EBUSY; + goto out; + } + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, AT_STATX_SYNC_AS_STAT); if (err) goto out; - if (stat.nlink == 2) { - /* - * This is the last named link to this file. The only one left - * is in .index. Remove them both now. - */ - err = final_file_delete(mi, backing_path.dentry); - } else { - /* There are other links to this file. Remove just this one. */ - err = incfs_unlink(backing_path.dentry); - } + err = file_delete(mi, backing_path.dentry, stat.nlink); d_drop(dentry); out: path_put(&backing_path); +path_err: if (err) pr_debug("incfs: %s err:%d\n", __func__, err); mutex_unlock(&mi->mi_dir_struct_mutex); @@ -1754,6 +1040,9 @@ static int dir_link(struct dentry *old_dentry, struct inode *dir, struct path backing_new_path = {}; int error = 0; + if (!mi) + return -EBADF; + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (error) return error; @@ -1767,6 +1056,12 @@ static int dir_link(struct dentry *old_dentry, struct inode *dir, goto out; } + if (backing_new_path.dentry->d_parent == mi->mi_incomplete_dir) { + /* Can't link to .incomplete */ + error = -EBUSY; + goto out; + } + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); if (!error) { struct inode *inode = NULL; @@ -1800,6 +1095,9 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) struct path backing_path = {}; int err = 0; + if (!mi) + return -EBADF; + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (err) return err; @@ -1807,7 +1105,7 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) get_incfs_backing_path(dentry, &backing_path); if (!backing_path.dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_path.dentry == mi->mi_index_dir) { @@ -1816,11 +1114,19 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) goto out; } + if (backing_path.dentry == mi->mi_incomplete_dir) { + /* Can't delete .incomplete */ + err = -EBUSY; + goto out; + } + err = incfs_rmdir(backing_path.dentry); if (!err) d_drop(dentry); out: path_put(&backing_path); + +path_err: if (err) pr_debug("incfs: %s err:%d\n", __func__, err); mutex_unlock(&mi->mi_dir_struct_mutex); @@ -1844,6 +1150,14 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, return error; backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; + + if (!backing_old_dentry || backing_old_dentry == mi->mi_index_dir || + backing_old_dentry == mi->mi_incomplete_dir) { + /* Renaming .index or .incomplete not allowed */ + error = -EBUSY; + goto exit; + } + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; dget(backing_old_dentry); dget(backing_new_dentry); @@ -1852,8 +1166,9 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, backing_new_dir_dentry = dget_parent(backing_new_dentry); target_inode = d_inode(new_dentry); - if (backing_old_dir_dentry == mi->mi_index_dir) { - /* Direct moves from .index are not allowed. */ + if (backing_old_dir_dentry == mi->mi_index_dir || + backing_old_dir_dentry == mi->mi_incomplete_dir) { + /* Direct moves from .index or .incomplete are not allowed. */ error = -EBUSY; goto out; } @@ -1890,6 +1205,7 @@ out: dput(backing_new_dentry); dput(backing_old_dentry); +exit: mutex_unlock(&mi->mi_dir_struct_mutex); if (error) pr_debug("incfs: %s err:%d\n", __func__, error); @@ -1903,10 +1219,19 @@ static int file_open(struct inode *inode, struct file *file) struct file *backing_file = NULL; struct path backing_path = {}; int err = 0; + int flags = O_NOATIME | O_LARGEFILE | + (S_ISDIR(inode->i_mode) ? O_RDONLY : O_RDWR); + + WARN_ON(file->private_data); + + if (!mi) + return -EBADF; get_incfs_backing_path(file->f_path.dentry, &backing_path); - backing_file = dentry_open( - &backing_path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); + if (!backing_path.dentry) + return -EBADF; + + backing_file = dentry_open(&backing_path, flags, mi->mi_owner); path_put(&backing_path); if (IS_ERR(backing_file)) { @@ -1916,8 +1241,20 @@ static int file_open(struct inode *inode, struct file *file) } if (S_ISREG(inode->i_mode)) { + struct incfs_file_data *fd = kzalloc(sizeof(*fd), GFP_NOFS); + + if (!fd) { + err = -ENOMEM; + goto out; + } + + *fd = (struct incfs_file_data) { + .fd_fill_permission = CANT_FILL, + }; + file->private_data = fd; + err = make_inode_ready_for_data_ops(mi, inode, backing_file); - file->private_data = (void *)CANT_FILL; + } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = NULL; @@ -1930,9 +1267,17 @@ static int file_open(struct inode *inode, struct file *file) err = -EBADF; out: - if (err) - pr_debug("incfs: %s name:%s err: %d\n", __func__, - file->f_path.dentry->d_name.name, err); + if (err) { + pr_debug("name:%s err: %d\n", + file->f_path.dentry->d_name.name, err); + if (S_ISREG(inode->i_mode)) + kfree(file->private_data); + else if (S_ISDIR(inode->i_mode)) + incfs_free_dir_file(file->private_data); + + file->private_data = NULL; + } + if (backing_file) fput(backing_file); return err; @@ -1941,9 +1286,8 @@ out: static int file_release(struct inode *inode, struct file *file) { if (S_ISREG(inode->i_mode)) { - /* Do nothing. - * data_file is released only by inode eviction. - */ + kfree(file->private_data); + file->private_data = NULL; } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = get_incfs_dir_file(file); @@ -2162,10 +1506,13 @@ static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, const char *dev_name, void *data) { + static const char index_name[] = ".index"; + static const char incomplete_name[] = ".incomplete"; struct mount_options options = {}; struct mount_info *mi = NULL; struct path backing_dir_path = {}; - struct dentry *index_dir; + struct dentry *index_dir = NULL; + struct dentry *incomplete_dir = NULL; struct super_block *src_fs_sb = NULL; struct inode *root_inode = NULL; struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); @@ -2218,15 +1565,28 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, goto err; } - index_dir = open_or_create_index_dir(backing_dir_path.dentry); + index_dir = open_or_create_special_dir(backing_dir_path.dentry, + index_name); if (IS_ERR_OR_NULL(index_dir)) { error = PTR_ERR(index_dir); pr_err("incfs: Can't find or create .index dir in %s\n", dev_name); + /* No need to null index_dir since we don't put it */ goto err; } mi->mi_index_dir = index_dir; + incomplete_dir = open_or_create_special_dir(backing_dir_path.dentry, + incomplete_name); + if (IS_ERR_OR_NULL(incomplete_dir)) { + error = PTR_ERR(incomplete_dir); + pr_err("incfs: Can't find or create .incomplete dir in %s\n", + dev_name); + /* No need to null incomplete_dir since we don't put it */ + goto err; + } + mi->mi_incomplete_dir = incomplete_dir; + sb->s_fs_info = mi; root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); if (IS_ERR(root_inode)) { @@ -2267,6 +1627,11 @@ static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) if (err) return err; + if (options.report_uid != mi->mi_options.report_uid) { + pr_err("incfs: Can't change report_uid mount option on remount\n"); + return -EOPNOTSUPP; + } + err = incfs_realloc_mount_info(mi, &options); if (err) return err; @@ -2295,9 +1660,7 @@ static int show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",rlog_wakeup_cnt=%u", mi->mi_options.read_log_wakeup_count); } - if (mi->mi_options.no_backing_file_cache) - seq_puts(m, ",no_bf_cache"); - if (mi->mi_options.no_backing_file_readahead) - seq_puts(m, ",no_bf_readahead"); + if (mi->mi_options.report_uid) + seq_puts(m, ",report_uid"); return 0; } diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h index eaa490e19072..79fdf243733d 100644 --- a/fs/incfs/vfs.h +++ b/fs/incfs/vfs.h @@ -6,8 +6,28 @@ #ifndef _INCFS_VFS_H #define _INCFS_VFS_H +extern const struct file_operations incfs_file_ops; +extern const struct inode_operations incfs_file_inode_ops; + void incfs_kill_sb(struct super_block *sb); struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, const char *dev_name, void *data); +int incfs_link(struct dentry *what, struct dentry *where); +int incfs_unlink(struct dentry *dentry); + +static inline struct mount_info *get_mount_info(struct super_block *sb) +{ + struct mount_info *result = sb->s_fs_info; + + WARN_ON(!result); + return result; +} + +static inline struct super_block *file_superblock(struct file *f) +{ + struct inode *inode = file_inode(f); + + return inode->i_sb; +} #endif diff --git a/fs/libfs.c b/fs/libfs.c index 417566e87e09..3bc8205b2aac 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -870,7 +870,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { struct simple_attr *attr; - u64 val; + unsigned long long val; size_t size; ssize_t ret; @@ -888,7 +888,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtoll(attr->set_buf, NULL, 0); + ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5686be67be8..d57d453aecc2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2a2ff3f22a4..fe7b42c277ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7600,9 +7600,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -7997,7 +7999,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0d20fd161225..01f7ce1ae127 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -118,6 +118,13 @@ done: return nfsd_return_attrs(nfserr, resp); } +/* Obsolete, replaced by MNTPROC_MNT. */ +static __be32 +nfsd_proc_root(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Look up a path name component * Note: the dentry in the resp->fh may be negative if the file @@ -201,6 +208,13 @@ nfsd_proc_read(struct svc_rqst *rqstp) return fh_getattr(&resp->fh, &resp->stat); } +/* Reserved */ +static __be32 +nfsd_proc_writecache(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Write data to a file * N.B. After this call resp->fh needs an fh_put @@ -615,6 +629,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT, }, [NFSPROC_ROOT] = { + .pc_func = nfsd_proc_root, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), @@ -652,6 +667,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, }, [NFSPROC_WRITECACHE] = { + .pc_func = nfsd_proc_writecache, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index fcfc9087be7e..9530a2dfabc3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; + struct path alteredpath; + struct path *canonical_path = &path; struct fd f; int ret; unsigned flags = 0; @@ -747,13 +749,23 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (ret) goto fput_and_out; + /* support stacked filesystems */ + if (path.dentry && path.dentry->d_op) { + if (path.dentry->d_op->d_canonical_path) { + path.dentry->d_op->d_canonical_path(&path, + &alteredpath); + canonical_path = &alteredpath; + path_put(&path); + } + } + /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + inode = canonical_path->dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - path_put(&path); + path_put(canonical_path); fput_and_out: fdput(f); return ret; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2658d91c1f7b..09bc2cf5f61c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1747,6 +1747,7 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); diff --git a/fs/proc/self.c b/fs/proc/self.c index cc6d4253399d..7922edf70ce1 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry, pid_t tgid = task_tgid_nr_ns(current, ns); char *name; + /* + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ + if (current->flags & PF_KTHREAD) + return ERR_PTR(-EOPNOTSUPP); + if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ diff --git a/fs/super.c b/fs/super.c index bc62a37e7122..e7d3edaae9cc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1380,36 +1380,11 @@ EXPORT_SYMBOL(__sb_end_write); */ int __sb_start_write(struct super_block *sb, int level, bool wait) { - bool force_trylock = false; - int ret = 1; + if (!wait) + return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); -#ifdef CONFIG_LOCKDEP - /* - * We want lockdep to tell us about possible deadlocks with freezing - * but it's it bit tricky to properly instrument it. Getting a freeze - * protection works as getting a read lock but there are subtle - * problems. XFS for example gets freeze protection on internal level - * twice in some cases, which is OK only because we already hold a - * freeze protection also on higher level. Due to these cases we have - * to use wait == F (trylock mode) which must not fail. - */ - if (wait) { - int i; - - for (i = 0; i < level - 1; i++) - if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) { - force_trylock = true; - break; - } - } -#endif - if (wait && !force_trylock) - percpu_down_read(sb->s_writers.rw_sem + level-1); - else - ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - WARN_ON(force_trylock && !ret); - return ret; + percpu_down_read(sb->s_writers.rw_sem + level-1); + return 1; } EXPORT_SYMBOL(__sb_start_write); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 564e330d05b1..24bbecd4752b 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1129,6 +1129,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) err = PTR_ERR(dent); if (err == -ENOENT) break; + kfree(pdent); return err; } diff --git a/fs/udf/super.c b/fs/udf/super.c index caeb01ca039b..c7f6243f318b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1685,7 +1685,8 @@ static noinline int udf_process_sequence( "Pointers (max %u supported)\n", UDF_MAX_TD_NESTING); brelse(bh); - return -EIO; + ret = -EIO; + goto out; } vdp = (struct volDescPtr *)bh->b_data; @@ -1705,7 +1706,8 @@ static noinline int udf_process_sequence( curr = get_volume_descriptor_record(ident, bh, &data); if (IS_ERR(curr)) { brelse(bh); - return PTR_ERR(curr); + ret = PTR_ERR(curr); + goto out; } /* Descriptor we don't care about? */ if (!curr) @@ -1727,28 +1729,31 @@ static noinline int udf_process_sequence( */ if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); - return -EAGAIN; + ret = -EAGAIN; + goto out; } ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) - return ret; + goto out; if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, data.vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset); if (ret < 0) - return ret; + goto out; } /* Now handle prevailing Partition Descriptors */ for (i = 0; i < data.num_part_descs; i++) { ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); if (ret < 0) - return ret; + goto out; } - - return 0; + ret = 0; +out: + kfree(data.part_descs_loc); + return ret; } /* diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 1eb7933dac83..b3a9043b0c9e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2213,6 +2213,7 @@ xfs_defer_agfl_block( new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; new->xefi_oinfo = *oinfo; + new->xefi_skip_discard = false; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f35e1801f1c9..fc9950a505e6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4920,20 +4920,25 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; xfs_filblks_t len; xfs_extlen_t mod; - bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, &mod); ASSERT(mod == 0); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; + if (!(bflags & XFS_BMAPI_REMAP)) { + xfs_fsblock_t bno; + + bno = div_u64_rem(del->br_startblock, + mp->m_sb.sb_rextsize, &mod); + ASSERT(mod == 0); + + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + } + do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 488dc8860fd7..50242ba3cdb7 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -52,9 +52,9 @@ struct xfs_extent_free_item { xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + bool xefi_skip_discard; struct list_head xefi_list; struct xfs_owner_info xefi_oinfo; /* extent owner */ - bool xefi_skip_discard; }; #define XFS_BMAP_MAX_NMAP 4 diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 245af452840e..ab3e72e702f0 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -1387,7 +1387,7 @@ xfs_rmap_convert_shared( * record for our insertion point. This will also give us the record for * start block contiguity tests. */ - error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index f84a58e523bc..cf005e18d618 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -120,6 +120,8 @@ xchk_bmap_get_rmap( if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; + if (irec->br_state == XFS_EXT_UNWRITTEN) + rflags |= XFS_RMAP_UNWRITTEN; /* * CoW staging extents are owned (on disk) by the refcountbt, so @@ -223,13 +225,13 @@ xchk_bmap_xref_rmap( * which doesn't track unwritten state. */ if (owner != XFS_RMAP_OWN_COW && - irec->br_state == XFS_EXT_UNWRITTEN && - !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + !!(irec->br_state == XFS_EXT_UNWRITTEN) != + !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (info->whichfork == XFS_ATTR_FORK && - !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + if (!!(info->whichfork == XFS_ATTR_FORK) != + !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 4ae959f7ad2c..c924fe3cdad6 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -450,32 +450,41 @@ xchk_btree_check_minrecs( int level, struct xfs_btree_block *block) { - unsigned int numrecs; - int ok_level; - - numrecs = be16_to_cpu(block->bb_numrecs); + struct xfs_btree_cur *cur = bs->cur; + unsigned int root_level = cur->bc_nlevels - 1; + unsigned int numrecs = be16_to_cpu(block->bb_numrecs); /* More records than minrecs means the block is ok. */ - if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return; /* - * Certain btree blocks /can/ have fewer than minrecs records. Any - * level greater than or equal to the level of the highest dedicated - * btree block are allowed to violate this constraint. - * - * For a btree rooted in a block, the btree root can have fewer than - * minrecs records. If the btree is rooted in an inode and does not - * store records in the root, the direct children of the root and the - * root itself can have fewer than minrecs records. + * For btrees rooted in the inode, it's possible that the root block + * contents spilled into a regular ondisk block because there wasn't + * enough space in the inode root. The number of records in that + * child block might be less than the standard minrecs, but that's ok + * provided that there's only one direct child of the root. */ - ok_level = bs->cur->bc_nlevels - 1; - if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - ok_level--; - if (level >= ok_level) - return; + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 2) { + struct xfs_btree_block *root_block; + struct xfs_buf *root_bp; + int root_maxrecs; - xchk_btree_set_corrupt(bs->sc, bs->cur, level); + root_block = xfs_btree_get_block(cur, root_level, &root_bp); + root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); + if (be16_to_cpu(root_block->bb_numrecs) != 1 || + numrecs <= root_maxrecs) + xchk_btree_set_corrupt(bs->sc, cur, level); + return; + } + + /* + * Otherwise, only the root level is allowed to have fewer than minrecs + * records or keyptrs. + */ + if (level < root_level) + xchk_btree_set_corrupt(bs->sc, cur, level); } /* diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index e386c9b0b4ab..8d45d60832db 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -131,8 +131,7 @@ xchk_inode_flags( goto bad; /* rt flags require rt device */ - if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && - !mp->m_rtdev_targp) + if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) goto bad; /* new rt bitmap flag only valid for rbmino */ diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index e8c82b026083..76e4f16a9fab 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -180,7 +180,6 @@ xchk_refcountbt_process_rmap_fragments( */ INIT_LIST_HEAD(&worklist); rbno = NULLAGBLOCK; - nr = 1; /* Make sure the fragments actually /are/ in agbno order. */ bno = 0; @@ -194,15 +193,14 @@ xchk_refcountbt_process_rmap_fragments( * Find all the rmaps that start at or before the refc extent, * and put them on the worklist. */ + nr = 0; list_for_each_entry_safe(frag, n, &refchk->fragments, list) { - if (frag->rm.rm_startblock > refchk->bno) - goto done; + if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) + break; bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; if (bno < rbno) rbno = bno; list_move_tail(&frag->list, &worklist); - if (nr == target_nr) - break; nr++; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e427ad097e2e..948ac1290121 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -895,6 +895,16 @@ xfs_setattr_size( error = iomap_zero_range(inode, oldsize, newsize - oldsize, &did_zeroing, &xfs_iomap_ops); } else { + /* + * iomap won't detect a dirty page over an unwritten block (or a + * cow block over a hole) and subsequently skips zeroing the + * newly post-EOF portion of the page. Flush the new EOF to + * convert the block before the pagecache truncate. + */ + error = filemap_write_and_wait_range(inode->i_mapping, newsize, + newsize); + if (error) + return error; error = iomap_truncate_page(inode, newsize, &did_zeroing, &xfs_iomap_ops); } diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index f44c3599527d..1c9bced3e860 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -141,7 +141,7 @@ xfs_fs_map_blocks( goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) - return error; + goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 08da48b66235..280965fc9bbd 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -998,10 +998,13 @@ xfs_growfs_rt( xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* - * Update the bitmap inode's size. + * Update the bitmap inode's size ondisk and incore. We need + * to update the incore size so that inode inactivation won't + * punch what it thinks are "posteof" blocks. */ mp->m_rbmip->i_d.di_size = nsbp->sb_rbmblocks * nsbp->sb_blocksize; + i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); /* * Get the summary inode into the transaction. @@ -1009,9 +1012,12 @@ xfs_growfs_rt( xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* - * Update the summary inode's size. + * Update the summary inode's size. We need to update the + * incore size so that inode inactivation won't punch what it + * thinks are "posteof" blocks. */ mp->m_rsumip->i_d.di_size = nmp->m_rsumsize; + i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); /* * Copy summary data from old to new sizes. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 15fd0277ffa6..5901b0005929 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1115,10 +1115,6 @@ static inline bool arch_has_pfn_modify_check(void) #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 1361efced43c..cacd61042e47 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -69,10 +69,10 @@ #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define TEXT_CFI_MAIN .text.[0-9a-zA-Z_]*.cfi -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* -#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b3379a97245c..a34694e675c9 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1c6bb25a8501..ee2d177dc0e8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -151,7 +151,8 @@ struct dentry_operations { int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *); - ANDROID_KABI_RESERVE(1); + ANDROID_KABI_USE(1, void (*d_canonical_path)(const struct path *, + struct path *)); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index fd1ce10553bf..f3eb2b2e05f4 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -211,11 +211,20 @@ static inline void fsnotify_open(struct file *file) { const struct path *path = &file->f_path; struct inode *inode = file_inode(file); + struct path lower_path; __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) { + path->dentry->d_op->d_canonical_path(path, &lower_path); + fsnotify_parent(&lower_path, NULL, mask); + fsnotify(lower_path.dentry->d_inode, mask, &lower_path, + FSNOTIFY_EVENT_PATH, NULL, 0); + path_put(&lower_path); + } + fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 774f7d3b8f6a..369221fd5518 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { diff --git a/include/linux/mm.h b/include/linux/mm.h index 16a9b1c7a45b..b8edce0a421a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2616,6 +2616,15 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 122f3439e1af..c65d7a3be3c6 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr) if (!(dsr & DSR_AVAILABLE)) printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) + if ((prog_status & 0x03) == 0x03) printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " "half with 41h command\n"); else if (prog_status & 0x02) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 72cb19c3db6a..9460a5635c90 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -300,7 +300,7 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - list_del(&skb->list); + skb_list_del_init(skb); if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1) list_add_tail(&skb->list, &sublist); } diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..45ff1330b339 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..47a2de582f57 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -36,7 +36,7 @@ struct nf_ipv6_ops { }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 1dda31825ec4..71283739ffd2 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,6 +32,7 @@ #endif /* CONFIG_SPARSEMEM */ +#ifndef BUILD_VDSO32_64 /* * page->flags layout: * @@ -76,20 +77,22 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_TAG_WIDTH 8 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ > BITS_PER_LONG - NR_PAGEFLAGS -#error "KASAN: not enough bits in page flags for tag" -#endif -#else -#define KASAN_TAG_WIDTH 0 +#error "Not enough bits in page flags" #endif /* @@ -104,4 +107,5 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif +#endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/prandom.h b/include/linux/prandom.h index aa16e6468f91..cc1e71334e53 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + struct rnd_state { __u32 s1, s2, s3, s4; }; -DECLARE_PER_CPU(struct rnd_state, net_rand_state); - u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index ea8505204fdf..9f3c17506674 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -32,7 +32,6 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; #include #include -#include #include #include #include diff --git a/include/linux/time64.h b/include/linux/time64.h index 0333e106b5cf..6e27f3876a29 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -130,6 +130,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { + /* Prevent multiplication overflow */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7acb953298a7..84ff2844df2a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -57,6 +57,7 @@ struct tk_read_base { * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -84,6 +85,9 @@ struct tk_read_base { * * wall_to_monotonic is no longer the boot time, getboottime must be * used instead. + * + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to + * accelerate the VDSO update for CLOCK_BOOTTIME. */ struct timekeeper { struct tk_read_base tkr_mono; @@ -99,6 +103,7 @@ struct timekeeper { u8 cs_was_changed_seq; ktime_t next_leap_ktime; u64 raw_sec; + struct timespec64 monotonic_to_boot; /* The following members are for timekeeping internal use */ u64 cycle_interval; diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index f2162e0fe531..bdf4c88d2aa0 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -451,6 +451,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ #define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ #define PD_T_NEWSRC 250 /* Maximum of 275ms */ +#define PD_T_SWAP_SRC_START 20 /* Minimum of 20ms */ #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 461e8a7661b7..d28df7adf948 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -196,6 +196,7 @@ struct dsa_port { unsigned int index; const char *name; const struct dsa_port *cpu_dp; + const char *mac; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6f05af7eff0d..d5f62fb24cea 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -491,9 +491,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -539,7 +541,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 35d7c887a5b9..ce42654425ab 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -39,7 +39,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ - ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ + ND_OPT_PREF64 = 38, /* RFC8781 */ __ND_OPT_MAX }; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c9bd935f4fd1..1ee0f30ae190 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -145,6 +145,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d143e277cdaf..71ca8c4dc290 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1193,8 +1193,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2170e58a2a97..24af4edfc98c 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -383,6 +383,7 @@ enum fuse_opcode { FUSE_READDIRPLUS = 44, FUSE_RENAME2 = 45, FUSE_LSEEK = 46, + FUSE_CANONICAL_PATH= 2016, /* CUSE specific operations */ CUSE_INIT = 4096, diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 13c3d5173e14..625db40356f2 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -30,6 +30,7 @@ #define INCFS_PENDING_READS_FILENAME ".pending_reads" #define INCFS_LOG_FILENAME ".log" +#define INCFS_BLOCKS_WRITTEN_FILENAME ".blocks_written" #define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") #define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") #define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") @@ -42,7 +43,10 @@ /* ===== ioctl requests on the command dir ===== */ -/* Create a new file */ +/* + * Create a new file + * May only be called on .pending_reads file + */ #define INCFS_IOC_CREATE_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) @@ -92,9 +96,55 @@ #define INCFS_IOC_GET_FILLED_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) +/* + * Creates a new mapped file + * May only be called on .pending_reads file + */ +#define INCFS_IOC_CREATE_MAPPED_FILE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) + +/* + * Get number of blocks, total and filled + * May only be called on .pending_reads file + */ +#define INCFS_IOC_GET_BLOCK_COUNT \ + _IOR(INCFS_IOCTL_BASE_CODE, 36, struct incfs_get_block_count_args) + +/* + * Get per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_GET_READ_TIMEOUTS \ + _IOR(INCFS_IOCTL_BASE_CODE, 37, struct incfs_get_read_timeouts_args) + +/* + * Set per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_SET_READ_TIMEOUTS \ + _IOW(INCFS_IOCTL_BASE_CODE, 38, struct incfs_set_read_timeouts_args) + +/* ===== sysfs feature flags ===== */ +/* + * Each flag is represented by a file in /sys/fs/incremental-fs/features + * If the file exists the feature is supported + * Also the file contents will be the line "supported" + */ + +/* + * Basic flag stating that the core incfs file system is available + */ +#define INCFS_FEATURE_FLAG_COREFS "corefs" + +/* + * report_uid mount option is supported + */ +#define INCFS_FEATURE_FLAG_REPORT_UID "report_uid" + enum incfs_compression_alg { COMPRESSION_NONE = 0, - COMPRESSION_LZ4 = 1 + COMPRESSION_LZ4 = 1, + COMPRESSION_ZSTD = 2, }; enum incfs_block_flags { @@ -109,6 +159,8 @@ typedef struct { /* * Description of a pending read. A pending read - a read call by * a userspace program for which the filesystem currently doesn't have data. + * + * Reads from .pending_reads and .log return an array of these structure */ struct incfs_pending_read_info { /* Id of a file that is being read from. */ @@ -124,6 +176,32 @@ struct incfs_pending_read_info { __u32 serial_number; }; +/* + * Description of a pending read. A pending read - a read call by + * a userspace program for which the filesystem currently doesn't have data. + * + * This version of incfs_pending_read_info is used whenever the file system is + * mounted with the report_uid flag + */ +struct incfs_pending_read_info2 { + /* Id of a file that is being read from. */ + incfs_uuid_t file_id; + + /* A number of microseconds since system boot to the read. */ + __aligned_u64 timestamp_us; + + /* Index of a file block that is being read. */ + __u32 block_index; + + /* A serial number of this pending read. */ + __u32 serial_number; + + /* The UID of the reading process */ + __u32 uid; + + __u32 reserved; +}; + /* * Description of a data or hash block to add to a data file. */ @@ -331,4 +409,131 @@ struct incfs_get_filled_blocks_args { __u32 index_out; }; +/* + * Create a new mapped file + * Argument for INCFS_IOC_CREATE_MAPPED_FILE + */ +struct incfs_create_mapped_file_args { + /* + * Total size of the new file. + */ + __aligned_u64 size; + + /* + * File mode. Permissions and dir flag. + */ + __u16 mode; + + __u16 reserved1; + + __u32 reserved2; + + /* + * A pointer to a null-terminated relative path to the incfs mount + * point + * Max length: PATH_MAX + * + * Equivalent to: char *directory_path; + */ + __aligned_u64 directory_path; + + /* + * A pointer to a null-terminated file name. + * Max length: PATH_MAX + * + * Equivalent to: char *file_name; + */ + __aligned_u64 file_name; + + /* Id of source file to map. */ + incfs_uuid_t source_file_id; + + /* + * Offset in source file to start mapping. Must be a multiple of + * INCFS_DATA_FILE_BLOCK_SIZE + */ + __aligned_u64 source_offset; +}; + +/* + * Get information about the blocks in this file + * Argument for INCFS_IOC_GET_BLOCK_COUNT + */ +struct incfs_get_block_count_args { + /* Total number of data blocks in the file */ + __u32 total_data_blocks_out; + + /* Number of filled data blocks in the file */ + __u32 filled_data_blocks_out; + + /* Total number of hash blocks in the file */ + __u32 total_hash_blocks_out; + + /* Number of filled hash blocks in the file */ + __u32 filled_hash_blocks_out; +}; + +/* Description of timeouts for one UID */ +struct incfs_per_uid_read_timeouts { + /* UID to apply these timeouts to */ + __u32 uid; + + /* + * Min time to read any block. Note that this doesn't apply to reads + * which are satisfied from the page cache. + */ + __u32 min_time_ms; + + /* + * Min time to satisfy a pending read. Must be >= min_time_ms. Any + * pending read which is filled before this time will be delayed so + * that the total read time >= this value. + */ + __u32 min_pending_time_ms; + + /* + * Max time to satisfy a pending read before the read times out. + * If set to U32_MAX, defaults to mount options read_timeout_ms= + * Must be >= min_pending_time_ms + */ + __u32 max_pending_time_ms; +}; + +/* + * Get the read timeouts array + * Argument for INCFS_IOC_GET_READ_TIMEOUTS + */ +struct incfs_get_read_timeouts_args { + /* + * A pointer to a buffer to fill with the current timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above buffer in bytes */ + __u32 timeouts_array_size; + + /* Size used in bytes, or size needed if -ENOMEM returned */ + __u32 timeouts_array_size_out; +}; + +/* + * Set the read timeouts array + * Arguments for INCFS_IOC_SET_READ_TIMEOUTS + */ +struct incfs_set_read_timeouts_args { + /* + * A pointer to an array containing the new timeouts + * This will replace any existing timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above array in bytes. Must be < 256 */ + __u32 timeouts_array_size; +}; + + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..54a78529c8b3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -136,6 +136,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -143,6 +145,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 43153888b567..f5c73be7e84d 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -364,9 +364,9 @@ enum v4l2_hsv_encoding { enum v4l2_quantization { /* - * The default for R'G'B' quantization is always full range, except - * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG: this is full range. + * The default for R'G'B' quantization is always full range. + * For Y'CbCr the quantization is always limited range, except + * for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -375,14 +375,13 @@ enum v4l2_quantization { /* * Determine how QUANTIZATION_DEFAULT should map to a proper quantization. - * This depends on whether the image is RGB or not, the colorspace and the - * Y'CbCr encoding. + * This depends on whether the image is RGB or not, the colorspace. + * The Y'CbCr encoding is not used anymore, but is still there for backwards + * compatibility. */ #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ - (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ - V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ - V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE) /* * Deprecated names for opRGB colorspace (IEC 61966-2-5) diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index a2c006a364e0..24f3371ad826 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -74,7 +74,11 @@ #include /* for "struct sockaddr" et al */ #include /* for IFNAMSIZ and co... */ -#include /* for offsetof */ +#ifdef __KERNEL__ +# include /* for offsetof */ +#else +# include /* for offsetof */ +#endif /***************************** VERSION *****************************/ /* diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 925c2acab6a6..96442d540116 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -2,8 +2,6 @@ #ifndef __VDSO_DATAPAGE_H #define __VDSO_DATAPAGE_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include @@ -116,6 +114,4 @@ extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden") #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* __VDSO_DATAPAGE_H */ diff --git a/include/xen/events.h b/include/xen/events.h index 1650d39decae..d8255ed2052c 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -14,11 +14,16 @@ unsigned xen_evtchn_nr_channels(void); -int bind_evtchn_to_irq(unsigned int evtchn); -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, @@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, const char *devname, void *dev_id); int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port); + evtchn_port_t remote_port); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); /* * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); +/* + * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi + * functions above. + */ +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags); +/* Signal an event was spurious, i.e. there was no action resulting from it. */ +#define XEN_EOI_FLAG_SPURIOUS 0x00000001 + #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN diff --git a/init/Kconfig b/init/Kconfig index 30e9ada9dfd1..6341935d30fe 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -594,7 +594,8 @@ config IKHEADERS config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - range 12 25 + range 12 25 if !H8300 + range 12 19 if H8300 default 17 depends on PRINTK help diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e6bad6b3f604..68154bffea45 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6156,6 +6156,48 @@ struct cgroup *cgroup_get_from_fd(int fd) } EXPORT_SYMBOL_GPL(cgroup_get_from_fd); +static u64 power_of_ten(int power) +{ + u64 v = 1; + while (power--) + v *= 10; + return v; +} + +/** + * cgroup_parse_float - parse a floating number + * @input: input string + * @dec_shift: number of decimal digits to shift + * @v: output + * + * Parse a decimal floating point number in @input and store the result in + * @v with decimal point right shifted @dec_shift times. For example, if + * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. + * Returns 0 on success, -errno otherwise. + * + * There's nothing cgroup specific about this function except that it's + * currently the only user. + */ +int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) +{ + s64 whole, frac = 0; + int fstart = 0, fend = 0, flen; + + if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) + return -EINVAL; + if (frac < 0) + return -EINVAL; + + flen = fend > fstart ? fend - fstart : 0; + if (flen < dec_shift) + frac *= power_of_ten(dec_shift - flen); + else + frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); + + *v = whole * power_of_ten(dec_shift) + frac; + return 0; +} + /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. @@ -6332,46 +6374,4 @@ static int __init cgroup_sysfs_init(void) } subsys_initcall(cgroup_sysfs_init); -static u64 power_of_ten(int power) -{ - u64 v = 1; - while (power--) - v *= 10; - return v; -} - -/** - * cgroup_parse_float - parse a floating number - * @input: input string - * @dec_shift: number of decimal digits to shift - * @v: output - * - * Parse a decimal floating point number in @input and store the result in - * @v with decimal point right shifted @dec_shift times. For example, if - * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. - * Returns 0 on success, -errno otherwise. - * - * There's nothing cgroup specific about this function except that it's - * currently the only user. - */ -int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) -{ - s64 whole, frac = 0; - int fstart = 0, fend = 0, flen; - - if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) - return -EINVAL; - if (frac < 0) - return -EINVAL; - - flen = fend > fstart ? fend - fstart : 0; - if (flen < dec_shift) - frac *= power_of_ten(dec_shift - flen); - else - frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); - - *v = whole * power_of_ten(dec_shift) + frac; - return 0; -} - #endif /* CONFIG_SYSFS */ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index fbb1bfdd2fa5..8c76141c99c8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -95,14 +95,6 @@ int dbg_switch_cpu; /* Use kdb or gdbserver mode */ int dbg_kdb_mode = 1; -static int __init opt_kgdb_con(char *str) -{ - kgdb_use_con = 1; - return 0; -} - -early_param("kgdbcon", opt_kgdb_con); - module_param(kgdb_use_con, int, 0644); module_param(kgdbreboot, int, 0644); @@ -820,6 +812,20 @@ static struct console kgdbcons = { .index = -1, }; +static int __init opt_kgdb_con(char *str) +{ + kgdb_use_con = 1; + + if (kgdb_io_module_registered && !kgdb_con_registered) { + register_console(&kgdbcons); + kgdb_con_registered = 1; + } + + return 0; +} + +early_param("kgdbcon", opt_kgdb_con); + #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_dbg(int key) { diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2a8c41f12d45..6f7d4e977c5c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -239,6 +239,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; if (verbose) swiotlb_print_info(); @@ -270,9 +271,11 @@ swiotlb_init(int verbose) if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; - if (io_tlb_start) + if (io_tlb_start) { memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + io_tlb_start = 0; + } pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -376,6 +379,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; swiotlb_print_info(); diff --git a/kernel/events/core.c b/kernel/events/core.c index b229db1588c3..9c1a26fb3a36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5492,11 +5492,11 @@ static void perf_pmu_output_stop(struct perf_event *event); static void perf_mmap_close(struct vm_area_struct *vma) { struct perf_event *event = vma->vm_file->private_data; - struct ring_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); + bool detach_rest = false; if (event->pmu->event_unmapped) event->pmu->event_unmapped(event, vma->vm_mm); @@ -5527,7 +5527,8 @@ static void perf_mmap_close(struct vm_area_struct *vma) mutex_unlock(&event->mmap_mutex); } - atomic_dec(&rb->mmap_count); + if (atomic_dec_and_test(&rb->mmap_count)) + detach_rest = true; if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) goto out_put; @@ -5536,7 +5537,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) mutex_unlock(&event->mmap_mutex); /* If there's still other mmap()s of this buffer, we're done. */ - if (atomic_read(&rb->mmap_count)) + if (!detach_rest) goto out_put; /* @@ -9068,6 +9069,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { int fpos = token == IF_SRC_FILE ? 2 : 1; + kfree(filename); filename = match_strdup(&args[fpos]); if (!filename) { ret = -ENOMEM; @@ -9114,16 +9116,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, */ ret = -EOPNOTSUPP; if (!event->ctx->task) - goto fail_free_name; + goto fail; /* look up the path and grab its inode */ ret = kern_path(filename, LOOKUP_FOLLOW, &filter->path); if (ret) - goto fail_free_name; - - kfree(filename); - filename = NULL; + goto fail; ret = -EINVAL; if (!filter->path.dentry || @@ -9143,13 +9142,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (state != IF_STATE_ACTION) goto fail; + kfree(filename); kfree(orig); return 0; -fail_free_name: - kfree(filename); fail: + kfree(filename); free_filters_list(filters); kfree(orig); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 6dc725a7e7bc..8fc0ddc38cb6 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -209,7 +209,7 @@ static inline int get_recursion_context(int *recursion) rctx = 3; else if (in_irq()) rctx = 2; - else if (in_softirq()) + else if (in_serving_softirq()) rctx = 1; else rctx = 0; diff --git a/kernel/exit.c b/kernel/exit.c index 906637701f17..e7af71a12f65 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -517,7 +517,10 @@ static void exit_mm(void) up_read(&mm->mmap_sem); self.task = current; - self.next = xchg(&core_state->dumper.next, &self); + if (self.task->flags & PF_SIGNALED) + self.next = xchg(&core_state->dumper.next, &self); + else + self.task = NULL; /* * Implies mb(), the result of xchg() must be visible * to core_state->dumper. diff --git a/kernel/fail_function.c b/kernel/fail_function.c index bc80a4e268c0..a52151a2291f 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -261,7 +261,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, if (copy_from_user(buf, buffer, count)) { ret = -EFAULT; - goto out; + goto out_free; } buf[count] = '\0'; sym = strstrip(buf); @@ -315,8 +315,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, ret = count; } out: - kfree(buf); mutex_unlock(&fei_lock); +out_free: + kfree(buf); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index 186c9bbb88f2..a2f2ad3ee9f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1685,11 +1685,11 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) /* * Poll support for process exit notification. */ -static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) +static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct task_struct *task; struct pid *pid = file->private_data; - int poll_flags = 0; + __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); @@ -1701,7 +1701,7 @@ static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) * group, then poll(2) should block, similar to the wait(2) family. */ if (!task || (task->exit_state && thread_group_empty(task))) - poll_flags = POLLIN | POLLRDNORM; + poll_flags = EPOLLIN | EPOLLRDNORM; rcu_read_unlock(); return poll_flags; @@ -1829,8 +1829,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1841,16 +1839,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* @@ -2106,14 +2094,9 @@ static __latent_entropy struct task_struct *copy_process( /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { - p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { - if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = (clone_flags & CSIGNAL); p->group_leader = p; p->tgid = p->pid; } @@ -2158,9 +2141,14 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) + p->exit_signal = -1; + else + p->exit_signal = current->group_leader->exit_signal; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; + p->exit_signal = (clone_flags & CSIGNAL); } klp_copy_process(p); diff --git a/kernel/futex.c b/kernel/futex.c index 920d853a8e9e..334dc4cae780 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -856,8 +856,9 @@ static void put_pi_state(struct futex_pi_state *pi_state) */ if (pi_state->owner) { struct task_struct *owner; + unsigned long flags; - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); owner = pi_state->owner; if (owner) { raw_spin_lock(&owner->pi_lock); @@ -865,7 +866,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock(&owner->pi_lock); } rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } if (current->pi_state_cache) { @@ -1517,8 +1518,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (unlikely(should_fail_futex(true))) + if (unlikely(should_fail_futex(true))) { ret = -EFAULT; + goto out_unlock; + } ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); if (!ret && (curval != uval)) { @@ -2415,10 +2418,22 @@ retry: } /* - * Since we just failed the trylock; there must be an owner. + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); - BUG_ON(!newowner); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..d532bf0c5a67 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -80,6 +80,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + select IRQ_DOMAIN_HIERARCHY # Generic MSI interrupt support config GENERIC_MSI_IRQ diff --git a/kernel/kthread.c b/kernel/kthread.c index 4b3644b85760..b7e2254e99a6 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -864,7 +864,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) /* Move the work from worker->delayed_work_list. */ WARN_ON_ONCE(list_empty(&work->node)); list_del_init(&work->node); - kthread_insert_work(worker, work, &worker->work_list); + if (!work->canceling) + kthread_insert_work(worker, work, &worker->work_list); spin_unlock(&worker->lock); } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f2635fc751d9..2505d25f89a1 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1145,24 +1145,24 @@ void free_basic_memory_bitmaps(void) void clear_free_pages(void) { -#ifdef CONFIG_PAGE_POISONING_ZERO struct memory_bitmap *bm = free_pages_map; unsigned long pfn; if (WARN_ON(!(free_pages_map))) return; - memory_bm_position_reset(bm); - pfn = memory_bm_next_pfn(bm); - while (pfn != BM_END_OF_MAP) { - if (pfn_valid(pfn)) - clear_highpage(pfn_to_page(pfn)); - + if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { + memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (pfn_valid(pfn)) + clear_highpage(pfn_to_page(pfn)); + + pfn = memory_bm_next_pfn(bm); + } + memory_bm_position_reset(bm); + pr_info("free pages cleared after restore\n"); } - memory_bm_position_reset(bm); - pr_info("free pages cleared after restore\n"); -#endif /* PAGE_POISONING_ZERO */ } /** diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b93eb4eaf7ac..ecdb7402072f 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -258,17 +258,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } -static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, - unsigned int mode) +static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { - int ret; - if (mode & PTRACE_MODE_NOAUDIT) - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT); - else - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE); - - return ret == 0; + return ns_capable_noaudit(ns, CAP_SYS_PTRACE); + return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ @@ -320,7 +314,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(cred, tcred->user_ns, mode)) + if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -339,7 +333,7 @@ ok: mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(cred, mm->user_ns, mode))) + !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); diff --git a/kernel/reboot.c b/kernel/reboot.c index 25079b0fcfec..e9ee544d8ace 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -568,22 +568,22 @@ static int __init reboot_setup(char *str) break; case 's': - { - int rc; - - if (isdigit(*(str+1))) { - rc = kstrtoint(str+1, 0, &reboot_cpu); - if (rc) - return rc; - } else if (str[1] == 'm' && str[2] == 'p' && - isdigit(*(str+3))) { - rc = kstrtoint(str+3, 0, &reboot_cpu); - if (rc) - return rc; - } else + if (isdigit(*(str+1))) + reboot_cpu = simple_strtoul(str+1, NULL, 0); + else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) + reboot_cpu = simple_strtoul(str+3, NULL, 0); + else *mode = REBOOT_SOFT; + if (reboot_cpu >= num_possible_cpus()) { + pr_err("Ignoring the CPU number in reboot= option. " + "CPU %d exceeds possible cpu number %d\n", + reboot_cpu, num_possible_cpus()); + reboot_cpu = 0; + break; + } break; - } + case 'g': *mode = REBOOT_GPIO; break; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 748810a291ea..77ba4fd1f443 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5279,6 +5279,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int task_new = !(flags & ENQUEUE_WAKEUP); #ifdef CONFIG_ROCKCHIP_SCHED_PERFORMANCE_BIAS if (sysctl_sched_performance_bias) @@ -5365,7 +5366,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * into account, but that is not straightforward to implement, * and the following generally works well enough in practice. */ - if (flags & ENQUEUE_WAKEUP) + if (!task_new) update_overutilized_status(rq); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 775cd2c68956..b88bde4bafbb 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1995,7 +1995,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att rcu_read_unlock(); if (has_asym) - static_branch_enable_cpuslocked(&sched_asym_cpucapacity); + static_branch_inc_cpuslocked(&sched_asym_cpucapacity); ret = 0; error: @@ -2086,8 +2086,12 @@ int sched_init_domains(const struct cpumask *cpu_map) */ static void detach_destroy_domains(const struct cpumask *cpu_map) { + unsigned int cpu = cpumask_any(cpu_map); int i; + if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu))) + static_branch_dec_cpuslocked(&sched_asym_cpucapacity); + rcu_read_lock(); for_each_cpu(i, cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 56e69203b658..b9bd5a6b5138 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -383,8 +383,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) * behavior of privileged children. */ if (!task_no_new_privs(current) && - security_capable(current_cred(), current_user_ns(), - CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) + !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ diff --git a/kernel/signal.c b/kernel/signal.c index 1c5140a75cee..9ff2c26161f9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -389,16 +389,17 @@ static bool task_participate_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task) { + unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; + struct signal_struct *sig = current->signal; + + if (sig->group_stop_count) { + sig->group_stop_count++; + mask |= JOBCTL_STOP_CONSUME; + } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) + return; + /* Have the new thread join an on-going signal group stop */ - unsigned long jobctl = current->jobctl; - if (jobctl & JOBCTL_STOP_PENDING) { - struct signal_struct *sig = current->signal; - unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; - unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; - if (task_set_jobctl_pending(task, signr | gstop)) { - sig->group_stop_count++; - } - } + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } /* diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9a65713c8309..2e2b335ef101 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -154,10 +154,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; - /* - * Use the to_ktime conversion because that clamps the maximum - * value to KTIME_MAX and avoid multiplication overflows. - */ nval = ktime_to_ns(timeval_to_ktime(value->it_value)); ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval)); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a02e0f6b287c..0a3cc37e4b83 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -520,6 +521,7 @@ void tick_unfreeze(void) trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); } else { + touch_softlockup_watchdog(); tick_resume_local(); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c66fd11d94bc..aa798afcb089 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -150,6 +150,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { tk->offs_boot = ktime_add(tk->offs_boot, delta); + /* + * Timespec representation for VDSO update to avoid 64bit division + * on every update. + */ + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); } /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 61e41ea3a96e..a6e88d9bb931 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1655,13 +1655,6 @@ void update_process_times(int user_tick) scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(p); - - /* The current CPU might make use of net randoms without receiving IRQs - * to renew them often enough. Let's update the net_rand_state from a - * non-constant value that's not affine to the number of calls to make - * sure it's updated when there's some activity (we don't care in idle). - */ - this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); } /** diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 565ab06917a7..5ee0f7709410 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata, struct timekeeper *tk) { struct vdso_timestamp *vdso_ts; - u64 nsec; + u64 nsec, sec; vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; @@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata, } vdso_ts->nsec = nsec; - /* CLOCK_MONOTONIC_RAW */ - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; - vdso_ts->sec = tk->raw_sec; - vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* Copy MONOTONIC time for BOOTTIME */ + sec = vdso_ts->sec; + /* Add the boot offset */ + sec += tk->monotonic_to_boot.tv_sec; + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift; /* CLOCK_BOOTTIME */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec + - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift); + vdso_ts->sec = sec; + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); vdso_ts->sec++; } vdso_ts->nsec = nsec; + /* CLOCK_MONOTONIC_RAW */ + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts->sec = tk->raw_sec; + vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* CLOCK_TAI */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; @@ -104,11 +108,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; nsec = nsec + tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec = nsec - NSEC_PER_SEC; - vdso_ts->sec++; - } - vdso_ts->nsec = nsec; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); update_vdso_data(vdata, tk); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1442f6152abc..645048bb1e86 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -521,10 +521,18 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->msg_data) goto err; - ret = -ENOENT; - - dir = debugfs_lookup(buts->name, blk_debugfs_root); - if (!dir) +#ifdef CONFIG_BLK_DEBUG_FS + /* + * When tracing whole make_request drivers (multiqueue) block devices, + * reuse the existing debugfs directory created by the block layer on + * init. For request-based block devices, all partitions block devices, + * and scsi-generic block devices we create a temporary new debugfs + * directory that will be removed once the trace ends. + */ + if (q->mq_ops && bdev && bdev == bdev->bd_contains) + dir = q->debugfs_dir; + else +#endif bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); if (!dir) goto err; @@ -583,8 +591,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = 0; err: - if (dir && !bt->dir) - dput(dir); if (ret) blk_trace_free(bt); return ret; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 564d22691dd7..87ce9736043d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -444,14 +444,16 @@ struct rb_event_info { /* * Used for which event context the event is in. - * NMI = 0 - * IRQ = 1 - * SOFTIRQ = 2 - * NORMAL = 3 + * TRANSITION = 0 + * NMI = 1 + * IRQ = 2 + * SOFTIRQ = 3 + * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { + RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, @@ -1692,18 +1694,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; - int cpu, err = 0; + int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) - return size; + return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) - return size; + return 0; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); @@ -1843,7 +1845,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } mutex_unlock(&buffer->mutex); - return size; + return 0; out_err: for_each_buffer_cpu(buffer, cpu) { @@ -2620,10 +2622,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * bit 1 = NMI context + * bit 2 = IRQ context + * bit 3 = SoftIRQ context + * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ @@ -2646,6 +2648,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. + * + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit + * is set when a recursion is detected at the current context, and if + * the TRANSITION bit is already set, it will fail the recursion. + * This is needed because there's a lag between the changing of + * interrupt context and updating the preempt count. In this case, + * a false positive will be found. To handle this, one extra recursion + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION + * bit is already set, then it is considered a recursion and the function + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. + * + * On the trace_recursive_unlock(), the TRANSITION bit will be the first + * to be cleared. Even if it wasn't the context that set it. That is, + * if an interrupt comes in while NORMAL bit is set and the ring buffer + * is called before preempt_count() is updated, since the check will + * be on the NORMAL bit, the TRANSITION bit will then be set. If an + * NMI then comes in, it will set the NMI bit, but when the NMI code + * does the trace_recursive_unlock() it will clear the TRANSTION bit + * and leave the NMI bit set. But this is fine, because the interrupt + * code that set the TRANSITION bit will then clear the NMI bit when it + * calls trace_recursive_unlock(). If another NMI comes in, it will + * set the TRANSITION bit and continue. + * + * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline int @@ -2661,8 +2687,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = pc & NMI_MASK ? RB_CTX_NMI : pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; - if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) - return 1; + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { + /* + * It is possible that this was called by transitioning + * between interrupt context, and preempt_count() has not + * been updated yet. In this case, use the TRANSITION bit. + */ + bit = RB_CTX_TRANSITION; + if (val & (1 << (bit + cpu_buffer->nest))) + return 1; + } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; @@ -2677,8 +2711,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->current_context - (1 << cpu_buffer->nest); } -/* The recursive locking above uses 4 bits */ -#define NESTED_BITS 4 +/* The recursive locking above uses 5 bits */ +#define NESTED_BITS 5 /** * ring_buffer_nest_start - Allow to trace while nested diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6bf617ff0369..c3cc6aaa6f79 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2819,7 +2819,7 @@ static char *get_trace_buf(void) /* Interrupts must see nesting incremented before we use the buffer */ barrier(); - return &buffer->buffer[buffer->nesting][0]; + return &buffer->buffer[buffer->nesting - 1][0]; } static void put_trace_buf(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ee0c6a313ed1..01475411fd1b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -534,6 +534,12 @@ enum { TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -588,14 +594,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) return 0; bit = trace_get_context_bit() + start; - if (unlikely(val & (1 << bit))) - return -1; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit; + return bit + 1; } static __always_inline void trace_clear_recursion(int bit) @@ -605,6 +624,7 @@ static __always_inline void trace_clear_recursion(int bit) if (!bit) return; + bit--; bit = 1 << bit; val &= ~bit; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 11e9daa4a568..330ba2b081ed 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -492,8 +492,13 @@ trace_selftest_function_recursion(void) unregister_ftrace_function(&test_rec_probe); ret = -1; - if (trace_selftest_recursion_cnt != 1) { - pr_cont("*callback not called once (%d)* ", + /* + * Recursion allows for transitions between context, + * and may call the callback twice. + */ + if (trace_selftest_recursion_cnt != 1 && + trace_selftest_recursion_cnt != 2) { + pr_cont("*callback not called once (or twice) (%d)* ", trace_selftest_recursion_cnt); goto out; } diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 9defbcac7621..2a049c66dc0e 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -105,7 +105,6 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST - default !(CLANG_VERSION < 90000) depends on KASAN help The LLVM stack address sanitizer has a know problem that @@ -114,11 +113,11 @@ config KASAN_STACK_ENABLE Disabling asan-stack makes it safe to run kernels build with clang-8 with KASAN enabled, though it loses some of the functionality. - This feature is always disabled when compile-testing with clang-8 - or earlier to avoid cluttering the output in stack overflow - warnings, but clang-8 users can still enable it for builds without - CONFIG_COMPILE_TEST. On gcc and later clang versions it is - assumed to always be safe to use and enabled by default. + This feature is always disabled when compile-testing with clang + to avoid cluttering the output in stack overflow warnings, + but clang users can still enable it for builds without + CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe + to use and enabled by default. config KASAN_STACK int diff --git a/lib/crc32test.c b/lib/crc32test.c index 97d6a57cefcc..61ddce2cff77 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -683,7 +683,6 @@ static int __init crc32c_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -694,7 +693,6 @@ static int __init crc32c_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); @@ -768,7 +766,6 @@ static int __init crc32_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -783,7 +780,6 @@ static int __init crc32_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", CRC_LE_BITS, CRC_BE_BITS); diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c index 0e2deac97da0..e02f9df24d1e 100644 --- a/lib/fonts/font_10x18.c +++ b/lib/fonts/font_10x18.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 9216 -static struct font_data fontdata_10x18 = { +static const struct font_data fontdata_10x18 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 0000000000 */ diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c index 87da8acd07db..6e3c4b7691c8 100644 --- a/lib/fonts/font_6x10.c +++ b/lib/fonts/font_6x10.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 2560 -static struct font_data fontdata_6x10 = { +static const struct font_data fontdata_6x10 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c index 5e975dfa10a5..2d22a24e816f 100644 --- a/lib/fonts/font_6x11.c +++ b/lib/fonts/font_6x11.c @@ -9,7 +9,7 @@ #define FONTDATAMAX (11*256) -static struct font_data fontdata_6x11 = { +static const struct font_data fontdata_6x11 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c index 86d298f38505..9cc7ae2e03f7 100644 --- a/lib/fonts/font_7x14.c +++ b/lib/fonts/font_7x14.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 3584 -static struct font_data fontdata_7x14 = { +static const struct font_data fontdata_7x14 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 0000000 */ diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c index 37cedd36ca5e..bab25dc59e8d 100644 --- a/lib/fonts/font_8x16.c +++ b/lib/fonts/font_8x16.c @@ -10,7 +10,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_8x16 = { +static const struct font_data fontdata_8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c index 8ab695538395..109d0572368f 100644 --- a/lib/fonts/font_8x8.c +++ b/lib/fonts/font_8x8.c @@ -9,7 +9,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_8x8 = { +static const struct font_data fontdata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c index 069b3e80c434..fb395f0d4031 100644 --- a/lib/fonts/font_acorn_8x8.c +++ b/lib/fonts/font_acorn_8x8.c @@ -5,7 +5,7 @@ #define FONTDATAMAX 2048 -static struct font_data acorndata_8x8 = { +static const struct font_data acorndata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ^@ */ /* 01 */ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e, /* ^A */ diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c index 1449876c6a27..592774a90917 100644 --- a/lib/fonts/font_mini_4x6.c +++ b/lib/fonts/font_mini_4x6.c @@ -43,7 +43,7 @@ __END__; #define FONTDATAMAX 1536 -static struct font_data fontdata_mini_4x6 = { +static const struct font_data fontdata_mini_4x6 = { { 0, 0, FONTDATAMAX, 0 }, { /*{*/ /* Char 0: ' ' */ diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c index 32d65551e7ed..a6f95ebce950 100644 --- a/lib/fonts/font_pearl_8x8.c +++ b/lib/fonts/font_pearl_8x8.c @@ -14,7 +14,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_pearl8x8 = { +static const struct font_data fontdata_pearl8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c index 641a6b4dca42..a5b65bd49604 100644 --- a/lib/fonts/font_sun12x22.c +++ b/lib/fonts/font_sun12x22.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 11264 -static struct font_data fontdata_sun12x22 = { +static const struct font_data fontdata_sun12x22 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 000000000000 */ diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c index 193fe6d988e0..e577e76a6a7c 100644 --- a/lib/fonts/font_sun8x16.c +++ b/lib/fonts/font_sun8x16.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_sun8x16 = { +static const struct font_data fontdata_sun8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* */ 0x00,0x00,0x7e,0x81,0xa5,0x81,0x81,0xbd,0x99,0x81,0x81,0x7e,0x00,0x00,0x00,0x00, diff --git a/lib/random32.c b/lib/random32.c index b6f3325e38e4..9085b1172015 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -40,16 +40,6 @@ #include #include -#ifdef CONFIG_RANDOM32_SELFTEST -static void __init prandom_state_selftest(void); -#else -static inline void prandom_state_selftest(void) -{ -} -#endif - -DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; - /** * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. @@ -69,25 +59,6 @@ u32 prandom_u32_state(struct rnd_state *state) } EXPORT_SYMBOL(prandom_u32_state); -/** - * prandom_u32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 prandom_u32(void) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - u32 res; - - res = prandom_u32_state(state); - put_cpu_var(net_rand_state); - - return res; -} -EXPORT_SYMBOL(prandom_u32); - /** * prandom_bytes_state - get the requested number of pseudo-random bytes * @@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) } EXPORT_SYMBOL(prandom_bytes_state); -/** - * prandom_bytes - get the requested number of pseudo-random bytes - * @buf: where to copy the pseudo-random bytes to - * @bytes: the requested number of bytes - */ -void prandom_bytes(void *buf, size_t bytes) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - - prandom_bytes_state(state, buf, bytes); - put_cpu_var(net_rand_state); -} -EXPORT_SYMBOL(prandom_bytes); - static void prandom_warmup(struct rnd_state *state) { /* Calling RNG ten times to satisfy recurrence condition */ @@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static u32 __extract_hwseed(void) -{ - unsigned int val = 0; - - (void)(arch_get_random_seed_int(&val) || - arch_get_random_int(&val)); - - return val; -} - -static void prandom_seed_early(struct rnd_state *state, u32 seed, - bool mix_with_hwseed) -{ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ -#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) - state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); - state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); - state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); - state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); -} - -/** - * prandom_seed - add entropy to pseudo random number generator - * @seed: seed value - * - * Add some additional seeding to the prandom pool. - */ -void prandom_seed(u32 entropy) -{ - int i; - /* - * No locking on the CPUs, but then somewhat random results are, well, - * expected. - */ - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - - state->s1 = __seed(state->s1 ^ entropy, 2U); - prandom_warmup(state); - } -} -EXPORT_SYMBOL(prandom_seed); - -/* - * Generate some initially weak seeding values to allow - * to start the prandom_u32() engine. - */ -static int __init prandom_init(void) -{ - int i; - - prandom_state_selftest(); - - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - - prandom_seed_early(state, weak_seed, true); - prandom_warmup(state); - } - - return 0; -} -core_initcall(prandom_init); - -static void __prandom_timer(struct timer_list *unused); - -static DEFINE_TIMER(seed_timer, __prandom_timer); - -static void __prandom_timer(struct timer_list *unused) -{ - u32 entropy; - unsigned long expires; - - get_random_bytes(&entropy, sizeof(entropy)); - prandom_seed(entropy); - - /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + prandom_u32_max(40); - seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); - - add_timer(&seed_timer); -} - -static void __init __prandom_start_seed_timer(void) -{ - seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); - add_timer(&seed_timer); -} - void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) { int i; @@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) } EXPORT_SYMBOL(prandom_seed_full_state); -/* - * Generate better values after random number generator - * is fully initialized. - */ -static void __prandom_reseed(bool late) -{ - unsigned long flags; - static bool latch = false; - static DEFINE_SPINLOCK(lock); - - /* Asking for random bytes might result in bytes getting - * moved into the nonblocking pool and thus marking it - * as initialized. In this case we would double back into - * this function and attempt to do a late reseed. - * Ignore the pointless attempt to reseed again if we're - * already waiting for bytes when the nonblocking pool - * got initialized. - */ - - /* only allow initial seeding (late == false) once */ - if (!spin_trylock_irqsave(&lock, flags)) - return; - - if (latch && !late) - goto out; - - latch = true; - prandom_seed_full_state(&net_rand_state); -out: - spin_unlock_irqrestore(&lock, flags); -} - -void prandom_reseed_late(void) -{ - __prandom_reseed(true); -} - -static int __init prandom_reseed(void) -{ - __prandom_reseed(false); - __prandom_start_seed_timer(); - return 0; -} -late_initcall(prandom_reseed); - #ifdef CONFIG_RANDOM32_SELFTEST static struct prandom_test1 { u32 seed; @@ -421,7 +243,28 @@ static struct prandom_test2 { { 407983964U, 921U, 728767059U }, }; -static void __init prandom_state_selftest(void) +static u32 __extract_hwseed(void) +{ + unsigned int val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); +} + +static int __init prandom_state_selftest(void) { int i, j, errors = 0, runs = 0; bool error = false; @@ -461,5 +304,266 @@ static void __init prandom_state_selftest(void) pr_warn("prandom: %d/%d self tests failed\n", errors, runs); else pr_info("prandom: %d self tests passed\n", runs); + return 0; } +core_initcall(prandom_state_selftest); #endif + +/* + * The prandom_u32() implementation is now completely separate from the + * prandom_state() functions, which are retained (for now) for compatibility. + * + * Because of (ab)use in the networking code for choosing random TCP/UDP port + * numbers, which open DoS possibilities if guessable, we want something + * stronger than a standard PRNG. But the performance requirements of + * the network code do not allow robust crypto for this application. + * + * So this is a homebrew Junior Spaceman implementation, based on the + * lowest-latency trustworthy crypto primitive available, SipHash. + * (The authors of SipHash have not been consulted about this abuse of + * their work.) + * + * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to + * one word of output. This abbreviated version uses 2 rounds per word + * of output. + */ + +struct siprand_state { + unsigned long v0; + unsigned long v1; + unsigned long v2; + unsigned long v3; +}; + +static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; + +/* + * This is the core CPRNG function. As "pseudorandom", this is not used + * for truly valuable things, just intended to be a PITA to guess. + * For maximum speed, we do just two SipHash rounds per word. This is + * the same rate as 4 rounds per 64 bits that SipHash normally uses, + * so hopefully it's reasonably secure. + * + * There are two changes from the official SipHash finalization: + * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; + * they are there only to make the output rounds distinct from the input + * rounds, and this application has no input rounds. + * - Rather than returning v0^v1^v2^v3, return v1+v3. + * If you look at the SipHash round, the last operation on v3 is + * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. + * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but + * it still cancels out half of the bits in v2 for no benefit.) + * Second, since the last combining operation was xor, continue the + * pattern of alternating xor/add for a tiny bit of extra non-linearity. + */ +static inline u32 siprand_u32(struct siprand_state *s) +{ + unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; + + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; + return v1 + v3; +} + + +/** + * prandom_u32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 prandom_u32(void) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u32 res = siprand_u32(state); + + put_cpu_ptr(&net_rand_state); + return res; +} +EXPORT_SYMBOL(prandom_u32); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, size_t bytes) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u8 *ptr = buf; + + while (bytes >= sizeof(u32)) { + put_unaligned(siprand_u32(state), (u32 *)ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); + } + + if (bytes > 0) { + u32 rem = siprand_u32(state); + + do { + *ptr++ = (u8)rem; + rem >>= BITS_PER_BYTE; + } while (--bytes > 0); + } + put_cpu_ptr(&net_rand_state); +} +EXPORT_SYMBOL(prandom_bytes); + +/** + * prandom_seed - add entropy to pseudo random number generator + * @entropy: entropy value + * + * Add some additional seed material to the prandom pool. + * The "entropy" is actually our IP address (the only caller is + * the network code), not for unpredictability, but to ensure that + * different machines are initialized differently. + */ +void prandom_seed(u32 entropy) +{ + int i; + + add_device_randomness(&entropy, sizeof(entropy)); + + for_each_possible_cpu(i) { + struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); + unsigned long v0 = state->v0, v1 = state->v1; + unsigned long v2 = state->v2, v3 = state->v3; + + do { + v3 ^= entropy; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= entropy; + } while (unlikely(!v0 || !v1 || !v2 || !v3)); + + WRITE_ONCE(state->v0, v0); + WRITE_ONCE(state->v1, v1); + WRITE_ONCE(state->v2, v2); + WRITE_ONCE(state->v3, v3); + } +} +EXPORT_SYMBOL(prandom_seed); + +/* + * Generate some initially weak seeding values to allow + * the prandom_u32() engine to be started. + */ +static int __init prandom_init_early(void) +{ + int i; + unsigned long v0, v1, v2, v3; + + if (!arch_get_random_long(&v0)) + v0 = jiffies; + if (!arch_get_random_long(&v1)) + v1 = random_get_entropy(); + v2 = v0 ^ PRND_K0; + v3 = v1 ^ PRND_K1; + + for_each_possible_cpu(i) { + struct siprand_state *state; + + v3 ^= i; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= i; + + state = per_cpu_ptr(&net_rand_state, i); + state->v0 = v0; state->v1 = v1; + state->v2 = v2; state->v3 = v3; + } + + return 0; +} +core_initcall(prandom_init_early); + + +/* Stronger reseeding when available, and periodically thereafter. */ +static void prandom_reseed(struct timer_list *unused); + +static DEFINE_TIMER(seed_timer, prandom_reseed); + +static void prandom_reseed(struct timer_list *unused) +{ + unsigned long expires; + int i; + + /* + * Reinitialize each CPU's PRNG with 128 bits of key. + * No locking on the CPUs, but then somewhat random results are, + * well, expected. + */ + for_each_possible_cpu(i) { + struct siprand_state *state; + unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; + unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; +#if BITS_PER_LONG == 32 + int j; + + /* + * On 32-bit machines, hash in two extra words to + * approximate 128-bit key length. Not that the hash + * has that much security, but this prevents a trivial + * 64-bit brute force. + */ + for (j = 0; j < 2; j++) { + unsigned long m = get_random_long(); + + v3 ^= m; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= m; + } +#endif + /* + * Probably impossible in practice, but there is a + * theoretical risk that a race between this reseeding + * and the target CPU writing its state back could + * create the all-zero SipHash fixed point. + * + * To ensure that never happens, ensure the state + * we write contains no zero words. + */ + state = per_cpu_ptr(&net_rand_state, i); + WRITE_ONCE(state->v0, v0 ? v0 : -1ul); + WRITE_ONCE(state->v1, v1 ? v1 : -1ul); + WRITE_ONCE(state->v2, v2 ? v2 : -1ul); + WRITE_ONCE(state->v3, v3 ? v3 : -1ul); + } + + /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ + expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); + mod_timer(&seed_timer, expires); +} + +/* + * The random ready callback can be called from almost any interrupt. + * To avoid worrying about whether it's safe to delay that interrupt + * long enough to seed all CPUs, just schedule an immediate timer event. + */ +static void prandom_timer_start(struct random_ready_callback *unused) +{ + mod_timer(&seed_timer, jiffies); +} + +/* + * Start periodic full reseeding as soon as strong + * random numbers are available. + */ +static int __init prandom_init_late(void) +{ + static struct random_ready_callback random_ready = { + .func = prandom_timer_start + }; + int ret = add_random_ready_callback(&random_ready); + + if (ret == -EALREADY) { + prandom_timer_start(&random_ready); + ret = 0; + } + return ret; +} +late_initcall(prandom_init_late); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 60e7eca2f4be..3b859201f84c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -506,7 +506,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, elem_len = min_t(u64, length, PAGE_SIZE << order); page = alloc_pages(gfp, order); if (!page) { - sgl_free(sgl); + sgl_free_order(sgl, order); return NULL; } diff --git a/mm/filemap.c b/mm/filemap.c index 16645266fabb..9bf5437449c6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2547,7 +2547,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; if (ra->mmap_miss > 0) ra->mmap_miss--; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6aef386c1726..b73e70254a5a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -694,7 +694,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) transparent_hugepage_use_zero_page()) { pgtable_t pgtable; struct page *zero_page; - bool set; vm_fault_t ret; pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) @@ -707,25 +706,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ret = 0; - set = false; if (pmd_none(*vmf->pmd)) { ret = check_stable_address_space(vma->vm_mm); if (ret) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); } else { set_huge_zero_page(pgtable, vma->vm_mm, vma, haddr, vmf->pmd, zero_page); spin_unlock(vmf->ptl); - set = true; } - } else + } else { spin_unlock(vmf->ptl); - if (!set) pte_free(vma->vm_mm, pgtable); + } return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 69ee47b614e3..9b03bfa4326a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long flags = qp->flags; int ret; bool has_unmovable = false; - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); @@ -510,7 +510,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; @@ -542,7 +542,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, } else break; } - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); cond_resched(); if (has_unmovable) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ef270896b6fd..22b9efb128b3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1072,8 +1072,11 @@ static void kernel_init_free_pages(struct page *page, int numpages) { int i; + /* s390's use of memset() could override KASAN redzones. */ + kasan_disable_current(); for (i = 0; i < numpages; i++) clear_highpage(page + i); + kasan_enable_current(); } static __always_inline bool free_pages_prepare(struct page *page, @@ -4713,6 +4716,11 @@ refill: if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; + if (unlikely(nc->pfmemalloc)) { + free_the_page(page, compound_order(page)); + goto refill; + } + #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* if size can vary use size else just use PAGE_SIZE */ size = nc->size; diff --git a/mm/slab.c b/mm/slab.c index fa53bb09495d..4e2be129e568 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2379,7 +2379,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index b6dcb40fa8a7..9268f808afc0 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1038,7 +1038,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (addr == NULL) + if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 853773e45f79..837f67c9fad3 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -205,6 +205,7 @@ static const struct file_operations batadv_log_fops = { .read = batadv_log_read, .poll = batadv_log_poll, .llseek = no_llseek, + .owner = THIS_MODULE, }; /** diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9ce661e2590d..a350c05b7ff5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -215,6 +215,7 @@ static void br_get_stats64(struct net_device *dev, sum.rx_packets += tmp.rx_packets; } + netdev_stats_to_stats64(stats, &dev->stats); stats->tx_bytes = sum.tx_bytes; stats->tx_packets = sum.tx_packets; stats->rx_bytes = sum.rx_bytes; diff --git a/net/can/af_can.c b/net/can/af_can.c index 04132b0b5d36..b3edb8092124 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -722,16 +722,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, @@ -739,16 +748,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } /* diff --git a/net/can/proc.c b/net/can/proc.c index 70fea17bb04c..a3071f43acd7 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -467,6 +467,9 @@ void can_init_proc(struct net *net) */ void can_remove_proc(struct net *net) { + if (!net->can.proc_dir) + return; + if (net->can.pde_version) remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); @@ -494,6 +497,5 @@ void can_remove_proc(struct net *net) if (net->can.pde_rcvlist_sff) remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (net->can.proc_dir) - remove_proc_entry("can", net->proc_net); + remove_proc_entry("can", net->proc_net); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f7d7f32ac673..21bd37ec5511 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3037,6 +3037,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); diff --git a/net/core/devlink.c b/net/core/devlink.c index a77e3777c8dd..6ad095264896 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1113,7 +1113,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, pool_index, &cur, &max); if (err && err != -EOPNOTSUPP) - return err; + goto sb_occ_get_failure; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; @@ -1126,8 +1126,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, return 0; nla_put_failure: + err = -EMSGSIZE; +sb_occ_get_failure: genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + return err; } static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 023ce0fbb496..41e32a958d08 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -638,15 +639,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; + struct net_device *ndev = NULL, *dev = NULL; + struct net *net = current->nsproxy->net_ns; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) { - struct net *net = current->nsproxy->net_ns; + if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); - } + if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -654,6 +655,19 @@ int netpoll_setup(struct netpoll *np) } dev_hold(ndev); + /* bring up DSA management network devices up first */ + for_each_netdev(net, dev) { + if (!netdev_uses_dsa(dev)) + continue; + + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + np_err(np, "%s failed to open %s\n", + np->dev_name, dev->name); + goto put; + } + } + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b036c55f5cb1..7c10bc4dacd3 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -261,6 +261,7 @@ static int dsa_port_setup(struct dsa_port *dp) int err = 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); + dp->mac = of_get_mac_address(dp->dn); if (dp->type != DSA_PORT_TYPE_UNUSED) err = devlink_port_register(ds->devlink, &dp->devlink_port, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8ee28b6016d8..d03c67e761df 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1313,7 +1313,10 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->features = master->vlan_features | NETIF_F_HW_TC; slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; - eth_hw_addr_inherit(slave_dev, master); + if (port->mac && is_valid_ether_addr(port->mac)) + ether_addr_copy(slave_dev->dev_addr, port->mac); + else + eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ad037fed0ae0..069d96f66bb7 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -392,8 +392,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..3d670d5aea34 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,17 +17,19 @@ #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - const struct sock *sk = skb_to_full_sk(skb); - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; + __u8 flags; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; + sk = sk_to_full_sk(sk); + flags = sk ? inet_sk_flowi_flags(sk) : 0; + if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) @@ -91,8 +93,8 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 690b17ef6a44..d64b1ef43c10 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -54,7 +54,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; if (nfct) { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..0829f46ddfdd 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -65,7 +65,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 0fb3776869b5..593d89c05066 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -329,7 +329,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 5cd06ba3535d..4996db1f64a1 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -129,7 +129,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; niph = ip_hdr(nskb); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 7d82934c46f4..61003768e52b 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -50,7 +50,7 @@ static unsigned int nf_route_table_hook(void *priv, iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f66b2e6d97a7..1a06850ef3cc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -296,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; @@ -391,8 +391,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4ce3397e6fcf..98e8ee8bb759 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -495,6 +495,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, return true; if (tcp_rmem_pressure(sk)) return true; + if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) + return true; } if (sk->sk_prot->stream_memory_read) return sk->sk_prot->stream_memory_read(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 93f176336297..b70c9365e131 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -917,7 +917,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || + (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c19870d56186..686833dfaa7f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4704,7 +4704,8 @@ void tcp_data_ready(struct sock *sk) int avail = tp->rcv_nxt - tp->copied_seq; if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE)) + !sock_flag(sk, SOCK_DONE) && + tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) return; sk->sk_data_ready(sk); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 78c974391567..2b68bd7c83c4 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -600,7 +600,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) + err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN); + if (err) goto out_free; ip6h->priority = 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0b1f3e927b..5679fa3f696a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -17,10 +17,10 @@ #include #include -int ip6_route_me_harder(struct net *net, struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk = sk_to_full_sk(skb->sk); + struct sock *sk = sk_to_full_sk(sk_partial); unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -81,7 +81,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, skb); + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b0524b18c4fb..acba3757ff60 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index ca6d38698b1a..2b6a3b27f670 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -352,7 +352,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index da3f1f8cb325..afe79cb46e63 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv, skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bfed7508ba19..98c108baf35e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1087,7 +1087,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1377,7 +1376,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a377be8a9fb4..ec61b67a92be 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; @@ -246,7 +246,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 23a1002ed86d..ad3e515f91f0 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1571,7 +1571,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) break; } - if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) && + sk->sk_state == IUCV_CONNECTED) { if (iucv->transport == AF_IUCV_TRANS_IUCV) { txmsg.class = 0; txmsg.tag = 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 8cc0e5a3b467..a700cfa533b7 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -381,8 +381,12 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); struct bpf_prog *prog = psock->bpf_prog; + int res; - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static int kcm_read_sock_done(struct strparser *strp, int err) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 07fb219327d6..1f3c420c01c7 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -276,7 +276,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, success = !!(info->flags & IEEE80211_TX_STAT_ACK); for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - if (ar[i].idx < 0) + if (ar[i].idx < 0 || !ar[i].count) break; ndx = rix_to_ndx(mi, ar[i].idx); @@ -289,12 +289,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->r[ndx].stats.success += success; } - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) - mi->sample_packets++; - - if (mi->sample_deferred > 0) - mi->sample_deferred--; - if (time_after(jiffies, mi->last_stats_update + (mp->update_interval * HZ) / 1000)) minstrel_update_stats(mp, mi); @@ -373,7 +367,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; delta = (mi->total_packets * sampling_ratio / 100) - - (mi->sample_packets + mi->sample_deferred / 2); + mi->sample_packets; /* delta < 0: no sampling required */ prev_sample = mi->prev_sample; @@ -382,7 +376,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; if (mi->total_packets >= 10000) { - mi->sample_deferred = 0; mi->sample_packets = 0; mi->total_packets = 0; } else if (delta > mi->n_rates * 2) { @@ -407,19 +400,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * rate sampling method should be used. * Respect such rates that are not sampled for 20 interations. */ - if (mrr_capable && - msr->perfect_tx_time > mr->perfect_tx_time && - msr->stats.sample_skipped < 20) { - /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark - * packets that have the sampling rate deferred to the - * second MRR stage. Increase the sample counter only - * if the deferred sample rate was actually used. - * Use the sample_deferred counter to make sure that - * the sampling is not done in large bursts */ - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - rate++; - mi->sample_deferred++; - } else { + if (msr->perfect_tx_time < mr->perfect_tx_time || + msr->stats.sample_skipped >= 20) { if (!msr->sample_limit) return; @@ -439,6 +421,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate->idx = mi->r[ndx].rix; rate->count = minstrel_get_retry_count(&mi->r[ndx], info); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index be6c3f35f48b..d60413adb215 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -98,7 +98,6 @@ struct minstrel_sta_info { u8 max_prob_rate; unsigned int total_packets; unsigned int sample_packets; - int sample_deferred; unsigned int sample_row; unsigned int sample_column; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 9968b8a976f1..c0fae954082b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + /* + * If we had used sta_info_pre_move_state() then we might not + * have gone through the state transitions down again, so do + * it here now (and warn if it's inserted). + * + * This will clear state such as fast TX/RX that may have been + * allocated during state transitions. + */ + while (sta->sta_state > IEEE80211_STA_NONE) { + int ret; + + WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); + + ret = sta_info_move_state(sta, sta->sta_state - 1); + if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) + break; + } + if (sta->rate_ctrl) rate_control_free_sta(sta); @@ -616,7 +634,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); - __cleanup_single_sta(sta); + cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); @@ -635,19 +653,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) err = sta_info_insert_check(sta); if (err) { + sta_info_free(local, sta); mutex_unlock(&local->sta_mtx); rcu_read_lock(); - goto out_free; + return err; } - err = sta_info_insert_finish(sta); - if (err) - goto out_free; - - return 0; - out_free: - sta_info_free(local, sta); - return err; + return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3160ffd93a15..98d048630ad2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1908,19 +1908,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* device xmit handlers */ +enum ieee80211_encrypt { + ENCRYPT_NO, + ENCRYPT_MGMT, + ENCRYPT_DATA, +}; + static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int head_need, bool may_encrypt) + int head_need, + enum ieee80211_encrypt encrypt) { struct ieee80211_local *local = sdata->local; - struct ieee80211_hdr *hdr; bool enc_tailroom; int tail_need = 0; - hdr = (struct ieee80211_hdr *) skb->data; - enc_tailroom = may_encrypt && - (sdata->crypto_tx_tailroom_needed_cnt || - ieee80211_is_mgmt(hdr->frame_control)); + enc_tailroom = encrypt == ENCRYPT_MGMT || + (encrypt == ENCRYPT_DATA && + sdata->crypto_tx_tailroom_needed_cnt); if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; @@ -1952,23 +1957,29 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; int headroom; - bool may_encrypt; + enum ieee80211_encrypt encrypt; - may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); + if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT) + encrypt = ENCRYPT_NO; + else if (ieee80211_is_mgmt(hdr->frame_control)) + encrypt = ENCRYPT_MGMT; + else + encrypt = ENCRYPT_DATA; headroom = local->tx_headroom; - if (may_encrypt) + if (encrypt != ENCRYPT_NO) headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); - if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { + if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) { ieee80211_free_txskb(&local->hw, skb); return; } + /* reload after potential resize */ hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; @@ -2751,7 +2762,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) { ieee80211_free_txskb(&local->hw, skb); skb = NULL; return ERR_PTR(-ENOMEM); @@ -3414,7 +3425,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (unlikely(ieee80211_skb_resize(sdata, skb, max_t(int, extra_head + hw_headroom - skb_headroom(skb), 0), - false))) { + ENCRYPT_NO))) { kfree_skb(skb); return true; } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 091284760d21..f65afa7e7d28 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1484,9 +1484,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); - /* Set up generic netlink interface */ - ncsi_init_netlink(dev); - return nd; } EXPORT_SYMBOL_GPL(ncsi_register_dev); @@ -1566,8 +1563,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd) #endif spin_unlock_irqrestore(&ncsi_dev_lock, flags); - ncsi_unregister_netlink(nd->dev); - kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 45f33d6dedf7..a2f4280e2889 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -397,24 +397,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .n_ops = ARRAY_SIZE(ncsi_ops), }; -int ncsi_init_netlink(struct net_device *dev) +static int __init ncsi_init_netlink(void) { - int rc; - - rc = genl_register_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to register netlink family\n"); - - return rc; -} - -int ncsi_unregister_netlink(struct net_device *dev) -{ - int rc; - - rc = genl_unregister_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to unregister netlink family\n"); - - return rc; + return genl_register_family(&ncsi_genl_family); } +subsys_initcall(ncsi_init_netlink); diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h index 91a5c256f8c4..6c55a6775157 100644 --- a/net/ncsi/ncsi-netlink.h +++ b/net/ncsi/ncsi-netlink.h @@ -14,7 +14,4 @@ #include "internal.h" -int ncsi_init_netlink(struct net_device *dev); -int ncsi_unregister_netlink(struct net_device *dev); - #endif /* __NCSI_NETLINK_H__ */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 36ebc40a4313..0427e66bc478 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -488,13 +488,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) { struct ip_set_counter *counter = ext_counter(data, set); + ip_set_update_counter(counter, ext, flags); + if (flags & IPSET_FLAG_MATCH_COUNTERS && !(ip_set_match_counter(ip_set_get_packets(counter), mext->packets, mext->packets_op) && ip_set_match_counter(ip_set_get_bytes(counter), mext->bytes, mext->bytes_op))) return false; - ip_set_update_counter(counter, ext, flags); } if (SET_WITH_SKBINFO(set)) ip_set_get_skbinfo(ext_skbinfo(data, set), diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d5e4329579e2..acaeeaf81441 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -725,12 +725,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(ipvs->net, skb) != 0) + ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1; return 0; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c92894c3e40a..0067f472367b 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1179,12 +1179,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 iter_bkt; - u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; + u32 skip_addr4 = cb->args[2]; + u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) + u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif @@ -1195,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; - iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { + iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || @@ -1203,7 +1204,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < cb->args[2]) + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1216,10 +1217,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr4 = 0; + skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < cb->args[3]) + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1232,8 +1235,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr6 = 0; + skip_addr6 = 0; #endif /* IPv6 */ } + iter_chain = 0; + skip_chain = 0; } unlabel_staticlist_return: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bd96fd261dba..4e15913e7519 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1116,10 +1116,13 @@ static void dev_deactivate_queue(struct net_device *dev, void *_qdisc_default) { struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc); + struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 014a28d8dd4f..02d8d3fd84a5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma, /* default uniform distribution */ if (dist == NULL) - return ((rnd % (2 * sigma)) + mu) - sigma; + return ((rnd % (2 * (u32)sigma)) + mu) - sigma; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; @@ -787,6 +787,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) q->slot_config.max_packets = INT_MAX; if (q->slot_config.max_bytes == 0) q->slot_config.max_bytes = INT_MAX; + + /* capping dist_jitter to the range acceptable by tabledist() */ + q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); + q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; if (q->slot_config.min_delay | q->slot_config.max_delay | @@ -1011,6 +1015,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_NETEM_SLOT]) get_slot(q, tb[TCA_NETEM_SLOT]); + /* capping jitter to the range acceptable by tabledist() */ + q->jitter = min_t(s64, abs(q->jitter), INT_MAX); + return ret; get_table_failure: diff --git a/net/sctp/input.c b/net/sctp/input.c index f64d882c8698..3dd900e42b85 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -461,7 +461,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); @@ -470,7 +470,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, "encountered!\n", __func__); if (del_timer(&t->proto_unreach_timer)) - sctp_association_put(asoc); + sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 567517e44811..2a94240eac36 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -434,7 +434,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(transport); goto out_unlock; } @@ -450,7 +450,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) out_unlock: bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(transport); } /* Handle the timeout of the RE-CONFIG timer. */ @@ -1615,12 +1615,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u16); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.u32); + subtype, chunk, cmd->obj.u16); break; case SCTP_CMD_INIT_COUNTER_INC: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c0d55ed62d2e..78302e547b2b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -148,7 +148,7 @@ void sctp_transport_free(struct sctp_transport *transport) /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) - sctp_association_put(transport->asoc); + sctp_transport_put(transport); sctp_transport_put(transport); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 49bb9fc0aa06..ce0f067d0faf 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -93,6 +93,11 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { tipc_net_stop(net); + + /* Make sure the tipc_net_finalize_work stopped + * before releasing the resources. + */ + flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0b8446cd541c..f04843ca8216 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -140,12 +140,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (skb_cloned(frag)) - frag = skb_copy(frag, GFP_ATOMIC); + *buf = NULL; + frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; head = *headbuf = frag; - *buf = NULL; TIPC_SKB_CB(head)->tail = NULL; if (skb_is_nonlinear(head)) { skb_walk_frags(head, tail) { diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ec9a7137d267..1c4733153d74 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -671,12 +671,18 @@ static int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c88dc8ee3144..02374459c417 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -629,7 +629,7 @@ struct sock *__vsock_create(struct net *net, vsk->owner = get_cred(psk->owner); vsk->connect_timeout = psk->connect_timeout; } else { - vsk->trusted = capable(CAP_NET_ADMIN); + vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 935aebf15010..c7825b951f72 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3374,7 +3374,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) power_rule = ®_rule->power_rule; if (reg_rule->flags & NL80211_RRF_AUTO_BW) - snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO", freq_range->max_bandwidth_khz, reg_get_max_bandwidth(rd, reg_rule)); else diff --git a/net/wireless/scan.c b/net/wireless/scan.c index aa60d99242d2..c81d3250047e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1555,7 +1555,12 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, return; new_ie_len -= trans_ssid[1]; mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen); - if (!mbssid) + /* + * It's not valid to have the MBSSID element before SSID + * ignore if that happens - the code below assumes it is + * after (while copying things inbetween). + */ + if (!mbssid || mbssid < trans_ssid) return; new_ie_len -= mbssid[1]; rcu_read_lock(); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index bd1cbbfe5924..b3db0b0a52f5 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -824,7 +824,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) { + if (rc && x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; @@ -1049,6 +1049,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->lci = lci; makex25->dest_addr = dest_addr; makex25->source_addr = source_addr; + x25_neigh_hold(nb); makex25->neighbour = nb; makex25->facilities = facilities; makex25->dte_facilities= dte_facilities; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 263f7adf7865..d7399552c81c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1825,6 +1825,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + __be32 newspi = 0; u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); @@ -1843,21 +1844,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) xfrm_state_put(x0); goto unlock; } - x->id.spi = minspi; + newspi = minspi; } else { u32 spi = 0; for (h = 0; h < high-low+1; h++) { spi = low + prandom_u32()%(high-low+1); x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { - x->id.spi = htonl(spi); + newspi = htonl(spi); break; } xfrm_state_put(x0); } } - if (x->id.spi) { + if (newspi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); + x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2199,17 +2201,20 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (IS_ERR(data)) return PTR_ERR(data); - if (in_compat_syscall()) { - struct xfrm_translator *xtr = xfrm_get_translator(); + /* Use the 64-bit / untranslated format on Android, even for compat */ + if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) { + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) - return -EOPNOTSUPP; + if (!xtr) + return -EOPNOTSUPP; - err = xtr->xlate_user_policy_sockptr(&data, optlen); - xfrm_put_translator(xtr); - if (err) { - kfree(data); - return err; + err = xtr->xlate_user_policy_sockptr(&data, optlen); + xfrm_put_translator(xtr); + if (err) { + kfree(data); + return err; + } } } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8cdc63254c63..fe7ff38f2374 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2700,19 +2700,22 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (in_compat_syscall()) { - struct xfrm_translator *xtr = xfrm_get_translator(); + /* Use the 64-bit / untranslated format on Android, even for compat */ + if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) { + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) - return -EOPNOTSUPP; + if (!xtr) + return -EOPNOTSUPP; - nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, - link->nla_pol, extack); - xfrm_put_translator(xtr); - if (IS_ERR(nlh64)) - return PTR_ERR(nlh64); - if (nlh64) - nlh = nlh64; + nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, + link->nla_pol, extack); + xfrm_put_translator(xtr); + if (IS_ERR(nlh64)) + return PTR_ERR(nlh64); + if (nlh64) + nlh = nlh64; + } } if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c830750d725b..3f566c124a28 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -290,7 +290,7 @@ ksym_dep_filter = \ $(CPP) $(call flags_nodeps,a_flags) -D__KSYM_DEPS__ $< ;; \ boot*|build*|cpp_its_S|*cpp_lds_S|dtc|host*|vdso*) : ;; \ *) echo "Don't know how to preprocess $(1)" >&2; false ;; \ - esac | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' + esac | grep -F '=== __KSYM_' | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ diff --git a/scripts/clang-android.sh b/scripts/clang-android.sh deleted file mode 100755 index 9186c4f48576..000000000000 --- a/scripts/clang-android.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -$* -dM -E - &1 | grep -q __ANDROID__ && echo "y" diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 081013eb0d5e..c2e8455b3aeb 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -65,7 +65,7 @@ scm_version() # Check for git and a git repo. if test -z "$(git rev-parse --show-cdup 2>/dev/null)" && - head=`git rev-parse --verify --short HEAD 2>/dev/null`; then + head=$(git rev-parse --verify HEAD 2>/dev/null); then if [ -n "$android_release" ] && [ -n "$kmi_generation" ]; then printf '%s' "-$android_release-$kmi_generation" @@ -83,11 +83,22 @@ scm_version() fi # If we are past a tagged commit (like # "v2.6.30-rc5-302-g72357d5"), we pretty print it. - if atag="`git describe 2>/dev/null`"; then - echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' + # + # Ensure the abbreviated sha1 has exactly 12 + # hex characters, to make the output + # independent of git version, local + # core.abbrev settings and/or total number of + # objects in the current repository - passing + # --abbrev=12 ensures a minimum of 12, and the + # awk substr() then picks the 'g' and first 12 + # hex chars. + if atag="$(git describe --abbrev=12 2>/dev/null)"; then + echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}' - # If we don't have a tag at all we print -g{commitish}. + # If we don't have a tag at all we print -g{commitish}, + # again using exactly 12 hex chars. else + head="$(echo $head | cut -c1-12)" printf '%s%s' -g $head fi fi diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh index 97a2c844a95e..45e8aa360b45 100755 --- a/scripts/tools-support-relr.sh +++ b/scripts/tools-support-relr.sh @@ -4,13 +4,13 @@ tmp_file=$(mktemp) trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT -cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1 +cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1 void *p = &p; END -"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file +$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file # Despite printing an error message, GNU nm still exits with exit code 0 if it # sees a relr section. So we need to check that nothing is printed to stderr. -test -z "$("$NM" $tmp_file 2>&1 >/dev/null)" +test -z "$($NM $tmp_file 2>&1 >/dev/null)" -"$OBJCOPY" -O binary $tmp_file $tmp_file.bin +$OBJCOPY -O binary $tmp_file $tmp_file.bin diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index e11d860fdce4..651c0127c00d 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -186,6 +186,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, break; case EVM_IMA_XATTR_DIGSIG: case EVM_XATTR_PORTABLE_DIGSIG: + /* accept xattr with non-empty signature field */ + if (xattr_len <= sizeof(struct signature_v2_hdr)) { + evm_status = INTEGRITY_FAIL; + goto out; + } + hdr = (struct signature_v2_hdr *)xattr_data; digest.hdr.algo = hdr->hash_algo; rc = evm_calc_hash(dentry, xattr_name, xattr_value, diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index 0a4b89d48297..cb05ae28ce00 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -161,8 +161,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (!new) { + ret = -ENOMEM; goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 18ba0c2328fb..5e05f5b902d7 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -245,9 +245,13 @@ int mls_context_to_sid(struct policydb *pol, char *rangep[2]; if (!pol->mls_enabled) { - if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') - return 0; - return -EINVAL; + /* + * With no MLS, only return -EINVAL if there is a MLS field + * and it did not come from an xattr. + */ + if (oldc && def_sid == SECSID_NULL) + return -EINVAL; + return 0; } /* diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 2279dc77d96b..c9c48fc2da7e 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1276,6 +1276,12 @@ int security_sidtab_hash_stats(struct selinux_state *state, char *page) { int rc; + if (!state->initialized) { + pr_err("SELinux: %s: called before initial load_policy\n", + __func__); + return -EINVAL; + } + read_lock(&state->ss->policy_rwlock); rc = sidtab_hash_stats(state->ss->sidtab, page); read_unlock(&state->ss->policy_rwlock); diff --git a/sound/core/control.c b/sound/core/control.c index d1312f14d78f..9cec0e8c9d53 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1388,7 +1388,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, unlock: up_write(&card->controls_rwsem); - return 0; + return err; } static int snd_ctl_elem_add_user(struct snd_ctl_file *file, diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index 36a08ba51ec7..58674a711132 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -124,7 +124,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) t = (struct snd_efw_transaction *)data; length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length); - spin_lock_irq(&efw->lock); + spin_lock(&efw->lock); if (efw->push_ptr < efw->pull_ptr) capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr); @@ -191,7 +191,7 @@ handle_resp_for_user(struct fw_card *card, int generation, int source, copy_resp_to_buf(efw, data, length, rcode); end: - spin_unlock_irq(&instances_lock); + spin_unlock(&instances_lock); } static void diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 84b44cdae28a..b96abebcfd1a 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -156,6 +156,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_bus *bus, return NULL; if (bus->idx != bus_idx) return NULL; + if (addr < 0 || addr > 31) + return NULL; list_for_each_entry(hlink, &bus->hlink_list, list) { for (i = 0; i < HDA_MAX_CODECS; i++) { diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 708efb9b4387..f86b9b0a0607 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1955,20 +1955,23 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, int pinctl; int err = 0; + mutex_lock(&spec->pcm_lock); if (hinfo->nid) { pcm_idx = hinfo_to_pcm_index(codec, hinfo); - if (snd_BUG_ON(pcm_idx < 0)) - return -EINVAL; + if (snd_BUG_ON(pcm_idx < 0)) { + err = -EINVAL; + goto unlock; + } cvt_idx = cvt_nid_to_cvt_index(codec, hinfo->nid); - if (snd_BUG_ON(cvt_idx < 0)) - return -EINVAL; + if (snd_BUG_ON(cvt_idx < 0)) { + err = -EINVAL; + goto unlock; + } per_cvt = get_cvt(spec, cvt_idx); - snd_BUG_ON(!per_cvt->assigned); per_cvt->assigned = 0; hinfo->nid = 0; - mutex_lock(&spec->pcm_lock); snd_hda_spdif_ctls_unassign(codec, pcm_idx); clear_bit(pcm_idx, &spec->pcm_in_use); pin_idx = hinfo_to_pin_index(codec, hinfo); @@ -1996,10 +1999,11 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, per_pin->setup = false; per_pin->channels = 0; mutex_unlock(&per_pin->lock); - unlock: - mutex_unlock(&spec->pcm_lock); } +unlock: + mutex_unlock(&spec->pcm_lock); + return err; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 382a8d179eb0..935ca7989cfd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2498,13 +2498,23 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x950A, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e3, "Clevo P955[ER]T", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e4, "Clevo P955ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e5, "Clevo P955EE6", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e6, "Clevo P950R[CDF]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), @@ -7114,11 +7124,49 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1403, "Clevo N140CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8535, "Clevo NH50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8536, "Clevo NH79D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c index 71776bfe0485..5a956f8bfa9e 100644 --- a/sound/pci/mixart/mixart_core.c +++ b/sound/pci/mixart/mixart_core.c @@ -83,7 +83,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, unsigned int i; #endif - mutex_lock(&mgr->msg_lock); err = 0; /* copy message descriptor from miXart to driver */ @@ -132,8 +131,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, writel_be(headptr, MIXART_MEM(mgr, MSG_OUTBOUND_FREE_HEAD)); _clean_exit: - mutex_unlock(&mgr->msg_lock); - return err; } @@ -271,7 +268,9 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int resp.data = resp_data; resp.size = max_resp_size; + mutex_lock(&mgr->msg_lock); err = get_msg(mgr, &resp, msg_frame); + mutex_unlock(&mgr->msg_lock); if( request->message_id != resp.message_id ) dev_err(&mgr->pci->dev, "RESPONSE ERROR!\n"); diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 2f2967247789..1d06e2b7bb63 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -81,8 +81,10 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) else dma_ch = 0; - if (dma_ch < 0) + if (dma_ch < 0) { + kfree(data); return dma_ch; + } drvdata->substream[dma_ch] = substream; @@ -103,6 +105,7 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); if (ret < 0) { + kfree(data); dev_err(soc_runtime->dev, "setting constraints failed: %d\n", ret); return -EINVAL; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index ab1eaf27d442..21ac9956240d 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -398,6 +398,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, switch (subs->stream->chip->usb_id) { case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */ case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */ + case USB_ID(0x22f0, 0x0006): /* Allen&Heath Qu-16 */ ep = 0x81; ifnum = 3; goto add_sync_ep_from_ifnum; @@ -407,6 +408,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ifnum = 2; goto add_sync_ep_from_ifnum; case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */ + case USB_ID(0x0499, 0x172a): /* Yamaha MODX */ ep = 0x86; ifnum = 2; goto add_sync_ep_from_ifnum; @@ -414,6 +416,10 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; + case USB_ID(0x1686, 0xf029): /* Zoom UAC-2 */ + ep = 0x82; + ifnum = 2; + goto add_sync_ep_from_ifnum; case USB_ID(0x1397, 0x0001): /* Behringer UFX1604 */ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */ ep = 0x81; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 278f9040e463..eabe52b2b4ae 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1338,13 +1338,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); - /* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX - * needs a tiny delay here, otherwise requests like get/set - * frequency return as failed despite actually succeeding. + /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950), + * Jabra 550a, Kingston HyperX needs a tiny delay here, + * otherwise requests like get/set frequency return + * as failed despite actually succeeding. */ if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || - chip->usb_id == USB_ID(0x046d, 0x0a46) || - chip->usb_id == USB_ID(0x046d, 0x0a56) || + USB_ID_VENDOR(chip->usb_id) == 0x046d || /* Logitech */ chip->usb_id == USB_ID(0x0b0e, 0x0349) || chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) @@ -1466,6 +1466,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ case 0x2ab6: /* T+A devices */ + case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ case 0xc502: /* HiBy devices */ if (fp->dsd_raw) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf4cd924aed5..13944978ada5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1191,8 +1191,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 3f98dcfbc177..0b1ba8e7d18a 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -100,11 +100,6 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; - if (!insn_sec->sym) { - WARN("missing symbol for section %s", insn_sec->name); - return -1; - } - /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); @@ -117,8 +112,32 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, } memset(rela, 0, sizeof(*rela)); - rela->sym = insn_sec->sym; - rela->addend = insn_off; + if (insn_sec->sym) { + rela->sym = insn_sec->sym; + rela->addend = insn_off; + } else { + /* + * The Clang assembler doesn't produce section symbols, so we + * have to reference the function symbol instead: + */ + rela->sym = find_symbol_containing(insn_sec, insn_off); + if (!rela->sym) { + /* + * Hack alert. This happens when we need to reference + * the NOP pad insn immediately after the function. + */ + rela->sym = find_symbol_containing(insn_sec, + insn_off - 1); + } + if (!rela->sym) { + WARN("missing symbol for insn at offset 0x%lx\n", + insn_off); + return -1; + } + + rela->addend = insn_off - rela->sym->offset; + } + rela->type = R_X86_64_PC32; rela->offset = idx * sizeof(int); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6e0189df2b3b..0cb7f7b731fb 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -620,7 +620,7 @@ static int report_lock_release_event(struct perf_evsel *evsel, case SEQ_STATE_READ_ACQUIRED: seq->read_count--; BUG_ON(seq->read_count < 0); - if (!seq->read_count) { + if (seq->read_count) { ls->nr_release++; goto end; } diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ad33b99f5d21..7b5e15cc6b71 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -87,9 +87,6 @@ struct cs_etm_queue { struct cs_etm_packet *packet; }; -/* RB tree for quick conversion between traceID and metadata pointers */ -static struct intlist *traceid_list; - static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid, u64 time_); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index c7ef97b198c7..37f8d48179ca 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -53,6 +53,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 29e75c051d04..230e94bf7775 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -332,6 +332,7 @@ bool die_is_func_def(Dwarf_Die *dw_die) int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) { Dwarf_Addr base, end; + Dwarf_Attribute attr; if (!addr) return -EINVAL; @@ -339,6 +340,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) if (dwarf_entrypc(dw_die, addr) == 0) return 0; + /* + * Since the dwarf_ranges() will return 0 if there is no + * DW_AT_ranges attribute, we should check it first. + */ + if (!dwarf_attr(dw_die, DW_AT_ranges, &attr)) + return -ENOENT; + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index aa9c7df120ca..9c22729e2ad6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -912,11 +912,13 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; - if (symbol_conf.kptr_restrict) - return -1; if (map == NULL) return -1; + kmap = map__kmap(map); + if (!kmap->ref_reloc_sym) + return -1; + /* * We should get this from /sys/kernel/sections/.text, but till that is * available use this, and after it is use this as a fallback for older @@ -939,7 +941,6 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 23e367063446..71aeaf6f45cc 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len) len--; - for (i = 0; i < len; i++) { + for (i = 0; i < len && p[i]; i++) { if (!isprint(p[i]) && !isspace(p[i])) return 0; } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9569cc06e0a7..2814251df06b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1493,7 +1493,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1508,20 +1507,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f016d1b330e5..6a2037b52098 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -488,6 +488,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index dc58254a2b69..8c01b2cfdb1a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -22,7 +22,7 @@ static inline void *zalloc(size_t size) return calloc(1, size); } -#define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +#define zfree(ptr) ({ free((void *)*ptr); *ptr = NULL; }) struct dirent; struct nsinfo; diff --git a/tools/testing/selftests/filesystems/incfs/.gitignore b/tools/testing/selftests/filesystems/incfs/.gitignore index 4cba9c219a92..f0e3cd99d4ac 100644 --- a/tools/testing/selftests/filesystems/incfs/.gitignore +++ b/tools/testing/selftests/filesystems/incfs/.gitignore @@ -1 +1,3 @@ -incfs_test \ No newline at end of file +incfs_test +incfs_stress +incfs_perf diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 5b2e627ce883..f3798029247a 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -CFLAGS += -I../.. -I../../../../.. - -LDLIBS := -llz4 -lcrypto -EXTRA_SOURCES := utils.c -TEST_GEN_PROGS := incfs_test - -$(TEST_GEN_PROGS): $(EXTRA_SOURCES) +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. +LDLIBS := -llz4 -lzstd -lcrypto -lpthread +TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf include ../../lib.mk + +# Put after include ../../lib.mk since that changes $(TEST_GEN_PROGS) +# Otherwise you get multiple targets, this becomes the default, and it's a mess +EXTRA_SOURCES := utils.c +$(TEST_GEN_PROGS) : $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/filesystems/incfs/incfs_perf.c b/tools/testing/selftests/filesystems/incfs/incfs_perf.c new file mode 100644 index 000000000000..ed36bbd774e3 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_perf.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define err_msg(...) \ + do { \ + fprintf(stderr, "%s: (%d) ", TAG, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " (%s)\n", strerror(errno)); \ + } while (false) + +#define TAG "incfs_perf" + +struct options { + int blocks; /* -b number of diff block sizes */ + bool no_cleanup; /* -c don't clean up after */ + const char *test_dir; /* -d working directory */ + const char *file_types; /* -f sScCvV */ + bool no_native; /* -n don't test native files */ + bool no_random; /* -r don't do random reads*/ + bool no_linear; /* -R random reads only */ + size_t size; /* -s file size as power of 2 */ + int tries; /* -t times to run test*/ +}; + +enum flags { + SHUFFLE = 1, + COMPRESS = 2, + VERIFY = 4, + LAST_FLAG = 8, +}; + +void print_help(void) +{ + puts( + "incfs_perf. Performance test tool for incfs\n" + "\tTests read performance of incfs by creating files of various types\n" + "\tflushing caches and then reading them back.\n" + "\tEach file is read with different block sizes and average\n" + "\tthroughput in megabytes/second and memory usage are reported for\n" + "\teach block size\n" + "\tNative files are tested for comparison\n" + "\tNative files are created in native folder, incfs files are created\n" + "\tin src folder which is mounted on dst folder\n" + "\n" + "\t-bn (default 8) number of different block sizes, starting at 4096\n" + "\t and doubling\n" + "\t-c don't Clean up - leave files and mount point\n" + "\t-d dir create directories in dir\n" + "\t-fs|Sc|Cv|V restrict which files are created.\n" + "\t s blocks not shuffled, S blocks shuffled\n" + "\t c blocks not compress, C blocks compressed\n" + "\t v files not verified, V files verified\n" + "\t If a letter is omitted, both options are tested\n" + "\t If no letter are given, incfs is not tested\n" + "\t-n Don't test native files\n" + "\t-r No random reads (sequential only)\n" + "\t-R Random reads only (no sequential)\n" + "\t-sn (default 30)File size as power of 2\n" + "\t-tn (default 5) Number of tries per file. Results are averaged\n" + ); +} + +int parse_options(int argc, char *const *argv, struct options *options) +{ + signed char c; + + /* Set defaults here */ + *options = (struct options){ + .blocks = 8, + .test_dir = ".", + .tries = 5, + .size = 30, + }; + + /* Load options from command line here */ + while ((c = getopt(argc, argv, "b:cd:f::hnrRs:t:")) != -1) { + switch (c) { + case 'b': + options->blocks = strtol(optarg, NULL, 10); + break; + + case 'c': + options->no_cleanup = true; + break; + + case 'd': + options->test_dir = optarg; + break; + + case 'f': + if (optarg) + options->file_types = optarg; + else + options->file_types = "sS"; + break; + + case 'h': + print_help(); + exit(0); + + case 'n': + options->no_native = true; + break; + + case 'r': + options->no_random = true; + break; + + case 'R': + options->no_linear = true; + break; + + case 's': + options->size = strtol(optarg, NULL, 10); + break; + + case 't': + options->tries = strtol(optarg, NULL, 10); + break; + + default: + print_help(); + return -EINVAL; + } + } + + options->size = 1L << options->size; + + return 0; +} + +void shuffle(size_t *buffer, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++i) { + size_t j = random() * (size - i - 1) / RAND_MAX; + size_t temp = buffer[i]; + + buffer[i] = buffer[j]; + buffer[j] = temp; + } +} + +int get_free_memory(void) +{ + FILE *meminfo = fopen("/proc/meminfo", "re"); + char field[256]; + char value[256] = {}; + + if (!meminfo) + return -ENOENT; + + while (fscanf(meminfo, "%[^:]: %s kB\n", field, value) == 2) { + if (!strcmp(field, "MemFree")) + break; + *value = 0; + } + + fclose(meminfo); + + if (!*value) + return -ENOENT; + + return strtol(value, NULL, 10); +} + +int write_data(int cmd_fd, int dir_fd, const char *name, size_t size, int flags) +{ + int fd = openat(dir_fd, name, O_RDWR | O_CLOEXEC); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + size_t *blocks = malloc(sizeof(size_t) * block_count); + int error = 0; + size_t i; + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + uint8_t compressed_data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + struct incfs_fill_block fill_block = { + .compression = COMPRESSION_NONE, + .data_len = sizeof(data), + .data = ptr_to_u64(data), + }; + + if (!blocks) { + err_msg("Out of memory"); + error = -errno; + goto out; + } + + if (fd == -1) { + err_msg("Could not open file for writing %s", name); + error = -errno; + goto out; + } + + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + for (i = 0; i < block_count; ++i) + blocks[i] = i; + + if (flags & SHUFFLE) + shuffle(blocks, block_count); + + if (flags & COMPRESS) { + size_t comp_size = LZ4_compress_default( + (char *)data, (char *)compressed_data, sizeof(data), + ARRAY_SIZE(compressed_data)); + + if (comp_size <= 0) { + error = -EBADMSG; + goto out; + } + fill_block.compression = COMPRESSION_LZ4; + fill_block.data = ptr_to_u64(compressed_data); + fill_block.data_len = comp_size; + } + + for (i = 0; i < block_count; ++i) { + struct incfs_fill_blocks fill_blocks = { + .count = 1, + .fill_blocks = ptr_to_u64(&fill_block), + }; + + fill_block.block_index = blocks[i]; + int written = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + + if (written != 1) { + error = -errno; + err_msg("Failed to write block %lu in file %s", i, + name); + break; + } + } + +out: + free(blocks); + close(fd); + sync(); + return error; +} + +int measure_read_throughput_internal(const char *tag, int dir, const char *name, + const struct options *options, bool random) +{ + int block; + + if (random) + printf("%32s(random)", tag); + else + printf("%40s", tag); + + for (block = 0; block < options->blocks; ++block) { + size_t buffer_size; + char *buffer; + int try; + double time = 0; + double throughput; + int memory = 0; + + buffer_size = 1 << (block + 12); + buffer = malloc(buffer_size); + + for (try = 0; try < options->tries; ++try) { + int err; + struct timespec start_time, end_time; + off_t i; + int fd; + size_t offsets_size = options->size / buffer_size; + size_t *offsets = + malloc(offsets_size * sizeof(*offsets)); + int start_memory, end_memory; + + if (!offsets) { + err_msg("Not enough memory"); + return -ENOMEM; + } + + for (i = 0; i < offsets_size; ++i) + offsets[i] = i * buffer_size; + + if (random) + shuffle(offsets, offsets_size); + + err = drop_caches(); + if (err) { + err_msg("Failed to drop caches"); + return err; + } + + start_memory = get_free_memory(); + if (start_memory < 0) { + err_msg("Failed to get start memory"); + return start_memory; + } + + fd = openat(dir, name, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + err_msg("Failed to open file"); + return err; + } + + err = clock_gettime(CLOCK_MONOTONIC, &start_time); + if (err) { + err_msg("Failed to get start time"); + return err; + } + + for (i = 0; i < offsets_size; ++i) + if (pread(fd, buffer, buffer_size, + offsets[i]) != buffer_size) { + err_msg("Failed to read file"); + err = -errno; + goto fail; + } + + err = clock_gettime(CLOCK_MONOTONIC, &end_time); + if (err) { + err_msg("Failed to get start time"); + goto fail; + } + + end_memory = get_free_memory(); + if (end_memory < 0) { + err_msg("Failed to get end memory"); + return end_memory; + } + + time += end_time.tv_sec - start_time.tv_sec; + time += (end_time.tv_nsec - start_time.tv_nsec) / 1e9; + + close(fd); + fd = -1; + memory += start_memory - end_memory; + +fail: + free(offsets); + close(fd); + if (err) + return err; + } + + throughput = options->size * options->tries / time; + printf("%10.3e %10d", throughput, memory / options->tries); + free(buffer); + } + + printf("\n"); + return 0; +} + +int measure_read_throughput(const char *tag, int dir, const char *name, + const struct options *options) +{ + int err = 0; + + if (!options->no_linear) + err = measure_read_throughput_internal(tag, dir, name, options, + false); + + if (!err && !options->no_random) + err = measure_read_throughput_internal(tag, dir, name, options, + true); + return err; +} + +int test_native_file(int dir, const struct options *options) +{ + const char *name = "file"; + int fd; + char buffer[4096] = {}; + off_t i; + int err; + + fd = openat(dir, name, O_CREAT | O_WRONLY | O_CLOEXEC, 0600); + if (fd == -1) { + err_msg("Could not open native file"); + return -errno; + } + + for (i = 0; i < options->size; i += sizeof(buffer)) + if (pwrite(fd, buffer, sizeof(buffer), i) != sizeof(buffer)) { + err_msg("Failed to write file"); + err = -errno; + goto fail; + } + + close(fd); + sync(); + fd = -1; + + err = measure_read_throughput("native", dir, name, options); + +fail: + close(fd); + return err; +} + +struct hash_block { + char data[INCFS_DATA_FILE_BLOCK_SIZE]; +}; + +static struct hash_block *build_mtree(size_t size, char *root_hash, + int *mtree_block_count) +{ + char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + int block_count = 0; + int hash_block_count = 0; + int total_tree_block_count = 0; + int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; + int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; + int levels_count = 0; + int i, level; + struct hash_block *mtree; + + if (size == 0) + return 0; + + block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + hash_block_count = block_count; + for (i = 0; hash_block_count > 1; i++) { + hash_block_count = (hash_block_count + hash_per_block - 1) / + hash_per_block; + tree_lvl_count[i] = hash_block_count; + total_tree_block_count += hash_block_count; + } + levels_count = i; + + for (i = 0; i < levels_count; i++) { + int prev_lvl_base = (i == 0) ? total_tree_block_count : + tree_lvl_index[i - 1]; + + tree_lvl_index[i] = prev_lvl_base - tree_lvl_count[i]; + } + + *mtree_block_count = total_tree_block_count; + mtree = calloc(total_tree_block_count, sizeof(*mtree)); + /* Build level 0 hashes. */ + for (i = 0; i < block_count; i++) { + int block_index = tree_lvl_index[0] + i / hash_per_block; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = mtree[block_index].data + block_off; + + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + + /* Build higher levels of hash tree. */ + for (level = 1; level < levels_count; level++) { + int prev_lvl_base = tree_lvl_index[level - 1]; + int prev_lvl_count = tree_lvl_count[level - 1]; + + for (i = 0; i < prev_lvl_count; i++) { + int block_index = + i / hash_per_block + tree_lvl_index[level]; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = mtree[block_index].data + block_off; + + sha256(mtree[i + prev_lvl_base].data, + INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + } + + /* Calculate root hash from the top block */ + sha256(mtree[0].data, INCFS_DATA_FILE_BLOCK_SIZE, root_hash); + + return mtree; +} + +static int load_hash_tree(int cmd_fd, int dir, const char *name, + struct hash_block *mtree, int mtree_block_count) +{ + int err; + int i; + int fd; + struct incfs_fill_block *fill_block_array = + calloc(mtree_block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = mtree_block_count, + .fill_blocks = ptr_to_u64(fill_block_array), + }; + struct incfs_permit_fill permit_fill; + + if (!fill_block_array) + return -ENOMEM; + + for (i = 0; i < fill_blocks.count; i++) { + fill_block_array[i] = (struct incfs_fill_block){ + .block_index = i, + .data_len = INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(mtree[i].data), + .flags = INCFS_BLOCK_FLAGS_HASH + }; + } + + fd = openat(dir, name, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = errno; + goto failure; + } + + permit_fill.file_descriptor = fd; + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + err = -errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); + if (err < fill_blocks.count) + err = errno; + else + err = 0; + +failure: + free(fill_block_array); + return err; +} + +int test_incfs_file(int dst_dir, const struct options *options, int flags) +{ + int cmd_file = openat(dst_dir, INCFS_PENDING_READS_FILENAME, + O_RDONLY | O_CLOEXEC); + int err; + char name[4]; + incfs_uuid_t id; + char tag[256]; + + snprintf(name, sizeof(name), "%c%c%c", + flags & SHUFFLE ? 'S' : 's', + flags & COMPRESS ? 'C' : 'c', + flags & VERIFY ? 'V' : 'v'); + + if (cmd_file == -1) { + err_msg("Could not open command file"); + return -errno; + } + + if (flags & VERIFY) { + char root_hash[INCFS_MAX_HASH_SIZE]; + int mtree_block_count; + struct hash_block *mtree = build_mtree(options->size, root_hash, + &mtree_block_count); + + if (!mtree) { + err_msg("Failed to build hash tree"); + err = -ENOMEM; + goto fail; + } + + err = crypto_emit_file(cmd_file, NULL, name, &id, options->size, + root_hash, "add_data"); + + if (!err) + err = load_hash_tree(cmd_file, dst_dir, name, mtree, + mtree_block_count); + + free(mtree); + } else + err = emit_file(cmd_file, NULL, name, &id, options->size, NULL); + + if (err) { + err_msg("Failed to create file %s", name); + goto fail; + } + + if (write_data(cmd_file, dst_dir, name, options->size, flags)) + goto fail; + + snprintf(tag, sizeof(tag), "incfs%s%s%s", + flags & SHUFFLE ? "(shuffle)" : "", + flags & COMPRESS ? "(compress)" : "", + flags & VERIFY ? "(verify)" : ""); + + err = measure_read_throughput(tag, dst_dir, name, options); + +fail: + close(cmd_file); + return err; +} + +bool skip(struct options const *options, int flag, char c) +{ + if (!options->file_types) + return false; + + if (flag && strchr(options->file_types, tolower(c))) + return true; + + if (!flag && strchr(options->file_types, toupper(c))) + return true; + + return false; +} + +int main(int argc, char *const *argv) +{ + struct options options; + int err; + const char *native_dir = "native"; + const char *src_dir = "src"; + const char *dst_dir = "dst"; + int native_dir_fd = -1; + int src_dir_fd = -1; + int dst_dir_fd = -1; + int block; + int flags; + + err = parse_options(argc, argv, &options); + if (err) + return err; + + err = chdir(options.test_dir); + if (err) { + err_msg("Failed to change to %s", options.test_dir); + return -errno; + } + + /* Clean up any interrupted previous runs */ + while (!umount(dst_dir)) + ; + + err = remove_dir(native_dir) || remove_dir(src_dir) || + remove_dir(dst_dir); + if (err) + return err; + + err = mkdir(native_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(src_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(dst_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mount_fs_opt(dst_dir, src_dir, "readahead=0,rlog_pages=0", 0); + if (err) { + err_msg("Failed to mount incfs"); + goto cleanup; + } + + native_dir_fd = open(native_dir, O_RDONLY | O_CLOEXEC); + src_dir_fd = open(src_dir, O_RDONLY | O_CLOEXEC); + dst_dir_fd = open(dst_dir, O_RDONLY | O_CLOEXEC); + if (native_dir_fd == -1 || src_dir_fd == -1 || dst_dir_fd == -1) { + err_msg("Failed to open native, src or dst dir"); + err = -errno; + goto cleanup; + } + + printf("%40s", ""); + for (block = 0; block < options.blocks; ++block) + printf("%21d", 1 << (block + 12)); + printf("\n"); + + if (!err && !options.no_native) + err = test_native_file(native_dir_fd, &options); + + for (flags = 0; flags < LAST_FLAG && !err; ++flags) { + if (skip(&options, flags & SHUFFLE, 's') || + skip(&options, flags & COMPRESS, 'c') || + skip(&options, flags & VERIFY, 'v')) + continue; + err = test_incfs_file(dst_dir_fd, &options, flags); + } + +cleanup: + close(native_dir_fd); + close(src_dir_fd); + close(dst_dir_fd); + if (!options.no_cleanup) { + umount(dst_dir); + remove_dir(native_dir); + remove_dir(dst_dir); + remove_dir(src_dir); + } + + return err; +} diff --git a/tools/testing/selftests/filesystems/incfs/incfs_stress.c b/tools/testing/selftests/filesystems/incfs/incfs_stress.c new file mode 100644 index 000000000000..a1d491755832 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_stress.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define err_msg(...) \ + do { \ + fprintf(stderr, "%s: (%d) ", TAG, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " (%s)\n", strerror(errno)); \ + } while (false) + +#define TAG "incfs_stress" + +struct options { + bool no_cleanup; /* -c */ + const char *test_dir; /* -d */ + unsigned int rng_seed; /* -g */ + int num_reads; /* -n */ + int readers; /* -r */ + int size; /* -s */ + int timeout; /* -t */ +}; + +struct read_data { + const char *filename; + int dir_fd; + size_t filesize; + int num_reads; + unsigned int rng_seed; +}; + +int cancel_threads; + +int parse_options(int argc, char *const *argv, struct options *options) +{ + signed char c; + + /* Set defaults here */ + *options = (struct options){ + .test_dir = ".", + .num_reads = 1000, + .readers = 10, + .size = 10, + }; + + /* Load options from command line here */ + while ((c = getopt(argc, argv, "cd:g:n:r:s:t:")) != -1) { + switch (c) { + case 'c': + options->no_cleanup = true; + break; + + case 'd': + options->test_dir = optarg; + break; + + case 'g': + options->rng_seed = strtol(optarg, NULL, 10); + break; + + case 'n': + options->num_reads = strtol(optarg, NULL, 10); + break; + + case 'r': + options->readers = strtol(optarg, NULL, 10); + break; + + case 's': + options->size = strtol(optarg, NULL, 10); + break; + + case 't': + options->timeout = strtol(optarg, NULL, 10); + break; + } + } + + return 0; +} + +void *reader(void *data) +{ + struct read_data *read_data = (struct read_data *)data; + int i; + int fd = -1; + void *buffer = malloc(read_data->filesize); + + if (!buffer) { + err_msg("Failed to alloc read buffer"); + goto out; + } + + fd = openat(read_data->dir_fd, read_data->filename, + O_RDONLY | O_CLOEXEC); + if (fd == -1) { + err_msg("Failed to open file"); + goto out; + } + + for (i = 0; i < read_data->num_reads && !cancel_threads; ++i) { + off_t offset = rnd(read_data->filesize, &read_data->rng_seed); + size_t count = + rnd(read_data->filesize - offset, &read_data->rng_seed); + ssize_t err = pread(fd, buffer, count, offset); + + if (err != count) + err_msg("failed to read with value %lu", err); + } + +out: + close(fd); + free(read_data); + free(buffer); + return NULL; +} + +int write_data(int cmd_fd, int dir_fd, const char *name, size_t size) +{ + int fd = openat(dir_fd, name, O_RDWR | O_CLOEXEC); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int error = 0; + int i; + int block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + + if (fd == -1) { + err_msg("Could not open file for writing %s", name); + return -errno; + } + + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + for (i = 0; i < block_count; ++i) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + size_t block_size = + size > i * INCFS_DATA_FILE_BLOCK_SIZE ? + INCFS_DATA_FILE_BLOCK_SIZE : + size - (i * INCFS_DATA_FILE_BLOCK_SIZE); + struct incfs_fill_block fill_block = { + .compression = COMPRESSION_NONE, + .block_index = i, + .data_len = block_size, + .data = ptr_to_u64(data), + }; + struct incfs_fill_blocks fill_blocks = { + .count = 1, + .fill_blocks = ptr_to_u64(&fill_block), + }; + int written = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + + if (written != 1) { + error = -errno; + err_msg("Failed to write block %d in file %s", i, name); + break; + } + } +out: + close(fd); + return error; +} + +int test_files(int src_dir, int dst_dir, struct options const *options) +{ + unsigned int seed = options->rng_seed; + int cmd_file = openat(dst_dir, INCFS_PENDING_READS_FILENAME, + O_RDONLY | O_CLOEXEC); + int err; + const char *name = "001"; + incfs_uuid_t id; + size_t size; + int i; + pthread_t *threads = NULL; + + size = 1 << (rnd(options->size, &seed) + 12); + size += rnd(size, &seed); + + if (cmd_file == -1) { + err_msg("Could not open command file"); + return -errno; + } + + err = emit_file(cmd_file, NULL, name, &id, size, NULL); + if (err) { + err_msg("Failed to create file %s", name); + return err; + } + + threads = malloc(sizeof(pthread_t) * options->readers); + if (!threads) { + err_msg("Could not allocate memory for threads"); + return -ENOMEM; + } + + for (i = 0; i < options->readers; ++i) { + struct read_data *read_data = malloc(sizeof(*read_data)); + + if (!read_data) { + err_msg("Failed to allocate read_data"); + err = -ENOMEM; + break; + } + + *read_data = (struct read_data){ + .filename = name, + .dir_fd = dst_dir, + .filesize = size, + .num_reads = options->num_reads, + .rng_seed = seed, + }; + + rnd(0, &seed); + + err = pthread_create(threads + i, 0, reader, read_data); + if (err) { + err_msg("Failed to create thread"); + free(read_data); + break; + } + } + + if (err) + cancel_threads = 1; + else + err = write_data(cmd_file, dst_dir, name, size); + + for (; i > 0; --i) { + if (pthread_join(threads[i - 1], NULL)) { + err_msg("FATAL: failed to join thread"); + exit(-errno); + } + } + + free(threads); + close(cmd_file); + return err; +} + +int main(int argc, char *const *argv) +{ + struct options options; + int err; + const char *src_dir = "src"; + const char *dst_dir = "dst"; + int src_dir_fd = -1; + int dst_dir_fd = -1; + + err = parse_options(argc, argv, &options); + if (err) + return err; + + err = chdir(options.test_dir); + if (err) { + err_msg("Failed to change to %s", options.test_dir); + return -errno; + } + + err = remove_dir(src_dir) || remove_dir(dst_dir); + if (err) + return err; + + err = mkdir(src_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(dst_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mount_fs(dst_dir, src_dir, options.timeout); + if (err) { + err_msg("Failed to mount incfs"); + goto cleanup; + } + + src_dir_fd = open(src_dir, O_RDONLY | O_CLOEXEC); + dst_dir_fd = open(dst_dir, O_RDONLY | O_CLOEXEC); + if (src_dir_fd == -1 || dst_dir_fd == -1) { + err_msg("Failed to open src or dst dir"); + err = -errno; + goto cleanup; + } + + err = test_files(src_dir_fd, dst_dir_fd, &options); + +cleanup: + close(src_dir_fd); + close(dst_dir_fd); + if (!options.no_cleanup) { + umount(dst_dir); + remove_dir(dst_dir); + remove_dir(src_dir); + } + + return err; +} diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 6809399eac97..de9fccf6b608 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -7,12 +7,15 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include #include #include @@ -29,10 +32,46 @@ #define TEST_FAILURE 1 #define TEST_SUCCESS 0 -#define INCFS_MAX_MTREE_LEVELS 8 #define INCFS_ROOT_INODE 0 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) +#else +#error Big endian not supported! +#endif + +struct { + int file; + int test; + bool verbose; +} options; + +#define TESTCOND(condition) \ + do { \ + if (!(condition)) { \ + ksft_print_msg("%s failed %d\n", \ + __func__, __LINE__); \ + goto out; \ + } else if (options.verbose) \ + ksft_print_msg("%s succeeded %d\n", \ + __func__, __LINE__); \ + } while (false) + +#define TEST(statement, condition) \ + do { \ + statement; \ + TESTCOND(condition); \ + } while (false) + +#define TESTEQUAL(statement, res) \ + TESTCOND((statement) == (res)) + +#define TESTNE(statement, res) \ + TESTCOND((statement) != (res)) + struct hash_block { char data[INCFS_DATA_FILE_BLOCK_SIZE]; }; @@ -103,6 +142,13 @@ struct test_files_set get_test_files_set(void) .size = 900 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, { .index = 10, .name = "file_big", .size = 500 * 1024 * 1024 } }; + + if (options.file) + return (struct test_files_set) { + .files = files + options.file - 1, + .files_count = 1, + }; + return (struct test_files_set){ .files = files, .files_count = ARRAY_SIZE(files) }; } @@ -206,6 +252,17 @@ static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) return strdup(path); } +static char *get_incomplete_filename(const char *mnt_dir, incfs_uuid_t id) +{ + char path[FILENAME_MAX]; + char str_id[1 + 2 * sizeof(id)]; + + bin2hex(str_id, id.bytes, sizeof(id.bytes)); + snprintf(path, ARRAY_SIZE(path), "%s/.incomplete/%s", mnt_dir, str_id); + + return strdup(path); +} + int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) { char *path = get_index_filename(mnt_dir, id); @@ -228,8 +285,7 @@ int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) goto out; } - if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || - errno != EPERM) { + if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1) { print_error( "Successfully called PERMIT_FILL on non pending_read file"); return -errno; @@ -248,7 +304,7 @@ out: return fd; } -int get_file_attr(char *mnt_dir, incfs_uuid_t id, char *value, int size) +int get_file_attr(const char *mnt_dir, incfs_uuid_t id, char *value, int size) { char *path = get_index_filename(mnt_dir, id); int res; @@ -266,7 +322,13 @@ static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); } -static int emit_test_blocks(char *mnt_dir, struct test_file *file, +ssize_t ZSTD_compress_default(char *data, char *comp_data, size_t data_size, + size_t comp_size) +{ + return ZSTD_compress(comp_data, comp_size, data, data_size, 1); +} + +static int emit_test_blocks(const char *mnt_dir, struct test_file *file, int blocks[], int count) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; @@ -290,7 +352,8 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, for (i = 0; i < block_count; i++) { int block_index = blocks[i]; - bool compress = (file->index + block_index) % 2 == 0; + bool compress_zstd = (file->index + block_index) % 4 == 2; + bool compress_lz4 = (file->index + block_index) % 4 == 0; int seed = get_file_block_seed(file->index, block_index); off_t block_offset = ((off_t)block_index) * INCFS_DATA_FILE_BLOCK_SIZE; @@ -307,10 +370,10 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, block_size = file->size - block_offset; rnd_buf(data, block_size, seed); - if (compress) { - size_t comp_size = LZ4_compress_default( - (char *)data, (char *)comp_data, block_size, - ARRAY_SIZE(comp_data)); + if (compress_lz4) { + size_t comp_size = LZ4_compress_default((char *)data, + (char *)comp_data, block_size, + ARRAY_SIZE(comp_data)); if (comp_size <= 0) { error = -EBADMSG; @@ -323,6 +386,22 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, memcpy(current_data, comp_data, comp_size); block_size = comp_size; block_buf[i].compression = COMPRESSION_LZ4; + } else if (compress_zstd) { + size_t comp_size = ZSTD_compress(comp_data, + ARRAY_SIZE(comp_data), data, block_size, + 1); + + if (comp_size <= 0) { + error = -EBADMSG; + break; + } + if (current_data + comp_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, comp_data, comp_size); + block_size = comp_size; + block_buf[i].compression = COMPRESSION_ZSTD; } else { if (current_data + block_size > data_end) { error = -ENOMEM; @@ -376,7 +455,7 @@ out: return (error < 0) ? error : blocks_written; } -static int emit_test_block(char *mnt_dir, struct test_file *file, +static int emit_test_block(const char *mnt_dir, struct test_file *file, int block_index) { int res = emit_test_blocks(mnt_dir, file, &block_index, 1); @@ -406,7 +485,7 @@ static void shuffle(int array[], int count, unsigned int seed) } } -static int emit_test_file_data(char *mount_dir, struct test_file *file) +static int emit_test_file_data(const char *mount_dir, struct test_file *file) { int i; int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; @@ -439,7 +518,7 @@ out: return result; } -static loff_t read_whole_file(char *filename) +static loff_t read_whole_file(const char *filename) { int fd = -1; loff_t result; @@ -505,7 +584,7 @@ cleanup: return result; } -static char *create_backing_dir(char *mount_dir) +static char *create_backing_dir(const char *mount_dir) { struct stat st; char backing_dir_name[255]; @@ -541,7 +620,7 @@ static char *create_backing_dir(char *mount_dir) return strdup(backing_dir_name); } -static int validate_test_file_content_with_seed(char *mount_dir, +static int validate_test_file_content_with_seed(const char *mount_dir, struct test_file *file, unsigned int shuffle_seed) { @@ -603,12 +682,13 @@ failure: return error; } -static int validate_test_file_content(char *mount_dir, struct test_file *file) +static int validate_test_file_content(const char *mount_dir, + struct test_file *file) { return validate_test_file_content_with_seed(mount_dir, file, 0); } -static int data_producer(char *mount_dir, struct test_files_set *test_set) +static int data_producer(const char *mount_dir, struct test_files_set *test_set) { int ret = 0; int timeout_ms = 1000; @@ -656,6 +736,55 @@ static int data_producer(char *mount_dir, struct test_files_set *test_set) return ret; } +static int data_producer2(const char *mount_dir, + struct test_files_set *test_set) +{ + int ret = 0; + int timeout_ms = 1000; + struct incfs_pending_read_info2 prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int fd = open_commands_file(mount_dir); + + if (fd < 0) + return -errno; + + while ((ret = wait_for_pending_reads2(fd, timeout_ms, prs, prs_size)) > + 0) { + int read_count = ret; + int i; + + for (i = 0; i < read_count; i++) { + int j = 0; + struct test_file *file = NULL; + + for (j = 0; j < test_set->files_count; j++) { + bool same = same_id(&(test_set->files[j].id), + &(prs[i].file_id)); + + if (same) { + file = &test_set->files[j]; + break; + } + } + if (!file) { + ksft_print_msg( + "Unknown file in pending reads.\n"); + break; + } + + ret = emit_test_block(mount_dir, file, + prs[i].block_index); + if (ret < 0) { + ksft_print_msg("Emitting test data error: %s\n", + strerror(-ret)); + break; + } + } + } + close(fd); + return ret; +} + static int build_mtree(struct test_file *file) { char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; @@ -803,7 +932,7 @@ failure: return err; } -static int cant_touch_index_test(char *mount_dir) +static int cant_touch_index_test(const char *mount_dir) { char *file_name = "test_file"; int file_size = 123; @@ -837,6 +966,12 @@ static int cant_touch_index_test(char *mount_dir) goto failure; } + err = rmdir(index_path); + if (err == 0 || errno != EBUSY) { + print_error(".index directory should not be removed\n"); + goto failure; + } + err = emit_file(cmd_fd, ".index", file_name, &file_id, file_size, NULL); if (err != -EBUSY) { @@ -871,6 +1006,12 @@ static int cant_touch_index_test(char *mount_dir) goto failure; } + err = rename(index_path, dst_name); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't rename .index directory\n"); + goto failure; + } + close(cmd_fd); free(subdir); free(index_path); @@ -893,7 +1034,8 @@ failure: return TEST_FAILURE; } -static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) +static bool iterate_directory(const char *dir_to_iterate, bool root, + int file_count) { struct expected_name { const char *name; @@ -902,7 +1044,9 @@ static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) } names[] = { {INCFS_LOG_FILENAME, true, false}, {INCFS_PENDING_READS_FILENAME, true, false}, + {INCFS_BLOCKS_WRITTEN_FILENAME, true, false}, {".index", true, false}, + {".incomplete", true, false}, {"..", false, false}, {".", false, false}, }; @@ -998,7 +1142,7 @@ failure: return pass; } -static int basic_file_ops_test(char *mount_dir) +static int basic_file_ops_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1164,7 +1308,7 @@ failure: return TEST_FAILURE; } -static int dynamic_files_and_data_test(char *mount_dir) +static int dynamic_files_and_data_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1270,7 +1414,7 @@ failure: return TEST_FAILURE; } -static int concurrent_reads_and_writes_test(char *mount_dir) +static int concurrent_reads_and_writes_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1392,7 +1536,7 @@ failure: return TEST_FAILURE; } -static int work_after_remount_test(char *mount_dir) +static int work_after_remount_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1542,7 +1686,7 @@ failure: return TEST_FAILURE; } -static int attribute_test(char *mount_dir) +static int attribute_test(const char *mount_dir) { char file_attr[] = "metadata123123"; char attr_buf[INCFS_MAX_FILE_ATTR_SIZE] = {}; @@ -1625,7 +1769,7 @@ failure: return TEST_FAILURE; } -static int child_procs_waiting_for_data_test(char *mount_dir) +static int child_procs_waiting_for_data_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1717,7 +1861,7 @@ failure: return TEST_FAILURE; } -static int multiple_providers_test(char *mount_dir) +static int multiple_providers_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1733,7 +1877,8 @@ static int multiple_providers_test(char *mount_dir) goto failure; /* Mount FS and release the backing file. (10s wait time) */ - if (mount_fs(mount_dir, backing_dir, 10000) != 0) + if (mount_fs_opt(mount_dir, backing_dir, + "read_timeout_ms=10000,report_uid", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -1760,7 +1905,7 @@ static int multiple_providers_test(char *mount_dir) * pending reads. */ - ret = data_producer(mount_dir, &test); + ret = data_producer2(mount_dir, &test); exit(-ret); } else if (producer_pid > 0) { producer_pids[i] = producer_pid; @@ -1812,7 +1957,7 @@ failure: return TEST_FAILURE; } -static int hash_tree_test(char *mount_dir) +static int hash_tree_test(const char *mount_dir) { char *backing_dir; struct test_files_set test = get_test_files_set(); @@ -1934,301 +2079,234 @@ failure: enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; -static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, - enum expected_log expected_log) +static int validate_logs(const char *mount_dir, int log_fd, + struct test_file *file, + enum expected_log expected_log, + bool report_uid, bool expect_data) { + int result = TEST_FAILURE; uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; struct incfs_pending_read_info prs[2048] = {}; + struct incfs_pending_read_info2 prs2[2048] = {}; + struct incfs_pending_read_info *previous_record = NULL; int prs_size = ARRAY_SIZE(prs); - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - int expected_read_block_cnt; - int res; - int read_count; - int i, j; - char *filename = concat_file_name(mount_dir, file->name); - int fd; + int block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int expected_read_count, read_count, block_index, read_index; + char *filename = NULL; + int fd = -1; - fd = open(filename, O_RDONLY | O_CLOEXEC); - free(filename); - if (fd <= 0) - return TEST_FAILURE; + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); - if (block_cnt > prs_size) - block_cnt = prs_size; - expected_read_block_cnt = block_cnt; + if (block_count > prs_size) + block_count = prs_size; + expected_read_count = block_count; - for (i = 0; i < block_cnt; i++) { - res = pread(fd, data, sizeof(data), - INCFS_DATA_FILE_BLOCK_SIZE * i); + for (block_index = 0; block_index < block_count; block_index++) { + int result = pread(fd, data, sizeof(data), + INCFS_DATA_FILE_BLOCK_SIZE * block_index); /* Make some read logs of type SAME_FILE_NEXT_BLOCK */ - if (i % 10 == 0) + if (block_index % 100 == 10) usleep(20000); /* Skip some blocks to make logs of type SAME_FILE */ - if (i % 10 == 5) { - ++i; - --expected_read_block_cnt; + if (block_index % 10 == 5) { + ++block_index; + --expected_read_count; } - if (res <= 0) - goto failure; + if (expect_data) + TESTCOND(result > 0); + + if (!expect_data) + TESTEQUAL(result, -1); } - read_count = wait_for_pending_reads( - log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size); - if (expected_log == NO_LOG) { - if (read_count == 0) - goto success; - if (read_count < 0) - ksft_print_msg("Error reading logged reads %s.\n", - strerror(-read_count)); - else - ksft_print_msg("Somehow read empty logs.\n"); - goto failure; + if (report_uid) + read_count = wait_for_pending_reads2(log_fd, + expected_log == NO_LOG ? 10 : 0, + prs2, prs_size); + else + read_count = wait_for_pending_reads(log_fd, + expected_log == NO_LOG ? 10 : 0, + prs, prs_size); + + if (expected_log == NO_LOG) + TESTEQUAL(read_count, 0); + + if (expected_log == PARTIAL_LOG) + TESTCOND(read_count > 0 && + read_count <= expected_read_count); + + if (expected_log == FULL_LOG) + TESTEQUAL(read_count, expected_read_count); + + /* If read less than expected, advance block_index appropriately */ + for (block_index = 0, read_index = 0; + read_index < expected_read_count - read_count; + block_index++, read_index++) + if (block_index % 10 == 5) + ++block_index; + + for (read_index = 0; read_index < read_count; + block_index++, read_index++) { + struct incfs_pending_read_info *record = report_uid ? + (struct incfs_pending_read_info *) &prs2[read_index] : + &prs[read_index]; + + TESTCOND(same_id(&record->file_id, &file->id)); + TESTEQUAL(record->block_index, block_index); + TESTNE(record->timestamp_us, 0); + if (previous_record) + TESTEQUAL(record->serial_number, + previous_record->serial_number + 1); + + previous_record = record; + if (block_index % 10 == 5) + ++block_index; } - if (read_count < 0) { - ksft_print_msg("Error reading logged reads %s.\n", - strerror(-read_count)); - goto failure; - } - - i = 0; - if (expected_log == PARTIAL_LOG) { - if (read_count == 0) { - ksft_print_msg("No logs %s.\n", file->name); - goto failure; - } - - for (i = 0, j = 0; j < expected_read_block_cnt - read_count; - i++, j++) - if (i % 10 == 5) - ++i; - - } else if (read_count != expected_read_block_cnt) { - ksft_print_msg("Bad log read count %s %d %d.\n", file->name, - read_count, expected_read_block_cnt); - goto failure; - } - - for (j = 0; j < read_count; i++, j++) { - struct incfs_pending_read_info *read = &prs[j]; - - if (!same_id(&read->file_id, &file->id)) { - ksft_print_msg("Bad log read ino %s\n", file->name); - goto failure; - } - - if (read->block_index != i) { - ksft_print_msg("Bad log read ino %s %d %d.\n", - file->name, read->block_index, i); - goto failure; - } - - if (j != 0) { - unsigned long psn = prs[j - 1].serial_number; - - if (read->serial_number != psn + 1) { - ksft_print_msg("Bad log read sn %s %d %d.\n", - file->name, read->serial_number, - psn); - goto failure; - } - } - - if (read->timestamp_us == 0) { - ksft_print_msg("Bad log read timestamp %s.\n", - file->name); - goto failure; - } - - if (i % 10 == 5) - ++i; - } - -success: + result = TEST_SUCCESS; +out: close(fd); - return TEST_SUCCESS; - -failure: - close(fd); - return TEST_FAILURE; + free(filename); + return result; } -static int read_log_test(char *mount_dir) +static int read_log_test(const char *mount_dir) { + int result = TEST_FAILURE; struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; int i = 0; - int cmd_fd = -1, log_fd = -1, drop_caches = -1; - char *backing_dir; + int cmd_fd = -1, log_fd = -1; + char *backing_dir = NULL; - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); - - /* Write data. */ + /* Create files */ + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,report_uid,read_timeout_ms=0", + false), 0); + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, NULL)) - goto failure; - - if (emit_test_file_data(mount_dir, file)) - goto failure; + TESTEQUAL(emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL), 0); } - - /* Validate data */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) - goto failure; - } - - /* Unmount and mount again, to see that logs work after remount. */ close(cmd_fd); - close(log_fd); cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; + /* Validate logs */ + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, true, false), 0); - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) - goto failure; - } - - /* - * Unmount and mount again with no read log to make sure poll - * doesn't crash - */ - close(cmd_fd); + /* Unmount and mount again without report_uid */ close(log_fd); - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,read_timeout_ms=0", false), 0); - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0", - false) != 0) - goto failure; + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, false), 0); - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); + /* No read log to make sure poll doesn't crash */ + close(log_fd); + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=0,read_timeout_ms=0", + false), 0); - /* Validate data again - note should fail this time */ + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + NO_LOG, false, false), 0); + + /* Remount and check that logs start working again */ + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=1,read_timeout_ms=0", + true), 0); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + PARTIAL_LOG, false, false), 0); + + /* Remount and check that logs continue working */ + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=4,read_timeout_ms=0", + true), 0); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, false), 0); + + /* Check logs work with data */ for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, NO_LOG)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=4", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) - goto failure; + TESTEQUAL(emit_test_file_data(mount_dir, &test.files[i]), 0); + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, true), 0); } /* Final unmount */ close(log_fd); - free(backing_dir); - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); - return TEST_SUCCESS; - -failure: + result = TEST_SUCCESS; +out: close(cmd_fd); close(log_fd); free(backing_dir); umount(mount_dir); - return TEST_FAILURE; + return result; } -static int emit_partial_test_file_data(char *mount_dir, struct test_file *file) +static int emit_partial_test_file_data(const char *mount_dir, + struct test_file *file) { int i, j; int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; int *block_indexes = NULL; int result = 0; int blocks_written = 0; + int bw_fd = -1; + char buffer[20]; + struct pollfd pollfd; + long blocks_written_total, blocks_written_new_total; if (file->size == 0) return 0; + bw_fd = open_blocks_written_file(mount_dir); + if (bw_fd == -1) + return -errno; + + result = read(bw_fd, buffer, sizeof(buffer)); + if (result <= 0) { + result = -EIO; + goto out; + } + + buffer[result] = 0; + blocks_written_total = atol(buffer); + result = 0; + + pollfd = (struct pollfd) { + .fd = bw_fd, + .events = POLLIN, + }; + + result = poll(&pollfd, 1, 0); + if (result) { + result = -EIO; + goto out; + } + /* Emit 2 blocks, skip 2 blocks etc*/ block_indexes = calloc(block_cnt, sizeof(*block_indexes)); for (i = 0, j = 0; i < block_cnt; ++i) @@ -2248,9 +2326,29 @@ static int emit_partial_test_file_data(char *mount_dir, struct test_file *file) result = -EIO; goto out; } + + result = poll(&pollfd, 1, 0); + if (result != 1 || pollfd.revents != POLLIN) { + result = -EIO; + goto out; + } + + result = read(bw_fd, buffer, sizeof(buffer)); + buffer[result] = 0; + blocks_written_new_total = atol(buffer); + + if (blocks_written_new_total - blocks_written_total + != blocks_written) { + result = -EIO; + goto out; + } + + blocks_written_total = blocks_written_new_total; + result = 0; } out: free(block_indexes); + close(bw_fd); return result; } @@ -2384,7 +2482,7 @@ out: return error; } -static int get_blocks_test(char *mount_dir) +static int get_blocks_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2441,7 +2539,8 @@ failure: return TEST_FAILURE; } -static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) +static int emit_partial_test_file_hash(const char *mount_dir, + struct test_file *file) { int err; int fd; @@ -2455,9 +2554,6 @@ static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) if (file->size <= 4096 / 32 * 4096) return 0; - if (fill_blocks.count == 0) - return 0; - if (!fill_block_array) return -ENOMEM; fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); @@ -2559,7 +2655,7 @@ out: return error; } -static int get_hash_blocks_test(char *mount_dir) +static int get_hash_blocks_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2609,7 +2705,7 @@ failure: return TEST_FAILURE; } -static int large_file(char *mount_dir) +static int large_file_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2624,7 +2720,7 @@ static int large_file(char *mount_dir) .fill_blocks = ptr_to_u64(block_buf), }; incfs_uuid_t id; - int fd; + int fd = -1; backing_dir = create_backing_dir(mount_dir); if (!backing_dir) @@ -2665,6 +2761,713 @@ static int large_file(char *mount_dir) failure: close(fd); close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static int validate_mapped_file(const char *orig_name, const char *name, + size_t size, size_t offset) +{ + struct stat st; + int orig_fd = -1, fd = -1; + size_t block; + int result = TEST_FAILURE; + + if (stat(name, &st)) { + ksft_print_msg("Failed to stat %s with error %s\n", + name, strerror(errno)); + goto failure; + } + + if (size != st.st_size) { + ksft_print_msg("Mismatched file sizes for file %s - expected %llu, got %llu\n", + name, size, st.st_size); + goto failure; + } + + fd = open(name, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + ksft_print_msg("Failed to open %s with error %s\n", name, + strerror(errno)); + goto failure; + } + + orig_fd = open(orig_name, O_RDONLY | O_CLOEXEC); + if (orig_fd == -1) { + ksft_print_msg("Failed to open %s with error %s\n", orig_name, + strerror(errno)); + goto failure; + } + + for (block = 0; block < size; block += INCFS_DATA_FILE_BLOCK_SIZE) { + uint8_t orig_data[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + ssize_t orig_read, mapped_read; + + orig_read = pread(orig_fd, orig_data, + INCFS_DATA_FILE_BLOCK_SIZE, block + offset); + mapped_read = pread(fd, data, INCFS_DATA_FILE_BLOCK_SIZE, + block); + + if (orig_read < mapped_read || + mapped_read != min(size - block, + INCFS_DATA_FILE_BLOCK_SIZE)) { + ksft_print_msg("Failed to read enough data: %llu %llu %llu %lld %lld\n", + block, size, offset, orig_read, + mapped_read); + goto failure; + } + + if (memcmp(orig_data, data, mapped_read)) { + ksft_print_msg("Data doesn't match: %llu %llu %llu %lld %lld\n", + block, size, offset, orig_read, + mapped_read); + goto failure; + } + } + + result = TEST_SUCCESS; + +failure: + close(orig_fd); + close(fd); + return result; +} + +static int mapped_file_test(const char *mount_dir) +{ + char *backing_dir; + int result = TEST_FAILURE; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + for (i = 0; i < file_num; ++i) { + struct test_file *file = &test.files[i]; + size_t blocks = file->size / INCFS_DATA_FILE_BLOCK_SIZE; + size_t mapped_offset = blocks / 4 * + INCFS_DATA_FILE_BLOCK_SIZE; + size_t mapped_size = file->size / 4 * 3 - mapped_offset; + struct incfs_create_mapped_file_args mfa; + char mapped_file_name[FILENAME_MAX]; + char orig_file_path[PATH_MAX]; + char mapped_file_path[PATH_MAX]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, + NULL) < 0) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + if (snprintf(mapped_file_name, ARRAY_SIZE(mapped_file_name), + "%s.mapped", file->name) < 0) + goto failure; + + mfa = (struct incfs_create_mapped_file_args) { + .size = mapped_size, + .mode = 0664, + .file_name = ptr_to_u64(mapped_file_name), + .source_file_id = file->id, + .source_offset = mapped_offset, + }; + + result = ioctl(cmd_fd, INCFS_IOC_CREATE_MAPPED_FILE, &mfa); + if (result) { + ksft_print_msg( + "Failed to create mapped file with error %d\n", + result); + goto failure; + } + + result = snprintf(orig_file_path, + ARRAY_SIZE(orig_file_path), "%s/%s", + mount_dir, file->name); + + if (result < 0 || result >= ARRAY_SIZE(mapped_file_path)) { + result = TEST_FAILURE; + goto failure; + } + + result = snprintf(mapped_file_path, + ARRAY_SIZE(mapped_file_path), "%s/%s", + mount_dir, mapped_file_name); + + if (result < 0 || result >= ARRAY_SIZE(mapped_file_path)) { + result = TEST_FAILURE; + goto failure; + } + + result = validate_mapped_file(orig_file_path, mapped_file_path, + mapped_size, mapped_offset); + if (result) + goto failure; + } + +failure: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static const char v1_file[] = { + /* Header */ + /* 0x00: Magic number */ + 0x49, 0x4e, 0x43, 0x46, 0x53, 0x00, 0x00, 0x00, + /* 0x08: Version */ + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x10: Header size */ + 0x38, 0x00, + /* 0x12: Block size */ + 0x00, 0x10, + /* 0x14: Flags */ + 0x00, 0x00, 0x00, 0x00, + /* 0x18: First md offset */ + 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x20: File size */ + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x28: UUID */ + 0x8c, 0x7d, 0xd9, 0x22, 0xad, 0x47, 0x49, 0x4f, + 0xc0, 0x2c, 0x38, 0x8e, 0x12, 0xc0, 0x0e, 0xac, + + /* 0x38: Attribute */ + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x31, 0x32, 0x33, 0x31, 0x32, 0x33, + + /* Attribute md record */ + /* 0x46: Type */ + 0x02, + /* 0x47: Size */ + 0x25, 0x00, + /* 0x49: CRC */ + 0x9a, 0xef, 0xef, 0x72, + /* 0x4d: Next md offset */ + 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x55: Prev md offset */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x5d: fa_offset */ + 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x65: fa_size */ + 0x0e, 0x00, + /* 0x67: fa_crc */ + 0xfb, 0x5e, 0x72, 0x89, + + /* Blockmap table */ + /* 0x6b: First 10-byte entry */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* Blockmap md record */ + /* 0x75: Type */ + 0x01, + /* 0x76: Size */ + 0x23, 0x00, + /* 0x78: CRC */ + 0x74, 0x45, 0xd3, 0xb9, + /* 0x7c: Next md offset */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x84: Prev md offset */ + 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x8c: blockmap offset */ + 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x94: blockmap count */ + 0x01, 0x00, 0x00, 0x00, +}; + +static int compatibility_test(const char *mount_dir) +{ + static const char *name = "file"; + int result = TEST_FAILURE; + char *backing_dir = NULL; + char *filename = NULL; + int fd = -1; + uint64_t size = 0x0c; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TEST(filename = concat_file_name(backing_dir, name), filename); + TEST(fd = open(filename, O_CREAT | O_WRONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(write(fd, v1_file, sizeof(v1_file)), sizeof(v1_file)); + TESTEQUAL(fsetxattr(fd, INCFS_XATTR_SIZE_NAME, &size, sizeof(size), 0), + 0); + TESTEQUAL(mount_fs(mount_dir, backing_dir, 50), 0); + free(filename); + TEST(filename = concat_file_name(mount_dir, name), filename); + close(fd); + TEST(fd = open(filename, O_RDONLY), fd != -1); + + result = TEST_SUCCESS; +out: + close(fd); + umount(mount_dir); + free(backing_dir); + free(filename); + return result; +} + +static int zero_blocks_written_count(int fd, uint32_t data_blocks_written, + uint32_t hash_blocks_written) +{ + int test_result = TEST_FAILURE; + uint64_t offset; + uint8_t type; + uint32_t bw; + + /* Get first md record */ + TESTEQUAL(pread(fd, &offset, sizeof(offset), 24), sizeof(offset)); + + /* Find status md record */ + for (;;) { + TESTNE(offset, 0); + TESTEQUAL(pread(fd, &type, sizeof(type), le64_to_cpu(offset)), + sizeof(type)); + if (type == 4) + break; + TESTEQUAL(pread(fd, &offset, sizeof(offset), + le64_to_cpu(offset) + 7), + sizeof(offset)); + } + + /* Read blocks_written */ + offset = le64_to_cpu(offset); + TESTEQUAL(pread(fd, &bw, sizeof(bw), offset + 23), sizeof(bw)); + TESTEQUAL(le32_to_cpu(bw), data_blocks_written); + TESTEQUAL(pread(fd, &bw, sizeof(bw), offset + 27), sizeof(bw)); + TESTEQUAL(le32_to_cpu(bw), hash_blocks_written); + + /* Write out zero */ + bw = 0; + TESTEQUAL(pwrite(fd, &bw, sizeof(bw), offset + 23), sizeof(bw)); + TESTEQUAL(pwrite(fd, &bw, sizeof(bw), offset + 27), sizeof(bw)); + + test_result = TEST_SUCCESS; +out: + return test_result; +} + +static int validate_block_count(const char *mount_dir, const char *backing_dir, + struct test_file *file, + int total_data_blocks, int filled_data_blocks, + int total_hash_blocks, int filled_hash_blocks) +{ + char *filename = NULL; + char *backing_filename = NULL; + int fd = -1; + struct incfs_get_block_count_args bca = {}; + int test_result = TEST_FAILURE; + struct incfs_filled_range ranges[128]; + struct incfs_get_filled_blocks_args fba = { + .range_buffer = ptr_to_u64(ranges), + .range_buffer_size = sizeof(ranges), + }; + int cmd_fd = -1; + struct incfs_permit_fill permit_fill; + + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(backing_filename = concat_file_name(backing_dir, file->name), + backing_filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + + close(fd); + TESTEQUAL(umount(mount_dir), 0); + TEST(fd = open(backing_filename, O_RDWR | O_CLOEXEC), fd != -1); + TESTEQUAL(zero_blocks_written_count(fd, filled_data_blocks, + filled_hash_blocks), + TEST_SUCCESS); + close(fd); + fd = -1; + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + permit_fill.file_descriptor = fd; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill), 0); + do { + ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + fba.start_index = fba.index_out + 1; + } while (fba.index_out < fba.total_blocks_out); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + + test_result = TEST_SUCCESS; +out: + close(cmd_fd); + close(fd); + free(filename); + free(backing_filename); + return test_result; +} + + + +static int validate_data_block_count(const char *mount_dir, + const char *backing_dir, + struct test_file *file) +{ + const int total_data_blocks = 1 + (file->size - 1) / + INCFS_DATA_FILE_BLOCK_SIZE; + const int filled_data_blocks = (total_data_blocks + 1) / 2; + + int test_result = TEST_FAILURE; + char *filename = NULL; + char *incomplete_filename = NULL; + struct stat stat_buf_incomplete, stat_buf_file; + int fd = -1; + struct incfs_get_block_count_args bca = {}; + int i; + + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(incomplete_filename = get_incomplete_filename(mount_dir, file->id), + incomplete_filename); + + TESTEQUAL(stat(filename, &stat_buf_file), 0); + TESTEQUAL(stat(incomplete_filename, &stat_buf_incomplete), 0); + TESTEQUAL(stat_buf_file.st_ino, stat_buf_incomplete.st_ino); + TESTEQUAL(stat_buf_file.st_nlink, 3); + + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, 0); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + + for (i = 0; i < total_data_blocks; i += 2) + TESTEQUAL(emit_test_block(mount_dir, file, i), 0); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); + TESTEQUAL(bca.total_hash_blocks_out, 0); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + close(fd); + fd = -1; + + TESTEQUAL(validate_block_count(mount_dir, backing_dir, file, + total_data_blocks, filled_data_blocks, + 0, 0), + 0); + + for (i = 1; i < total_data_blocks; i += 2) + TESTEQUAL(emit_test_block(mount_dir, file, i), 0); + + TESTEQUAL(stat(incomplete_filename, &stat_buf_incomplete), -1); + TESTEQUAL(errno, ENOENT); + + test_result = TEST_SUCCESS; +out: + close(fd); + free(incomplete_filename); + free(filename); + return test_result; +} + +static int data_block_count_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + char *backing_dir; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); + + for (i = 0; i < test.files_count; ++i) { + struct test_file *file = &test.files[i]; + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + TESTEQUAL(emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL), + 0); + close(cmd_fd); + cmd_fd = -1; + + TESTEQUAL(validate_data_block_count(mount_dir, backing_dir, + file), + 0); + } + + result = TEST_SUCCESS; +out: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static int validate_hash_block_count(const char *mount_dir, + const char *backing_dir, + struct test_file *file) +{ + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + const int total_data_blocks = 1 + (file->size - 1) / + INCFS_DATA_FILE_BLOCK_SIZE; + + int result = TEST_FAILURE; + int hash_layer = total_data_blocks; + int total_hash_blocks = 0; + int filled_hash_blocks; + char *filename = NULL; + int fd = -1; + struct incfs_get_block_count_args bca = {}; + + while (hash_layer > 1) { + hash_layer = (hash_layer + hash_per_block - 1) / hash_per_block; + total_hash_blocks += hash_layer; + } + filled_hash_blocks = total_hash_blocks > 1 ? 1 : 0; + + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + + TESTEQUAL(emit_partial_test_file_hash(mount_dir, file), 0); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + close(fd); + fd = -1; + + if (filled_hash_blocks) + TESTEQUAL(validate_block_count(mount_dir, backing_dir, file, + total_data_blocks, 0, + total_hash_blocks, filled_hash_blocks), + 0); + + result = TEST_SUCCESS; +out: + close(fd); + free(filename); + return result; +} + +static int hash_block_count_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + char *backing_dir; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); + + for (i = 0; i < test.files_count; i++) { + struct test_file *file = &test.files[i]; + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + TESTEQUAL(crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.add_data), + 0); + close(cmd_fd); + cmd_fd = -1; + + TESTEQUAL(validate_hash_block_count(mount_dir, backing_dir, + &test.files[i]), + 0); + } + + result = TEST_SUCCESS; +out: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static int is_close(struct timespec *start, int expected_ms) +{ + const int allowed_variance = 100; + int result = TEST_FAILURE; + struct timespec finish; + int diff; + + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &finish), 0); + diff = (finish.tv_sec - start->tv_sec) * 1000 + + (finish.tv_nsec - start->tv_nsec) / 1000000; + + TESTCOND(diff >= expected_ms - allowed_variance); + TESTCOND(diff <= expected_ms + allowed_variance); + result = TEST_SUCCESS; +out: + return result; +} + +static int per_uid_read_timeouts_test(const char *mount_dir) +{ + struct test_file file = { + .name = "file", + .size = 16 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + int result = TEST_FAILURE; + char *backing_dir = NULL; + int pid; + int cmd_fd = -1; + char *filename = NULL; + int fd = -1; + struct timespec start; + char buffer[4096]; + struct incfs_per_uid_read_timeouts purt_get[1]; + struct incfs_get_read_timeouts_args grt = { + ptr_to_u64(purt_get), + sizeof(purt_get) + }; + struct incfs_per_uid_read_timeouts purt_set[] = { + { + .uid = 0, + .min_time_ms = 1000, + .min_pending_time_ms = 2000, + .max_pending_time_ms = 3000, + }, + }; + struct incfs_set_read_timeouts_args srt = { + ptr_to_u64(purt_set), + sizeof(purt_set) + }; + int status; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "read_timeout_ms=1000,readahead=0", false), 0); + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + TESTEQUAL(emit_file(cmd_fd, NULL, file.name, &file.id, file.size, + NULL), 0); + + TEST(filename = concat_file_name(mount_dir, file.name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC), 0); + + /* Default mount options read failure is 1000 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + /* Set it to 3000 */ + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 3000), 0); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), -1); + TESTEQUAL(errno, E2BIG); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + grt.timeouts_array_size = sizeof(purt_get); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + TESTEQUAL(purt_get[0].uid, purt_set[0].uid); + TESTEQUAL(purt_get[0].min_time_ms, purt_set[0].min_time_ms); + TESTEQUAL(purt_get[0].min_pending_time_ms, + purt_set[0].min_pending_time_ms); + TESTEQUAL(purt_get[0].max_pending_time_ms, + purt_set[0].max_pending_time_ms); + + /* Still 1000 in UID 2 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + exit(0); + } + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Set it to default */ + purt_set[0].max_pending_time_ms = UINT32_MAX; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min read time */ + TESTEQUAL(emit_test_block(mount_dir, &file, 0), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), sizeof(buffer)); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min pending time */ + purt_set[0].uid = 2; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), sizeof(buffer)), + sizeof(buffer)); + exit(0); + } + sleep(1); + TESTEQUAL(emit_test_block(mount_dir, &file, 1), 0); + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 2000), 0); + + /* Clear timeouts */ + srt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + result = TEST_SUCCESS; +out: + close(fd); + + if (pid == 0) + exit(result); + + free(filename); + close(cmd_fd); + umount(backing_dir); + free(backing_dir); return result; } @@ -2691,13 +3494,54 @@ static char *setup_mount_dir() return mount_dir; } +int parse_options(int argc, char *const *argv) +{ + signed char c; + + while ((c = getopt(argc, argv, "f:t:v")) != -1) + switch (c) { + case 'f': + options.file = strtol(optarg, NULL, 10); + break; + + case 't': + options.test = strtol(optarg, NULL, 10); + break; + + case 'v': + options.verbose = true; + break; + + default: + return -EINVAL; + } + + return 0; +} + +struct test_case { + int (*pfunc)(const char *dir); + const char *name; +}; + +void run_one_test(const char *mount_dir, struct test_case *test_case) +{ + ksft_print_msg("Running %s\n", test_case->name); + if (test_case->pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", test_case->name); + else + ksft_test_result_fail("%s\n", test_case->name); +} + int main(int argc, char *argv[]) { char *mount_dir = NULL; - int fails = 0; int i; int fd, count; + if (parse_options(argc, argv)) + ksft_exit_fail_msg("Bad options\n"); + // Seed randomness pool for testing on QEMU // NOTE - this abuses the concept of randomness - do *not* ever do this // on a machine for production use - the device will think it has good @@ -2721,10 +3565,7 @@ int main(int argc, char *argv[]) { \ test, #test \ } - struct { - int (*pfunc)(char *dir); - const char *name; - } cases[] = { + struct test_case cases[] = { MAKE_TEST(basic_file_ops_test), MAKE_TEST(cant_touch_index_test), MAKE_TEST(dynamic_files_and_data_test), @@ -2737,29 +3578,25 @@ int main(int argc, char *argv[]) MAKE_TEST(read_log_test), MAKE_TEST(get_blocks_test), MAKE_TEST(get_hash_blocks_test), - MAKE_TEST(large_file), + MAKE_TEST(large_file_test), + MAKE_TEST(mapped_file_test), + MAKE_TEST(compatibility_test), + MAKE_TEST(data_block_count_test), + MAKE_TEST(hash_block_count_test), + MAKE_TEST(per_uid_read_timeouts_test), }; #undef MAKE_TEST - /* Bring back for kernel 5.x */ - /* ksft_set_plan(ARRAY_SIZE(cases)); */ + if (options.test) { + if (options.test <= 0 || options.test > ARRAY_SIZE(cases)) + ksft_exit_fail_msg("Invalid test\n"); - for (i = 0; i < ARRAY_SIZE(cases); ++i) { - ksft_print_msg("Running %s\n", cases[i].name); - if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) - ksft_test_result_pass("%s\n", cases[i].name); - else { - ksft_test_result_fail("%s\n", cases[i].name); - fails++; - } - } + run_one_test(mount_dir, &cases[options.test - 1]); + } else + for (i = 0; i < ARRAY_SIZE(cases); ++i) + run_one_test(mount_dir, &cases[i]); umount2(mount_dir, MNT_FORCE); rmdir(mount_dir); - - if (fails > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); - return 0; + return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail(); } diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index e194f63ba922..e60b0d36f49d 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -25,6 +25,45 @@ #define __S_IFREG S_IFREG #endif +unsigned int rnd(unsigned int max, unsigned int *seed) +{ + return rand_r(seed) * ((uint64_t)max + 1) / RAND_MAX; +} + +int remove_dir(const char *dir) +{ + int err = rmdir(dir); + + if (err && errno == ENOTEMPTY) { + err = delete_dir_tree(dir); + if (err) + return err; + return 0; + } + + if (err && errno != ENOENT) + return -errno; + + return 0; +} + +int drop_caches(void) +{ + int drop_caches = + open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); + int i; + + if (drop_caches == -1) + return -errno; + i = write(drop_caches, "3", 1); + close(drop_caches); + + if (i != 1) + return -errno; + + return 0; +} + int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms) { @@ -195,14 +234,28 @@ int open_commands_file(const char *mount_dir) int open_log_file(const char *mount_dir) { - char cmd_file[255]; - int cmd_fd; + char file[255]; + int fd; - snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); - cmd_fd = open(cmd_file, O_RDWR | O_CLOEXEC); - if (cmd_fd < 0) + snprintf(file, ARRAY_SIZE(file), "%s/.log", mount_dir); + fd = open(file, O_RDWR | O_CLOEXEC); + if (fd < 0) perror("Can't open log file"); - return cmd_fd; + return fd; +} + +int open_blocks_written_file(const char *mount_dir) +{ + char file[255]; + int fd; + + snprintf(file, ARRAY_SIZE(file), + "%s/%s", mount_dir, INCFS_BLOCKS_WRITTEN_FILENAME); + fd = open(file, O_RDONLY | O_CLOEXEC); + + if (fd < 0) + perror("Can't open blocks_written file"); + return fd; } int wait_for_pending_reads(int fd, int timeout_ms, @@ -233,7 +286,35 @@ int wait_for_pending_reads(int fd, int timeout_ms, return read_res / sizeof(*prs); } -char *concat_file_name(const char *dir, char *file) +int wait_for_pending_reads2(int fd, int timeout_ms, + struct incfs_pending_read_info2 *prs, int prs_count) +{ + ssize_t read_res = 0; + + if (timeout_ms > 0) { + int poll_res = 0; + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN + }; + + poll_res = poll(&pollfd, 1, timeout_ms); + if (poll_res < 0) + return -errno; + if (poll_res == 0) + return 0; + if (!(pollfd.revents | POLLIN)) + return 0; + } + + read_res = read(fd, prs, prs_count * sizeof(*prs)); + if (read_res < 0) + return -errno; + + return read_res / sizeof(*prs); +} + +char *concat_file_name(const char *dir, const char *file) { char full_name[FILENAME_MAX] = ""; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 9af63e4e922c..f5ed8dc5f0ff 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -18,6 +18,13 @@ #endif #define SHA256_DIGEST_SIZE 32 +#define INCFS_MAX_MTREE_LEVELS 8 + +unsigned int rnd(unsigned int max, unsigned int *seed); + +int remove_dir(const char *dir); + +int drop_caches(void); int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms); @@ -45,10 +52,15 @@ int open_commands_file(const char *mount_dir); int open_log_file(const char *mount_dir); +int open_blocks_written_file(const char *mount_dir); + int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); -char *concat_file_name(const char *dir, char *file); +int wait_for_pending_reads2(int fd, int timeout_ms, + struct incfs_pending_read_info2 *prs, int prs_count); + +char *concat_file_name(const char *dir, const char *file); void sha256(const char *data, size_t dsize, char *hash); diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h index 42c3596815b8..a7667a613bbc 100644 --- a/tools/testing/selftests/kvm/include/x86.h +++ b/tools/testing/selftests/kvm/include/x86.h @@ -59,7 +59,7 @@ enum x86_register { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c index 4d35eba73dc9..800fe36064f9 100644 --- a/tools/testing/selftests/kvm/lib/x86.c +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -449,11 +449,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index fcff7047000d..8edaafc2b92f 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 5ab5f4810e43..7b9018fad092 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include #include diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..cbb38a0d1b25 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -226,6 +226,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, return extract_bytes(value, addr & 7, len); } +static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + int target_vcpu_id = vcpu->vcpu_id; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + /* reporting of the Last bit is not supported for userspace */ + return extract_bytes(value, addr & 7, len); +} + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -532,8 +549,9 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_TYPER, - vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, + vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_WAKER, vgic_mmio_read_raz, vgic_mmio_write_wi, 4,