Merge branch 'android14-5.15' into branch 'android14-5.15-lts'

Catch up with changes made in android14-5.15, including new symbols to
track the ABI.  Changes included here are the following:

* d83231efe4 ANDROID: 16K: Handle pad VMA splits and merges
* 19d6e7eb47 ANDROID: 16K: madvise_vma_pad_pages: Remove filemap_fault check
* ae44e8dac8 ANDROID: 16K: Only madvise padding from dynamic linker context
* ae67f18944 ANDROID: Enable CONFIG_LAZY_RCU in x86 gki_defconfig
* d38091b4ff ANDROID: Enable CONFIG_LAZY_RCU in arm64 gki_defconfig
* 37b02c190c FROMLIST: rcu: Provide a boot time parameter to control lazy RCU
* 4adb60810c ANDROID: rcu: Add a minimum time for marking boot as completed
* 16ea06fe44 UPSTREAM: rcu/kvfree: Move need_offload_krc() out of krcp->lock
* 5d1a3986c2 UPSTREAM: rcu/kfree: Fix kfree_rcu_shrink_count() return value
* 88587c1838 UPSTREAM: rcu/kvfree: Update KFREE_DRAIN_JIFFIES interval
* 5b47d8411d UPSTREAM: rcu/kvfree: Remove useless monitor_todo flag
* 84828604c7 UPSTREAM: scsi/scsi_error: Use call_rcu_hurry() instead of call_rcu()
* a4124a21b1 ANDROID: rxrpc: Use call_rcu_hurry() instead of call_rcu()
* 930bdc0924 UPSTREAM: net: devinet: Reduce refcount before grace period
* 706e751b33 UPSTREAM: rcu: Disable laziness if lazy-tracking says so
* 8568593719 UPSTREAM: rcu: Track laziness during boot and suspend
* f12c162eac UPSTREAM: net: Use call_rcu_hurry() for dst_release()
* ff22b562f0 UPSTREAM: percpu-refcount: Use call_rcu_hurry() for atomic switch
* a4cc1aa22d UPSTREAM: rcu/sync: Use call_rcu_hurry() instead of call_rcu
* 222a4cd66c UPSTREAM: rcu: Refactor code a bit in rcu_nocb_do_flush_bypass()
* f4abe7bb5f BACKPORT: rcu: Shrinker for lazy rcu
* e0297c38a5 BACKPORT: rcu: Make call_rcu() lazy to save power
* 276d33f21a UPSTREAM: rcu: Fix late wakeup when flush of bypass cblist happens
* 24e6758060 BACKPORT: rcu: Fix missing nocb gp wake on rcu_barrier()
* fb310d468a UPSTREAM: netfilter: nft_set_pipapo: do not free live element
* 444a497469 ANDROID: GKI: Update lenovo symbol list
* 978f805a2d ANDROID: GKI: Export css_task_iter_start()
* 0ae4f32634 FROMGIT: coresight: etm4x: Fix access to resource selector registers
* 8ba1802287 BACKPORT: FROMGIT: coresight: etm4x: Safe access for TRCQCLTR
* 6a08c9fb9d FROMGIT: coresight: etm4x: Do not save/restore Data trace control registers
* a02278f990 FROMGIT: coresight: etm4x: Do not hardcode IOMEM access for register restore
* e8e652b8c8 UPSTREAM: af_unix: Fix garbage collector racing against connect()
* 65e0a92c6d UPSTREAM: af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
* 5725caa296 FROMLIST: scsi: ufs: Check for completion from the timeout handler
* 8563ce5895 BACKPORT: FROMLIST: scsi: ufs: Make the polling code report which command has been completed
* 0fcd7a1c7c BACKPORT: FROMLIST: scsi: ufs: Make ufshcd_poll() complain about unsupported arguments
* aa07d6b28d ANDROID: scsi: ufs: Unexport ufshcd_mcq_poll_cqe_nolock()
* 25ebc09178 ANDROID: mm: fix incorrect unlock mmap_lock for speculative swap fault
* 264477e0d8 ANDROID: Update the ABI symbol list
* 084d22016c ANDROID: 16K: Separate padding from ELF LOAD segment mappings
* 37ea0e8485 ANDROID: 16K: Exclude ELF padding for fault around range
* e7bff50b22 ANDROID: 16K: Use MADV_DONTNEED to save VMA padding pages.
* 38cccb9154 ANDROID: 16K: Introduce ELF padding representation for VMAs
* 9274c308d8 ANDROID: 16K: Introduce /sys/kernel/mm/pgsize_miration/enabled
* ceb8c595f8 UPSTREAM: netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path
* ea419cda5c UPSTREAM: netfilter: nf_tables: release batch on table validation from abort path
* 6b883cdac2 UPSTREAM: netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout
* f395ea0980 ANDROID: GKI: update mtktv symbol
* a5d03f57d6 UPSTREAM: netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain
* 0cf6fdfb0a UPSTREAM: HID: playstation: support updated DualSense rumble mode.
* e3da19b218 UPSTREAM: HID: playstation: stop DualSense output work on remove.
* 62085a0e6d UPSTREAM: HID: playstation: convert to use dev_groups
* adce8aae67 UPSTREAM: HID: playstation: fix return from dualsense_player_led_set_brightness()
* c996cb50e2 UPSTREAM: HID: playstation: expose DualSense player LEDs through LED class.
* f011142fea UPSTREAM: leds: add new LED_FUNCTION_PLAYER for player LEDs for game controllers.
* 19cbe31642 UPSTREAM: HID: playstation: expose DualSense lightbar through a multi-color LED.
* 3507c287a6 UPSTREAM: mm: update mark_victim tracepoints fields
* cd4da4b748 Revert "FROMGIT: mm: update mark_victim tracepoints fields"
* 948f42ca2b UPSTREAM: netfilter: nft_set_pipapo: release elements in clone only from destroy path
* 6a45518094 ANDROID: GKI: Update symbol list for Amlogic
* 3de9177e81 ANDROID: GKI: Update symbol list for lenovo
* 668dfb812d FROMLIST: binder: check offset alignment in binder_get_object()
* 3b3c1c80e8 ANDROID: GKI: Update the ABI symbol list
* f600c62d25 ANDROID: GKI: Update symbol list for Amlogic
* d154026d33 ANDROID: GKI: Update the ABI symbol list
* 5f12c91ab0 Merge tag 'android14-5.15.148_r00' into android14-5.15
* ec86765bae ANDROID: KVM: arm64: Fix TLB invalidation when coalescing into a block
* 5854f4c2af ANDROID: KVM: arm64: Fix missing trace event for nVHE dyn HVCs
* 865e6d9df1 UPSTREAM: netfilter: nf_tables: disallow timeout for anonymous sets
* 537e133918 UPSTREAM: arm64: Apply dynamic shadow call stack patching in two passes
* 96305e30e9 ANDROID: userfaultfd: abort uffdio ops if mmap_lock is contended
* 3673533a09 ANDROID: userfaultfd: add MMAP_TRYLOCK mode for COPY/ZEROPAGE
* 3fd32dc171 ANDROID: fix isolate_migratepages_range return value
* 483395b445 Revert "ANDROID: Add CONFIG_BLK_DEV_NULL_BLK=m to gki_defconfig"
* 7b301c7079 ANDROID: fips140 - fix integrity check by unapplying dynamic SCS
* b1f8c25026 ANDROID: fips140 - add option for debugging the integrity check
* 1225d7ed6c ANDROID: fuse-bpf: Fix readdir for getdents
* 37b83a89de BACKPORT: f2fs: split initial and dynamic conditions for extent_cache
* ac4797cea5 UPSTREAM: usb: typec: altmodes/displayport: create sysfs nodes as driver's default device attribute group
* 5aed5c3435 ANDROID: uid_sys_stat: fix data-error of cputime and io
* c3b70e94f1 UPSTREAM: usb: typec: class: fix typec_altmode_put_partner to put plugs
* 282bfc6c30 UPSTREAM: Revert "usb: typec: class: fix typec_altmode_put_partner to put plugs"
* 2390d58862 ANDROID: GKI: Update the ABI symbol list
* 0d0784d6b2 ANDROID: Update ABI for userfaultfd_ctx
* ee9964b308 ANDROID: userfaultfd: allow SPF for UFFD_FEATURE_SIGBUS on private+anon
* 9cef46f39e ANDROID: remove LTO check from build.config.gki.aarch64.fips140
* b74b4cbe62 Revert "interconnect: Fix locking for runpm vs reclaim"
* f115661832 Revert "interconnect: Teach lockdep about icc_bw_lock order"
* d96725ec1a BACKPORT: FROMGIT: PM: runtime: add tracepoint for runtime_status changes
* 4403e2517a UPSTREAM: netfilter: nft_set_rbtree: skip end interval element from gc
* 288abb8b19 ANDROID: PCI: dwc: Wait for the link only if it has been started
* ff1e211db6 ANDROID: null_blk: Support configuring the maximum segment size
* 0ffd03e67d ANDROID: scsi_debug: Support configuring the maximum segment size
* 3ef8e9009c ANDROID: block: Make sub_page_limit_queues available in debugfs
* bed88e7c4f ANDROID: block: Add support for filesystem requests and small segments
* e99e7de8a6 ANDROID: block: Support submitting passthrough requests with small segments
* 3f6018f1b6 ANDROID: block: Support configuring limits below the page size
* 025c278e84 ANDROID: block: Prepare for supporting sub-page limits
* f56ddffe05 ANDROID: block: Use pr_info() instead of printk(KERN_INFO ...)

Change-Id: I6834aac2be94f461b9f59baa696d5d130fc295d9
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2024-05-01 12:05:08 +00:00
85 changed files with 3983 additions and 523 deletions

View File

@@ -4637,6 +4637,11 @@
rcu_node tree with an eye towards determining
why a new grace period has not yet started.
rcutree.enable_rcu_lazy= [KNL]
To save power, batch RCU callbacks and flush after
delay, memory pressure or callback list growing too
big.
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
@@ -4881,6 +4886,21 @@
rcutorture.verbose= [KNL]
Enable additional printk() statements.
rcupdate.android_rcu_boot_end_delay= [KNL]
Minimum time in milliseconds from the start of boot
that must elapse before the boot sequence can be marked
complete from RCU's perspective, after which RCU's
behavior becomes more relaxed. The default value is also
configurable via CONFIG_RCU_BOOT_END_DELAY.
Userspace can also mark the boot as completed
sooner by writing the time in milliseconds, say once
userspace considers the system as booted, to:
/sys/module/rcupdate/parameters/android_rcu_boot_end_delay
Or even just writing a value of 0 to this sysfs node.
The sysfs node can also be used to extend the delay
to be larger than the default, assuming the marking
of boot complete has not yet occurred.
rcupdate.rcu_cpu_stall_ftrace_dump= [KNL]
Dump ftrace buffer after reporting RCU CPU
stall warning.

View File

@@ -16,6 +16,20 @@ but then try the legacy ones, too.
Notice there's a list of functions in include/dt-bindings/leds/common.h .
* Gamepads and joysticks
Game controllers may feature LEDs to indicate a player number. This is commonly
used on game consoles in which multiple controllers can be connected to a system.
The "player LEDs" are then programmed with a pattern to indicate a particular
player. For example, a game controller with 4 LEDs, may be programmed with "x---"
to indicate player 1, "-x--" to indicate player 2 etcetera where "x" means on.
Input drivers can utilize the LED class to expose the individual player LEDs
of a game controller using the function "player".
Note: tracking and management of Player IDs is the responsibility of user space,
though drivers may pick a default value.
Good: "input*:*:player-{1,2,3,4,5}
* Keyboards
Good: "input*:*:capslock"

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,7 @@
arm64_use_ng_mappings
__arm_smccc_hvc
__arm_smccc_smc
async_schedule_node
atomic_notifier_call_chain
atomic_notifier_chain_register
atomic_notifier_chain_unregister
@@ -35,6 +36,9 @@
backlight_device_unregister
balance_dirty_pages_ratelimited
bcmp
bdi_alloc
bdi_put
bdi_register
bio_add_page
bio_alloc_bioset
bio_associate_blkg
@@ -82,8 +86,8 @@
bpf_master_redirect_enabled_key
bpf_prog_put
bpf_stats_enabled_key
bpf_trace_run10
bpf_trace_run1
bpf_trace_run10
bpf_trace_run2
bpf_trace_run3
bpf_trace_run4
@@ -91,6 +95,7 @@
bpf_trace_run6
bpf_trace_run7
bpf_trace_run8
bpf_trace_run9
bpf_warn_invalid_xdp_action
__bread_gfp
__brelse
@@ -194,6 +199,8 @@
cpu_all_bits
cpu_bit_bitmap
cpufreq_boost_enabled
cpufreq_cpu_get
cpufreq_cpu_put
cpufreq_generic_attr
cpufreq_generic_frequency_table_verify
cpufreq_generic_suspend
@@ -256,6 +263,7 @@
crypto_skcipher_encrypt
crypto_skcipher_setkey
crypto_unregister_ahash
crypto_unregister_ahashes
crypto_unregister_shash
crypto_unregister_skcipher
__crypto_xor
@@ -265,6 +273,7 @@
_ctype
current_time
current_umask
deactivate_locked_super
debugfs_attr_read
debugfs_attr_write
debugfs_create_blob
@@ -585,6 +594,7 @@
drm_connector_cleanup
drm_connector_init
drm_connector_list_iter_begin
drm_connector_list_iter_end
drm_connector_list_iter_next
drm_connector_set_vrr_capable_property
drm_connector_unregister
@@ -695,6 +705,7 @@
drm_state_dump
drm_universal_plane_init
drm_vblank_init
drm_wait_one_vblank
drm_writeback_connector_init
drm_writeback_queue_job
drm_writeback_signal_completion
@@ -728,9 +739,9 @@
extcon_get_extcon_dev
extcon_get_state
extcon_register_notifier
extcon_unregister_notifier
extcon_set_state
extcon_set_state_sync
extcon_unregister_notifier
fasync_helper
fault_in_iov_iter_readable
__fdget
@@ -755,6 +766,7 @@
find_vm_area
find_vpid
finish_wait
fixed_size_llseek
flow_block_cb_setup_simple
flow_rule_match_basic
flow_rule_match_ipv4_addrs
@@ -808,6 +820,7 @@
generic_handle_irq
generic_permission
generic_read_dir
generic_shutdown_super
generic_write_checks
generic_write_end
genlmsg_multicast_allns
@@ -819,13 +832,13 @@
genphy_handle_interrupt_no_ack
genphy_read_abilities
genphy_read_mmd_unsupported
genphy_write_mmd_unsupported
genphy_read_status
genphy_restart_aneg
genphy_resume
genphy_soft_reset
genphy_suspend
genphy_update_link
genphy_write_mmd_unsupported
gen_pool_add_owner
gen_pool_alloc_algo_owner
gen_pool_avail
@@ -873,6 +886,7 @@
gpiod_direction_output_raw
gpiod_get
gpiod_get_index
gpiod_get_index_optional
gpiod_get_optional
gpiod_get_raw_value
gpiod_get_raw_value_cansleep
@@ -1034,8 +1048,8 @@
iwe_stream_add_event
iwe_stream_add_point
iwe_stream_add_value
jiffies_64
jiffies
jiffies_64
jiffies_to_msecs
jiffies_to_usecs
kasan_flag_enabled
@@ -1144,12 +1158,18 @@
__lock_page
lockref_get
logfc
log_post_read_mmio
log_post_write_mmio
log_read_mmio
log_write_mmio
lookup_bdev
loops_per_jiffy
mac_pton
make_bad_inode
mangle_path
mark_buffer_dirty
__mark_inode_dirty
match_string
mbox_chan_received_data
mbox_chan_txdone
mbox_controller_register
@@ -1161,11 +1181,11 @@
mdiobus_alloc_size
mdiobus_free
mdiobus_get_phy
mdiobus_read
__mdiobus_read
mdiobus_read
mdiobus_unregister
mdiobus_write
__mdiobus_write
mdiobus_write
mdio_device_create
mdio_device_free
media_create_pad_link
@@ -1184,8 +1204,8 @@
memparse
memremap
mem_section
memset64
memset
memset64
__memset_io
memstart_addr
memunmap
@@ -1204,8 +1224,8 @@
mmc_cqe_request_done
mmc_detect_change
mmc_free_host
mmc_gpio_get_cd
mmc_gpiod_request_cd
mmc_gpio_get_cd
mmc_of_parse
mmc_regulator_get_supply
mmc_regulator_set_ocr
@@ -1223,6 +1243,7 @@
mod_delayed_work_on
mod_node_page_state
mod_timer
__module_get
module_layout
module_put
mpage_readahead
@@ -1245,6 +1266,7 @@
napi_gro_receive
__napi_schedule
napi_schedule_prep
__ndelay
netdev_alert
__netdev_alloc_skb
netdev_err
@@ -1285,8 +1307,8 @@
nla_find
nla_memcpy
__nla_parse
nla_put_64bit
nla_put
nla_put_64bit
nla_put_nohdr
nla_reserve
nla_strscpy
@@ -1301,6 +1323,8 @@
ns_to_timespec64
__num_online_cpus
nvmem_cell_read
nvmem_register
nvmem_unregister
of_address_to_resource
of_alias_get_id
of_clk_add_provider
@@ -1309,6 +1333,7 @@
of_clk_hw_onecell_get
of_clk_set_defaults
of_clk_src_onecell_get
of_count_phandle_with_args
of_device_get_match_data
of_device_is_available
of_device_is_compatible
@@ -1348,6 +1373,8 @@
of_match_node
__of_mdiobus_register
of_mdio_find_bus
of_n_addr_cells
of_n_size_cells
of_parse_phandle
of_parse_phandle_with_args
of_phy_is_fixed_link
@@ -1359,9 +1386,11 @@
of_property_read_string_helper
of_property_read_u32_index
of_property_read_u64
of_property_read_u64_index
of_property_read_variable_u16_array
of_property_read_variable_u32_array
of_property_read_variable_u8_array
of_prop_next_string
of_prop_next_u32
of_pwm_xlate_with_flags
of_reserved_mem_device_init_by_idx
@@ -1441,6 +1470,7 @@
pci_unlock_rescan_remove
pci_write_config_byte
pci_write_config_dword
pcpu_nr_pages
PDE_DATA
__per_cpu_offset
perf_trace_buf_alloc
@@ -1478,8 +1508,8 @@
phylink_start
phylink_stop
phylink_suspend
phy_modify
__phy_modify
phy_modify
phy_modify_changed
phy_modify_paged
phy_modify_paged_changed
@@ -1563,6 +1593,7 @@
__printk_ratelimit
proc_create
proc_create_data
proc_create_single_data
proc_dointvec
proc_get_parent_data
proc_mkdir
@@ -1803,10 +1834,12 @@
sg_alloc_table_from_pages_segment
sg_copy_from_buffer
sg_copy_to_buffer
sget_fc
sg_free_table
sg_init_one
sg_init_table
sg_nents
sg_nents_for_len
sg_next
__sg_page_iter_next
__sg_page_iter_start
@@ -1814,6 +1847,8 @@
sg_pcopy_to_buffer
show_class_attr_string
show_regs
si_mem_available
si_meminfo
simple_attr_open
simple_attr_read
simple_attr_release
@@ -1993,12 +2028,14 @@
sysctl_sched_latency
sysfs_create_bin_file
sysfs_create_file_ns
sysfs_create_files
sysfs_create_group
sysfs_create_link
sysfs_emit
__sysfs_match_string
sysfs_remove_bin_file
sysfs_remove_file_ns
sysfs_remove_files
sysfs_remove_group
sysfs_remove_link
sysfs_streq
@@ -2028,6 +2065,7 @@
time64_to_tm
timespec64_to_jiffies
_totalram_pages
touch_softlockup_watchdog
trace_clock_local
trace_event_buffer_commit
trace_event_buffer_reserve
@@ -2047,9 +2085,12 @@
__traceiter_android_rvh_place_entity
__traceiter_android_rvh_replace_next_task_fair
__traceiter_android_rvh_schedule
__traceiter_android_rvh_select_task_rq_fair
__traceiter_android_rvh_select_task_rq_rt
__traceiter_android_rvh_set_sugov_update
__traceiter_android_rvh_tick_entry
__traceiter_android_vh_alloc_pages_entry
__traceiter_android_vh_alloc_pages_slowpath
__traceiter_android_vh_calc_alloc_flags
__traceiter_android_vh_cma_alloc_bypass
__traceiter_android_vh_cma_drain_all_pages_bypass
@@ -2097,9 +2138,12 @@
__tracepoint_android_rvh_place_entity
__tracepoint_android_rvh_replace_next_task_fair
__tracepoint_android_rvh_schedule
__tracepoint_android_rvh_select_task_rq_fair
__tracepoint_android_rvh_select_task_rq_rt
__tracepoint_android_rvh_set_sugov_update
__tracepoint_android_rvh_tick_entry
__tracepoint_android_vh_alloc_pages_entry
__tracepoint_android_vh_alloc_pages_slowpath
__tracepoint_android_vh_calc_alloc_flags
__tracepoint_android_vh_cma_alloc_bypass
__tracepoint_android_vh_cma_drain_all_pages_bypass
@@ -2209,16 +2253,16 @@
usb_autopm_get_interface
usb_autopm_put_interface
usb_control_msg
usb_create_hcd
__usb_create_hcd
usb_create_hcd
usb_debug_root
usb_decode_ctrl
usb_del_gadget
usb_del_gadget_udc
usb_deregister
usb_deregister_dev
usb_disabled
usb_disable_autosuspend
usb_disabled
usb_driver_claim_interface
usb_driver_release_interface
usb_ep_set_maxpacket_limit
@@ -2239,9 +2283,9 @@
usb_get_maximum_ssp_rate
usb_get_role_switch_default_mode
usb_hcd_check_unlink_urb
usb_hc_died
usb_hcd_end_port_resume
usb_hcd_giveback_urb
usb_hc_died
usb_hcd_irq
usb_hcd_is_primary_hcd
usb_hcd_link_urb_to_ep
@@ -2364,6 +2408,7 @@
__video_register_device
video_unregister_device
vmalloc
vmalloc_nr_pages
vmalloc_to_page
vmalloc_to_pfn
vmalloc_user
@@ -2371,6 +2416,7 @@
vm_event_states
vmf_insert_pfn_prot
vm_insert_page
vm_node_stat
vm_unmap_aliases
vm_zone_stat
vprintk

View File

@@ -205,6 +205,9 @@
cpu_topology
crc32_le
css_next_child
css_task_iter_end
css_task_iter_next
css_task_iter_start
csum_partial
_ctype
debugfs_attr_read
@@ -257,6 +260,7 @@
_dev_info
__dev_kfree_skb_any
devm_add_action
devm_bitmap_zalloc
devm_clk_get
devm_free_irq
devm_fwnode_gpiod_get_index
@@ -1330,6 +1334,8 @@
__tracepoint_android_vh_use_amu_fie
__tracepoint_binder_transaction_received
__tracepoint_cpu_frequency_limits
tracing_off
tracing_on
try_module_get
__ubsan_handle_cfi_check_fail_abort
__udelay

View File

@@ -192,6 +192,7 @@
copy_page
_copy_to_iter
cpu_bit_bitmap
cpufreq_boost_enabled
cpufreq_cpu_get_raw
cpufreq_dbs_governor_exit
cpufreq_dbs_governor_init
@@ -199,6 +200,8 @@
cpufreq_dbs_governor_start
cpufreq_dbs_governor_stop
__cpufreq_driver_target
cpufreq_freq_attr_scaling_available_freqs
cpufreq_freq_attr_scaling_boost_freqs
cpufreq_generic_attr
cpufreq_generic_frequency_table_verify
cpufreq_register_driver
@@ -352,6 +355,7 @@
device_register
device_remove_file
device_rename
device_set_wakeup_capable
device_set_wakeup_enable
device_unregister
_dev_info
@@ -403,8 +407,10 @@
devm_phy_optional_get
devm_pinctrl_get
devm_pinctrl_put
devm_platform_ioremap_resource
devm_pwm_get
__devm_regmap_init_i2c
__devm_regmap_init_mmio_clk
devm_regulator_bulk_get
devm_regulator_get
devm_regulator_register
@@ -523,6 +529,7 @@
d_obtain_alias
do_exit
do_trace_netlink_extack
do_wait_intr
down
downgrade_write
down_interruptible
@@ -563,6 +570,7 @@
drm_atomic_helper_wait_for_fences
drm_atomic_state_default_clear
__drm_atomic_state_free
drm_bridge_add
drm_compat_ioctl
drm_connector_attach_encoder
drm_connector_cleanup
@@ -582,6 +590,11 @@
drm_dev_alloc
drm_dev_register
drm_display_mode_from_videomode
drm_dp_aux_init
drm_dp_channel_eq_ok
drm_dp_clock_recovery_ok
drm_dp_dpcd_read
drm_dp_dpcd_write
drm_encoder_cleanup
drm_encoder_init
__drm_err
@@ -994,11 +1007,14 @@
kmsg_dump_register
kmsg_dump_rewind
kmsg_dump_unregister
kobject_add
kobject_create_and_add
kobject_del
kobject_init
kobject_init_and_add
kobject_put
kobject_uevent
kobj_sysfs_ops
krealloc
kstrdup
kstrndup
@@ -1013,13 +1029,18 @@
kstrtoul_from_user
kstrtoull
kthread_bind
kthread_cancel_work_sync
kthread_create_on_node
kthread_flush_worker
__kthread_init_worker
kthread_park
kthread_parkme
kthread_queue_work
kthread_should_park
kthread_should_stop
kthread_stop
kthread_unpark
kthread_worker_fn
ktime_get
ktime_get_coarse_ts64
ktime_get_coarse_with_offset
@@ -1208,6 +1229,7 @@
of_address_to_resource
of_clk_add_provider
of_clk_get
of_clk_get_by_name
of_clk_get_from_provider
of_clk_src_onecell_get
of_count_phandle_with_args
@@ -1382,6 +1404,7 @@
pm_runtime_force_resume
pm_runtime_force_suspend
__pm_runtime_idle
pm_runtime_no_callbacks
__pm_runtime_resume
pm_runtime_set_autosuspend_delay
__pm_runtime_set_status
@@ -1488,6 +1511,7 @@
__register_blkdev
__register_chrdev
register_chrdev_region
register_die_notifier
register_filesystem
register_inet6addr_notifier
register_inetaddr_notifier
@@ -1569,6 +1593,7 @@
rpmsg_create_ept
rpmsg_find_device
rpmsg_register_device
rpmsg_register_device_override
rpmsg_release_channel
rpmsg_send
rpmsg_sendto
@@ -1759,6 +1784,7 @@
snd_ctl_notify
snd_devm_card_new
snd_ecards_limit
snd_hwdep_new
snd_info_get_line
snd_interval_refine
snd_pcm_format_big_endian
@@ -1871,6 +1897,7 @@
strncat
strncmp
strncpy
strndup_user
strnlen
strnstr
strpbrk
@@ -2015,6 +2042,7 @@
tty_termios_baud_rate
tty_termios_copy_hw
tty_termios_encode_baud_rate
tty_termios_hw_change
tty_unregister_device
tty_unregister_driver
tty_unregister_ldisc
@@ -2041,6 +2069,7 @@
unregister_blkdev
__unregister_chrdev
unregister_chrdev_region
unregister_die_notifier
unregister_filesystem
unregister_inet6addr_notifier
unregister_inetaddr_notifier
@@ -2131,6 +2160,13 @@
usb_role_switch_get_drvdata
usb_role_switch_register
usb_role_switch_unregister
usb_serial_deregister_drivers
usb_serial_generic_close
usb_serial_generic_get_icount
usb_serial_generic_open
usb_serial_generic_throttle
usb_serial_generic_unthrottle
usb_serial_register_drivers
usb_set_interface
usb_show_dynids
usb_speed_string
@@ -2281,6 +2317,7 @@
wait_woken
__wake_up
wake_up_bit
__wake_up_locked
wake_up_process
wakeup_source_add
wakeup_source_create

View File

@@ -1106,6 +1106,7 @@
io_schedule_timeout
iounmap
iov_iter_bvec
iov_iter_kvec
ip_send_check
iput
__irq_alloc_descs
@@ -1837,6 +1838,7 @@
seq_putc
seq_puts
seq_read
seq_read_iter
seq_release
seq_release_private
seq_write

View File

@@ -733,6 +733,7 @@
dump_stack
__dynamic_dev_dbg
__dynamic_pr_debug
emergency_restart
enable_irq
enable_percpu_irq
ether_setup
@@ -844,6 +845,10 @@
get_user_pages
get_zeroed_page
gic_nonsecure_priorities
gic_resume
gic_v3_cpu_init
gic_v3_dist_init
gic_v3_dist_wait_for_rwp
gov_attr_set_init
gov_attr_set_put
governor_sysfs_ops
@@ -1088,6 +1093,7 @@
kernel_getsockname
kernel_kobj
kernel_recvmsg
kernel_restart
kernel_sendmsg
kernfs_find_and_get_ns
kernfs_notify
@@ -1127,6 +1133,7 @@
kobject_uevent_env
kobj_sysfs_ops
krealloc
kset_create_and_add
ksize
ksoftirqd
kstat
@@ -1220,6 +1227,7 @@
mipi_dsi_create_packet
mipi_dsi_dcs_set_display_brightness
mipi_dsi_dcs_set_tear_off
mipi_dsi_dcs_write
mipi_dsi_host_register
mipi_dsi_host_unregister
misc_deregister
@@ -1267,6 +1275,7 @@
mod_node_page_state
mod_timer
mod_timer_pending
__module_get
module_layout
module_put
__msecs_to_jiffies
@@ -1333,6 +1342,7 @@
ns_capable
nsecs_to_jiffies
ns_to_timespec64
__num_online_cpus
nvmem_cell_get
nvmem_cell_put
nvmem_cell_read
@@ -1734,6 +1744,7 @@
rpmsg_get_signals
rpmsg_poll
rpmsg_register_device
rpmsg_register_device_override
rpmsg_rx_done
rpmsg_send
rpmsg_set_signals
@@ -1939,6 +1950,7 @@
snd_soc_dapm_put_enum_double
snd_soc_dapm_put_volsw
snd_soc_dapm_sync
snd_soc_get_dai_name
snd_soc_get_enum_double
snd_soc_get_pcm_runtime
snd_soc_get_volsw
@@ -2067,6 +2079,7 @@
sysfs_remove_link
sysfs_remove_link_from_group
sysfs_streq
sysfs_update_group
sysrq_mask
system_32bit_el0_cpumask
system_freezable_wq
@@ -2167,12 +2180,16 @@
__traceiter_android_vh_binder_wakeup_ilocked
__traceiter_android_vh_cpu_idle_enter
__traceiter_android_vh_cpu_idle_exit
__traceiter_android_vh_cpuidle_psci_enter
__traceiter_android_vh_cpuidle_psci_exit
__traceiter_android_vh_disable_thermal_cooling_stats
__traceiter_android_vh_ftrace_dump_buffer
__traceiter_android_vh_ftrace_format_check
__traceiter_android_vh_ftrace_oops_enter
__traceiter_android_vh_ftrace_oops_exit
__traceiter_android_vh_ftrace_size_check
__traceiter_android_vh_gic_resume
__traceiter_android_vh_gic_v3_suspend
__traceiter_android_vh_ipi_stop
__traceiter_android_vh_jiffies_update
__traceiter_android_vh_mmc_sdio_pm_flag_set
@@ -2242,12 +2259,16 @@
__tracepoint_android_vh_binder_wakeup_ilocked
__tracepoint_android_vh_cpu_idle_enter
__tracepoint_android_vh_cpu_idle_exit
__tracepoint_android_vh_cpuidle_psci_enter
__tracepoint_android_vh_cpuidle_psci_exit
__tracepoint_android_vh_disable_thermal_cooling_stats
__tracepoint_android_vh_ftrace_dump_buffer
__tracepoint_android_vh_ftrace_format_check
__tracepoint_android_vh_ftrace_oops_enter
__tracepoint_android_vh_ftrace_oops_exit
__tracepoint_android_vh_ftrace_size_check
__tracepoint_android_vh_gic_resume
__tracepoint_android_vh_gic_v3_suspend
__tracepoint_android_vh_ipi_stop
__tracepoint_android_vh_jiffies_update
__tracepoint_android_vh_mmc_sdio_pm_flag_set
@@ -2441,6 +2462,9 @@
wakeup_source_unregister
__wake_up_sync
__warn_printk
watchdog_init_timeout
watchdog_register_device
watchdog_unregister_device
wireless_nlevent_flush
wireless_send_event
woken_wake_function

View File

@@ -15,6 +15,8 @@ CONFIG_RCU_EXPERT=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_BOOST=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_LAZY=y
CONFIG_RCU_LAZY_DEFAULT_OFF=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_IKHEADERS=m
@@ -316,7 +318,6 @@ CONFIG_ARM_SCPI_PROTOCOL=y
# CONFIG_ARM_SCPI_POWER_DOMAIN is not set
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
CONFIG_GNSS=y
CONFIG_BLK_DEV_NULL_BLK=m
CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16

View File

@@ -130,7 +130,8 @@ struct eh_frame {
static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
bool fde_has_augmentation_data,
int code_alignment_factor)
int code_alignment_factor,
bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
@@ -184,7 +185,8 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
case DW_CFA_negate_ra_state:
scs_patch_loc(loc - 4);
if (!dry_run)
scs_patch_loc(loc - 4);
break;
case 0x40 ... 0x7f:
@@ -235,9 +237,12 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
} else {
ret = scs_handle_fde_frame(frame,
fde_has_augmentation_data,
code_alignment_factor);
code_alignment_factor,
true);
if (ret)
return ret;
scs_handle_fde_frame(frame, fde_has_augmentation_data,
code_alignment_factor, false);
}
p += sizeof(frame->size) + frame->size;

View File

@@ -1334,7 +1334,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
hcall_t hfn;
if (handle_host_dynamic_hcall(host_ctxt) == HCALL_HANDLED)
return;
goto end;
/*
* If pKVM has been initialised then reject any calls to the
@@ -1359,7 +1359,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
hfn(host_ctxt);
end:
trace_host_hcall(id, 0);
return;

View File

@@ -885,7 +885,9 @@ static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level,
* of the page table page.
*/
if (mm_ops->page_count(childp) == 1) {
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
kvm_clear_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
mm_ops->put_page(ptep);
mm_ops->put_page(childp);
}
}

View File

@@ -17,6 +17,8 @@ CONFIG_RCU_EXPERT=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_BOOST=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_LAZY=y
CONFIG_RCU_LAZY_DEFAULT_OFF=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_IKHEADERS=m
@@ -294,7 +296,6 @@ CONFIG_FW_LOADER_USER_HELPER=y
# CONFIG_FW_CACHE is not set
CONFIG_GNSS=y
CONFIG_OF=y
CONFIG_BLK_DEV_NULL_BLK=m
CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16

View File

@@ -46,7 +46,11 @@
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#ifndef __GENKSYMS__
#include "blk-mq-debugfs.h"
#endif
#include "blk-mq-sched.h"
#include "blk-pm.h"
#ifndef __GENKSYMS__
@@ -1786,6 +1790,7 @@ int __init blk_dev_init(void)
sizeof(struct internal_request_queue), 0, SLAB_PANIC, NULL);
blk_debugfs_root = debugfs_create_dir("block", NULL);
blk_mq_debugfs_init();
return 0;
}

View File

@@ -486,7 +486,7 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
unsigned int nr_segs = 0;
bio_for_each_bvec(bv, bio, iter)
nr_segs++;
nr_segs += blk_segments(&rq->q->limits, bv.bv_len);
if (!rq->bio) {
blk_rq_bio_prep(rq, bio, nr_segs);

View File

@@ -265,7 +265,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
if (nsegs < max_segs &&
sectors + (bv.bv_len >> 9) <= max_sectors &&
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
nsegs++;
/* single-page bvec optimization */
nsegs += blk_segments(&q->limits, bv.bv_len);
sectors += bv.bv_len >> 9;
} else if (bvec_split_segs(q, &bv, &nsegs, &sectors, max_segs,
max_sectors)) {
@@ -333,18 +334,17 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
break;
default:
/*
* All drivers must accept single-segments bios that are <=
* PAGE_SIZE. This is a quick and dirty check that relies on
* the fact that bi_io_vec[0] is always valid if a bio has data.
* The check might lead to occasional false negatives when bios
* are cloned, but compared to the performance impact of cloned
* bios themselves the loop below doesn't matter anyway.
* Check whether bio splitting should be performed. This check may
* trigger the bio splitting code even if splitting is not necessary.
*/
if (!q->limits.chunk_sectors &&
(*bio)->bi_vcnt == 1 &&
(!blk_queue_sub_page_limits(&q->limits) ||
(*bio)->bi_io_vec->bv_len <= q->limits.max_segment_size) &&
((*bio)->bi_io_vec[0].bv_len +
(*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
*nr_segs = 1;
*nr_segs = blk_segments(&q->limits,
(*bio)->bi_io_vec[0].bv_len);
break;
}
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
@@ -519,7 +519,10 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
__blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
goto next_bvec;
if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE &&
(!blk_queue_sub_page_limits(&q->limits) ||
bvec.bv_len <= q->limits.max_segment_size))
/* single-segment bvec optimization */
nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
else
nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);

View File

@@ -1013,3 +1013,12 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
debugfs_remove_recursive(hctx->sched_debugfs_dir);
hctx->sched_debugfs_dir = NULL;
}
DEFINE_DEBUGFS_ATTRIBUTE(blk_sub_page_limit_queues_fops,
blk_sub_page_limit_queues_get, NULL, "%llu\n");
void blk_mq_debugfs_init(void)
{
debugfs_create_file("sub_page_limit_queues", 0400, blk_debugfs_root,
NULL, &blk_sub_page_limit_queues_fops);
}

View File

@@ -15,6 +15,8 @@ struct blk_mq_debugfs_attr {
const struct seq_operations *seq_ops;
};
void blk_mq_debugfs_init(void);
int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
@@ -36,6 +38,10 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
#else
static inline void blk_mq_debugfs_init(void)
{
}
static inline void blk_mq_debugfs_register(struct request_queue *q)
{
}

View File

@@ -2,6 +2,9 @@
/*
* Functions related to setting various queue properties from drivers
*/
#define pr_fmt(fmt) "%s: " fmt, __func__
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -18,6 +21,11 @@
#include "blk.h"
#include "blk-wbt.h"
/* Protects blk_nr_sub_page_limit_queues and blk_sub_page_limits changes. */
static DEFINE_MUTEX(blk_sub_page_limit_lock);
static uint32_t blk_nr_sub_page_limit_queues;
DEFINE_STATIC_KEY_FALSE(blk_sub_page_limits);
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
@@ -57,6 +65,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->misaligned = 0;
lim->zoned = BLK_ZONED_NONE;
lim->zone_write_granularity = 0;
lim->sub_page_limits = false;
}
EXPORT_SYMBOL(blk_set_default_limits);
@@ -101,6 +110,58 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
}
EXPORT_SYMBOL(blk_queue_bounce_limit);
/* For debugfs. */
int blk_sub_page_limit_queues_get(void *data, u64 *val)
{
*val = READ_ONCE(blk_nr_sub_page_limit_queues);
return 0;
}
/**
* blk_enable_sub_page_limits - enable support for limits below the page size
* @lim: request queue limits for which to enable support of these features.
*
* Enable support for max_segment_size values smaller than PAGE_SIZE and for
* max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT. Support for these
* features is not enabled all the time because of the runtime overhead of these
* features.
*/
static void blk_enable_sub_page_limits(struct queue_limits *lim)
{
if (lim->sub_page_limits)
return;
lim->sub_page_limits = true;
mutex_lock(&blk_sub_page_limit_lock);
if (++blk_nr_sub_page_limit_queues == 1)
static_branch_enable(&blk_sub_page_limits);
mutex_unlock(&blk_sub_page_limit_lock);
}
/**
* blk_disable_sub_page_limits - disable support for limits below the page size
* @lim: request queue limits for which to enable support of these features.
*
* max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values
* below PAGE_SIZE >> SECTOR_SHIFT. Support for these features is not enabled
* all the time because of the runtime overhead of these features.
*/
void blk_disable_sub_page_limits(struct queue_limits *lim)
{
if (!lim->sub_page_limits)
return;
lim->sub_page_limits = false;
mutex_lock(&blk_sub_page_limit_lock);
WARN_ON_ONCE(blk_nr_sub_page_limit_queues <= 0);
if (--blk_nr_sub_page_limit_queues == 0)
static_branch_disable(&blk_sub_page_limits);
mutex_unlock(&blk_sub_page_limit_lock);
}
/**
* blk_queue_max_hw_sectors - set max sectors for a request for this queue
* @q: the request queue for the device
@@ -123,12 +184,17 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
{
struct queue_limits *limits = &q->limits;
unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT;
unsigned int max_sectors;
if ((max_hw_sectors << 9) < PAGE_SIZE) {
max_hw_sectors = 1 << (PAGE_SHIFT - 9);
printk(KERN_INFO "%s: set to minimum %d\n",
__func__, max_hw_sectors);
if (max_hw_sectors < min_max_hw_sectors) {
blk_enable_sub_page_limits(limits);
min_max_hw_sectors = 1;
}
if (max_hw_sectors < min_max_hw_sectors) {
max_hw_sectors = min_max_hw_sectors;
pr_info("set to minimum %u\n", max_hw_sectors);
}
max_hw_sectors = round_down(max_hw_sectors,
@@ -243,8 +309,7 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments
{
if (!max_segments) {
max_segments = 1;
printk(KERN_INFO "%s: set to minimum %d\n",
__func__, max_segments);
pr_info("set to minimum %u\n", max_segments);
}
q->limits.max_segments = max_segments;
@@ -278,10 +343,16 @@ EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments);
**/
void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
{
if (max_size < PAGE_SIZE) {
max_size = PAGE_SIZE;
printk(KERN_INFO "%s: set to minimum %d\n",
__func__, max_size);
unsigned int min_max_segment_size = PAGE_SIZE;
if (max_size < min_max_segment_size) {
blk_enable_sub_page_limits(&q->limits);
min_max_segment_size = SECTOR_SIZE;
}
if (max_size < min_max_segment_size) {
max_size = min_max_segment_size;
pr_info("set to minimum %u\n", max_size);
}
/* see blk_queue_virt_boundary() for the explanation */
@@ -701,8 +772,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
{
if (mask < PAGE_SIZE - 1) {
mask = PAGE_SIZE - 1;
printk(KERN_INFO "%s: set to minimum %lx\n",
__func__, mask);
pr_info("set to minimum %lx\n", mask);
}
q->limits.seg_boundary_mask = mask;

View File

@@ -811,6 +811,8 @@ static void blk_release_queue(struct kobject *kobj)
blk_queue_free_zone_bitmaps(q);
blk_disable_sub_page_limits(&q->limits);
if (queue_is_mq(q))
blk_mq_release(q);

View File

@@ -16,6 +16,7 @@
#define BLK_MAX_TIMEOUT (5 * HZ)
extern struct dentry *blk_debugfs_root;
DECLARE_STATIC_KEY_FALSE(blk_sub_page_limits);
struct internal_request_queue {
struct request_queue q;
@@ -61,6 +62,15 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);
static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim)
{
return static_branch_unlikely(&blk_sub_page_limits) &&
lim->sub_page_limits;
}
int blk_sub_page_limit_queues_get(void *data, u64 *val);
void blk_disable_sub_page_limits(struct queue_limits *q);
void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
@@ -70,6 +80,24 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
/* Number of DMA segments required to transfer @bytes data. */
static inline unsigned int blk_segments(const struct queue_limits *limits,
unsigned int bytes)
{
if (!blk_queue_sub_page_limits(limits))
return 1;
{
const unsigned int mss = limits->max_segment_size;
if (bytes <= mss)
return 1;
if (is_power_of_2(mss))
return round_up(bytes, mss) >> ilog2(mss);
return (bytes + mss - 1) / mss;
}
}
static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
{

View File

@@ -10,11 +10,6 @@ MAKE_GOALS="
modules
"
if [ "${LTO}" = "none" ]; then
echo "The FIPS140 module needs LTO to be enabled."
exit 1
fi
MODULES_ORDER=android/gki_aarch64_fips140_modules
KERNEL_DIR=common

View File

@@ -58,6 +58,33 @@ config CRYPTO_FIPS140_MOD_EVAL_TESTING
errors and support for a userspace interface to some of the module's
services. This option should not be enabled in production builds.
config CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK
bool "Debug the integrity check in FIPS 140 module"
depends on CRYPTO_FIPS140_MOD
help
This option makes the FIPS 140 module provide debugfs files containing
the text and rodata that were used for the integrity check, i.e. the
runtime text and rodata with relocations and code patches unapplied.
This option also makes the module load even if the integrity check
fails so that these files can be used to debug the failure. (A
possible failure mode is that the kernel has added a new type of code
patching and the module needs to be updated to disable or unapply it.)
This option must not be enabled in production builds.
Example commands for debugging an integrity check failure:
adb root
adb shell mount debugfs -t debugfs /sys/kernel/debug
adb shell cp /sys/kernel/debug/fips140/{text,rodata} /data/local/tmp/
adb pull /data/local/tmp/text text.checked
adb pull /data/local/tmp/rodata rodata.checked
llvm-objcopy -O binary --only-section=.text fips140.ko text.orig
llvm-objcopy -O binary --only-section=.rodata fips140.ko rodata.orig
for f in {text,rodata}.{orig,checked}; do xxd -g1 $f > $f.xxd; done
vimdiff text.{orig,checked}.xxd
vimdiff rodata.{orig,checked}.xxd
config CRYPTO_ALGAPI
tristate
select CRYPTO_ALGAPI2

View File

@@ -23,6 +23,7 @@
#undef __DISABLE_EXPORTS
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <crypto/aead.h>
#include <crypto/aes.h>
@@ -357,6 +358,94 @@ static void __init unapply_rodata_relocations(void *section, int section_size,
}
}
enum {
PACIASP = 0xd503233f,
AUTIASP = 0xd50323bf,
SCS_PUSH = 0xf800865e,
SCS_POP = 0xf85f8e5e,
};
/*
* To make the integrity check work with dynamic Shadow Call Stack (SCS),
* replace all instructions that push or pop from the SCS with the Pointer
* Authentication Code (PAC) instructions that were present originally.
*/
static void __init unapply_scs_patch(void *section, int section_size)
{
#if defined(CONFIG_ARM64) && defined(CONFIG_UNWIND_PATCH_PAC_INTO_SCS)
u32 *insns = section;
int i;
for (i = 0; i < section_size / sizeof(insns[0]); i++) {
if (insns[i] == SCS_PUSH)
insns[i] = PACIASP;
else if (insns[i] == SCS_POP)
insns[i] = AUTIASP;
}
#endif
}
#ifdef CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK
static struct {
const void *text;
int textsize;
const void *rodata;
int rodatasize;
} saved_integrity_check_info;
static ssize_t fips140_text_read(struct file *file, char __user *to,
size_t count, loff_t *ppos)
{
return simple_read_from_buffer(to, count, ppos,
saved_integrity_check_info.text,
saved_integrity_check_info.textsize);
}
static ssize_t fips140_rodata_read(struct file *file, char __user *to,
size_t count, loff_t *ppos)
{
return simple_read_from_buffer(to, count, ppos,
saved_integrity_check_info.rodata,
saved_integrity_check_info.rodatasize);
}
static const struct file_operations fips140_text_fops = {
.read = fips140_text_read,
};
static const struct file_operations fips140_rodata_fops = {
.read = fips140_rodata_read,
};
static void fips140_init_integrity_debug_files(const void *text, int textsize,
const void *rodata,
int rodatasize)
{
struct dentry *dir;
dir = debugfs_create_dir("fips140", NULL);
saved_integrity_check_info.text = kmemdup(text, textsize, GFP_KERNEL);
saved_integrity_check_info.textsize = textsize;
if (saved_integrity_check_info.text)
debugfs_create_file("text", 0400, dir, NULL,
&fips140_text_fops);
saved_integrity_check_info.rodata = kmemdup(rodata, rodatasize,
GFP_KERNEL);
saved_integrity_check_info.rodatasize = rodatasize;
if (saved_integrity_check_info.rodata)
debugfs_create_file("rodata", 0400, dir, NULL,
&fips140_rodata_fops);
}
#else /* CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */
static void fips140_init_integrity_debug_files(const void *text, int textsize,
const void *rodata,
int rodatasize)
{
}
#endif /* !CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */
extern struct {
u32 offset;
u32 count;
@@ -398,6 +487,11 @@ static bool __init check_fips140_module_hmac(void)
offset_to_ptr(&fips140_rela_rodata.offset),
fips140_rela_rodata.count);
unapply_scs_patch(textcopy, textsize);
fips140_init_integrity_debug_files(textcopy, textsize,
rodatacopy, rodatasize);
fips140_inject_integrity_failure(textcopy);
tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
@@ -538,10 +632,14 @@ fips140_init(void)
*/
if (!check_fips140_module_hmac()) {
pr_crit("integrity check failed -- giving up!\n");
goto panic;
if (!IS_ENABLED(CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK)) {
pr_crit("integrity check failed -- giving up!\n");
goto panic;
}
pr_crit("ignoring integrity check failure due to debug mode\n");
} else {
pr_info("integrity check passed\n");
}
pr_info("integrity check passed\n");
complete_all(&fips140_tests_done);

View File

@@ -1925,8 +1925,10 @@ static size_t binder_get_object(struct binder_proc *proc,
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
if (offset > buffer->data_size || read_size < sizeof(*hdr))
if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
!IS_ALIGNED(offset, sizeof(u32)))
return 0;
if (u) {
if (copy_from_user(object, u + offset, read_size))
return 0;

View File

@@ -93,6 +93,7 @@ static void update_pm_runtime_accounting(struct device *dev)
static void __update_runtime_status(struct device *dev, enum rpm_status status)
{
update_pm_runtime_accounting(dev);
trace_rpm_status(dev, status);
dev->power.runtime_status = status;
}

View File

@@ -156,6 +156,10 @@ static int g_max_sectors;
module_param_named(max_sectors, g_max_sectors, int, 0444);
MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
static unsigned int g_max_segment_size = BLK_MAX_SEGMENT_SIZE;
module_param_named(max_segment_size, g_max_segment_size, int, 0444);
MODULE_PARM_DESC(max_segment_size, "Maximum size of a segment in bytes");
static unsigned int nr_devices = 1;
module_param(nr_devices, uint, 0444);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
@@ -351,6 +355,7 @@ NULLB_DEVICE_ATTR(home_node, uint, NULL);
NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
NULLB_DEVICE_ATTR(blocksize, uint, NULL);
NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
NULLB_DEVICE_ATTR(max_segment_size, uint, NULL);
NULLB_DEVICE_ATTR(irqmode, uint, NULL);
NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
NULLB_DEVICE_ATTR(index, uint, NULL);
@@ -470,6 +475,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_queue_mode,
&nullb_device_attr_blocksize,
&nullb_device_attr_max_sectors,
&nullb_device_attr_max_segment_size,
&nullb_device_attr_irqmode,
&nullb_device_attr_hw_queue_depth,
&nullb_device_attr_index,
@@ -541,7 +547,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,max_segment_size,virt_boundary\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -597,6 +603,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->queue_mode = g_queue_mode;
dev->blocksize = g_bs;
dev->max_sectors = g_max_sectors;
dev->max_segment_size = g_max_segment_size;
dev->irqmode = g_irqmode;
dev->hw_queue_depth = g_hw_queue_depth;
dev->blocking = g_blocking;
@@ -1135,6 +1142,8 @@ static int null_transfer(struct nullb *nullb, struct page *page,
unsigned int valid_len = len;
int err = 0;
WARN_ONCE(len > dev->max_segment_size, "%u > %u\n", len,
dev->max_segment_size);
if (!is_write) {
if (dev->zoned)
valid_len = null_zone_valid_read_len(nullb,
@@ -1170,7 +1179,8 @@ static int null_handle_rq(struct nullb_cmd *cmd)
spin_lock_irq(&nullb->lock);
rq_for_each_segment(bvec, rq, iter) {
len = bvec.bv_len;
len = min(bvec.bv_len, nullb->dev->max_segment_size);
bvec.bv_len = len;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
rq->cmd_flags & REQ_FUA);
@@ -1197,7 +1207,8 @@ static int null_handle_bio(struct nullb_cmd *cmd)
spin_lock_irq(&nullb->lock);
bio_for_each_segment(bvec, bio, iter) {
len = bvec.bv_len;
len = min(bvec.bv_len, nullb->dev->max_segment_size);
bvec.bv_len = len;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), sector,
bio->bi_opf & REQ_FUA);

View File

@@ -88,6 +88,7 @@ struct nullb_device {
unsigned int queue_mode; /* block interface */
unsigned int blocksize; /* block size */
unsigned int max_sectors; /* Max sectors per command */
unsigned int max_segment_size; /* Max size of a single DMA segment. */
unsigned int irqmode; /* IRQ completion handler */
unsigned int hw_queue_depth; /* queue depth */
unsigned int index; /* index of the disk, only valid with a disk */

View File

@@ -11,6 +11,8 @@
#include <linux/hid.h>
#include <linux/idr.h>
#include <linux/input/mt.h>
#include <linux/leds.h>
#include <linux/led-class-multicolor.h>
#include <linux/module.h>
#include <asm/unaligned.h>
@@ -38,11 +40,13 @@ struct ps_device {
uint8_t battery_capacity;
int battery_status;
const char *input_dev_name; /* Name of primary input device. */
uint8_t mac_address[6]; /* Note: stored in little endian order. */
uint32_t hw_version;
uint32_t fw_version;
int (*parse_report)(struct ps_device *dev, struct hid_report *report, u8 *data, int size);
void (*remove)(struct ps_device *dev);
};
/* Calibration data for playstation motion sensors. */
@@ -53,6 +57,13 @@ struct ps_calibration_data {
int sens_denom;
};
struct ps_led_info {
const char *name;
const char *color;
enum led_brightness (*brightness_get)(struct led_classdev *cdev);
int (*brightness_set)(struct led_classdev *cdev, enum led_brightness);
};
/* Seed values for DualShock4 / DualSense CRC32 for different report types. */
#define PS_INPUT_CRC32_SEED 0xA1
#define PS_OUTPUT_CRC32_SEED 0xA2
@@ -97,6 +108,9 @@ struct ps_calibration_data {
#define DS_STATUS_CHARGING GENMASK(7, 4)
#define DS_STATUS_CHARGING_SHIFT 4
/* Feature version from DualSense Firmware Info report. */
#define DS_FEATURE_VERSION(major, minor) ((major & 0xff) << 8 | (minor & 0xff))
/*
* Status of a DualSense touch point contact.
* Contact IDs, with highest bit set are 'inactive'
@@ -115,6 +129,7 @@ struct ps_calibration_data {
#define DS_OUTPUT_VALID_FLAG1_RELEASE_LEDS BIT(3)
#define DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE BIT(4)
#define DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE BIT(1)
#define DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2 BIT(2)
#define DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE BIT(4)
#define DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT BIT(1)
@@ -132,6 +147,9 @@ struct dualsense {
struct input_dev *sensors;
struct input_dev *touchpad;
/* Update version is used as a feature/capability version. */
uint16_t update_version;
/* Calibration data for accelerometer and gyroscope. */
struct ps_calibration_data accel_calib_data[3];
struct ps_calibration_data gyro_calib_data[3];
@@ -142,11 +160,13 @@ struct dualsense {
uint32_t sensor_timestamp_us;
/* Compatible rumble state */
bool use_vibration_v2;
bool update_rumble;
uint8_t motor_left;
uint8_t motor_right;
/* RGB lightbar */
struct led_classdev_mc lightbar;
bool update_lightbar;
uint8_t lightbar_red;
uint8_t lightbar_green;
@@ -163,6 +183,7 @@ struct dualsense {
struct led_classdev player_leds[5];
struct work_struct output_worker;
bool output_worker_initialized;
void *output_report_dmabuf;
uint8_t output_seq; /* Sequence number for output report. */
};
@@ -288,6 +309,9 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = {
{0, 0},
};
static inline void dualsense_schedule_work(struct dualsense *ds);
static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue);
/*
* Add a new ps_device to ps_devices if it doesn't exist.
* Return error on duplicate device, which can happen if the same
@@ -525,6 +549,71 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu
return 0;
}
static int ps_led_register(struct ps_device *ps_dev, struct led_classdev *led,
const struct ps_led_info *led_info)
{
int ret;
led->name = devm_kasprintf(&ps_dev->hdev->dev, GFP_KERNEL,
"%s:%s:%s", ps_dev->input_dev_name, led_info->color, led_info->name);
if (!led->name)
return -ENOMEM;
led->brightness = 0;
led->max_brightness = 1;
led->flags = LED_CORE_SUSPENDRESUME;
led->brightness_get = led_info->brightness_get;
led->brightness_set_blocking = led_info->brightness_set;
ret = devm_led_classdev_register(&ps_dev->hdev->dev, led);
if (ret) {
hid_err(ps_dev->hdev, "Failed to register LED %s: %d\n", led_info->name, ret);
return ret;
}
return 0;
}
/* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */
static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev,
int (*brightness_set)(struct led_classdev *, enum led_brightness))
{
struct hid_device *hdev = ps_dev->hdev;
struct mc_subled *mc_led_info;
struct led_classdev *led_cdev;
int ret;
mc_led_info = devm_kmalloc_array(&hdev->dev, 3, sizeof(*mc_led_info),
GFP_KERNEL | __GFP_ZERO);
if (!mc_led_info)
return -ENOMEM;
mc_led_info[0].color_index = LED_COLOR_ID_RED;
mc_led_info[1].color_index = LED_COLOR_ID_GREEN;
mc_led_info[2].color_index = LED_COLOR_ID_BLUE;
lightbar_mc_dev->subled_info = mc_led_info;
lightbar_mc_dev->num_colors = 3;
led_cdev = &lightbar_mc_dev->led_cdev;
led_cdev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s:rgb:indicator",
ps_dev->input_dev_name);
if (!led_cdev->name)
return -ENOMEM;
led_cdev->brightness = 255;
led_cdev->max_brightness = 255;
led_cdev->brightness_set_blocking = brightness_set;
ret = devm_led_classdev_multicolor_register(&hdev->dev, lightbar_mc_dev);
if (ret < 0) {
hid_err(hdev, "Cannot register multicolor LED device\n");
return ret;
}
return 0;
}
static struct input_dev *ps_sensors_create(struct hid_device *hdev, int accel_range, int accel_res,
int gyro_range, int gyro_res)
{
@@ -614,15 +703,12 @@ static ssize_t hardware_version_show(struct device *dev,
static DEVICE_ATTR_RO(hardware_version);
static struct attribute *ps_device_attributes[] = {
static struct attribute *ps_device_attrs[] = {
&dev_attr_firmware_version.attr,
&dev_attr_hardware_version.attr,
NULL
};
static const struct attribute_group ps_device_attribute_group = {
.attrs = ps_device_attributes,
};
ATTRIBUTE_GROUPS(ps_device);
static int dualsense_get_calibration_data(struct dualsense *ds)
{
@@ -746,6 +832,7 @@ err_free:
return ret;
}
static int dualsense_get_firmware_info(struct dualsense *ds)
{
uint8_t *buf;
@@ -765,6 +852,15 @@ static int dualsense_get_firmware_info(struct dualsense *ds)
ds->base.hw_version = get_unaligned_le32(&buf[24]);
ds->base.fw_version = get_unaligned_le32(&buf[28]);
/* Update version is some kind of feature version. It is distinct from
* the firmware version as there can be many different variations of a
* controller over time with the same physical shell, but with different
* PCBs and other internal changes. The update version (internal name) is
* used as a means to detect what features are available and change behavior.
* Note: the version is different between DualSense and DualSense Edge.
*/
ds->update_version = get_unaligned_le16(&buf[44]);
err_free:
kfree(buf);
return ret;
@@ -793,6 +889,53 @@ err_free:
return ret;
}
static int dualsense_lightbar_set_brightness(struct led_classdev *cdev,
enum led_brightness brightness)
{
struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev);
struct dualsense *ds = container_of(mc_cdev, struct dualsense, lightbar);
uint8_t red, green, blue;
led_mc_calc_color_components(mc_cdev, brightness);
red = mc_cdev->subled_info[0].brightness;
green = mc_cdev->subled_info[1].brightness;
blue = mc_cdev->subled_info[2].brightness;
dualsense_set_lightbar(ds, red, green, blue);
return 0;
}
static enum led_brightness dualsense_player_led_get_brightness(struct led_classdev *led)
{
struct hid_device *hdev = to_hid_device(led->dev->parent);
struct dualsense *ds = hid_get_drvdata(hdev);
return !!(ds->player_leds_state & BIT(led - ds->player_leds));
}
static int dualsense_player_led_set_brightness(struct led_classdev *led, enum led_brightness value)
{
struct hid_device *hdev = to_hid_device(led->dev->parent);
struct dualsense *ds = hid_get_drvdata(hdev);
unsigned long flags;
unsigned int led_index;
spin_lock_irqsave(&ds->base.lock, flags);
led_index = led - ds->player_leds;
if (value == LED_OFF)
ds->player_leds_state &= ~BIT(led_index);
else
ds->player_leds_state |= BIT(led_index);
ds->update_player_leds = true;
spin_unlock_irqrestore(&ds->base.lock, flags);
dualsense_schedule_work(ds);
return 0;
}
static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp,
void *buf)
{
@@ -832,6 +975,16 @@ static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_
}
}
static inline void dualsense_schedule_work(struct dualsense *ds)
{
unsigned long flags;
spin_lock_irqsave(&ds->base.lock, flags);
if (ds->output_worker_initialized)
schedule_work(&ds->output_worker);
spin_unlock_irqrestore(&ds->base.lock, flags);
}
/*
* Helper function to send DualSense output reports. Applies a CRC at the end of a report
* for Bluetooth reports.
@@ -870,7 +1023,10 @@ static void dualsense_output_worker(struct work_struct *work)
if (ds->update_rumble) {
/* Select classic rumble style haptics and enable it. */
common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT;
common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION;
if (ds->use_vibration_v2)
common->valid_flag2 |= DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2;
else
common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION;
common->motor_left = ds->motor_left;
common->motor_right = ds->motor_right;
ds->update_rumble = false;
@@ -992,7 +1148,7 @@ static int dualsense_parse_report(struct ps_device *ps_dev, struct hid_report *r
spin_unlock_irqrestore(&ps_dev->lock, flags);
/* Schedule updating of microphone state at hardware level. */
schedule_work(&ds->output_worker);
dualsense_schedule_work(ds);
}
ds->last_btn_mic_state = btn_mic_state;
@@ -1107,10 +1263,22 @@ static int dualsense_play_effect(struct input_dev *dev, void *data, struct ff_ef
ds->motor_right = effect->u.rumble.weak_magnitude / 256;
spin_unlock_irqrestore(&ds->base.lock, flags);
schedule_work(&ds->output_worker);
dualsense_schedule_work(ds);
return 0;
}
static void dualsense_remove(struct ps_device *ps_dev)
{
struct dualsense *ds = container_of(ps_dev, struct dualsense, base);
unsigned long flags;
spin_lock_irqsave(&ds->base.lock, flags);
ds->output_worker_initialized = false;
spin_unlock_irqrestore(&ds->base.lock, flags);
cancel_work_sync(&ds->output_worker);
}
static int dualsense_reset_leds(struct dualsense *ds)
{
struct dualsense_output_report report;
@@ -1138,12 +1306,16 @@ static int dualsense_reset_leds(struct dualsense *ds)
static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue)
{
unsigned long flags;
spin_lock_irqsave(&ds->base.lock, flags);
ds->update_lightbar = true;
ds->lightbar_red = red;
ds->lightbar_green = green;
ds->lightbar_blue = blue;
spin_unlock_irqrestore(&ds->base.lock, flags);
schedule_work(&ds->output_worker);
dualsense_schedule_work(ds);
}
static void dualsense_set_player_leds(struct dualsense *ds)
@@ -1166,7 +1338,7 @@ static void dualsense_set_player_leds(struct dualsense *ds)
ds->update_player_leds = true;
ds->player_leds_state = player_ids[player_id];
schedule_work(&ds->output_worker);
dualsense_schedule_work(ds);
}
static struct ps_device *dualsense_create(struct hid_device *hdev)
@@ -1174,7 +1346,20 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
struct dualsense *ds;
struct ps_device *ps_dev;
uint8_t max_output_report_size;
int ret;
int i, ret;
static const struct ps_led_info player_leds_info[] = {
{ LED_FUNCTION_PLAYER1, "white", dualsense_player_led_get_brightness,
dualsense_player_led_set_brightness },
{ LED_FUNCTION_PLAYER2, "white", dualsense_player_led_get_brightness,
dualsense_player_led_set_brightness },
{ LED_FUNCTION_PLAYER3, "white", dualsense_player_led_get_brightness,
dualsense_player_led_set_brightness },
{ LED_FUNCTION_PLAYER4, "white", dualsense_player_led_get_brightness,
dualsense_player_led_set_brightness },
{ LED_FUNCTION_PLAYER5, "white", dualsense_player_led_get_brightness,
dualsense_player_led_set_brightness }
};
ds = devm_kzalloc(&hdev->dev, sizeof(*ds), GFP_KERNEL);
if (!ds)
@@ -1192,7 +1377,9 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
ps_dev->battery_capacity = 100; /* initial value until parse_report. */
ps_dev->battery_status = POWER_SUPPLY_STATUS_UNKNOWN;
ps_dev->parse_report = dualsense_parse_report;
ps_dev->remove = dualsense_remove;
INIT_WORK(&ds->output_worker, dualsense_output_worker);
ds->output_worker_initialized = true;
hid_set_drvdata(hdev, ds);
max_output_report_size = sizeof(struct dualsense_output_report_bt);
@@ -1213,6 +1400,21 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
return ERR_PTR(ret);
}
/* Original DualSense firmware simulated classic controller rumble through
* its new haptics hardware. It felt different from classic rumble users
* were used to. Since then new firmwares were introduced to change behavior
* and make this new 'v2' behavior default on PlayStation and other platforms.
* The original DualSense requires a new enough firmware as bundled with PS5
* software released in 2021. DualSense edge supports it out of the box.
* Both devices also support the old mode, but it is not really used.
*/
if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) {
/* Feature version 2.21 introduced new vibration method. */
ds->use_vibration_v2 = ds->update_version >= DS_FEATURE_VERSION(2, 21);
} else if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) {
ds->use_vibration_v2 = true;
}
ret = ps_devices_list_add(ps_dev);
if (ret)
return ERR_PTR(ret);
@@ -1228,6 +1430,8 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
ret = PTR_ERR(ds->gamepad);
goto err;
}
/* Use gamepad input device name as primary device name for e.g. LEDs */
ps_dev->input_dev_name = dev_name(&ds->gamepad->dev);
ds->sensors = ps_sensors_create(hdev, DS_ACC_RANGE, DS_ACC_RES_PER_G,
DS_GYRO_RANGE, DS_GYRO_RES_PER_DEG_S);
@@ -1255,8 +1459,21 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
if (ret)
goto err;
ret = ps_lightbar_register(ps_dev, &ds->lightbar, dualsense_lightbar_set_brightness);
if (ret)
goto err;
/* Set default lightbar color. */
dualsense_set_lightbar(ds, 0, 0, 128); /* blue */
for (i = 0; i < ARRAY_SIZE(player_leds_info); i++) {
const struct ps_led_info *led_info = &player_leds_info[i];
ret = ps_led_register(ps_dev, &ds->player_leds[i], led_info);
if (ret < 0)
goto err;
}
ret = ps_device_set_player_id(ps_dev);
if (ret) {
hid_err(hdev, "Failed to assign player id for DualSense: %d\n", ret);
@@ -1324,12 +1541,6 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id)
}
}
ret = devm_device_add_group(&hdev->dev, &ps_device_attribute_group);
if (ret) {
hid_err(hdev, "Failed to register sysfs nodes.\n");
goto err_close;
}
return ret;
err_close:
@@ -1346,6 +1557,9 @@ static void ps_remove(struct hid_device *hdev)
ps_devices_list_remove(dev);
ps_device_release_player_id(dev);
if (dev->remove)
dev->remove(dev);
hid_hw_close(hdev);
hid_hw_stop(hdev);
}
@@ -1365,6 +1579,9 @@ static struct hid_driver ps_driver = {
.probe = ps_probe,
.remove = ps_remove,
.raw_event = ps_raw_event,
.driver = {
.dev_groups = ps_device_groups,
},
};
static int __init ps_init(void)

View File

@@ -1113,6 +1113,8 @@ static void etm4_init_arch_data(void *info)
drvdata->nr_event = BMVAL(etmidr0, 10, 11);
/* QSUPP, bits[16:15] Q element support field */
drvdata->q_support = BMVAL(etmidr0, 15, 16);
if (drvdata->q_support)
drvdata->q_filt = !!(etmidr0 & TRCIDR0_QFILT);
/* TSSIZE, bits[28:24] Global timestamp size field */
drvdata->ts_size = BMVAL(etmidr0, 24, 28);
@@ -1634,16 +1636,14 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
state->trcccctlr = etm4x_read32(csa, TRCCCCTLR);
state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR);
state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR);
state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
if (drvdata->q_filt)
state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
state->trcvictlr = etm4x_read32(csa, TRCVICTLR);
state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR);
state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR);
if (drvdata->nr_pe_cmp)
state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR);
state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR);
state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR);
state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
for (i = 0; i < drvdata->nrseqstate - 1; i++)
state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
@@ -1660,7 +1660,8 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
}
for (i = 0; i < drvdata->nr_resource * 2; i++)
/* Resource selector pair 0 is reserved */
for (i = 2; i < drvdata->nr_resource * 2; i++)
state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
for (i = 0; i < drvdata->nr_ss_cmp; i++) {
@@ -1745,8 +1746,10 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
{
int i;
struct etmv4_save_state *state = drvdata->save_state;
struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
struct csdev_access *csa = &tmp_csa;
struct csdev_access *csa = &drvdata->csdev->access;
if (WARN_ON(!drvdata->csdev))
return;
etm4_cs_unlock(drvdata, csa);
etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
@@ -1765,16 +1768,14 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR);
etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR);
etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR);
etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
if (drvdata->q_filt)
etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR);
etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR);
etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR);
if (drvdata->nr_pe_cmp)
etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR);
etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR);
etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR);
etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
for (i = 0; i < drvdata->nrseqstate - 1; i++)
etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
@@ -1791,7 +1792,8 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
}
for (i = 0; i < drvdata->nr_resource * 2; i++)
/* Resource selector pair 0 is reserved */
for (i = 2; i < drvdata->nr_resource * 2; i++)
etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
for (i = 0; i < drvdata->nr_ss_cmp; i++) {

View File

@@ -43,9 +43,6 @@
#define TRCVIIECTLR 0x084
#define TRCVISSCTLR 0x088
#define TRCVIPCSSCTLR 0x08C
#define TRCVDCTLR 0x0A0
#define TRCVDSACCTLR 0x0A4
#define TRCVDARCCTLR 0x0A8
/* Derived resources registers */
#define TRCSEQEVRn(n) (0x100 + (n * 4)) /* n = 0-2 */
#define TRCSEQRSTEVR 0x118
@@ -90,9 +87,6 @@
/* Address Comparator registers n = 0-15 */
#define TRCACVRn(n) (0x400 + (n * 8))
#define TRCACATRn(n) (0x480 + (n * 8))
/* Data Value Comparator Value registers, n = 0-7 */
#define TRCDVCVRn(n) (0x500 + (n * 16))
#define TRCDVCMRn(n) (0x580 + (n * 16))
/* ContextID/Virtual ContextID comparators, n = 0-7 */
#define TRCCIDCVRn(n) (0x600 + (n * 8))
#define TRCVMIDCVRn(n) (0x640 + (n * 8))
@@ -131,6 +125,8 @@
#define TRCRSR_TA BIT(12)
#define TRCIDR0_QFILT BIT(14)
/*
* System instructions to access ETM registers.
* See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
@@ -174,9 +170,6 @@
/* List of registers accessible via System instructions */
#define ETM4x_ONLY_SYSREG_LIST(op, val) \
CASE_##op((val), TRCPROCSELR) \
CASE_##op((val), TRCVDCTLR) \
CASE_##op((val), TRCVDSACCTLR) \
CASE_##op((val), TRCVDARCCTLR) \
CASE_##op((val), TRCOSLAR)
#define ETM_COMMON_SYSREG_LIST(op, val) \
@@ -324,22 +317,6 @@
CASE_##op((val), TRCACATRn(13)) \
CASE_##op((val), TRCACATRn(14)) \
CASE_##op((val), TRCACATRn(15)) \
CASE_##op((val), TRCDVCVRn(0)) \
CASE_##op((val), TRCDVCVRn(1)) \
CASE_##op((val), TRCDVCVRn(2)) \
CASE_##op((val), TRCDVCVRn(3)) \
CASE_##op((val), TRCDVCVRn(4)) \
CASE_##op((val), TRCDVCVRn(5)) \
CASE_##op((val), TRCDVCVRn(6)) \
CASE_##op((val), TRCDVCVRn(7)) \
CASE_##op((val), TRCDVCMRn(0)) \
CASE_##op((val), TRCDVCMRn(1)) \
CASE_##op((val), TRCDVCMRn(2)) \
CASE_##op((val), TRCDVCMRn(3)) \
CASE_##op((val), TRCDVCMRn(4)) \
CASE_##op((val), TRCDVCMRn(5)) \
CASE_##op((val), TRCDVCMRn(6)) \
CASE_##op((val), TRCDVCMRn(7)) \
CASE_##op((val), TRCCIDCVRn(0)) \
CASE_##op((val), TRCCIDCVRn(1)) \
CASE_##op((val), TRCCIDCVRn(2)) \
@@ -821,9 +798,6 @@ struct etmv4_save_state {
u32 trcviiectlr;
u32 trcvissctlr;
u32 trcvipcssctlr;
u32 trcvdctlr;
u32 trcvdsacctlr;
u32 trcvdarcctlr;
u32 trcseqevr[ETM_MAX_SEQ_STATES];
u32 trcseqrstevr;
@@ -895,6 +869,7 @@ struct etmv4_save_state {
* @os_unlock: True if access to management registers is allowed.
* @instrp0: Tracing of load and store instructions
* as P0 elements is supported.
* @q_filt: Q element filtering support, if Q elements are supported.
* @trcbb: Indicates if the trace unit supports branch broadcast tracing.
* @trccond: If the trace unit supports conditional
* instruction tracing.
@@ -957,6 +932,7 @@ struct etmv4_drvdata {
bool boot_enable;
bool os_unlock;
bool instrp0;
bool q_filt;
bool trcbb;
bool trccond;
bool retstack;

View File

@@ -30,7 +30,6 @@ static LIST_HEAD(icc_providers);
static int providers_count;
static bool synced_state;
static DEFINE_MUTEX(icc_lock);
static DEFINE_MUTEX(icc_bw_lock);
static struct dentry *icc_debugfs_dir;
static void icc_summary_show_one(struct seq_file *s, struct icc_node *n)
@@ -637,7 +636,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
if (WARN_ON(IS_ERR(path) || !path->num_nodes))
return -EINVAL;
mutex_lock(&icc_bw_lock);
mutex_lock(&icc_lock);
old_avg = path->reqs[0].avg_bw;
old_peak = path->reqs[0].peak_bw;
@@ -669,7 +668,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
apply_constraints(path);
}
mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
trace_icc_set_bw_end(path, ret);
@@ -972,7 +971,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
return;
mutex_lock(&icc_lock);
mutex_lock(&icc_bw_lock);
node->provider = provider;
list_add_tail(&node->node_list, &provider->nodes);
@@ -998,7 +996,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
node->avg_bw = 0;
node->peak_bw = 0;
mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
}
EXPORT_SYMBOL_GPL(icc_node_add);
@@ -1126,7 +1123,6 @@ void icc_sync_state(struct device *dev)
return;
mutex_lock(&icc_lock);
mutex_lock(&icc_bw_lock);
synced_state = true;
list_for_each_entry(p, &icc_providers, provider_list) {
dev_dbg(p->dev, "interconnect provider is in synced state\n");
@@ -1139,21 +1135,13 @@ void icc_sync_state(struct device *dev)
}
}
}
mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
}
EXPORT_SYMBOL_GPL(icc_sync_state);
static int __init icc_init(void)
{
struct device_node *root;
/* Teach lockdep about lock ordering wrt. shrinker: */
fs_reclaim_acquire(GFP_KERNEL);
might_lock(&icc_bw_lock);
fs_reclaim_release(GFP_KERNEL);
root = of_find_node_by_path("/");
struct device_node *root = of_find_node_by_path("/");
providers_count = of_count_icc_providers(root);
of_node_put(root);

View File

@@ -51,12 +51,9 @@ struct io_stats {
#define UID_STATE_FOREGROUND 0
#define UID_STATE_BACKGROUND 1
#define UID_STATE_BUCKET_SIZE 2
#define UID_STATE_TOTAL_CURR 2
#define UID_STATE_TOTAL_LAST 3
#define UID_STATE_DEAD_TASKS 4
#define UID_STATE_SIZE 5
#define UID_STATE_TOTAL_LAST 2
#define UID_STATE_DEAD_TASKS 3
#define UID_STATE_SIZE 4
#define MAX_TASK_COMM_LEN 256
@@ -71,8 +68,6 @@ struct uid_entry {
uid_t uid;
u64 utime;
u64 stime;
u64 active_utime;
u64 active_stime;
int state;
struct io_stats io[UID_STATE_SIZE];
struct hlist_node hash;
@@ -173,58 +168,47 @@ static struct uid_entry *find_or_register_uid(uid_t uid)
return uid_entry;
}
static void calc_uid_cputime(struct uid_entry *uid_entry,
u64 *total_utime, u64 *total_stime)
{
struct user_namespace *user_ns = current_user_ns();
struct task_struct *p, *t;
u64 utime, stime;
uid_t uid;
rcu_read_lock();
for_each_process(p) {
uid = from_kuid_munged(user_ns, task_uid(p));
if (uid != uid_entry->uid)
continue;
for_each_thread(p, t) {
/* avoid double accounting of dying threads */
if (!(t->flags & PF_EXITING)) {
task_cputime_adjusted(t, &utime, &stime);
*total_utime += utime;
*total_stime += stime;
}
}
}
rcu_read_unlock();
}
static int uid_cputime_show(struct seq_file *m, void *v)
{
struct uid_entry *uid_entry = NULL;
struct task_struct *task, *temp;
struct user_namespace *user_ns = current_user_ns();
u64 utime;
u64 stime;
u32 bkt;
uid_t uid;
for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
bkt < HASH_SIZE(hash_table); bkt++) {
lock_uid_by_bkt(bkt);
hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
uid_entry->active_stime = 0;
uid_entry->active_utime = 0;
}
unlock_uid_by_bkt(bkt);
}
u64 total_utime = uid_entry->utime;
u64 total_stime = uid_entry->stime;
rcu_read_lock();
do_each_thread(temp, task) {
uid = from_kuid_munged(user_ns, task_uid(task));
lock_uid(uid);
if (!uid_entry || uid_entry->uid != uid)
uid_entry = find_or_register_uid(uid);
if (!uid_entry) {
rcu_read_unlock();
unlock_uid(uid);
pr_err("%s: failed to find the uid_entry for uid %d\n",
__func__, uid);
return -ENOMEM;
}
/* avoid double accounting of dying threads */
if (!(task->flags & PF_EXITING)) {
task_cputime_adjusted(task, &utime, &stime);
uid_entry->active_utime += utime;
uid_entry->active_stime += stime;
}
unlock_uid(uid);
} while_each_thread(temp, task);
rcu_read_unlock();
for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
bkt < HASH_SIZE(hash_table); bkt++) {
lock_uid_by_bkt(bkt);
hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
u64 total_utime = uid_entry->utime +
uid_entry->active_utime;
u64 total_stime = uid_entry->stime +
uid_entry->active_stime;
calc_uid_cputime(uid_entry, &total_utime, &total_stime);
seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
ktime_to_us(total_utime), ktime_to_us(total_stime));
}
@@ -323,86 +307,52 @@ static void add_uid_io_stats(struct uid_entry *uid_entry,
__add_uid_io_stats(uid_entry, &task->ioac, slot);
}
static void update_io_stats_all(void)
static void update_io_stats_uid(struct uid_entry *uid_entry)
{
struct uid_entry *uid_entry = NULL;
struct task_struct *task, *temp;
struct user_namespace *user_ns = current_user_ns();
struct task_struct *p, *t;
struct io_stats io;
memset(&io, 0, sizeof(struct io_stats));
rcu_read_lock();
for_each_process(p) {
uid_t uid = from_kuid_munged(user_ns, task_uid(p));
if (uid != uid_entry->uid)
continue;
for_each_thread(p, t) {
/* avoid double accounting of dying threads */
if (!(t->flags & PF_EXITING)) {
io.read_bytes += t->ioac.read_bytes;
io.write_bytes += compute_write_bytes(&t->ioac);
io.rchar += t->ioac.rchar;
io.wchar += t->ioac.wchar;
io.fsync += t->ioac.syscfs;
}
}
}
rcu_read_unlock();
compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io,
&uid_entry->io[UID_STATE_TOTAL_LAST],
&uid_entry->io[UID_STATE_DEAD_TASKS]);
}
static int uid_io_show(struct seq_file *m, void *v)
{
struct uid_entry *uid_entry = NULL;
u32 bkt;
uid_t uid;
for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
bkt++) {
lock_uid_by_bkt(bkt);
hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
sizeof(struct io_stats));
}
unlock_uid_by_bkt(bkt);
}
rcu_read_lock();
do_each_thread(temp, task) {
uid = from_kuid_munged(user_ns, task_uid(task));
lock_uid(uid);
if (!uid_entry || uid_entry->uid != uid)
uid_entry = find_or_register_uid(uid);
if (!uid_entry) {
unlock_uid(uid);
continue;
}
add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
unlock_uid(uid);
} while_each_thread(temp, task);
rcu_read_unlock();
update_io_stats_uid(uid_entry);
for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
bkt++) {
lock_uid_by_bkt(bkt);
hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
&uid_entry->io[UID_STATE_TOTAL_CURR],
&uid_entry->io[UID_STATE_TOTAL_LAST],
&uid_entry->io[UID_STATE_DEAD_TASKS]);
}
unlock_uid_by_bkt(bkt);
}
}
static void update_io_stats_uid(struct uid_entry *uid_entry)
{
struct task_struct *task, *temp;
struct user_namespace *user_ns = current_user_ns();
memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
sizeof(struct io_stats));
rcu_read_lock();
do_each_thread(temp, task) {
if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid)
continue;
add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
} while_each_thread(temp, task);
rcu_read_unlock();
compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
&uid_entry->io[UID_STATE_TOTAL_CURR],
&uid_entry->io[UID_STATE_TOTAL_LAST],
&uid_entry->io[UID_STATE_DEAD_TASKS]);
}
static int uid_io_show(struct seq_file *m, void *v)
{
struct uid_entry *uid_entry;
u32 bkt;
update_io_stats_all();
for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
bkt++) {
lock_uid_by_bkt(bkt);
hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
uid_entry->uid,
uid_entry->io[UID_STATE_FOREGROUND].rchar,
@@ -446,7 +396,6 @@ static ssize_t uid_procstat_write(struct file *file,
uid_t uid;
int argc, state;
char input[128];
struct uid_entry uid_entry_tmp;
if (count >= sizeof(input))
return -EINVAL;
@@ -475,29 +424,8 @@ static ssize_t uid_procstat_write(struct file *file,
return count;
}
/*
* Update_io_stats_uid_locked would take a long lock-time of uid_lock
* due to call do_each_thread to compute uid_entry->io, which would
* cause to lock competition sometime.
*
* Using uid_entry_tmp to get the result of Update_io_stats_uid,
* so that we can unlock_uid during update_io_stats_uid, in order
* to avoid the unnecessary lock-time of uid_lock.
*/
uid_entry_tmp = *uid_entry;
unlock_uid(uid);
update_io_stats_uid(&uid_entry_tmp);
lock_uid(uid);
hlist_for_each_entry(uid_entry, &hash_table[hash_min(uid, HASH_BITS(hash_table))], hash) {
if (uid_entry->uid == uid_entry_tmp.uid) {
memcpy(uid_entry->io, uid_entry_tmp.io,
sizeof(struct io_stats) * UID_STATE_SIZE);
uid_entry->state = state;
break;
}
}
update_io_stats_uid(uid_entry);
uid_entry->state = state;
unlock_uid(uid);
return count;

View File

@@ -411,8 +411,10 @@ int dw_pcie_host_init(struct pcie_port *pp)
if (ret)
goto err_free_msi;
/* Ignore errors, the link may come up later */
dw_pcie_wait_for_link(pci);
if (pci->ops && pci->ops->start_link) {
/* Ignore errors, the link may come up later */
dw_pcie_wait_for_link(pci);
}
}
bridge->sysdata = pp;

View File

@@ -752,6 +752,7 @@ static int sdebug_host_max_queue; /* per host */
static int sdebug_lowest_aligned = DEF_LOWEST_ALIGNED;
static int sdebug_max_luns = DEF_MAX_LUNS;
static int sdebug_max_queue = SDEBUG_CANQUEUE; /* per submit queue */
static unsigned int sdebug_max_segment_size = BLK_MAX_SEGMENT_SIZE;
static unsigned int sdebug_medium_error_start = OPT_MEDIUM_ERR_ADDR;
static int sdebug_medium_error_count = OPT_MEDIUM_ERR_NUM;
static atomic_t retired_max_queue; /* if > 0 then was prior max_queue */
@@ -5775,6 +5776,7 @@ module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO);
module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR);
module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR);
module_param_named(max_queue, sdebug_max_queue, int, S_IRUGO | S_IWUSR);
module_param_named(max_segment_size, sdebug_max_segment_size, uint, S_IRUGO);
module_param_named(medium_error_count, sdebug_medium_error_count, int,
S_IRUGO | S_IWUSR);
module_param_named(medium_error_start, sdebug_medium_error_start, int,
@@ -5851,6 +5853,7 @@ MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))");
MODULE_PARM_DESC(max_segment_size, "max bytes in a single segment");
MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error");
MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error");
MODULE_PARM_DESC(ndelay, "response delay in nanoseconds (def=0 -> ignore)");
@@ -7725,6 +7728,7 @@ static int sdebug_driver_probe(struct device *dev)
sdebug_driver_template.can_queue = sdebug_max_queue;
sdebug_driver_template.cmd_per_lun = sdebug_max_queue;
sdebug_driver_template.max_segment_size = sdebug_max_segment_size;
if (!sdebug_clustering)
sdebug_driver_template.dma_boundary = PAGE_SIZE - 1;

View File

@@ -315,7 +315,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
* Ensure that all tasks observe the host state change before the
* host_failed change.
*/
call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed);
}
/**

View File

@@ -268,23 +268,29 @@ static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
return div_u64(addr, sizeof(struct utp_transfer_cmd_desc));
}
static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
struct ufs_hw_queue *hwq)
/* Returns true if and only if @compl_cmd has been completed. */
static bool ufshcd_mcq_process_cqe(struct ufs_hba *hba,
struct ufs_hw_queue *hwq,
struct scsi_cmnd *compl_cmd)
{
struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
int tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
ufshcd_compl_one_cqe(hba, tag, cqe);
return ufshcd_compl_one_cqe(hba, tag, cqe, compl_cmd);
}
/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */
unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq)
struct ufs_hw_queue *hwq,
struct scsi_cmnd **compl_cmd)
{
unsigned long completed_reqs = 0;
ufshcd_mcq_update_cq_tail_slot(hwq);
while (!ufshcd_mcq_is_cq_empty(hwq)) {
ufshcd_mcq_process_cqe(hba, hwq);
if (ufshcd_mcq_process_cqe(hba, hwq,
compl_cmd ? *compl_cmd : NULL))
*compl_cmd = NULL;
ufshcd_mcq_inc_cq_head_slot(hwq);
completed_reqs++;
}
@@ -294,15 +300,16 @@ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
return completed_reqs;
}
EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock);
/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */
unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq)
struct ufs_hw_queue *hwq,
struct scsi_cmnd **compl_cmd)
{
unsigned long completed_reqs, flags;
spin_lock_irqsave(&hwq->cq_lock, flags);
completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq);
completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq, compl_cmd);
spin_unlock_irqrestore(&hwq->cq_lock, flags);
return completed_reqs;

View File

@@ -56,8 +56,8 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
enum flag_idn idn, u8 index, bool *flag_res);
void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit);
void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
struct cq_entry *cqe);
bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
struct cq_entry *cqe, struct scsi_cmnd *compl_cmd);
int ufshcd_mcq_init(struct ufs_hba *hba);
int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba);
int ufshcd_mcq_memory_alloc(struct ufs_hba *hba);
@@ -67,11 +67,13 @@ void ufshcd_mcq_select_mcq_mode(struct ufs_hba *hba);
u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i);
void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq);
struct ufs_hw_queue *hwq,
struct scsi_cmnd **compl_cmd);
struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
struct request *req);
unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq);
struct ufs_hw_queue *hwq,
struct scsi_cmnd **compl_cmd);
#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
#define SD_ASCII_STD true

View File

@@ -5463,9 +5463,12 @@ static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
* @hba: per adapter instance
* @task_tag: the task tag of the request to be completed
* @cqe: pointer to the completion queue entry
* @compl_cmd: if not NULL, check whether this command has been completed
*
* Returns: true if and only if @compl_cmd has been completed.
*/
void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
struct cq_entry *cqe)
bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
struct cq_entry *cqe, struct scsi_cmnd *compl_cmd)
{
struct ufshcd_lrb *lrbp;
struct scsi_cmnd *cmd;
@@ -5482,6 +5485,7 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
ufshcd_release_scsi_cmd(hba, lrbp);
/* Do not touch lrbp after scsi done */
cmd->scsi_done(cmd);
return cmd == compl_cmd;
} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
if (hba->dev_cmd.complete) {
@@ -5492,20 +5496,26 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
ufshcd_clk_scaling_update_busy(hba);
}
}
return false;
}
/**
* __ufshcd_transfer_req_compl - handle SCSI and query command completion
* @hba: per adapter instance
* @completed_reqs: bitmask that indicates which requests to complete
* @compl_cmd: if not NULL, check whether *@compl_cmd has been completed.
* Clear *@compl_cmd if it has been completed.
*/
static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
unsigned long completed_reqs)
unsigned long completed_reqs,
struct scsi_cmnd **compl_cmd)
{
int tag;
for_each_set_bit(tag, &completed_reqs, hba->nutrs)
ufshcd_compl_one_cqe(hba, tag, NULL);
if (ufshcd_compl_one_cqe(hba, tag, NULL,
compl_cmd ? *compl_cmd : NULL))
*compl_cmd = NULL;
}
/* Any value that is not an existing queue number is fine for this constant. */
@@ -5532,7 +5542,8 @@ static void ufshcd_clear_polled(struct ufs_hba *hba,
* Returns > 0 if one or more commands have been completed or 0 if no
* requests have been completed.
*/
static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
static int __ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num,
struct scsi_cmnd **compl_cmd)
{
struct ufs_hba *hba = shost_priv(shost);
unsigned long completed_reqs, flags;
@@ -5540,9 +5551,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
struct ufs_hw_queue *hwq;
if (is_mcq_enabled(hba)) {
WARN_ON_ONCE(queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET];
return ufshcd_mcq_poll_cqe_lock(hba, hwq);
return ufshcd_mcq_poll_cqe_lock(hba, hwq, compl_cmd);
}
spin_lock_irqsave(&hba->outstanding_lock, flags);
@@ -5559,11 +5571,16 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
spin_unlock_irqrestore(&hba->outstanding_lock, flags);
if (completed_reqs)
__ufshcd_transfer_req_compl(hba, completed_reqs);
__ufshcd_transfer_req_compl(hba, completed_reqs, compl_cmd);
return completed_reqs != 0;
}
static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
{
return __ufshcd_poll(shost, queue_num, NULL);
}
/**
* ufshcd_transfer_req_compl - handle SCSI and query command completion
* @hba: per adapter instance
@@ -6819,7 +6836,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba)
ufshcd_mcq_write_cqis(hba, events, i);
if (events & UFSHCD_MCQ_CQIS_TAIL_ENT_PUSH_STS)
ufshcd_mcq_poll_cqe_nolock(hba, hwq);
ufshcd_mcq_poll_cqe_nolock(hba, hwq, NULL);
}
return IRQ_HANDLED;
@@ -7360,7 +7377,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
dev_err(hba->dev, "%s: failed to clear requests %#lx\n",
__func__, not_cleared);
}
__ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared);
__ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared, NULL);
out:
hba->req_abort_count = 0;
@@ -7521,7 +7538,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
dev_err(hba->dev,
"%s: cmd was completed, but without a notifying intr, tag = %d",
__func__, tag);
__ufshcd_transfer_req_compl(hba, 1UL << tag);
__ufshcd_transfer_req_compl(hba, 1UL << tag, NULL);
goto release;
}
@@ -8760,6 +8777,25 @@ out:
static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
{
struct ufs_hba *hba = shost_priv(scmd->device->host);
struct scsi_cmnd *cmd2 = scmd;
WARN_ON_ONCE(!scmd);
if (is_mcq_enabled(hba)) {
struct request *rq = scsi_cmd_to_rq(scmd);
struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq);
ufshcd_mcq_poll_cqe_lock(hba, hwq, &cmd2);
} else {
__ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT,
&cmd2);
}
if (cmd2 == NULL) {
sdev_printk(KERN_INFO, scmd->device,
"%s: cmd with tag %#x has already been completed\n",
__func__, blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
return SCSI_EH_DONE;
}
if (!hba->system_suspending) {
/* Activate the error handler in the SCSI core. */

View File

@@ -546,23 +546,27 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR_RO(hpd);
static struct attribute *dp_altmode_attrs[] = {
static struct attribute *displayport_attrs[] = {
&dev_attr_configuration.attr,
&dev_attr_pin_assignment.attr,
&dev_attr_hpd.attr,
NULL
};
static const struct attribute_group dp_altmode_group = {
static const struct attribute_group displayport_group = {
.name = "displayport",
.attrs = dp_altmode_attrs,
.attrs = displayport_attrs,
};
static const struct attribute_group *displayport_groups[] = {
&displayport_group,
NULL,
};
int dp_altmode_probe(struct typec_altmode *alt)
{
const struct typec_altmode *port = typec_altmode_get_partner(alt);
struct dp_altmode *dp;
int ret;
/* FIXME: Port can only be DFP_U. */
@@ -573,10 +577,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
return -ENODEV;
ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
if (ret)
return ret;
dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
if (!dp)
return -ENOMEM;
@@ -602,7 +602,6 @@ void dp_altmode_remove(struct typec_altmode *alt)
{
struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
cancel_work_sync(&dp->work);
}
EXPORT_SYMBOL_GPL(dp_altmode_remove);
@@ -620,6 +619,7 @@ static struct typec_altmode_driver dp_altmode_driver = {
.driver = {
.name = "typec_displayport",
.owner = THIS_MODULE,
.dev_groups = displayport_groups,
},
};
module_typec_altmode_driver(dp_altmode_driver);

View File

@@ -74,40 +74,14 @@ static void __set_extent_info(struct extent_info *ei,
}
}
static bool __may_read_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (!test_opt(sbi, READ_EXTENT_CACHE))
return false;
if (is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
!f2fs_sb_has_readonly(sbi))
return false;
return S_ISREG(inode->i_mode);
}
static bool __may_age_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (!test_opt(sbi, AGE_EXTENT_CACHE))
return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
return false;
if (file_is_cold(inode))
return false;
return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
}
static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
{
if (type == EX_READ)
return __may_read_extent_tree(inode);
else if (type == EX_BLOCK_AGE)
return __may_age_extent_tree(inode);
return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
S_ISREG(inode->i_mode);
if (type == EX_BLOCK_AGE)
return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
return false;
}
@@ -120,7 +94,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
if (list_empty(&F2FS_I_SB(inode)->s_list))
return false;
return __init_may_extent_tree(inode, type);
if (!__init_may_extent_tree(inode, type))
return false;
if (type == EX_READ) {
if (is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
!f2fs_sb_has_readonly(F2FS_I_SB(inode)))
return false;
} else if (type == EX_BLOCK_AGE) {
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
return false;
if (file_is_cold(inode))
return false;
}
return true;
}
static void __try_update_largest_extent(struct extent_tree *et,

View File

@@ -2331,8 +2331,11 @@ static int filldir(struct dir_context *ctx, const char *name, int namelen,
return 0;
}
static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx)
static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx,
loff_t next_offset)
{
char *buffstart = buf;
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
@@ -2346,12 +2349,18 @@ static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx)
ctx->pos = dirent->off;
if (!dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type))
break;
dirent->type)) {
// If we can't make any progress, user buffer is too small
if (buf == buffstart)
return -EINVAL;
else
return 0;
}
buf += reclen;
nbytes -= reclen;
}
ctx->pos = next_offset;
return 0;
}
@@ -2398,13 +2407,12 @@ void *fuse_readdir_finalize(struct fuse_bpf_args *fa,
struct file *backing_dir = ff->backing_file;
int err = 0;
err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx);
err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx, fro->offset);
*force_again = !!fro->again;
if (*force_again && !*allow_force)
err = -EINVAL;
ctx->pos = fro->offset;
backing_dir->f_pos = fro->offset;
backing_dir->f_pos = ctx->pos;
free_page((unsigned long) fa->out_args[1].value);
return ERR_PTR(err);

View File

@@ -10,6 +10,7 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/pgsize_migration.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/swap.h>
@@ -344,7 +345,14 @@ done:
static int show_map(struct seq_file *m, void *v)
{
show_map_vma(m, v);
struct vm_area_struct *pad_vma = get_pad_vma(v);
struct vm_area_struct *vma = get_data_vma(v);
if (vma_pages(vma))
show_map_vma(m, vma);
show_map_pad_vma(vma, pad_vma, m, show_map_vma);
return 0;
}
@@ -838,11 +846,15 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
static int show_smap(struct seq_file *m, void *v)
{
struct vm_area_struct *vma = v;
struct vm_area_struct *pad_vma = get_pad_vma(v);
struct vm_area_struct *vma = get_data_vma(v);
struct mem_size_stats mss;
memset(&mss, 0, sizeof(mss));
if (!vma_pages(vma))
goto show_pad;
smap_gather_stats(vma, &mss, 0);
show_map_vma(m, vma);
@@ -861,6 +873,9 @@ static int show_smap(struct seq_file *m, void *v)
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
show_smap_vma_flags(m, vma);
show_pad:
show_map_pad_vma(vma, pad_vma, m, (show_pad_vma_fn)show_smap);
return 0;
}

View File

@@ -71,6 +71,7 @@ struct userfaultfd_ctx {
atomic_t mmap_changing;
/* mm with one ore more vmas attached to this userfaultfd_ctx */
struct mm_struct *mm;
struct rcu_head rcu_head;
};
struct userfaultfd_fork_ctx {
@@ -156,6 +157,13 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
refcount_inc(&ctx->refcount);
}
static void __free_userfaultfd_ctx(struct rcu_head *head)
{
struct userfaultfd_ctx *ctx = container_of(head, struct userfaultfd_ctx,
rcu_head);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
}
/**
* userfaultfd_ctx_put - Releases a reference to the internal userfaultfd
* context.
@@ -176,7 +184,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
call_rcu(&ctx->rcu_head, __free_userfaultfd_ctx);
}
}
@@ -350,6 +358,24 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
return TASK_UNINTERRUPTIBLE;
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
{
struct userfaultfd_ctx *ctx;
bool ret;
/*
* Do it inside RCU section to ensure that the ctx doesn't
* disappear under us.
*/
rcu_read_lock();
ctx = rcu_dereference(vma->vm_userfaultfd_ctx.ctx);
ret = ctx && (ctx->features & UFFD_FEATURE_SIGBUS);
rcu_read_unlock();
return ret;
}
#endif
/*
* The locking rules involved in returning VM_FAULT_RETRY depending on
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
@@ -394,7 +420,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
*/
mmap_assert_locked(mm);
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
ctx = rcu_dereference_protected(vmf->vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
if (!ctx)
goto out;
@@ -611,8 +638,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
mmap_write_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
if (rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) ==
release_new_ctx) {
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx,
NULL);
vma->vm_flags &= ~__VM_UFFD_FLAGS;
}
mmap_write_unlock(mm);
@@ -643,9 +672,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
struct userfaultfd_ctx *ctx = NULL, *octx;
struct userfaultfd_fork_ctx *fctx;
octx = vma->vm_userfaultfd_ctx.ctx;
octx = rcu_dereference_protected(
vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&vma->vm_mm->mmap_lock));
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
vma->vm_flags &= ~__VM_UFFD_FLAGS;
return 0;
}
@@ -682,7 +714,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
list_add_tail(&fctx->list, fcs);
}
vma->vm_userfaultfd_ctx.ctx = ctx;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
return 0;
}
@@ -715,7 +747,8 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
{
struct userfaultfd_ctx *ctx;
ctx = vma->vm_userfaultfd_ctx.ctx;
ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&vma->vm_mm->mmap_lock));
if (!ctx)
return;
@@ -726,7 +759,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
atomic_inc(&ctx->mmap_changing);
} else {
/* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
vma->vm_flags &= ~__VM_UFFD_FLAGS;
}
}
@@ -763,7 +796,8 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue ewq;
ctx = vma->vm_userfaultfd_ctx.ctx;
ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
return true;
@@ -801,7 +835,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
{
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
struct userfaultfd_ctx *ctx =
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&vma->vm_mm->mmap_lock));
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end))
@@ -866,10 +902,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
mmap_write_lock(mm);
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
struct userfaultfd_ctx *cur_uffd_ctx =
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
cond_resched();
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
BUG_ON(!!cur_uffd_ctx ^
!!(vma->vm_flags & __VM_UFFD_FLAGS));
if (vma->vm_userfaultfd_ctx.ctx != ctx) {
if (cur_uffd_ctx != ctx) {
prev = vma;
continue;
}
@@ -884,7 +923,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
else
prev = vma;
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
}
mmap_write_unlock(mm);
mmput(mm);
@@ -1350,9 +1389,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
found = false;
basic_ioctls = false;
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
struct userfaultfd_ctx *cur_uffd_ctx =
rcu_dereference_protected(cur->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
BUG_ON(!!cur_uffd_ctx ^
!!(cur->vm_flags & __VM_UFFD_FLAGS));
/* check not compatible vmas */
@@ -1395,8 +1437,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* wouldn't know which one to deliver the userfaults to.
*/
ret = -EBUSY;
if (cur->vm_userfaultfd_ctx.ctx &&
cur->vm_userfaultfd_ctx.ctx != ctx)
if (cur_uffd_ctx && cur_uffd_ctx != ctx)
goto out_unlock;
/*
@@ -1414,18 +1455,20 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
ret = 0;
do {
struct userfaultfd_ctx *cur_uffd_ctx =
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
cond_resched();
BUG_ON(!vma_can_userfault(vma, vm_flags));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
BUG_ON(cur_uffd_ctx && cur_uffd_ctx != ctx);
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
* userfaultfd and with the right tracking mode too.
*/
if (vma->vm_userfaultfd_ctx.ctx == ctx &&
if (cur_uffd_ctx == ctx &&
(vma->vm_flags & vm_flags) == vm_flags)
goto skip;
@@ -1460,7 +1503,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* the current one has not been updated yet.
*/
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx.ctx = ctx;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
hugetlb_unshare_all_pmds(vma);
@@ -1559,7 +1602,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
BUG_ON(!!rcu_access_pointer(cur->vm_userfaultfd_ctx.ctx) ^
!!(cur->vm_flags & __VM_UFFD_FLAGS));
/*
@@ -1581,6 +1624,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
ret = 0;
do {
struct userfaultfd_ctx *cur_uffd_ctx =
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
lockdep_is_held(&mm->mmap_lock));
cond_resched();
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
@@ -1589,7 +1635,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* Nothing to do: this vma is already registered into this
* userfaultfd and with the right tracking mode too.
*/
if (!vma->vm_userfaultfd_ctx.ctx)
if (!cur_uffd_ctx)
goto skip;
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
@@ -1608,7 +1654,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range range;
range.start = start;
range.len = vma_end - start;
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
wake_userfault(cur_uffd_ctx, &range);
}
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
@@ -1637,7 +1683,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* the current one has not been updated yet.
*/
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
skip:
prev = vma;
@@ -1718,7 +1764,9 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
ret = -EINVAL;
if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
goto out;
if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|
UFFDIO_COPY_MODE_WP|
UFFDIO_COPY_MODE_MMAP_TRYLOCK))
goto out;
if (mmget_not_zero(ctx->mm)) {
ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
@@ -1769,13 +1817,14 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
if (ret)
goto out;
ret = -EINVAL;
if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE)
if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE|
UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK))
goto out;
if (mmget_not_zero(ctx->mm)) {
ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len,
&ctx->mmap_changing);
&ctx->mmap_changing, uffdio_zeropage.mode);
mmput(ctx->mm);
} else {
return -ESRCH;

View File

@@ -60,6 +60,13 @@
#define LED_FUNCTION_MICMUTE "micmute"
#define LED_FUNCTION_MUTE "mute"
/* Used for player LEDs as found on game controllers from e.g. Nintendo, Sony. */
#define LED_FUNCTION_PLAYER1 "player-1"
#define LED_FUNCTION_PLAYER2 "player-2"
#define LED_FUNCTION_PLAYER3 "player-3"
#define LED_FUNCTION_PLAYER4 "player-4"
#define LED_FUNCTION_PLAYER5 "player-5"
/* Miscelleaus functions. Use functions above if you can. */
#define LED_FUNCTION_ACTIVITY "activity"
#define LED_FUNCTION_ALARM "alarm"

View File

@@ -328,6 +328,11 @@ struct queue_limits {
unsigned char misaligned;
unsigned char discard_misaligned;
unsigned char raid_partial_stripes_expensive;
#ifndef __GENKSYMS__
bool sub_page_limits;
#endif
enum blk_zoned_model zoned;
ANDROID_KABI_RESERVE(1);

View File

@@ -311,7 +311,7 @@ struct vm_region {
#ifdef CONFIG_USERFAULTFD
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
struct vm_userfaultfd_ctx {
struct userfaultfd_ctx *ctx;
struct userfaultfd_ctx __rcu *ctx;
};
#else /* CONFIG_USERFAULTFD */
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})

View File

@@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PAGE_SIZE_MIGRATION_H
#define _LINUX_PAGE_SIZE_MIGRATION_H
/*
* Page Size Migration
*
* Copyright (c) 2024, Google LLC.
* Author: Kalesh Singh <kaleshsingh@goole.com>
*
* This file contains the APIs for mitigations to ensure
* app compatibility during the transition from 4kB to 16kB
* page size in Android.
*/
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/sizes.h>
/*
* vm_flags representation of VMA padding pages.
*
* This allows the kernel to identify the portion of an ELF LOAD segment VMA
* that is padding.
*
* 4 high bits of vm_flags [63,60] are used to represent ELF segment padding
* up to 60kB, which is sufficient for ELFs of both 16kB and 64kB segment
* alignment (p_align).
*
* The representation is illustrated below.
*
* 63 62 61 60
* _________ _________ _________ _________
* | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
* | of 4kB | of 4kB | of 4kB | of 4kB |
* | chunks | chunks | chunks | chunks |
* |_________|_________|_________|_________|
*/
#define VM_PAD_WIDTH 4
#define VM_PAD_SHIFT (BITS_PER_LONG - VM_PAD_WIDTH)
#define VM_TOTAL_PAD_PAGES ((1ULL << VM_PAD_WIDTH) - 1)
#define VM_PAD_MASK (VM_TOTAL_PAD_PAGES << VM_PAD_SHIFT)
#define VMA_PAD_START(vma) (vma->vm_end - (vma_pad_pages(vma) << PAGE_SHIFT))
typedef void (*show_pad_vma_fn)(struct seq_file *m, struct vm_area_struct *vma);
#if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT)
extern void vma_set_pad_pages(struct vm_area_struct *vma,
unsigned long nr_pages);
extern unsigned long vma_pad_pages(struct vm_area_struct *vma);
extern void madvise_vma_pad_pages(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma);
extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma);
extern void show_map_pad_vma(struct vm_area_struct *vma,
struct vm_area_struct *pad,
struct seq_file *m, show_pad_vma_fn func);
extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
unsigned long addr, int new_below);
#else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
static inline void vma_set_pad_pages(struct vm_area_struct *vma,
unsigned long nr_pages)
{
}
static inline unsigned long vma_pad_pages(struct vm_area_struct *vma)
{
return 0;
}
static inline void madvise_vma_pad_pages(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
static inline struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
{
return NULL;
}
static inline struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
{
return vma;
}
static inline void show_map_pad_vma(struct vm_area_struct *vma,
struct vm_area_struct *pad,
struct seq_file *m, show_pad_vma_fn func)
{
}
static inline void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
unsigned long addr, int new_below)
{
}
#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
{
return vma_pages(vma) - vma_pad_pages(vma);
}
/*
* Sets the correct padding bits / flags for a VMA split.
*/
static inline unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma,
unsigned long newflags)
{
if (newflags & VM_PAD_MASK)
return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK);
else
return newflags;
}
/*
* Merging of padding VMAs is uncommon, as padding is only allowed
* from the linker context.
*
* To simplify the semantics, adjacent VMAs with padding are not
* allowed to merge.
*/
static inline bool is_mergable_pad_vma(struct vm_area_struct *vma,
unsigned long vm_flags)
{
/* Padding VMAs cannot be merged with other padding or real VMAs */
return !((vma->vm_flags | vm_flags) & VM_PAD_MASK);
}
#endif /* _LINUX_PAGE_SIZE_MIGRATION_H */

View File

@@ -81,6 +81,15 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_RCU_LAZY
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
#else
static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
call_rcu(head, func);
}
#endif
/* Internal to kernel */
void rcu_init(void);
extern int rcu_scheduler_active __read_mostly;

View File

@@ -33,9 +33,15 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
static_assert(UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK == UFFDIO_COPY_MODE_MMAP_TRYLOCK);
#define UFFDIO_MODE_MMAP_TRYLOCK UFFDIO_COPY_MODE_MMAP_TRYLOCK
extern int sysctl_unprivileged_userfaultfd;
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
extern bool userfaultfd_using_sigbus(struct vm_area_struct *vma);
#endif
/*
* The mode of operation for __mcopy_atomic and its helpers.
@@ -62,9 +68,8 @@ extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len,
atomic_t *mmap_changing, __u64 mode);
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
unsigned long dst_start,
unsigned long len,
atomic_t *mmap_changing);
unsigned long dst_start, unsigned long len,
atomic_t *mmap_changing, __u64 mode);
extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long len, atomic_t *mmap_changing);
extern int mwriteprotect_range(struct mm_struct *dst_mm,
@@ -75,7 +80,7 @@ extern int mwriteprotect_range(struct mm_struct *dst_mm,
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
return rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) == vm_ctx.ctx;
}
/*
@@ -154,6 +159,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
return VM_FAULT_SIGBUS;
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
{
return false;
}
#endif
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{

View File

@@ -56,7 +56,7 @@ struct unix_sock {
struct mutex iolock, bindlock;
struct sock *peer;
struct list_head link;
atomic_long_t inflight;
unsigned long inflight;
spinlock_t lock;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0

View File

@@ -7,6 +7,8 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10))
TRACE_EVENT(oom_score_adj_update,
TP_PROTO(struct task_struct *task),
@@ -78,22 +80,37 @@ TRACE_EVENT(mark_victim,
TP_STRUCT__entry(
__field(int, pid)
__field(uid_t, uid)
__string(comm, task->comm)
__field(unsigned long, total_vm)
__field(unsigned long, anon_rss)
__field(unsigned long, file_rss)
__field(unsigned long, shmem_rss)
__field(uid_t, uid)
__field(unsigned long, pgtables)
__field(short, oom_score_adj)
),
TP_fast_assign(
__entry->pid = task->pid;
__entry->uid = uid;
__assign_str(comm, task->comm);
__entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm);
__entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES));
__entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES));
__entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES));
__entry->uid = uid;
__entry->pgtables = mm_pgtables_bytes(task->mm) >> 10;
__entry->oom_score_adj = task->signal->oom_score_adj;
),
TP_printk("pid=%d uid=%u comm=%s oom_score_adj=%hd",
TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd",
__entry->pid,
__entry->uid,
__get_str(comm),
__entry->total_vm,
__entry->anon_rss,
__entry->file_rss,
__entry->shmem_rss,
__entry->uid,
__entry->pgtables,
__entry->oom_score_adj
)
);

View File

@@ -101,6 +101,47 @@ TRACE_EVENT(rpm_return_int,
__entry->ret)
);
#define RPM_STATUS_STRINGS \
EM(RPM_ACTIVE, "RPM_ACTIVE") \
EM(RPM_RESUMING, "RPM_RESUMING") \
EM(RPM_SUSPENDED, "RPM_SUSPENDED") \
EMe(RPM_SUSPENDING, "RPM_SUSPENDING")
/* Enums require being exported to userspace, for user tool parsing. */
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
RPM_STATUS_STRINGS
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings that
* will be printed in the output.
*/
#undef EM
#undef EMe
#define EM(a, b) { a, b },
#define EMe(a, b) { a, b }
TRACE_EVENT(rpm_status,
TP_PROTO(struct device *dev, enum rpm_status status),
TP_ARGS(dev, status),
TP_STRUCT__entry(
__string(name, dev_name(dev))
__field(int, status)
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__entry->status = status;
),
TP_printk("%s status=%s", __get_str(name),
__print_symbolic(__entry->status, RPM_STATUS_STRINGS))
);
#endif /* _TRACE_RUNTIME_POWER_H */
/* This part must be outside protection */

View File

@@ -237,6 +237,7 @@ struct uffdio_copy {
* according to the uffdio_register.ioctls.
*/
#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
#define UFFDIO_COPY_MODE_MMAP_TRYLOCK ((__u64)1<<63)
__u64 mode;
/*
@@ -249,6 +250,7 @@ struct uffdio_copy {
struct uffdio_zeropage {
struct uffdio_range range;
#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
#define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK ((__u64)1<<63)
__u64 mode;
/*

View File

@@ -1298,8 +1298,6 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val);
void ufshcd_hba_stop(struct ufs_hba *hba);
void ufshcd_schedule_eh_work(struct ufs_hba *hba);
void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq);
void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg);

View File

@@ -4802,6 +4802,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
spin_unlock_irq(&css_set_lock);
}
EXPORT_SYMBOL_GPL(css_task_iter_start);
/**
* css_task_iter_next - return the next task for the iterator
@@ -4835,6 +4836,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
return it->cur_task;
}
EXPORT_SYMBOL_GPL(css_task_iter_next);
/**
* css_task_iter_end - finish task iteration
@@ -4857,6 +4859,7 @@ void css_task_iter_end(struct css_task_iter *it)
if (it->cur_task)
put_task_struct(it->cur_task);
}
EXPORT_SYMBOL_GPL(css_task_iter_end);
static void cgroup_procs_release(struct kernfs_open_file *of)
{

View File

@@ -274,4 +274,45 @@ config TASKS_TRACE_RCU_READ_MB
Say N here if you hate read-side memory barriers.
Take the default if you are unsure.
config RCU_LAZY
bool "RCU callback lazy invocation functionality"
depends on RCU_NOCB_CPU
default n
help
To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big.
Requires rcu_nocbs=all to be set.
Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
config RCU_LAZY_DEFAULT_OFF
bool "Turn RCU lazy invocation off by default"
depends on RCU_LAZY
default n
help
Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
it back on.
config RCU_BOOT_END_DELAY
int "Minimum time before RCU may consider in-kernel boot as completed"
range 0 120000
default 20000
help
Default value of the minimum time in milliseconds from the start of boot
that must elapse before the boot sequence can be marked complete from RCU's
perspective, after which RCU's behavior becomes more relaxed.
Userspace can also mark the boot as completed sooner than this default
by writing the time in milliseconds, say once userspace considers
the system as booted, to: /sys/module/rcupdate/parameters/rcu_boot_end_delay.
Or even just writing a value of 0 to this sysfs node. The sysfs node can
also be used to extend the delay to be larger than the default, assuming
the marking of boot completion has not yet occurred.
The actual delay for RCU's view of the system to be marked as booted can be
higher than this value if the kernel takes a long time to initialize but it
will never be smaller than this value.
Accept the default if unsure.
endmenu # "RCU Subsystem"

View File

@@ -428,14 +428,20 @@ do { \
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
static inline bool rcu_gp_is_normal(void) { return true; }
static inline bool rcu_gp_is_expedited(void) { return false; }
static inline bool rcu_async_should_hurry(void) { return false; }
static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_async_hurry(void) { }
static inline void rcu_async_relax(void) { }
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
bool rcu_async_should_hurry(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
void rcu_async_hurry(void);
void rcu_async_relax(void);
void rcupdate_announce_bootup_oddness(void);
#ifdef CONFIG_TASKS_RCU_GENERIC
void show_rcu_tasks_gp_kthreads(void);
@@ -459,6 +465,14 @@ enum rcutorture_type {
INVALID_RCU_FLAVOR
};
#if defined(CONFIG_RCU_LAZY)
unsigned long rcu_lazy_get_jiffies_till_flush(void);
void rcu_lazy_set_jiffies_till_flush(unsigned long j);
#else
static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
#endif
#if defined(CONFIG_TREE_RCU)
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq);

View File

@@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp)
{
call_rcu(&rsp->cb_head, rcu_sync_func);
call_rcu_hurry(&rsp->cb_head, rcu_sync_func);
}
/**

View File

@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
void rcu_barrier(void)
{
wait_rcu_gp(call_rcu);
wait_rcu_gp(call_rcu_hurry);
}
EXPORT_SYMBOL(rcu_barrier);

View File

@@ -2976,12 +2976,12 @@ static void check_cb_ovld(struct rcu_data *rdp)
raw_spin_unlock_rcu_node(rnp);
}
/* Helper function for call_rcu() and friends. */
static void
__call_rcu(struct rcu_head *head, rcu_callback_t func)
__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
{
static atomic_t doublefrees;
unsigned long flags;
bool lazy;
struct rcu_data *rdp;
bool was_alldone;
@@ -3006,6 +3006,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
local_irq_save(flags);
kasan_record_aux_stack_noalloc(head);
rdp = this_cpu_ptr(&rcu_data);
lazy = lazy_in && !rcu_async_should_hurry();
/* Add the callback to our list. */
if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
@@ -3019,7 +3020,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
}
check_cb_ovld(rdp);
if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
return; // Enqueued onto ->nocb_bypass, so just leave.
// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
rcu_segcblist_enqueue(&rdp->cblist, head);
@@ -3042,8 +3043,45 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
}
}
#ifdef CONFIG_RCU_LAZY
static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF);
module_param(enable_rcu_lazy, bool, 0444);
/**
* call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
* flush all lazy callbacks (including the new one) to the main ->cblist while
* doing so.
*
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
* The callback function will be invoked some time after a full grace
* period elapses, in other words after all pre-existing RCU read-side
* critical sections have completed.
*
* Use this API instead of call_rcu() if you don't want the callback to be
* invoked after very long periods of time, which can happen on systems without
* memory pressure and on systems which are lightly loaded or mostly idle.
* This function will cause callbacks to be invoked sooner than later at the
* expense of extra power. Other than that, this function is identical to, and
* reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
* ordering and other functionality.
*/
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
return __call_rcu_common(head, func, false);
}
EXPORT_SYMBOL_GPL(call_rcu_hurry);
#else
#define enable_rcu_lazy false
#endif
/**
* call_rcu() - Queue an RCU callback for invocation after a grace period.
* By default the callbacks are 'lazy' and are kept hidden from the main
* ->cblist to prevent starting of grace periods too soon.
* If you desire grace periods to start very soon, use call_rcu_hurry().
*
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
@@ -3084,13 +3122,12 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func);
__call_rcu_common(head, func, enable_rcu_lazy);
}
EXPORT_SYMBOL_GPL(call_rcu);
/* Maximum number of jiffies to wait before draining a batch. */
#define KFREE_DRAIN_JIFFIES (HZ / 50)
#define KFREE_DRAIN_JIFFIES (5 * HZ)
#define KFREE_N_BATCHES 2
#define FREE_N_CHANNELS 2
@@ -3136,7 +3173,6 @@ struct kfree_rcu_cpu_work {
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
* @lock: Synchronize access to this structure
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
* @monitor_todo: Tracks whether a @monitor_work delayed work is pending
* @initialized: The @rcu_work fields have been initialized
* @count: Number of objects for which GP not started
* @bkvcache:
@@ -3161,7 +3197,6 @@ struct kfree_rcu_cpu {
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
raw_spinlock_t lock;
struct delayed_work monitor_work;
bool monitor_todo;
bool initialized;
int count;
@@ -3365,6 +3400,21 @@ need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
return !!krwp->head_free;
}
static void
schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
{
long delay, delay_left;
delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
if (delayed_work_pending(&krcp->monitor_work)) {
delay_left = krcp->monitor_work.timer.expires - jiffies;
if (delay < delay_left)
mod_delayed_work(system_wq, &krcp->monitor_work, delay);
return;
}
queue_delayed_work(system_wq, &krcp->monitor_work, delay);
}
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
@@ -3401,7 +3451,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
// objects queued on the linked list.
if (!krwp->head_free) {
krwp->head_free = krcp->head;
krcp->head = NULL;
WRITE_ONCE(krcp->head, NULL);
}
WRITE_ONCE(krcp->count, 0);
@@ -3415,17 +3465,15 @@ static void kfree_rcu_monitor(struct work_struct *work)
}
}
raw_spin_unlock_irqrestore(&krcp->lock, flags);
// If there is nothing to detach, it means that our job is
// successfully done here. In case of having at least one
// of the channels that is still busy we should rearm the
// work to repeat an attempt. Because previous batches are
// still in progress.
if (!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head)
krcp->monitor_todo = false;
else
schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
if (need_offload_krc(krcp))
schedule_delayed_monitor_work(krcp);
}
static enum hrtimer_restart
@@ -3604,7 +3652,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
head->func = func;
head->next = krcp->head;
krcp->head = head;
WRITE_ONCE(krcp->head, head);
success = true;
}
@@ -3619,11 +3667,8 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
kmemleak_ignore(ptr);
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
krcp->monitor_todo = true;
schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
}
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
schedule_delayed_monitor_work(krcp);
unlock_return:
krc_this_cpu_unlock(krcp, flags);
@@ -3656,7 +3701,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
atomic_set(&krcp->backoff_page_cache_fill, 1);
}
return count;
return count == 0 ? SHRINK_EMPTY : count;
}
static unsigned long
@@ -3692,21 +3737,12 @@ static struct shrinker kfree_rcu_shrinker = {
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
unsigned long flags;
for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
raw_spin_lock_irqsave(&krcp->lock, flags);
if ((!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) ||
krcp->monitor_todo) {
raw_spin_unlock_irqrestore(&krcp->lock, flags);
continue;
}
krcp->monitor_todo = true;
schedule_delayed_work_on(cpu, &krcp->monitor_work,
KFREE_DRAIN_JIFFIES);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
if (need_offload_krc(krcp))
schedule_delayed_monitor_work(krcp);
}
}
@@ -3797,7 +3833,7 @@ void synchronize_rcu(void)
if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
else
wait_rcu_gp(call_rcu);
wait_rcu_gp(call_rcu_hurry);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
@@ -4003,12 +4039,21 @@ static void rcu_barrier_func(void *cpu_in)
{
uintptr_t cpu = (uintptr_t)cpu_in;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
bool wake_nocb = false;
bool was_alldone = false;
rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
rcu_nocb_lock(rdp);
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
/*
* Flush bypass and wakeup rcuog if we add callbacks to an empty regular
* queue. This way we don't wait for bypass timer that can reach seconds
* if it's fully lazy.
*/
was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
@@ -4017,6 +4062,8 @@ static void rcu_barrier_func(void *cpu_in)
rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
if (wake_nocb)
wake_nocb_gp(rdp, false);
}
/**
@@ -4399,7 +4446,7 @@ void rcutree_migrate_callbacks(int cpu)
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
/* Leverage recent GPs and set GP for new callbacks. */
needwake = rcu_advance_cbs(my_rnp, rdp) ||
@@ -4437,11 +4484,13 @@ static int rcu_pm_notify(struct notifier_block *self,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
rcu_async_hurry();
rcu_expedite_gp();
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
rcu_unexpedite_gp();
rcu_async_relax();
break;
default:
break;

View File

@@ -258,14 +258,16 @@ struct rcu_data {
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
long lazy_len; /* Length of buffered lazy callbacks. */
int cpu;
};
/* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOCB_WAKE_NOT 0
#define RCU_NOCB_WAKE_BYPASS 1
#define RCU_NOCB_WAKE 2
#define RCU_NOCB_WAKE_FORCE 3
#define RCU_NOCB_WAKE_LAZY 2
#define RCU_NOCB_WAKE 3
#define RCU_NOCB_WAKE_FORCE 4
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
@@ -437,10 +439,12 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j);
unsigned long j, bool lazy);
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags);
bool *was_alldone, unsigned long flags,
bool lazy);
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);

View File

@@ -922,7 +922,7 @@ void synchronize_rcu_expedited(void)
/* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) {
wait_rcu_gp(call_rcu);
wait_rcu_gp(call_rcu_hurry);
return;
}

View File

@@ -261,6 +261,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
return __wake_nocb_gp(rdp_gp, rdp, force, flags);
}
/*
* LAZY_FLUSH_JIFFIES decides the maximum amount of time that
* can elapse before lazy callbacks are flushed. Lazy callbacks
* could be flushed much earlier for a number of other reasons
* however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
* left unsubmitted to RCU after those many jiffies.
*/
#define LAZY_FLUSH_JIFFIES (10 * HZ)
static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
#ifdef CONFIG_RCU_LAZY
// To be called only from test code.
void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
{
jiffies_till_flush = jif;
}
EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
unsigned long rcu_lazy_get_jiffies_till_flush(void)
{
return jiffies_till_flush;
}
EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
#endif
/*
* Arrange to wake the GP kthread for this NOCB group at some future
* time when it is safe to do so.
@@ -274,10 +299,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
/*
* Bypass wakeup overrides previous deferments. In case
* of callback storm, no need to wake up too early.
* Bypass wakeup overrides previous deferments. In case of
* callback storms, no need to wake up too early.
*/
if (waketype == RCU_NOCB_WAKE_BYPASS) {
if (waketype == RCU_NOCB_WAKE_LAZY &&
rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else {
@@ -298,12 +327,16 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
* proves to be initially empty, just return false because the no-CB GP
* kthread may need to be awakened in this case.
*
* Return true if there was something to be flushed and it succeeded, otherwise
* false.
*
* Note that this function always returns true if rhp is NULL.
*/
static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j)
static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in,
unsigned long j, bool lazy)
{
struct rcu_cblist rcl;
struct rcu_head *rhp = rhp_in;
WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
rcu_lockdep_assert_cblist_protected(rdp);
@@ -315,7 +348,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
/* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
if (rhp)
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
/*
* If the new CB requested was a lazy one, queue it onto the main
* ->cblist so that we can take advantage of the grace-period that will
* happen regardless. But queue it onto the bypass list first so that
* the lazy CB is ordered with the existing CBs in the bypass list.
*/
if (lazy && rhp) {
rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
rhp = NULL;
}
rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
WRITE_ONCE(rdp->lazy_len, 0);
rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
WRITE_ONCE(rdp->nocb_bypass_first, j);
rcu_nocb_bypass_unlock(rdp);
@@ -331,13 +377,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
* Note that this function always returns true if rhp is NULL.
*/
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j)
unsigned long j, bool lazy)
{
if (!rcu_rdp_is_offloaded(rdp))
return true;
rcu_lockdep_assert_cblist_protected(rdp);
rcu_nocb_bypass_lock(rdp);
return rcu_nocb_do_flush_bypass(rdp, rhp, j);
return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
}
/*
@@ -350,7 +396,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
if (!rcu_rdp_is_offloaded(rdp) ||
!rcu_nocb_bypass_trylock(rdp))
return;
WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
}
/*
@@ -372,12 +418,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
* there is only one CPU in operation.
*/
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags)
bool *was_alldone, unsigned long flags,
bool lazy)
{
unsigned long c;
unsigned long cur_gp_seq;
unsigned long j = jiffies;
long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
lockdep_assert_irqs_disabled();
@@ -422,24 +470,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// If there hasn't yet been all that many ->cblist enqueues
// this jiffy, tell the caller to enqueue onto ->cblist. But flush
// ->nocb_bypass first.
if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
// Lazy CBs throttle this back and do immediate bypass queuing.
if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ"));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
return false; // Caller must enqueue the callback.
}
// If ->nocb_bypass has been used too long or is too full,
// flush ->nocb_bypass to ->cblist.
if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
(ncbs && bypass_is_lazy &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
ncbs >= qhimark) {
rcu_nocb_lock(rdp);
if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ"));
@@ -452,7 +505,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j;
}
rcu_nocb_unlock_irqrestore(rdp, flags);
// The flush succeeded and we moved CBs into the regular list.
// Don't wait for the wake up timer as it may be too far ahead.
// Wake up the GP thread now instead, if the cblist was empty.
__call_rcu_nocb_wake(rdp, *was_alldone, flags);
return true; // Callback already enqueued.
}
@@ -462,13 +520,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
if (lazy)
WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
if (!ncbs) {
WRITE_ONCE(rdp->nocb_bypass_first, j);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
}
rcu_nocb_bypass_unlock(rdp);
smp_mb(); /* Order enqueue before wake. */
if (ncbs) {
// A wake up of the grace period kthread or timer adjustment
// needs to be done only if:
// 1. Bypass list was fully empty before (this is the first
// bypass list entry), or:
// 2. Both of these conditions are met:
// a. The bypass list previously had only lazy CBs, and:
// b. The new CB is non-lazy.
if (ncbs && (!bypass_is_lazy || lazy)) {
local_irq_restore(flags);
} else {
// No-CBs GP kthread might be indefinitely asleep, if so, wake.
@@ -496,8 +565,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
unsigned long flags)
__releases(rdp->nocb_lock)
{
long bypass_len;
unsigned long cur_gp_seq;
unsigned long j;
long lazy_len;
long len;
struct task_struct *t;
@@ -511,9 +582,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
}
// Need to actually to a wakeup.
len = rcu_segcblist_n_cbs(&rdp->cblist);
bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
lazy_len = READ_ONCE(rdp->lazy_len);
if (was_alldone) {
rdp->qlen_last_fqs_check = len;
if (!irqs_disabled_flags(flags)) {
// Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
@@ -606,12 +684,12 @@ static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp,
static void nocb_gp_wait(struct rcu_data *my_rdp)
{
bool bypass = false;
long bypass_ncbs;
int __maybe_unused cpu = my_rdp->cpu;
unsigned long cur_gp_seq;
unsigned long flags;
bool gotcbs = false;
unsigned long j = jiffies;
bool lazy = false;
bool needwait_gp = false; // This prevents actual uninitialized use.
bool needwake;
bool needwake_gp;
@@ -628,9 +706,13 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
bool needwake_state = false;
long bypass_ncbs;
bool flush_bypass = false;
long lazy_ncbs;
if (!nocb_gp_enabled_cb(rdp))
continue;
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
rcu_nocb_lock_irqsave(rdp, flags);
if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) {
@@ -640,22 +722,37 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
continue;
}
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
if (bypass_ncbs &&
lazy_ncbs = READ_ONCE(rdp->lazy_len);
if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
bypass_ncbs > 2 * qhimark)) {
flush_bypass = true;
} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
bypass_ncbs > 2 * qhimark)) {
// Bypass full or old, so flush it.
(void)rcu_nocb_try_flush_bypass(rdp, j);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
flush_bypass = true;
} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
if (needwake_state)
swake_up_one(&rdp->nocb_state_wq);
continue; /* No callbacks here, try next. */
}
if (flush_bypass) {
// Bypass full or old, so flush it.
(void)rcu_nocb_try_flush_bypass(rdp, j);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
lazy_ncbs = READ_ONCE(rdp->lazy_len);
}
if (bypass_ncbs) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("Bypass"));
bypass = true;
bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
if (bypass_ncbs == lazy_ncbs)
lazy = true;
else
bypass = true;
}
rnp = rdp->mynode;
@@ -705,12 +802,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
my_rdp->nocb_gp_gp = needwait_gp;
my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
if (bypass && !rcu_nocb_poll) {
// At least one child with non-empty ->nocb_bypass, so set
// timer in order to avoid stranding its callbacks.
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
TPS("WakeBypassIsDeferred"));
// At least one child with non-empty ->nocb_bypass, so set
// timer in order to avoid stranding its callbacks.
if (!rcu_nocb_poll) {
// If bypass list only has lazy CBs. Add a deferred lazy wake up.
if (lazy && !bypass) {
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazyIsDeferred"));
// Otherwise add a deferred bypass wake up.
} else if (bypass) {
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
TPS("WakeBypassIsDeferred"));
}
}
if (rcu_nocb_poll) {
/* Polling, so trace if first poll in the series. */
if (gotcbs)
@@ -989,7 +1094,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
* return false, which means that future calls to rcu_nocb_try_bypass()
* will refuse to put anything into the bypass.
*/
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
ret = rdp_offload_toggle(rdp, false, flags);
swait_event_exclusive(rdp->nocb_state_wq,
!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
@@ -1111,6 +1216,55 @@ int rcu_nocb_cpu_offload(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
static unsigned long
lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
int cpu;
unsigned long count = 0;
/* Snapshot count of all CPUs */
for_each_possible_cpu(cpu) {
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
count += READ_ONCE(rdp->lazy_len);
}
return count ? count : SHRINK_EMPTY;
}
static unsigned long
lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
int cpu;
unsigned long flags;
unsigned long count = 0;
/* Snapshot count of all CPUs */
for_each_possible_cpu(cpu) {
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int _count = READ_ONCE(rdp->lazy_len);
if (_count == 0)
continue;
rcu_nocb_lock_irqsave(rdp, flags);
WRITE_ONCE(rdp->lazy_len, 0);
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
sc->nr_to_scan -= _count;
count += _count;
if (sc->nr_to_scan <= 0)
break;
}
return count ? count : SHRINK_STOP;
}
static struct shrinker lazy_rcu_shrinker = {
.count_objects = lazy_rcu_shrink_count,
.scan_objects = lazy_rcu_shrink_scan,
.batch = 0,
.seeks = DEFAULT_SEEKS,
};
void __init rcu_init_nohz(void)
{
int cpu;
@@ -1136,6 +1290,9 @@ void __init rcu_init_nohz(void)
cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (register_shrinker(&lazy_rcu_shrinker))
pr_err("Failed to register lazy_rcu shrinker!\n");
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
cpumask_and(rcu_nocb_mask, cpu_possible_mask,
@@ -1171,6 +1328,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
raw_spin_lock_init(&rdp->nocb_gp_lock);
timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
rcu_cblist_init(&rdp->nocb_bypass);
WRITE_ONCE(rdp->lazy_len, 0);
}
/*
@@ -1449,14 +1607,19 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
{
}
static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
{
return false;
}
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j)
unsigned long j, bool lazy)
{
return true;
}
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags)
bool *was_alldone, unsigned long flags, bool lazy)
{
return false;
}

View File

@@ -43,6 +43,7 @@
#include <linux/slab.h>
#include <linux/irq_work.h>
#include <linux/rcupdate_trace.h>
#include <linux/jiffies.h>
#define CREATE_TRACE_POINTS
@@ -144,8 +145,45 @@ bool rcu_gp_is_normal(void)
}
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1);
/*
* Should call_rcu() callbacks be processed with urgency or are
* they OK being executed with arbitrary delays?
*/
bool rcu_async_should_hurry(void)
{
return !IS_ENABLED(CONFIG_RCU_LAZY) ||
atomic_read(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_should_hurry);
/**
* rcu_async_hurry - Make future async RCU callbacks not lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a timely fashion.
*/
void rcu_async_hurry(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_inc(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_hurry);
/**
* rcu_async_relax - Make future async RCU callbacks lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a lazy fashion.
*/
void rcu_async_relax(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_dec(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_relax);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
/*
* Should normal grace-period primitives be expedited? Intended for
* use within RCU. Note that this function takes the rcu_expedited
@@ -187,19 +225,91 @@ void rcu_unexpedite_gp(void)
}
EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
/*
* Minimum time in milliseconds from the start boot until RCU can consider
* in-kernel boot as completed. This can also be tuned at runtime to end the
* boot earlier, by userspace init code writing the time in milliseconds (even
* 0) to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay. The sysfs
* node can also be used to extend the delay to be larger than the default,
* assuming the marking of boot complete has not yet occurred.
*/
static int android_rcu_boot_end_delay = CONFIG_RCU_BOOT_END_DELAY;
static bool rcu_boot_ended __read_mostly;
static bool rcu_boot_end_called __read_mostly;
static DEFINE_MUTEX(rcu_boot_end_lock);
/*
* Inform RCU of the end of the in-kernel boot sequence.
* Inform RCU of the end of the in-kernel boot sequence. The boot sequence will
* not be marked ended until at least android_rcu_boot_end_delay milliseconds
* have passed.
*/
void rcu_end_inkernel_boot(void)
void rcu_end_inkernel_boot(void);
static void rcu_boot_end_work_fn(struct work_struct *work)
{
rcu_end_inkernel_boot();
}
static DECLARE_DELAYED_WORK(rcu_boot_end_work, rcu_boot_end_work_fn);
/* Must be called with rcu_boot_end_lock held. */
static void rcu_end_inkernel_boot_locked(void)
{
rcu_boot_end_called = true;
if (rcu_boot_ended)
return;
if (android_rcu_boot_end_delay) {
u64 boot_ms = div_u64(ktime_get_boot_fast_ns(), 1000000UL);
if (boot_ms < android_rcu_boot_end_delay) {
schedule_delayed_work(&rcu_boot_end_work,
msecs_to_jiffies(android_rcu_boot_end_delay - boot_ms));
return;
}
}
cancel_delayed_work(&rcu_boot_end_work);
rcu_unexpedite_gp();
rcu_async_relax();
if (rcu_normal_after_boot)
WRITE_ONCE(rcu_normal, 1);
rcu_boot_ended = true;
}
void rcu_end_inkernel_boot(void)
{
mutex_lock(&rcu_boot_end_lock);
rcu_end_inkernel_boot_locked();
mutex_unlock(&rcu_boot_end_lock);
}
static int param_set_rcu_boot_end(const char *val, const struct kernel_param *kp)
{
uint end_ms;
int ret = kstrtouint(val, 0, &end_ms);
if (ret)
return ret;
/*
* rcu_end_inkernel_boot() should be called at least once during init
* before we can allow param changes to end the boot.
*/
mutex_lock(&rcu_boot_end_lock);
android_rcu_boot_end_delay = end_ms;
if (!rcu_boot_ended && rcu_boot_end_called) {
rcu_end_inkernel_boot_locked();
}
mutex_unlock(&rcu_boot_end_lock);
return ret;
}
static const struct kernel_param_ops rcu_boot_end_ops = {
.set = param_set_rcu_boot_end,
.get = param_get_uint,
};
module_param_cb(android_rcu_boot_end_delay, &rcu_boot_end_ops, &android_rcu_boot_end_delay, 0644);
/*
* Let rcutorture know when it is OK to turn it up to eleven.
*/

View File

@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
percpu_ref_noop_confirm_switch;
percpu_ref_get(ref); /* put after confirmation */
call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
call_rcu_hurry(&ref->data->rcu,
percpu_ref_switch_to_atomic_rcu);
}
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)

View File

@@ -52,7 +52,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
mm_init.o percpu.o slab_common.o \
compaction.o vmacache.o \
interval_tree.o list_lru.o workingset.o \
debug.o gup.o mmap_lock.o $(mmu-y)
debug.o gup.o mmap_lock.o pgsize_migration.o $(mmu-y)
# Give 'page_alloc' its own module-parameter namespace
page-alloc-y := page_alloc.o

View File

@@ -1269,7 +1269,7 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
block_end_pfn, cc->zone))
continue;
pfn = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn,
ret = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn,
ISOLATE_UNEVICTABLE);
if (ret)

View File

@@ -1710,7 +1710,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
if (flags & FAULT_FLAG_RETRY_NOWAIT)
return 0;
mmap_read_unlock(mm);
if (!(flags & FAULT_FLAG_SPECULATIVE))
mmap_read_unlock(mm);
if (flags & FAULT_FLAG_KILLABLE)
wait_on_page_locked_killable(page);
else
@@ -1722,7 +1723,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
ret = __lock_page_killable(page);
if (ret) {
mmap_read_unlock(mm);
if (!(flags & FAULT_FLAG_SPECULATIVE))
mmap_read_unlock(mm);
return 0;
}
} else {

View File

@@ -11,6 +11,7 @@
#include <linux/syscalls.h>
#include <linux/mempolicy.h>
#include <linux/page-isolation.h>
#include <linux/pgsize_migration.h>
#include <linux/page_idle.h>
#include <linux/userfaultfd_k.h>
#include <linux/hugetlb.h>
@@ -788,6 +789,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
madvise_vma_pad_pages(vma, start, end);
zap_page_range(vma, start, end - start);
return 0;
}

View File

@@ -58,6 +58,7 @@
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/pfn_t.h>
#include <linux/pgsize_migration.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
@@ -4461,7 +4462,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
end_pgoff = start_pgoff -
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1;
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
end_pgoff = min3(end_pgoff, vma_data_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1);
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
@@ -4939,6 +4940,17 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
pgd_t pgdval;
p4d_t p4dval;
pud_t pudval;
bool uffd_missing_sigbus = false;
#ifdef CONFIG_USERFAULTFD
/*
* Only support SPF for SIGBUS+MISSING userfaults in private
* anonymous VMAs.
*/
uffd_missing_sigbus = vma_is_anonymous(vma) &&
(vma->vm_flags & VM_UFFD_MISSING) &&
userfaultfd_using_sigbus(vma);
#endif
vmf.seq = seq;
@@ -5018,11 +5030,19 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
speculative_page_walk_end();
if (!vmf.pte && uffd_missing_sigbus)
return VM_FAULT_SIGBUS;
return handle_pte_fault(&vmf);
spf_fail:
speculative_page_walk_end();
return VM_FAULT_RETRY;
/*
* Failing page-table walk is similar to page-missing so give an
* opportunity to SIGBUS+MISSING userfault to handle it before
* retrying with mmap_lock
*/
return uffd_missing_sigbus ? VM_FAULT_SIGBUS : VM_FAULT_RETRY;
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */

View File

@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pagemap.h>
#include <linux/pgsize_migration.h>
#include <linux/pagevec.h>
#include <linux/mempolicy.h>
#include <linux/syscalls.h>
@@ -547,7 +548,7 @@ success:
*/
if (lock)
vma->vm_flags = newflags;
vma->vm_flags = vma_pad_fixup_flags(vma, newflags);
else
munlock_vma_pages_range(vma, start, end);

View File

@@ -24,6 +24,7 @@
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pgsize_migration.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
@@ -1053,6 +1054,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0;
if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
return 0;
if (!is_mergable_pad_vma(vma, vm_flags))
return 0;
return 1;
}
@@ -2778,8 +2781,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
/* Success. */
if (!err)
if (!err) {
split_pad_vma(vma, new, addr, new_below);
return 0;
}
/* Clean everything up if vma_adjust failed. */
if (new->vm_ops && new->vm_ops->close)

View File

@@ -17,6 +17,7 @@
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/mempolicy.h>
#include <linux/pgsize_migration.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
#include <linux/swap.h>
@@ -490,7 +491,8 @@ success:
* vm_flags and vm_page_prot are protected by the mmap_lock
* held in write mode.
*/
vma->vm_flags = newflags;
vma->vm_flags = vma_pad_fixup_flags(vma, newflags);
dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
vma_set_page_prot(vma);

396
mm/pgsize_migration.c Normal file
View File

@@ -0,0 +1,396 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Page Size Migration
*
* This file contains the core logic of mitigations to ensure
* app compatibility during the transition from 4kB to 16kB
* page size in Android.
*
* Copyright (c) 2024, Google LLC.
* Author: Kalesh Singh <kaleshsingh@goole.com>
*/
#include <linux/pgsize_migration.h>
#include <linux/init.h>
#include <linux/jump_label.h>
#include <linux/kobject.h>
#include <linux/kstrtox.h>
#include <linux/sched/task_stack.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#ifdef CONFIG_64BIT
#if PAGE_SIZE == SZ_4K
DEFINE_STATIC_KEY_TRUE(pgsize_migration_enabled);
#define is_pgsize_migration_enabled() (static_branch_likely(&pgsize_migration_enabled))
#else /* PAGE_SIZE != SZ_4K */
DEFINE_STATIC_KEY_FALSE(pgsize_migration_enabled);
#define is_pgsize_migration_enabled() (static_branch_unlikely(&pgsize_migration_enabled))
#endif /* PAGE_SIZE == SZ_4K */
static ssize_t show_pgsize_migration_enabled(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
if (is_pgsize_migration_enabled())
return sprintf(buf, "%d\n", 1);
else
return sprintf(buf, "%d\n", 0);
}
static ssize_t store_pgsize_migration_enabled(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t n)
{
unsigned long val;
/* Migration is only applicable to 4kB kernels */
if (PAGE_SIZE != SZ_4K)
return n;
if (kstrtoul(buf, 10, &val))
return -EINVAL;
if (val > 1)
return -EINVAL;
if (val == 1)
static_branch_enable(&pgsize_migration_enabled);
else if (val == 0)
static_branch_disable(&pgsize_migration_enabled);
return n;
}
static struct kobj_attribute pgsize_migration_enabled_attr = __ATTR(
enabled,
0644,
show_pgsize_migration_enabled,
store_pgsize_migration_enabled
);
static struct attribute *pgsize_migration_attrs[] = {
&pgsize_migration_enabled_attr.attr,
NULL
};
static struct attribute_group pgsize_migration_attr_group = {
.name = "pgsize_migration",
.attrs = pgsize_migration_attrs,
};
/**
* What: /sys/kernel/mm/pgsize_migration/enabled
* Date: April 2024
* KernelVersion: v5.4+ (GKI kernels)
* Contact: Kalesh Singh <kaleshsingh@google.com>
* Description: /sys/kernel/mm/pgsize_migration/enabled
* allows for userspace to turn on or off page size
* migration mitigations necessary for app compatibility
* during Android's transition from 4kB to 16kB page size.
* Such mitigations include preserving /proc/<pid>/[s]maps
* output as if there was no segment extension by the
* dynamic loader; and preventing fault around in the padding
* sections of ELF LOAD segment mappings.
* Users: Bionic's dynamic linker
*/
static int __init init_pgsize_migration(void)
{
if (sysfs_create_group(mm_kobj, &pgsize_migration_attr_group))
pr_err("pgsize_migration: failed to create sysfs group\n");
return 0;
};
late_initcall(init_pgsize_migration);
#if PAGE_SIZE == SZ_4K
void vma_set_pad_pages(struct vm_area_struct *vma,
unsigned long nr_pages)
{
if (!is_pgsize_migration_enabled())
return;
vma->vm_flags &= ~VM_PAD_MASK;
vma->vm_flags |= (nr_pages << VM_PAD_SHIFT);
}
unsigned long vma_pad_pages(struct vm_area_struct *vma)
{
if (!is_pgsize_migration_enabled())
return 0;
return vma->vm_flags >> VM_PAD_SHIFT;
}
static __always_inline bool str_has_suffix(const char *str, const char *suffix)
{
size_t str_len = strlen(str);
size_t suffix_len = strlen(suffix);
if (str_len < suffix_len)
return false;
return !strncmp(str + str_len - suffix_len, suffix, suffix_len);
}
/*
* The dynamic linker, or interpreter, operates within the process context
* of the binary that necessitated dynamic linking.
*
* Consequently, process context identifiers; like PID, comm, ...; cannot
* be used to differentiate whether the execution context belongs to the
* dynamic linker or not.
*
* linker_ctx() deduces whether execution is currently in the dynamic linker's
* context by correlating the current userspace instruction pointer with the
* VMAs of the current task.
*
* Returns true if in linker context, otherwise false.
*
* Caller must hold mmap lock in read mode.
*/
static inline bool linker_ctx(void)
{
struct pt_regs *regs = task_pt_regs(current);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct file *file;
if (!regs)
return false;
vma = find_vma(mm, instruction_pointer(regs));
/* Current execution context, the VMA must be present */
BUG_ON(!vma);
file = vma->vm_file;
if (!file)
return false;
if ((vma->vm_flags & VM_EXEC)) {
char buf[64];
const int bufsize = sizeof(buf);
char *path;
memset(buf, 0, bufsize);
path = d_path(&file->f_path, buf, bufsize);
if (!strcmp(path, "/system/bin/linker64"))
return true;
}
return false;
}
/*
* Saves the number of padding pages for an ELF segment mapping
* in vm_flags.
*
* The number of padding pages is deduced from the madvise DONTNEED range [start, end)
* if the following conditions are met:
* 1) The range is enclosed by a single VMA
* 2) The range ends at the end address of the VMA
* 3) The range starts at an address greater than the start address of the VMA
* 4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
* 5) The VMA is a file backed VMA.
* 6) The file backing the VMA is a shared library (*.so)
* 7) The madvise was requested by bionic's dynamic linker.
*/
void madvise_vma_pad_pages(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
unsigned long nr_pad_pages;
if (!is_pgsize_migration_enabled())
return;
/*
* If the madvise range is it at the end of the file save the number of
* pages in vm_flags (only need 4 bits are needed for up to 64kB aligned ELFs).
*/
if (start <= vma->vm_start || end != vma->vm_end)
return;
nr_pad_pages = (end - start) >> PAGE_SHIFT;
if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES)
return;
/* Only handle this for file backed VMAs */
if (!vma->vm_file)
return;
/* Limit this to only shared libraries (*.so) */
if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
return;
/* Only bionic's dynamic linker needs to hint padding pages. */
if (!linker_ctx())
return;
vma_set_pad_pages(vma, nr_pad_pages);
}
static const char *pad_vma_name(struct vm_area_struct *vma)
{
return "[page size compat]";
}
static const struct vm_operations_struct pad_vma_ops = {
.name = pad_vma_name,
};
/*
* Returns a new VMA representing the padding in @vma, if no padding
* in @vma returns NULL.
*/
struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
{
struct vm_area_struct *pad;
if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
return NULL;
pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
*pad = *vma;
/* Remove file */
pad->vm_file = NULL;
/* Add vm_ops->name */
pad->vm_ops = &pad_vma_ops;
/* Adjust the start to begin at the start of the padding section */
pad->vm_start = VMA_PAD_START(pad);
/* Make the pad vma PROT_NONE */
pad->vm_flags &= ~(VM_READ|VM_WRITE|VM_EXEC);
/* Remove padding bits */
pad->vm_flags &= ~VM_PAD_MASK;
return pad;
}
/*
* Returns a new VMA exclusing the padding from @vma; if no padding in
* @vma returns @vma.
*/
struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
{
struct vm_area_struct *data;
if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
return vma;
data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
*data = *vma;
/* Adjust the end to the start of the padding section */
data->vm_end = VMA_PAD_START(data);
return data;
}
/*
* Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma
* and @pad.
*/
void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad,
struct seq_file *m, show_pad_vma_fn func)
{
if (!pad)
return;
/*
* This cannot happen. If @pad vma was allocated the corresponding
* @vma should have the VM_PAD_MASK bit(s) set.
*/
BUG_ON(!(vma->vm_flags & VM_PAD_MASK));
/*
* This cannot happen. @pad is a section of the original VMA.
* Therefore @vma cannot be null if @pad is not null.
*/
BUG_ON(!vma);
func(m, pad);
kfree(pad);
kfree(vma);
}
/*
* When splitting a padding VMA there are a couple of cases to handle.
*
* Given:
*
* | DDDDPPPP |
*
* where:
* - D represents 1 page of data;
* - P represents 1 page of padding;
* - | represents the boundaries (start/end) of the VMA
*
*
* 1) Split exactly at the padding boundary
*
* | DDDDPPPP | --> | DDDD | PPPP |
*
* - Remove padding flags from the first VMA.
* - The second VMA is all padding
*
* 2) Split within the padding area
*
* | DDDDPPPP | --> | DDDDPP | PP |
*
* - Subtract the length of the second VMA from the first VMA's padding.
* - The second VMA is all padding, adjust its padding length (flags)
*
* 3) Split within the data area
*
* | DDDDPPPP | --> | DD | DDPPPP |
*
* - Remove padding flags from the first VMA.
* - The second VMA is has the same padding as from before the split.
*/
void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
unsigned long addr, int new_below)
{
unsigned long nr_pad_pages = vma_pad_pages(vma);
unsigned long nr_vma2_pages;
struct vm_area_struct *first;
struct vm_area_struct *second;
if (!nr_pad_pages)
return;
if (new_below) {
first = new;
second = vma;
} else {
first = vma;
second = new;
}
nr_vma2_pages = vma_pages(second);
if (nr_vma2_pages == nr_pad_pages) { /* Case 1 */
first->vm_flags &= ~VM_PAD_MASK;
vma_set_pad_pages(second, nr_pad_pages);
} else if (nr_vma2_pages < nr_pad_pages) { /* Case 2 */
vma_set_pad_pages(first, nr_pad_pages - nr_vma2_pages);
vma_set_pad_pages(second, nr_vma2_pages);
} else { /* Case 3 */
first->vm_flags &= ~VM_PAD_MASK;
vma_set_pad_pages(second, nr_pad_pages);
}
}
#endif /* PAGE_SIZE == SZ_4K */
#endif /* CONFIG_64BIT */

View File

@@ -42,7 +42,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
* enforce the VM_MAYWRITE check done at uffd registration
* time.
*/
if (!dst_vma->vm_userfaultfd_ctx.ctx)
if (!rcu_access_pointer(dst_vma->vm_userfaultfd_ctx.ctx))
return NULL;
return dst_vma;
@@ -530,14 +530,19 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
copied = 0;
page = NULL;
retry:
mmap_read_lock(dst_mm);
err = -EAGAIN;
if (mode & UFFDIO_MODE_MMAP_TRYLOCK) {
if (!mmap_read_trylock(dst_mm))
goto out;
} else {
mmap_read_lock(dst_mm);
}
/*
* If memory mappings are changing because of non-cooperative
* operation (e.g. mremap) running in parallel, bail out and
* request the user to retry later
*/
err = -EAGAIN;
if (mmap_changing && atomic_read(mmap_changing))
goto out_unlock;
@@ -631,6 +636,15 @@ retry:
if (unlikely(err == -ENOENT)) {
void *page_kaddr;
/*
* Return early due to mmap_lock contention only after
* some pages are copied to ensure that jank sensitive
* threads don't keep retrying for progress-critical
* pages.
*/
if (copied && mmap_lock_is_contended(dst_mm))
break;
mmap_read_unlock(dst_mm);
BUG_ON(!page);
@@ -655,6 +669,9 @@ retry:
if (fatal_signal_pending(current))
err = -EINTR;
if (mmap_lock_is_contended(dst_mm))
err = -EAGAIN;
}
if (err)
break;
@@ -680,10 +697,10 @@ ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
}
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, atomic_t *mmap_changing)
unsigned long len, atomic_t *mmap_changing, __u64 mode)
{
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
mmap_changing, 0);
mmap_changing, mode);
}
ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,

View File

@@ -8,7 +8,6 @@ This module contains a full list of kernel modules
_COMMON_GKI_MODULES_LIST = [
# keep sorted
"drivers/block/null_blk/null_blk.ko",
"drivers/block/zram/zram.ko",
"drivers/bluetooth/btbcm.ko",
"drivers/bluetooth/btqca.ko",

View File

@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst)
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu);
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);

View File

@@ -231,13 +231,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa)
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
static void in_dev_free_rcu(struct rcu_head *head)
{
struct in_device *idev = container_of(head, struct in_device, rcu_head);
kfree(rcu_dereference_protected(idev->mc_hash, 1));
kfree(idev);
}
void in_dev_finish_destroy(struct in_device *idev)
{
struct net_device *dev = idev->dev;
WARN_ON(idev->ifa_list);
WARN_ON(idev->mc_list);
kfree(rcu_dereference_protected(idev->mc_hash, 1));
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
@@ -245,7 +252,7 @@ void in_dev_finish_destroy(struct in_device *idev)
if (!idev->dead)
pr_err("Freeing alive in_device %p\n", idev);
else
kfree(idev);
call_rcu(&idev->rcu_head, in_dev_free_rcu);
}
EXPORT_SYMBOL(in_dev_finish_destroy);
@@ -295,12 +302,6 @@ out_kfree:
goto out;
}
static void in_dev_rcu_put(struct rcu_head *head)
{
struct in_device *idev = container_of(head, struct in_device, rcu_head);
in_dev_put(idev);
}
static void inetdev_destroy(struct in_device *in_dev)
{
struct net_device *dev;
@@ -325,7 +326,7 @@ static void inetdev_destroy(struct in_device *in_dev)
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
arp_ifdown(dev);
call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
in_dev_put(in_dev);
}
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)

View File

@@ -4682,6 +4682,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
if (flags & NFT_SET_ANONYMOUS)
return -EOPNOTSUPP;
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4690,6 +4693,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
if (flags & NFT_SET_ANONYMOUS)
return -EOPNOTSUPP;
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -5055,6 +5062,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -9683,10 +9691,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
return -EAGAIN;
err = -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
@@ -9857,12 +9866,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nf_tables_abort_release(trans);
}
if (action == NFNL_ABORT_AUTOLOAD)
nf_tables_module_autoload(net);
else
nf_tables_module_autoload_cleanup(net);
return 0;
return err;
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -9876,6 +9880,16 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
/* module autoload needs to happen after GC sequence update because it
* temporarily releases and grabs mutex again.
*/
if (action == NFNL_ABORT_AUTOLOAD)
nf_tables_module_autoload(net);
else
nf_tables_module_autoload_cleanup(net);
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10673,9 +10687,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
__nft_release_tables(net);

View File

@@ -1994,6 +1994,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
@@ -2006,16 +2008,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
return;
if (last && f->mt[rulemap[i].to].e == e) {
priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
}
first_rule += rules_f0;
}
WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
@@ -2253,8 +2257,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->clone) {
m = priv->clone;
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
nft_set_pipapo_match_destroy(ctx, set, m);
for_each_possible_cpu(cpu)
pipapo_free_scratch(priv->clone, cpu);

View File

@@ -253,7 +253,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
* must carry a ref on the connection to prevent us getting here whilst
* it is queued or running.
*/
call_rcu(&conn->rcu, rxrpc_destroy_connection);
call_rcu_hurry(&conn->rcu, rxrpc_destroy_connection);
}
/*

View File

@@ -877,11 +877,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
u = unix_sk(sk);
u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */

View File

@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
static void dec_inflight(struct unix_sock *usk)
{
atomic_long_dec(&usk->inflight);
usk->inflight--;
}
static void inc_inflight(struct unix_sock *usk)
{
atomic_long_inc(&usk->inflight);
usk->inflight++;
}
static void inc_inflight_move_tail(struct unix_sock *u)
{
atomic_long_inc(&u->inflight);
u->inflight++;
/* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
@@ -234,20 +235,34 @@ void unix_gc(void)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
*
* Embryos, though never candidates themselves, affect which
* candidates are reachable by the garbage collector. Before
* being added to a listener's queue, an embryo may already
* receive data carrying SCM_RIGHTS, potentially making the
* passed socket a candidate that is not yet reachable by the
* collector. It becomes reachable once the embryo is
* enqueued. Therefore, we must ensure that no SCM-laden
* embryo appears in a (candidate) listener's queue between
* consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
struct sock *sk = &u->sk;
long total_refs;
long inflight_refs;
total_refs = file_count(u->sk.sk_socket->file);
inflight_refs = atomic_long_read(&u->inflight);
total_refs = file_count(sk->sk_socket->file);
BUG_ON(inflight_refs < 1);
BUG_ON(total_refs < inflight_refs);
if (total_refs == inflight_refs) {
BUG_ON(!u->inflight);
BUG_ON(total_refs < u->inflight);
if (total_refs == u->inflight) {
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
if (sk->sk_state == TCP_LISTEN) {
unix_state_lock(sk);
unix_state_unlock(sk);
}
}
}
@@ -271,7 +286,7 @@ void unix_gc(void)
/* Move cursor to after the current position. */
list_move(&cursor, &u->link);
if (atomic_long_read(&u->inflight) > 0) {
if (u->inflight) {
list_move_tail(&u->link, &not_cycle_list);
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);

View File

@@ -54,12 +54,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);
if (atomic_long_inc_return(&u->inflight) == 1) {
if (!u->inflight) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
} else {
BUG_ON(list_empty(&u->link));
}
u->inflight++;
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
@@ -76,10 +77,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);
BUG_ON(!atomic_long_read(&u->inflight));
BUG_ON(!u->inflight);
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
u->inflight--;
if (!u->inflight)
list_del_init(&u->link);
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);