mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-08 20:07:46 +09:00
Merge tag 'locking-debug-2021-09-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull memory model updates from Ingo Molnar:
"LKMM updates:
- Update documentation and code example
KCSAN updates:
- Introduce CONFIG_KCSAN_STRICT (which RCU uses)
- Optimize use of get_ctx() by kcsan_found_watchpoint()
- Rework atomic.h into permissive.h
- Add the ability to ignore writes that change only one bit of a
given data-racy variable.
- Improve comments"
* tag 'locking-debug-2021-09-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
tools/memory-model: Document data_race(READ_ONCE())
tools/memory-model: Heuristics using data_race() must handle all values
tools/memory-model: Add example for heuristic lockless reads
tools/memory-model: Make read_foo_diagnostic() more clearly diagnostic
kcsan: Make strict mode imply interruptible watchers
kcsan: permissive: Ignore data-racy 1-bit value changes
kcsan: Print if strict or non-strict during init
kcsan: Rework atomic.h into permissive.h
kcsan: Reduce get_ctx() uses in kcsan_found_watchpoint()
kcsan: Introduce CONFIG_KCSAN_STRICT
kcsan: Remove CONFIG_KCSAN_DEBUG
kcsan: Improve some Kconfig comments
This commit is contained in:
@@ -127,6 +127,18 @@ Kconfig options:
|
|||||||
causes KCSAN to not report data races due to conflicts where the only plain
|
causes KCSAN to not report data races due to conflicts where the only plain
|
||||||
accesses are aligned writes up to word size.
|
accesses are aligned writes up to word size.
|
||||||
|
|
||||||
|
* ``CONFIG_KCSAN_PERMISSIVE``: Enable additional permissive rules to ignore
|
||||||
|
certain classes of common data races. Unlike the above, the rules are more
|
||||||
|
complex involving value-change patterns, access type, and address. This
|
||||||
|
option depends on ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=y``. For details
|
||||||
|
please see the ``kernel/kcsan/permissive.h``. Testers and maintainers that
|
||||||
|
only focus on reports from specific subsystems and not the whole kernel are
|
||||||
|
recommended to disable this option.
|
||||||
|
|
||||||
|
To use the strictest possible rules, select ``CONFIG_KCSAN_STRICT=y``, which
|
||||||
|
configures KCSAN to follow the Linux-kernel memory consistency model (LKMM) as
|
||||||
|
closely as possible.
|
||||||
|
|
||||||
DebugFS interface
|
DebugFS interface
|
||||||
~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
|
||||||
* Rules for implicitly atomic memory accesses.
|
|
||||||
*
|
|
||||||
* Copyright (C) 2019, Google LLC.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _KERNEL_KCSAN_ATOMIC_H
|
|
||||||
#define _KERNEL_KCSAN_ATOMIC_H
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Special rules for certain memory where concurrent conflicting accesses are
|
|
||||||
* common, however, the current convention is to not mark them; returns true if
|
|
||||||
* access to @ptr should be considered atomic. Called from slow-path.
|
|
||||||
*/
|
|
||||||
static bool kcsan_is_atomic_special(const volatile void *ptr)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* _KERNEL_KCSAN_ATOMIC_H */
|
|
||||||
@@ -20,9 +20,9 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
#include "atomic.h"
|
|
||||||
#include "encoding.h"
|
#include "encoding.h"
|
||||||
#include "kcsan.h"
|
#include "kcsan.h"
|
||||||
|
#include "permissive.h"
|
||||||
|
|
||||||
static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
|
static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
|
||||||
unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
|
unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
|
||||||
@@ -301,9 +301,9 @@ static inline void reset_kcsan_skip(void)
|
|||||||
this_cpu_write(kcsan_skip, skip_count);
|
this_cpu_write(kcsan_skip, skip_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline bool kcsan_is_enabled(void)
|
static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx)
|
||||||
{
|
{
|
||||||
return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
|
return READ_ONCE(kcsan_enabled) && !ctx->disable_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Introduce delay depending on context and configuration. */
|
/* Introduce delay depending on context and configuration. */
|
||||||
@@ -353,10 +353,18 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
|
|||||||
atomic_long_t *watchpoint,
|
atomic_long_t *watchpoint,
|
||||||
long encoded_watchpoint)
|
long encoded_watchpoint)
|
||||||
{
|
{
|
||||||
|
const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
|
||||||
|
struct kcsan_ctx *ctx = get_ctx();
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool consumed;
|
bool consumed;
|
||||||
|
|
||||||
if (!kcsan_is_enabled())
|
/*
|
||||||
|
* We know a watchpoint exists. Let's try to keep the race-window
|
||||||
|
* between here and finally consuming the watchpoint below as small as
|
||||||
|
* possible -- avoid unneccessarily complex code until consumed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!kcsan_is_enabled(ctx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -364,14 +372,22 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
|
|||||||
* reporting a race where e.g. the writer set up the watchpoint, but the
|
* reporting a race where e.g. the writer set up the watchpoint, but the
|
||||||
* reader has access_mask!=0, we have to ignore the found watchpoint.
|
* reader has access_mask!=0, we have to ignore the found watchpoint.
|
||||||
*/
|
*/
|
||||||
if (get_ctx()->access_mask != 0)
|
if (ctx->access_mask)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Consume the watchpoint as soon as possible, to minimize the chances
|
* If the other thread does not want to ignore the access, and there was
|
||||||
* of !consumed. Consuming the watchpoint must always be guarded by
|
* a value change as a result of this thread's operation, we will still
|
||||||
* kcsan_is_enabled() check, as otherwise we might erroneously
|
* generate a report of unknown origin.
|
||||||
* triggering reports when disabled.
|
*
|
||||||
|
* Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter.
|
||||||
|
*/
|
||||||
|
if (!is_assert && kcsan_ignore_address(ptr))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Consuming the watchpoint must be guarded by kcsan_is_enabled() to
|
||||||
|
* avoid erroneously triggering reports if the context is disabled.
|
||||||
*/
|
*/
|
||||||
consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
|
consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
|
||||||
|
|
||||||
@@ -391,7 +407,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
|
|||||||
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
|
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((type & KCSAN_ACCESS_ASSERT) != 0)
|
if (is_assert)
|
||||||
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
|
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
|
||||||
else
|
else
|
||||||
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
|
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
|
||||||
@@ -409,6 +425,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
|
|||||||
unsigned long access_mask;
|
unsigned long access_mask;
|
||||||
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
|
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
|
||||||
unsigned long ua_flags = user_access_save();
|
unsigned long ua_flags = user_access_save();
|
||||||
|
struct kcsan_ctx *ctx = get_ctx();
|
||||||
unsigned long irq_flags = 0;
|
unsigned long irq_flags = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -417,16 +434,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
|
|||||||
*/
|
*/
|
||||||
reset_kcsan_skip();
|
reset_kcsan_skip();
|
||||||
|
|
||||||
if (!kcsan_is_enabled())
|
if (!kcsan_is_enabled(ctx))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Special atomic rules: unlikely to be true, so we check them here in
|
* Check to-ignore addresses after kcsan_is_enabled(), as we may access
|
||||||
* the slow-path, and not in the fast-path in is_atomic(). Call after
|
* memory that is not yet initialized during early boot.
|
||||||
* kcsan_is_enabled(), as we may access memory that is not yet
|
|
||||||
* initialized during early boot.
|
|
||||||
*/
|
*/
|
||||||
if (!is_assert && kcsan_is_atomic_special(ptr))
|
if (!is_assert && kcsan_ignore_address(ptr))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (!check_encodable((unsigned long)ptr, size)) {
|
if (!check_encodable((unsigned long)ptr, size)) {
|
||||||
@@ -479,15 +494,6 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
|
|||||||
break; /* ignore; we do not diff the values */
|
break; /* ignore; we do not diff the values */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) {
|
|
||||||
kcsan_disable_current();
|
|
||||||
pr_err("watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n",
|
|
||||||
is_write ? "write" : "read", size, ptr,
|
|
||||||
watchpoint_slot((unsigned long)ptr),
|
|
||||||
encode_watchpoint((unsigned long)ptr, size, is_write));
|
|
||||||
kcsan_enable_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delay this thread, to increase probability of observing a racy
|
* Delay this thread, to increase probability of observing a racy
|
||||||
* conflicting access.
|
* conflicting access.
|
||||||
@@ -498,7 +504,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
|
|||||||
* Re-read value, and check if it is as expected; if not, we infer a
|
* Re-read value, and check if it is as expected; if not, we infer a
|
||||||
* racy access.
|
* racy access.
|
||||||
*/
|
*/
|
||||||
access_mask = get_ctx()->access_mask;
|
access_mask = ctx->access_mask;
|
||||||
new = 0;
|
new = 0;
|
||||||
switch (size) {
|
switch (size) {
|
||||||
case 1:
|
case 1:
|
||||||
@@ -521,8 +527,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
|
|||||||
if (access_mask)
|
if (access_mask)
|
||||||
diff &= access_mask;
|
diff &= access_mask;
|
||||||
|
|
||||||
/* Were we able to observe a value-change? */
|
/*
|
||||||
if (diff != 0)
|
* Check if we observed a value change.
|
||||||
|
*
|
||||||
|
* Also check if the data race should be ignored (the rules depend on
|
||||||
|
* non-zero diff); if it is to be ignored, the below rules for
|
||||||
|
* KCSAN_VALUE_CHANGE_MAYBE apply.
|
||||||
|
*/
|
||||||
|
if (diff && !kcsan_ignore_data_race(size, type, old, new, diff))
|
||||||
value_change = KCSAN_VALUE_CHANGE_TRUE;
|
value_change = KCSAN_VALUE_CHANGE_TRUE;
|
||||||
|
|
||||||
/* Check if this access raced with another. */
|
/* Check if this access raced with another. */
|
||||||
@@ -644,6 +656,15 @@ void __init kcsan_init(void)
|
|||||||
pr_info("enabled early\n");
|
pr_info("enabled early\n");
|
||||||
WRITE_ONCE(kcsan_enabled, true);
|
WRITE_ONCE(kcsan_enabled, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) ||
|
||||||
|
IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) ||
|
||||||
|
IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) ||
|
||||||
|
IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
|
||||||
|
pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n");
|
||||||
|
} else {
|
||||||
|
pr_info("strict mode configured\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* === Exported interface =================================================== */
|
/* === Exported interface =================================================== */
|
||||||
|
|||||||
@@ -414,6 +414,14 @@ static noinline void test_kernel_atomic_builtins(void)
|
|||||||
__atomic_load_n(&test_var, __ATOMIC_RELAXED);
|
__atomic_load_n(&test_var, __ATOMIC_RELAXED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static noinline void test_kernel_xor_1bit(void)
|
||||||
|
{
|
||||||
|
/* Do not report data races between the read-writes. */
|
||||||
|
kcsan_nestable_atomic_begin();
|
||||||
|
test_var ^= 0x10000;
|
||||||
|
kcsan_nestable_atomic_end();
|
||||||
|
}
|
||||||
|
|
||||||
/* ===== Test cases ===== */
|
/* ===== Test cases ===== */
|
||||||
|
|
||||||
/* Simple test with normal data race. */
|
/* Simple test with normal data race. */
|
||||||
@@ -952,6 +960,29 @@ static void test_atomic_builtins(struct kunit *test)
|
|||||||
KUNIT_EXPECT_FALSE(test, match_never);
|
KUNIT_EXPECT_FALSE(test, match_never);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__no_kcsan
|
||||||
|
static void test_1bit_value_change(struct kunit *test)
|
||||||
|
{
|
||||||
|
const struct expect_report expect = {
|
||||||
|
.access = {
|
||||||
|
{ test_kernel_read, &test_var, sizeof(test_var), 0 },
|
||||||
|
{ test_kernel_xor_1bit, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
bool match = false;
|
||||||
|
|
||||||
|
begin_test_checks(test_kernel_read, test_kernel_xor_1bit);
|
||||||
|
do {
|
||||||
|
match = IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)
|
||||||
|
? report_available()
|
||||||
|
: report_matches(&expect);
|
||||||
|
} while (!end_test_checks(match));
|
||||||
|
if (IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
|
||||||
|
KUNIT_EXPECT_FALSE(test, match);
|
||||||
|
else
|
||||||
|
KUNIT_EXPECT_TRUE(test, match);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate thread counts for all test cases. Values generated are in interval
|
* Generate thread counts for all test cases. Values generated are in interval
|
||||||
* [2, 5] followed by exponentially increasing thread counts from 8 to 32.
|
* [2, 5] followed by exponentially increasing thread counts from 8 to 32.
|
||||||
@@ -1024,6 +1055,7 @@ static struct kunit_case kcsan_test_cases[] = {
|
|||||||
KCSAN_KUNIT_CASE(test_jiffies_noreport),
|
KCSAN_KUNIT_CASE(test_jiffies_noreport),
|
||||||
KCSAN_KUNIT_CASE(test_seqlock_noreport),
|
KCSAN_KUNIT_CASE(test_seqlock_noreport),
|
||||||
KCSAN_KUNIT_CASE(test_atomic_builtins),
|
KCSAN_KUNIT_CASE(test_atomic_builtins),
|
||||||
|
KCSAN_KUNIT_CASE(test_1bit_value_change),
|
||||||
{},
|
{},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
94
kernel/kcsan/permissive.h
Normal file
94
kernel/kcsan/permissive.h
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Special rules for ignoring entire classes of data-racy memory accesses. None
|
||||||
|
* of the rules here imply that such data races are generally safe!
|
||||||
|
*
|
||||||
|
* All rules in this file can be configured via CONFIG_KCSAN_PERMISSIVE. Keep
|
||||||
|
* them separate from core code to make it easier to audit.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2019, Google LLC.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _KERNEL_KCSAN_PERMISSIVE_H
|
||||||
|
#define _KERNEL_KCSAN_PERMISSIVE_H
|
||||||
|
|
||||||
|
#include <linux/bitops.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Access ignore rules based on address.
|
||||||
|
*/
|
||||||
|
static __always_inline bool kcsan_ignore_address(const volatile void *ptr)
|
||||||
|
{
|
||||||
|
if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data-racy bitops on current->flags are too common, ignore completely
|
||||||
|
* for now.
|
||||||
|
*/
|
||||||
|
return ptr == ¤t->flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data race ignore rules based on access type and value change patterns.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
kcsan_ignore_data_race(size_t size, int type, u64 old, u64 new, u64 diff)
|
||||||
|
{
|
||||||
|
if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rules here are only for plain read accesses, so that we still report
|
||||||
|
* data races between plain read-write accesses.
|
||||||
|
*/
|
||||||
|
if (type || size > sizeof(long))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A common pattern is checking/setting just 1 bit in a variable; for
|
||||||
|
* example:
|
||||||
|
*
|
||||||
|
* if (flags & SOME_FLAG) { ... }
|
||||||
|
*
|
||||||
|
* and elsewhere flags is updated concurrently:
|
||||||
|
*
|
||||||
|
* flags |= SOME_OTHER_FLAG; // just 1 bit
|
||||||
|
*
|
||||||
|
* While it is still recommended that such accesses be marked
|
||||||
|
* appropriately, in many cases these types of data races are so common
|
||||||
|
* that marking them all is often unrealistic and left to maintainer
|
||||||
|
* preference.
|
||||||
|
*
|
||||||
|
* The assumption in all cases is that with all known compiler
|
||||||
|
* optimizations (including those that tear accesses), because no more
|
||||||
|
* than 1 bit changed, the plain accesses are safe despite the presence
|
||||||
|
* of data races.
|
||||||
|
*
|
||||||
|
* The rules here will ignore the data races if we observe no more than
|
||||||
|
* 1 bit changed.
|
||||||
|
*
|
||||||
|
* Of course many operations can effecively change just 1 bit, but the
|
||||||
|
* general assuption that data races involving 1-bit changes can be
|
||||||
|
* tolerated still applies.
|
||||||
|
*
|
||||||
|
* And in case a true bug is missed, the bug likely manifests as a
|
||||||
|
* reportable data race elsewhere.
|
||||||
|
*/
|
||||||
|
if (hweight64(diff) == 1) {
|
||||||
|
/*
|
||||||
|
* Exception: Report data races where the values look like
|
||||||
|
* ordinary booleans (one of them was 0 and the 0th bit was
|
||||||
|
* changed) More often than not, they come with interesting
|
||||||
|
* memory ordering requirements, so let's report them.
|
||||||
|
*/
|
||||||
|
if (!((!old || !new) && diff == 1))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _KERNEL_KCSAN_PERMISSIVE_H */
|
||||||
@@ -40,10 +40,14 @@ menuconfig KCSAN
|
|||||||
|
|
||||||
if KCSAN
|
if KCSAN
|
||||||
|
|
||||||
# Compiler capabilities that should not fail the test if they are unavailable.
|
|
||||||
config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
|
config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
|
||||||
def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
|
def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
|
||||||
(CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
|
(CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
|
||||||
|
help
|
||||||
|
The compiler instruments plain compound read-write operations
|
||||||
|
differently (++, --, +=, -=, |=, &=, etc.), which allows KCSAN to
|
||||||
|
distinguish them from other plain accesses. This is currently
|
||||||
|
supported by Clang 12 or later.
|
||||||
|
|
||||||
config KCSAN_VERBOSE
|
config KCSAN_VERBOSE
|
||||||
bool "Show verbose reports with more information about system state"
|
bool "Show verbose reports with more information about system state"
|
||||||
@@ -58,9 +62,6 @@ config KCSAN_VERBOSE
|
|||||||
generated from any one of them, system stability may suffer due to
|
generated from any one of them, system stability may suffer due to
|
||||||
deadlocks or recursion. If in doubt, say N.
|
deadlocks or recursion. If in doubt, say N.
|
||||||
|
|
||||||
config KCSAN_DEBUG
|
|
||||||
bool "Debugging of KCSAN internals"
|
|
||||||
|
|
||||||
config KCSAN_SELFTEST
|
config KCSAN_SELFTEST
|
||||||
bool "Perform short selftests on boot"
|
bool "Perform short selftests on boot"
|
||||||
default y
|
default y
|
||||||
@@ -149,7 +150,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
|
|||||||
KCSAN_WATCH_SKIP.
|
KCSAN_WATCH_SKIP.
|
||||||
|
|
||||||
config KCSAN_INTERRUPT_WATCHER
|
config KCSAN_INTERRUPT_WATCHER
|
||||||
bool "Interruptible watchers"
|
bool "Interruptible watchers" if !KCSAN_STRICT
|
||||||
|
default KCSAN_STRICT
|
||||||
help
|
help
|
||||||
If enabled, a task that set up a watchpoint may be interrupted while
|
If enabled, a task that set up a watchpoint may be interrupted while
|
||||||
delayed. This option will allow KCSAN to detect races between
|
delayed. This option will allow KCSAN to detect races between
|
||||||
@@ -169,13 +171,9 @@ config KCSAN_REPORT_ONCE_IN_MS
|
|||||||
reporting to avoid flooding the console with reports. Setting this
|
reporting to avoid flooding the console with reports. Setting this
|
||||||
to 0 disables rate limiting.
|
to 0 disables rate limiting.
|
||||||
|
|
||||||
# The main purpose of the below options is to control reported data races (e.g.
|
# The main purpose of the below options is to control reported data races, and
|
||||||
# in fuzzer configs), and are not expected to be switched frequently by other
|
# are not expected to be switched frequently by non-testers or at runtime.
|
||||||
# users. We could turn some of them into boot parameters, but given they should
|
# The defaults are chosen to be conservative, and can miss certain bugs.
|
||||||
# not be switched normally, let's keep them here to simplify configuration.
|
|
||||||
#
|
|
||||||
# The defaults below are chosen to be very conservative, and may miss certain
|
|
||||||
# bugs.
|
|
||||||
|
|
||||||
config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
|
config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
|
||||||
bool "Report races of unknown origin"
|
bool "Report races of unknown origin"
|
||||||
@@ -186,9 +184,17 @@ config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
|
|||||||
reported if it was only possible to infer a race due to a data value
|
reported if it was only possible to infer a race due to a data value
|
||||||
change while an access is being delayed on a watchpoint.
|
change while an access is being delayed on a watchpoint.
|
||||||
|
|
||||||
|
config KCSAN_STRICT
|
||||||
|
bool "Strict data-race checking"
|
||||||
|
help
|
||||||
|
KCSAN will report data races with the strictest possible rules, which
|
||||||
|
closely aligns with the rules defined by the Linux-kernel memory
|
||||||
|
consistency model (LKMM).
|
||||||
|
|
||||||
config KCSAN_REPORT_VALUE_CHANGE_ONLY
|
config KCSAN_REPORT_VALUE_CHANGE_ONLY
|
||||||
bool "Only report races where watcher observed a data value change"
|
bool "Only report races where watcher observed a data value change"
|
||||||
default y
|
default y
|
||||||
|
depends on !KCSAN_STRICT
|
||||||
help
|
help
|
||||||
If enabled and a conflicting write is observed via a watchpoint, but
|
If enabled and a conflicting write is observed via a watchpoint, but
|
||||||
the data value of the memory location was observed to remain
|
the data value of the memory location was observed to remain
|
||||||
@@ -197,6 +203,7 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY
|
|||||||
config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
|
config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
|
||||||
bool "Assume that plain aligned writes up to word size are atomic"
|
bool "Assume that plain aligned writes up to word size are atomic"
|
||||||
default y
|
default y
|
||||||
|
depends on !KCSAN_STRICT
|
||||||
help
|
help
|
||||||
Assume that plain aligned writes up to word size are atomic by
|
Assume that plain aligned writes up to word size are atomic by
|
||||||
default, and also not subject to other unsafe compiler optimizations
|
default, and also not subject to other unsafe compiler optimizations
|
||||||
@@ -209,6 +216,7 @@ config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
|
|||||||
|
|
||||||
config KCSAN_IGNORE_ATOMICS
|
config KCSAN_IGNORE_ATOMICS
|
||||||
bool "Do not instrument marked atomic accesses"
|
bool "Do not instrument marked atomic accesses"
|
||||||
|
depends on !KCSAN_STRICT
|
||||||
help
|
help
|
||||||
Never instrument marked atomic accesses. This option can be used for
|
Never instrument marked atomic accesses. This option can be used for
|
||||||
additional filtering. Conflicting marked atomic reads and plain
|
additional filtering. Conflicting marked atomic reads and plain
|
||||||
@@ -224,4 +232,14 @@ config KCSAN_IGNORE_ATOMICS
|
|||||||
due to two conflicting plain writes will be reported (aligned and
|
due to two conflicting plain writes will be reported (aligned and
|
||||||
unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n).
|
unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n).
|
||||||
|
|
||||||
|
config KCSAN_PERMISSIVE
|
||||||
|
bool "Enable all additional permissive rules"
|
||||||
|
depends on KCSAN_REPORT_VALUE_CHANGE_ONLY
|
||||||
|
help
|
||||||
|
Enable additional permissive rules to ignore certain classes of data
|
||||||
|
races (also see kernel/kcsan/permissive.h). None of the permissive
|
||||||
|
rules imply that such data races are generally safe, but can be used
|
||||||
|
to further reduce reported data races due to data-racy patterns
|
||||||
|
common across the kernel.
|
||||||
|
|
||||||
endif # KCSAN
|
endif # KCSAN
|
||||||
|
|||||||
@@ -37,7 +37,9 @@ compiler's use of code-motion and common-subexpression optimizations.
|
|||||||
Therefore, if a given access is involved in an intentional data race,
|
Therefore, if a given access is involved in an intentional data race,
|
||||||
using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
|
using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
|
||||||
preferable to data_race(), which in turn is usually preferable to plain
|
preferable to data_race(), which in turn is usually preferable to plain
|
||||||
C-language accesses.
|
C-language accesses. It is permissible to combine #2 and #3, for example,
|
||||||
|
data_race(READ_ONCE(a)), which will both restrict compiler optimizations
|
||||||
|
and disable KCSAN diagnostics.
|
||||||
|
|
||||||
KCSAN will complain about many types of data races involving plain
|
KCSAN will complain about many types of data races involving plain
|
||||||
C-language accesses, but marking all accesses involved in a given data
|
C-language accesses, but marking all accesses involved in a given data
|
||||||
@@ -86,6 +88,10 @@ that fail to exclude the updates. In this case, it is important to use
|
|||||||
data_race() for the diagnostic reads because otherwise KCSAN would give
|
data_race() for the diagnostic reads because otherwise KCSAN would give
|
||||||
false-positive warnings about these diagnostic reads.
|
false-positive warnings about these diagnostic reads.
|
||||||
|
|
||||||
|
If it is necessary to both restrict compiler optimizations and disable
|
||||||
|
KCSAN diagnostics, use both data_race() and READ_ONCE(), for example,
|
||||||
|
data_race(READ_ONCE(a)).
|
||||||
|
|
||||||
In theory, plain C-language loads can also be used for this use case.
|
In theory, plain C-language loads can also be used for this use case.
|
||||||
However, in practice this will have the disadvantage of causing KCSAN
|
However, in practice this will have the disadvantage of causing KCSAN
|
||||||
to generate false positives because KCSAN will have no way of knowing
|
to generate false positives because KCSAN will have no way of knowing
|
||||||
@@ -126,6 +132,11 @@ consistent errors, which in turn are quite capable of breaking heuristics.
|
|||||||
Therefore use of data_race() should be limited to cases where some other
|
Therefore use of data_race() should be limited to cases where some other
|
||||||
code (such as a barrier() call) will force the occasional reload.
|
code (such as a barrier() call) will force the occasional reload.
|
||||||
|
|
||||||
|
Note that this use case requires that the heuristic be able to handle
|
||||||
|
any possible error. In contrast, if the heuristics might be fatally
|
||||||
|
confused by one or more of the possible erroneous values, use READ_ONCE()
|
||||||
|
instead of data_race().
|
||||||
|
|
||||||
In theory, plain C-language loads can also be used for this use case.
|
In theory, plain C-language loads can also be used for this use case.
|
||||||
However, in practice this will have the disadvantage of causing KCSAN
|
However, in practice this will have the disadvantage of causing KCSAN
|
||||||
to generate false positives because KCSAN will have no way of knowing
|
to generate false positives because KCSAN will have no way of knowing
|
||||||
@@ -259,9 +270,9 @@ diagnostic purposes. The code might look as follows:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int read_foo_diagnostic(void)
|
void read_foo_diagnostic(void)
|
||||||
{
|
{
|
||||||
return data_race(foo);
|
pr_info("Current value of foo: %d\n", data_race(foo));
|
||||||
}
|
}
|
||||||
|
|
||||||
The reader-writer lock prevents the compiler from introducing concurrency
|
The reader-writer lock prevents the compiler from introducing concurrency
|
||||||
@@ -274,19 +285,34 @@ tells KCSAN that data races are expected, and should be silently
|
|||||||
ignored. This data_race() also tells the human reading the code that
|
ignored. This data_race() also tells the human reading the code that
|
||||||
read_foo_diagnostic() might sometimes return a bogus value.
|
read_foo_diagnostic() might sometimes return a bogus value.
|
||||||
|
|
||||||
However, please note that your kernel must be built with
|
If it is necessary to suppress compiler optimization and also detect
|
||||||
CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to
|
buggy lockless writes, read_foo_diagnostic() can be updated as follows:
|
||||||
detect a buggy lockless write. If you need KCSAN to detect such a
|
|
||||||
write even if that write did not change the value of foo, you also
|
void read_foo_diagnostic(void)
|
||||||
need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to
|
{
|
||||||
detect such a write happening in an interrupt handler running on the
|
pr_info("Current value of foo: %d\n", data_race(READ_ONCE(foo)));
|
||||||
same CPU doing the legitimate lock-protected write, you also need
|
}
|
||||||
CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig
|
|
||||||
options set properly, KCSAN can be quite helpful, although it is not
|
Alternatively, given that KCSAN is to ignore all accesses in this function,
|
||||||
necessarily a full replacement for hardware watchpoints. On the other
|
this function can be marked __no_kcsan and the data_race() can be dropped:
|
||||||
hand, neither are hardware watchpoints a full replacement for KCSAN
|
|
||||||
because it is not always easy to tell hardware watchpoint to conditionally
|
void __no_kcsan read_foo_diagnostic(void)
|
||||||
trap on accesses.
|
{
|
||||||
|
pr_info("Current value of foo: %d\n", READ_ONCE(foo));
|
||||||
|
}
|
||||||
|
|
||||||
|
However, in order for KCSAN to detect buggy lockless writes, your kernel
|
||||||
|
must be built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. If you
|
||||||
|
need KCSAN to detect such a write even if that write did not change
|
||||||
|
the value of foo, you also need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.
|
||||||
|
If you need KCSAN to detect such a write happening in an interrupt handler
|
||||||
|
running on the same CPU doing the legitimate lock-protected write, you
|
||||||
|
also need CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these
|
||||||
|
Kconfig options set properly, KCSAN can be quite helpful, although
|
||||||
|
it is not necessarily a full replacement for hardware watchpoints.
|
||||||
|
On the other hand, neither are hardware watchpoints a full replacement
|
||||||
|
for KCSAN because it is not always easy to tell hardware watchpoint to
|
||||||
|
conditionally trap on accesses.
|
||||||
|
|
||||||
|
|
||||||
Lock-Protected Writes With Lockless Reads
|
Lock-Protected Writes With Lockless Reads
|
||||||
@@ -319,6 +345,99 @@ of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy
|
|||||||
concurrent lockless write.
|
concurrent lockless write.
|
||||||
|
|
||||||
|
|
||||||
|
Lock-Protected Writes With Heuristic Lockless Reads
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
For another example, suppose that the code can normally make use of
|
||||||
|
a per-data-structure lock, but there are times when a global lock
|
||||||
|
is required. These times are indicated via a global flag. The code
|
||||||
|
might look as follows, and is based loosely on nf_conntrack_lock(),
|
||||||
|
nf_conntrack_all_lock(), and nf_conntrack_all_unlock():
|
||||||
|
|
||||||
|
bool global_flag;
|
||||||
|
DEFINE_SPINLOCK(global_lock);
|
||||||
|
struct foo {
|
||||||
|
spinlock_t f_lock;
|
||||||
|
int f_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* All foo structures are in the following array. */
|
||||||
|
int nfoo;
|
||||||
|
struct foo *foo_array;
|
||||||
|
|
||||||
|
void do_something_locked(struct foo *fp)
|
||||||
|
{
|
||||||
|
/* This works even if data_race() returns nonsense. */
|
||||||
|
if (!data_race(global_flag)) {
|
||||||
|
spin_lock(&fp->f_lock);
|
||||||
|
if (!smp_load_acquire(&global_flag)) {
|
||||||
|
do_something(fp);
|
||||||
|
spin_unlock(&fp->f_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
spin_unlock(&fp->f_lock);
|
||||||
|
}
|
||||||
|
spin_lock(&global_lock);
|
||||||
|
/* global_lock held, thus global flag cannot be set. */
|
||||||
|
spin_lock(&fp->f_lock);
|
||||||
|
spin_unlock(&global_lock);
|
||||||
|
/*
|
||||||
|
* global_flag might be set here, but begin_global()
|
||||||
|
* will wait for ->f_lock to be released.
|
||||||
|
*/
|
||||||
|
do_something(fp);
|
||||||
|
spin_unlock(&fp->f_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin_global(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&global_lock);
|
||||||
|
WRITE_ONCE(global_flag, true);
|
||||||
|
for (i = 0; i < nfoo; i++) {
|
||||||
|
/*
|
||||||
|
* Wait for pre-existing local locks. One at
|
||||||
|
* a time to avoid lockdep limitations.
|
||||||
|
*/
|
||||||
|
spin_lock(&fp->f_lock);
|
||||||
|
spin_unlock(&fp->f_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void end_global(void)
|
||||||
|
{
|
||||||
|
smp_store_release(&global_flag, false);
|
||||||
|
spin_unlock(&global_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
All code paths leading from the do_something_locked() function's first
|
||||||
|
read from global_flag acquire a lock, so endless load fusing cannot
|
||||||
|
happen.
|
||||||
|
|
||||||
|
If the value read from global_flag is true, then global_flag is
|
||||||
|
rechecked while holding ->f_lock, which, if global_flag is now false,
|
||||||
|
prevents begin_global() from completing. It is therefore safe to invoke
|
||||||
|
do_something().
|
||||||
|
|
||||||
|
Otherwise, if either value read from global_flag is true, then after
|
||||||
|
global_lock is acquired global_flag must be false. The acquisition of
|
||||||
|
->f_lock will prevent any call to begin_global() from returning, which
|
||||||
|
means that it is safe to release global_lock and invoke do_something().
|
||||||
|
|
||||||
|
For this to work, only those foo structures in foo_array[] may be passed
|
||||||
|
to do_something_locked(). The reason for this is that the synchronization
|
||||||
|
with begin_global() relies on momentarily holding the lock of each and
|
||||||
|
every foo structure.
|
||||||
|
|
||||||
|
The smp_load_acquire() and smp_store_release() are required because
|
||||||
|
changes to a foo structure between calls to begin_global() and
|
||||||
|
end_global() are carried out without holding that structure's ->f_lock.
|
||||||
|
The smp_load_acquire() and smp_store_release() ensure that the next
|
||||||
|
invocation of do_something() from do_something_locked() will see those
|
||||||
|
changes.
|
||||||
|
|
||||||
|
|
||||||
Lockless Reads and Writes
|
Lockless Reads and Writes
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user