Merge e6a7cf70a3 ("Merge tag 'filelock-v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux") into android-mainline

Steps on the way to 6.0-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I7555a50c3e1a03cefed70662205462cf8028f09f
This commit is contained in:
Greg Kroah-Hartman
2022-08-03 15:40:04 +02:00
36 changed files with 1145 additions and 923 deletions

View File

@@ -7485,6 +7485,8 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
R: Yue Hu <huyue2@coolpad.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git

View File

@@ -9,6 +9,15 @@ menuconfig DLM
A general purpose distributed lock manager for kernel or userspace
applications.
config DLM_DEPRECATED_API
bool "DLM deprecated API"
depends on DLM
help
Enables deprecated DLM timeout features that will be removed in
later Linux kernel releases.
If you are unsure, say N.
config DLM_DEBUG
bool "DLM debugging"
depends on DLM

View File

@@ -9,7 +9,6 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
netlink.o \
lowcomms.o \
plock.o \
rcom.o \
@@ -18,5 +17,6 @@ dlm-y := ast.o \
requestqueue.o \
user.o \
util.o
dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o

View File

@@ -255,13 +255,13 @@ void dlm_callback_work(struct work_struct *work)
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
bastfn(lkb->lkb_astparam, callbacks[i].mode);
trace_dlm_bast(ls, lkb, callbacks[i].mode);
bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
trace_dlm_ast(ls, lkb);
castfn(lkb->lkb_astparam);
trace_dlm_ast(ls, lkb, lkb->lkb_lksb);
}
}

View File

@@ -75,8 +75,9 @@ struct dlm_cluster {
unsigned int cl_log_info;
unsigned int cl_protocol;
unsigned int cl_mark;
#ifdef CONFIG_DLM_DEPRECATED_API
unsigned int cl_timewarn_cs;
unsigned int cl_waitwarn_us;
#endif
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
@@ -102,8 +103,9 @@ enum {
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_MARK,
#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR_TIMEWARN_CS,
CLUSTER_ATTR_WAITWARN_US,
#endif
CLUSTER_ATTR_NEW_RSB_COUNT,
CLUSTER_ATTR_RECOVER_CALLBACKS,
CLUSTER_ATTR_CLUSTER_NAME,
@@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
CLUSTER_ATTR(waitwarn_us, NULL);
#endif
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
@@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
[CLUSTER_ATTR_MARK] = &cluster_attr_mark,
#ifdef CONFIG_DLM_DEPRECATED_API
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
#endif
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks,
[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name,
@@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_log_info = dlm_config.ci_log_info;
cl->cl_protocol = dlm_config.ci_protocol;
#ifdef CONFIG_DLM_DEPRECATED_API
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
#endif
cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
@@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL DLM_PROTO_TCP
#define DEFAULT_MARK 0
#ifdef CONFIG_DLM_DEPRECATED_API
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
#define DEFAULT_WAITWARN_US 0
#endif
#define DEFAULT_NEW_RSB_COUNT 128
#define DEFAULT_RECOVER_CALLBACKS 0
#define DEFAULT_CLUSTER_NAME ""
@@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = {
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_mark = DEFAULT_MARK,
#ifdef CONFIG_DLM_DEPRECATED_API
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
.ci_waitwarn_us = DEFAULT_WAITWARN_US,
#endif
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
.ci_cluster_name = DEFAULT_CLUSTER_NAME

View File

@@ -37,8 +37,9 @@ struct dlm_config_info {
int ci_log_info;
int ci_protocol;
int ci_mark;
#ifdef CONFIG_DLM_DEPRECATED_API
int ci_timewarn_cs;
int ci_waitwarn_us;
#endif
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];

View File

@@ -145,7 +145,9 @@ struct dlm_args {
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
#ifdef CONFIG_DLM_DEPRECATED_API
unsigned long timeout;
#endif
};
@@ -203,10 +205,20 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
#ifdef CONFIG_DLM_DEPRECATED_API
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
/* least significant 2 bytes are message changed, they are full transmitted
* but at receive side only the 2 bytes LSB will be set.
*
* Even wireshark dlm dissector does only evaluate the lower bytes and note
* that they may not be used on transceiver side, we assume the higher bytes
* are for internal use or reserved so long they are not parsed on receiver
* side.
*/
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -249,10 +261,12 @@ struct dlm_lkb {
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
ktime_t lkb_wait_time;
#ifdef CONFIG_DLM_DEPRECATED_API
struct list_head lkb_time_list;
unsigned long lkb_timeout_cs;
#endif
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
@@ -568,8 +582,10 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
#ifdef CONFIG_DLM_DEPRECATED_API
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
#endif
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
@@ -606,8 +622,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
struct completion ls_members_done;
int ls_members_result;
struct completion ls_recovery_done;
int ls_recovery_result;
struct miscdevice ls_device;
@@ -688,7 +704,9 @@ struct dlm_ls {
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
#ifdef CONFIG_DLM_DEPRECATED_API
#define LSFL_TIMEWARN 8
#endif
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
@@ -741,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls)
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
#else
static inline int dlm_netlink_init(void) { return 0; }
static inline void dlm_netlink_exit(void) { };
static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { };
#endif
int dlm_plock_init(void);
void dlm_plock_exit(void);

View File

@@ -296,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
#ifdef CONFIG_DLM_DEPRECATED_API
/* if the operation was a cancel, then return -DLM_ECANCEL, if a
timeout caused the cancel then return -ETIMEDOUT */
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
rv = -ETIMEDOUT;
}
#endif
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
@@ -1210,7 +1212,9 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&lkb->lkb_time_list);
#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
@@ -1306,6 +1310,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
kref_get(&lkb->lkb_ref);
}
static void unhold_lkb_assert(struct kref *kref)
{
struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
DLM_ASSERT(false, dlm_print_lkb(lkb););
}
/* This is called when we need to remove a reference and are certain
it's not the last ref. e.g. del_lkb is always called between a
find_lkb/put_lkb and is always the inverse of a previous add_lkb.
@@ -1313,9 +1324,7 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
static inline void unhold_lkb(struct dlm_lkb *lkb)
{
int rv;
rv = kref_put(&lkb->lkb_ref, kill_lkb);
DLM_ASSERT(!rv, dlm_print_lkb(lkb););
kref_put(&lkb->lkb_ref, unhold_lkb_assert);
}
static void lkb_add_ordered(struct list_head *new, struct list_head *head,
@@ -1402,75 +1411,6 @@ static int msg_reply_type(int mstype)
return -1;
}
static int nodeid_warned(int nodeid, int num_nodes, int *warned)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (!warned[i]) {
warned[i] = nodeid;
return 0;
}
if (warned[i] == nodeid)
return 1;
}
return 0;
}
void dlm_scan_waiters(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
s64 us;
s64 debug_maxus = 0;
u32 debug_scanned = 0;
u32 debug_expired = 0;
int num_nodes = 0;
int *warned = NULL;
if (!dlm_config.ci_waitwarn_us)
return;
mutex_lock(&ls->ls_waiters_mutex);
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
if (!lkb->lkb_wait_time)
continue;
debug_scanned++;
us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
if (us < dlm_config.ci_waitwarn_us)
continue;
lkb->lkb_wait_time = 0;
debug_expired++;
if (us > debug_maxus)
debug_maxus = us;
if (!num_nodes) {
num_nodes = ls->ls_num_nodes;
warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
}
if (!warned)
continue;
if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
continue;
log_error(ls, "waitwarn %x %lld %d us check connection to "
"node %d", lkb->lkb_id, (long long)us,
dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
}
mutex_unlock(&ls->ls_waiters_mutex);
kfree(warned);
if (debug_expired)
log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
debug_scanned, debug_expired,
dlm_config.ci_waitwarn_us, (long long)debug_maxus);
}
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
@@ -1514,7 +1454,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
lkb->lkb_wait_time = ktime_get();
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
@@ -1842,6 +1781,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
@@ -1962,17 +1902,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
if (!dlm_config.ci_waitwarn_us)
return;
mutex_lock(&ls->ls_waiters_mutex);
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
if (ktime_to_us(lkb->lkb_wait_time))
lkb->lkb_wait_time = ktime_get();
}
mutex_unlock(&ls->ls_waiters_mutex);
}
#else
static void add_timeout(struct dlm_lkb *lkb) { }
static void del_timeout(struct dlm_lkb *lkb) { }
#endif
/* lkb is master or local copy */
@@ -2837,12 +2771,20 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, unsigned long timeout_cs,
void (*ast) (void *astparam),
void *astparam,
void (*bast) (void *astparam, int mode),
struct dlm_args *args)
#else
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, void (*ast)(void *astparam),
void *astparam,
void (*bast)(void *astparam, int mode),
struct dlm_args *args)
#endif
{
int rv = -EINVAL;
@@ -2895,7 +2837,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
args->astfn = ast;
args->astparam = astparam;
args->bastfn = bast;
#ifdef CONFIG_DLM_DEPRECATED_API
args->timeout = timeout_cs;
#endif
args->mode = mode;
args->lksb = lksb;
rv = 0;
@@ -2951,7 +2895,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
#ifdef CONFIG_DLM_DEPRECATED_API
lkb->lkb_timeout_cs = args->timeout;
#endif
rv = 0;
out:
if (rv)
@@ -3472,10 +3418,15 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error)
goto out;
trace_dlm_lock_start(ls, lkb, mode, flags);
trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
#else
error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
&args);
#endif
if (error)
goto out_put;
@@ -3487,7 +3438,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
trace_dlm_lock_end(ls, lkb, mode, flags, error);
trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
if (convert || error)
__put_lkb(ls, lkb);
@@ -5839,9 +5790,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
return 0;
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs)
#else
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen)
#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5864,8 +5820,13 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
#else
error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
fake_bastfn, &args);
#endif
if (error) {
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
@@ -5904,9 +5865,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
return error;
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs)
#else
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5941,8 +5907,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua->bastaddr = ua_tmp->bastaddr;
ua->user_lksb = ua_tmp->user_lksb;
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
#else
error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
fake_bastfn, &args);
#endif
if (error)
goto out_put;
@@ -5966,7 +5937,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs, uint32_t *lkid)
uint32_t *lkid)
{
struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;

View File

@@ -24,9 +24,15 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
void dlm_scan_waiters(struct dlm_ls *ls);
#ifdef CONFIG_DLM_DEPRECATED_API
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
#else
static inline void dlm_scan_timeout(struct dlm_ls *ls) { }
static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
#endif
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
@@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
#else
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
#endif
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs, uint32_t *lkid);
uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,

View File

@@ -275,7 +275,6 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
@@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster,
ls->ls_ops_arg = ops_arg;
}
if (flags & DLM_LSFL_TIMEWARN)
#ifdef CONFIG_DLM_DEPRECATED_API
if (flags & DLM_LSFL_TIMEWARN) {
pr_warn_once("===============================================================\n"
"WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n"
" will be removed in v6.2!\n"
" Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n"
"===============================================================\n");
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
}
/* ls_exflags are forced to match among nodes, and we don't
need to require all nodes to have some flags set */
* need to require all nodes to have some flags set
*/
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
#else
/* ls_exflags are forced to match among nodes, and we don't
* need to require all nodes to have some flags set
*/
ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL));
#endif
size = READ_ONCE(dlm_config.ci_rsbtbl_size);
ls->ls_rsbtbl_size = size;
@@ -527,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster,
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
#endif
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
@@ -548,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
init_completion(&ls->ls_recovery_done);
ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
@@ -645,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster,
if (error)
goto out_recoverd;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
/* wait until recovery is successful or failed */
wait_for_completion(&ls->ls_recovery_done);
error = ls->ls_recovery_result;
if (error)
goto out_members;

View File

@@ -529,7 +529,7 @@ static void lowcomms_write_space(struct sock *sk)
return;
if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
log_print("successful connected to node %d", con->nodeid);
log_print("connected to node %d", con->nodeid);
queue_work(send_workqueue, &con->swork);
return;
}
@@ -1931,7 +1931,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
return ret;
if (!test_and_set_bit(CF_CONNECTED, &con->flags))
log_print("successful connected to node %d", con->nodeid);
log_print("connected to node %d", con->nodeid);
return 0;
}

View File

@@ -534,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
* count as a negative change so the "neg" recovery steps will happen
*
* This functionality must report all member changes to lsops or
* midcomms layer and must never return before.
*/
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
@@ -583,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg;
error = ping_members(ls);
/* error -EINTR means that a new recovery action is triggered.
* We ignore this recovery action and let run the new one which might
* have new member configuration.
*/
if (error == -EINTR)
error = 0;
/* new_lockspace() may be waiting to know if the config
* is good or bad
*/
ls->ls_members_result = error;
complete(&ls->ls_members_done);
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
@@ -675,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
dlm_lsop_recover_prep(ls);
/* call recover_prep ops only once and not multiple times
* for each possible dlm_ls_stop() when recovery is already
* stopped.
*
* If we successful was able to clear LSFL_RUNNING bit and
* it was set we know it is the first dlm_ls_stop() call.
*/
if (new)
dlm_lsop_recover_prep(ls);
return 0;
}

View File

@@ -29,6 +29,8 @@ struct plock_async_data {
struct plock_op {
struct list_head list;
int done;
/* if lock op got interrupted while waiting dlm_controld reply */
bool sigint;
struct dlm_plock_info info;
/* if set indicates async handling */
struct plock_async_data *data;
@@ -79,8 +81,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
static void do_unlock_close(struct dlm_ls *ls, u64 number,
struct file *file, struct file_lock *fl)
static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
@@ -89,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
op->info.pid = fl->fl_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.pid = info->pid;
op->info.fsid = info->fsid;
op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
@@ -161,16 +159,24 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = wait_event_interruptible(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
spin_lock(&ops_lock);
list_del(&op->list);
/* recheck under ops_lock if we got a done != 0,
* if so this interrupt case should be ignored
*/
if (op->done != 0) {
spin_unlock(&ops_lock);
goto do_lock_wait;
}
op->sigint = true;
spin_unlock(&ops_lock);
log_print("%s: wait interrupted %x %llx, op removed",
log_debug(ls, "%s: wait interrupted %x %llx pid %d",
__func__, ls->ls_global_id,
(unsigned long long)number);
dlm_release_plock_op(op);
do_unlock_close(ls, number, file, fl);
(unsigned long long)number, op->info.pid);
goto out;
}
do_lock_wait:
WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
@@ -378,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_entry(send_list.next, struct plock_op, list);
op = list_first_entry(&send_list, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
@@ -425,6 +431,19 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (iter->info.fsid == info.fsid &&
iter->info.number == info.number &&
iter->info.owner == info.owner) {
if (iter->sigint) {
list_del(&iter->list);
spin_unlock(&ops_lock);
pr_debug("%s: sigint cleanup %x %llx pid %d",
__func__, iter->info.fsid,
(unsigned long long)iter->info.number,
iter->info.pid);
do_unlock_close(&iter->info);
memcpy(&iter->info, &info, sizeof(info));
dlm_release_plock_op(iter);
return count;
}
list_del_init(&iter->list);
memcpy(&iter->info, &info, sizeof(info));
if (iter->data)
@@ -443,7 +462,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
else
wake_up(&recv_wq);
} else
log_print("%s: no op %x %llx - may got interrupted?", __func__,
log_print("%s: no op %x %llx", __func__,
info.fsid, (unsigned long long)info.number);
return count;
}

View File

@@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
/*
* Add or remove nodes from the lockspace's ls_nodes list.
*
* Due to the fact that we must report all membership changes to lsops
* or midcomms layer, it is not permitted to abort ls_recover() until
* this is done.
*/
error = dlm_recover_members(ls, rv, &neg);
@@ -239,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
log_rinfo(ls, "dlm_recover %llu error %d",
(unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
return error;
}
@@ -257,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
int error;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
@@ -266,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_unlock(&ls->ls_recover_lock);
if (rv) {
ls_recover(ls, rv);
error = ls_recover(ls, rv);
switch (error) {
case 0:
ls->ls_recovery_result = 0;
complete(&ls->ls_recovery_done);
dlm_lsop_recover_done(ls);
break;
case -EINTR:
/* if recovery was interrupted -EINTR we wait for the next
* ls_recover() iteration until it hopefully succeeds.
*/
log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
__func__, (unsigned long long)rv->seq);
break;
default:
log_rinfo(ls, "%s %llu error %d", __func__,
(unsigned long long)rv->seq, error);
/* let new_lockspace() get aware of critical error */
ls->ls_recovery_result = error;
complete(&ls->ls_recovery_done);
break;
}
kfree(rv->nodes);
kfree(rv);
}

View File

@@ -250,6 +250,14 @@ static int device_user_lock(struct dlm_user_proc *proc,
goto out;
}
#ifdef CONFIG_DLM_DEPRECATED_API
if (params->timeout)
pr_warn_once("========================================================\n"
"WARNING: the lkb timeout feature is being deprecated and\n"
" will be removed in v6.2!\n"
"========================================================\n");
#endif
ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
if (!ua)
goto out;
@@ -262,23 +270,34 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT) {
#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
#else
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb);
#endif
} else if (params->flags & DLM_LKF_ORPHAN) {
error = dlm_user_adopt_orphan(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout,
&lkid);
if (!error)
error = lkid;
} else {
#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout);
#else
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen);
#endif
if (!error)
error = ua->lksb.sb_lkid;
}

View File

@@ -17,7 +17,7 @@ struct z_erofs_decompress_req {
/* indicate the algorithm will be used for decompression */
unsigned int alg;
bool inplace_io, partial_decoding;
bool inplace_io, partial_decoding, fillgaps;
};
struct z_erofs_decompressor {

View File

@@ -366,42 +366,33 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return iomap_bmap(mapping, block, &erofs_iomap_ops);
}
static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
loff_t align = iocb->ki_pos | iov_iter_count(to) |
iov_iter_alignment(to);
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int blksize_mask;
if (bdev)
blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
else
blksize_mask = (1 << inode->i_blkbits) - 1;
if (align & blksize_mask)
return -EINVAL;
return 0;
}
static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
/* no need taking (shared) inode lock since it's a ro filesystem */
if (!iov_iter_count(to))
return 0;
#ifdef CONFIG_FS_DAX
if (IS_DAX(iocb->ki_filp->f_mapping->host))
if (IS_DAX(inode))
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
#endif
if (iocb->ki_flags & IOCB_DIRECT) {
int err = erofs_prepare_dio(iocb, to);
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int blksize_mask;
if (!err)
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
NULL, 0, NULL, 0);
if (err < 0)
return err;
if (bdev)
blksize_mask = bdev_logical_block_size(bdev) - 1;
else
blksize_mask = (1 << inode->i_blkbits) - 1;
if ((iocb->ki_pos | iov_iter_count(to) |
iov_iter_alignment(to)) & blksize_mask)
return -EINVAL;
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
NULL, 0, NULL, 0);
}
return filemap_read(iocb, to, 0);
}

View File

@@ -83,7 +83,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
j = 0;
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
if (!rq->fillgaps && test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
@@ -91,14 +91,18 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
if (page) {
__clear_bit(j, bounced);
if (kaddr) {
if (kaddr + PAGE_SIZE == page_address(page))
if (!PageHighMem(page)) {
if (!i) {
kaddr = page_address(page);
continue;
}
if (kaddr &&
kaddr + PAGE_SIZE == page_address(page)) {
kaddr += PAGE_SIZE;
else
kaddr = NULL;
} else if (!i) {
kaddr = page_address(page);
continue;
}
}
kaddr = NULL;
continue;
}
kaddr = NULL;

View File

@@ -143,6 +143,7 @@ again:
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);
wake_up_all(&z_erofs_lzma_wq);
z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);

View File

@@ -22,10 +22,9 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name,
}
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
void *dentry_blk, unsigned int *ofs,
void *dentry_blk, struct erofs_dirent *de,
unsigned int nameoff, unsigned int maxsize)
{
struct erofs_dirent *de = dentry_blk + *ofs;
const struct erofs_dirent *end = dentry_blk + nameoff;
while (de < end) {
@@ -59,9 +58,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
/* stopped by some reason */
return 1;
++de;
*ofs += sizeof(struct erofs_dirent);
ctx->pos += sizeof(struct erofs_dirent);
}
*ofs = maxsize;
return 0;
}
@@ -90,33 +88,33 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
nameoff >= EROFS_BLKSIZ) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
goto skip_this;
break;
}
maxsize = min_t(unsigned int,
dirsize - ctx->pos + ofs, PAGE_SIZE);
dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
/* search dirents at the arbitrary position */
if (initial) {
initial = false;
ofs = roundup(ofs, sizeof(struct erofs_dirent));
ctx->pos = blknr_to_addr(i) + ofs;
if (ofs >= nameoff)
goto skip_this;
}
err = erofs_fill_dentries(dir, ctx, de, &ofs,
err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs,
nameoff, maxsize);
skip_this:
ctx->pos = blknr_to_addr(i) + ofs;
if (err)
break;
skip_this:
ctx->pos = blknr_to_addr(i) + maxsize;
++i;
ofs = 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -7,13 +7,10 @@
#define __EROFS_FS_ZDATA_H
#include "internal.h"
#include "zpvec.h"
#include "tagptr.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS 3
#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
#define Z_EROFS_INLINE_BVECS 2
/*
* let's leave a type here in case of introducing
@@ -21,6 +18,21 @@
*/
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
struct page *page;
int offset;
unsigned int end;
};
#define __Z_EROFS_BVSET(name, total) \
struct name { \
/* point to the next page which contains the following bvecs */ \
struct page *nextpage; \
struct z_erofs_bvec bvec[total]; \
}
__Z_EROFS_BVSET(z_erofs_bvset,);
__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -38,24 +50,21 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
/* A: lower limit of decompressed length and if full length or not */
/* L: the maximum decompression size of this round */
unsigned int length;
/* L: total number of bvecs */
unsigned int vcnt;
/* I: page offset of start position of decompression */
unsigned short pageofs_out;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
/* L: maximum relative page index in pagevec[] */
unsigned short nr_pages;
/* L: total number of pages in pagevec[] */
unsigned int vcnt;
union {
/* L: inline a certain number of pagevecs for bootstrap */
erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
/* L: inline a certain number of bvec for bootstrap */
struct z_erofs_bvset_inline bvset;
/* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu;
@@ -72,8 +81,14 @@ struct z_erofs_pcluster {
/* I: compression algorithm format */
unsigned char algorithmformat;
/* A: compressed pages (can be cached or inplaced pages) */
struct page *compressed_pages[];
/* L: whether partial decompression or not */
bool partial;
/* L: indicate several pageofs_outs or not */
bool multibases;
/* A: compressed bvecs (can be cached or inplaced pages) */
struct z_erofs_bvec compressed_bvecs[];
};
/* let's avoid the valid 32-bit kernel addresses */
@@ -94,6 +109,8 @@ struct z_erofs_decompressqueue {
struct completion done;
struct work_struct work;
} u;
bool eio;
};
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
@@ -108,38 +125,17 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return pcl->pclusterpages;
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
/*
* waiters (aka. ongoing_packs): # to unlock the page
* sub-index: 0 - for partial page, >= 1 full page sub-index
* bit 31: I/O error occurred on this page
* bit 0 - 30: remaining parts to complete this page
*/
typedef atomic_t z_erofs_onlinepage_t;
/* type punning */
union z_erofs_onlinepage_converter {
z_erofs_onlinepage_t *o;
unsigned long *v;
};
static inline unsigned int z_erofs_onlinepage_index(struct page *page)
{
union z_erofs_onlinepage_converter u;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
}
#define Z_EROFS_PAGE_EIO (1 << 31)
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
z_erofs_onlinepage_t o;
atomic_t o;
unsigned long v;
/* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
@@ -147,49 +143,36 @@ static inline void z_erofs_onlinepage_init(struct page *page)
SetPagePrivate(page);
}
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
static inline void z_erofs_onlinepage_split(struct page *page)
{
union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
int orig, orig_index, val;
atomic_inc((atomic_t *)&page->private);
}
repeat:
orig = atomic_read(u.o);
orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
if (orig_index) {
if (!index)
return;
static inline void z_erofs_page_mark_eio(struct page *page)
{
int orig;
DBG_BUGON(orig_index != index);
}
val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
do {
orig = atomic_read((atomic_t *)&page->private);
} while (atomic_cmpxchg((atomic_t *)&page->private, orig,
orig | Z_EROFS_PAGE_EIO) != orig);
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
union z_erofs_onlinepage_converter u;
unsigned int v;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
v = atomic_dec_return((atomic_t *)&page->private);
if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
if (!(v & Z_EROFS_PAGE_EIO))
SetPageUptodate(page);
unlock_page(page);
}
erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
#define Z_EROFS_VMAP_ONSTACK_PAGES \
min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
#define Z_EROFS_ONSTACK_PAGES 32
#endif

View File

@@ -1,159 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_ZPVEC_H
#define __EROFS_FS_ZPVEC_H
#include "tagptr.h"
/* page type in pagevec for decompress subsystem */
enum z_erofs_page_type {
/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
Z_EROFS_PAGE_TYPE_EXCLUSIVE,
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
Z_EROFS_VLE_PAGE_TYPE_HEAD,
Z_EROFS_VLE_PAGE_TYPE_MAX
};
extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
__bad_page_type_exclusive(void);
/* pagevec tagged pointer */
typedef tagptr2_t erofs_vtptr_t;
/* pagevec collector */
struct z_erofs_pagevec_ctor {
struct page *curr, *next;
erofs_vtptr_t *pages;
unsigned int nr, index;
};
static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
bool atomic)
{
if (!ctor->curr)
return;
if (atomic)
kunmap_atomic(ctor->pages);
else
kunmap(ctor->curr);
}
static inline struct page *
z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
unsigned int nr)
{
unsigned int index;
/* keep away from occupied pages */
if (ctor->next)
return ctor->next;
for (index = 0; index < nr; ++index) {
const erofs_vtptr_t t = ctor->pages[index];
const unsigned int tags = tagptr_unfold_tags(t);
if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
return tagptr_unfold_ptr(t);
}
DBG_BUGON(nr >= ctor->nr);
return NULL;
}
static inline void
z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
bool atomic)
{
struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
z_erofs_pagevec_ctor_exit(ctor, atomic);
ctor->curr = next;
ctor->next = NULL;
ctor->pages = atomic ?
kmap_atomic(ctor->curr) : kmap(ctor->curr);
ctor->nr = PAGE_SIZE / sizeof(struct page *);
ctor->index = 0;
}
static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
unsigned int nr,
erofs_vtptr_t *pages,
unsigned int i)
{
ctor->nr = nr;
ctor->curr = ctor->next = NULL;
ctor->pages = pages;
if (i >= nr) {
i -= nr;
z_erofs_pagevec_ctor_pagedown(ctor, false);
while (i > ctor->nr) {
i -= ctor->nr;
z_erofs_pagevec_ctor_pagedown(ctor, false);
}
}
ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
ctor->index = i;
}
static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
struct page *page,
enum z_erofs_page_type type,
bool pvec_safereuse)
{
if (!ctor->next) {
/* some pages cannot be reused as pvec safely without I/O */
if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
ctor->index + 1 == ctor->nr)
return false;
}
if (ctor->index >= ctor->nr)
z_erofs_pagevec_ctor_pagedown(ctor, false);
/* exclusive page type must be 0 */
if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
__bad_page_type_exclusive();
/* should remind that collector->next never equal to 1, 2 */
if (type == (uintptr_t)ctor->next) {
ctor->next = page;
}
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
return true;
}
static inline struct page *
z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
enum z_erofs_page_type *type)
{
erofs_vtptr_t t;
if (ctor->index >= ctor->nr) {
DBG_BUGON(!ctor->next);
z_erofs_pagevec_ctor_pagedown(ctor, true);
}
t = ctor->pages[ctor->index];
*type = tagptr_unfold_tags(t);
/* should remind that collector->next never equal to 1, 2 */
if (*type == (uintptr_t)ctor->next)
ctor->next = tagptr_unfold_ptr(t);
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
return tagptr_unfold_ptr(t);
}
#endif

View File

@@ -1059,9 +1059,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #inodes per group too big: %lu",
"error: invalid #inodes per group: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -1071,6 +1072,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
le32_to_cpu(es->s_inodes_count)) {
ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
le32_to_cpu(es->s_inodes_count),
(u64)sbi->s_groups_count * sbi->s_inodes_per_group);
goto failed_mount;
}
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc_array(db_count,
@@ -1490,8 +1498,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
@@ -1529,8 +1536,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;

View File

@@ -425,21 +425,9 @@ static inline int flock_translate_cmd(int cmd) {
}
/* Fill in a file_lock structure with an appropriate FLOCK lock. */
static struct file_lock *
flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
int type = flock_translate_cmd(cmd);
if (type < 0)
return ERR_PTR(type);
if (fl == NULL) {
fl = locks_alloc_lock();
if (fl == NULL)
return ERR_PTR(-ENOMEM);
} else {
locks_init_lock(fl);
}
locks_init_lock(fl);
fl->fl_file = filp;
fl->fl_owner = filp;
@@ -447,8 +435,6 @@ flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
return fl;
}
static int assign_type(struct file_lock *fl, long type)
@@ -2097,21 +2083,9 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
*/
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
{
struct fd f = fdget(fd);
struct file_lock *lock;
int can_sleep, unlock;
int error;
error = -EBADF;
if (!f.file)
goto out;
can_sleep = !(cmd & LOCK_NB);
cmd &= ~LOCK_NB;
unlock = (cmd == LOCK_UN);
if (!unlock && !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
goto out_putf;
int can_sleep, error, type;
struct file_lock fl;
struct fd f;
/*
* LOCK_MAND locks were broken for a long time in that they never
@@ -2123,36 +2097,41 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
if (cmd & LOCK_MAND) {
pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
error = 0;
goto out_putf;
return 0;
}
lock = flock_make_lock(f.file, cmd, NULL);
if (IS_ERR(lock)) {
error = PTR_ERR(lock);
type = flock_translate_cmd(cmd & ~LOCK_NB);
if (type < 0)
return type;
error = -EBADF;
f = fdget(fd);
if (!f.file)
return error;
if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
goto out_putf;
}
if (can_sleep)
lock->fl_flags |= FL_SLEEP;
flock_make_lock(f.file, &fl, type);
error = security_file_lock(f.file, lock->fl_type);
error = security_file_lock(f.file, fl.fl_type);
if (error)
goto out_free;
goto out_putf;
can_sleep = !(cmd & LOCK_NB);
if (can_sleep)
fl.fl_flags |= FL_SLEEP;
if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
(can_sleep) ? F_SETLKW : F_SETLK,
&fl);
else
error = locks_lock_file_wait(f.file, lock);
out_free:
locks_free_lock(lock);
error = locks_lock_file_wait(f.file, &fl);
out_putf:
fdput(f);
out:
return error;
}
@@ -2614,7 +2593,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
if (list_empty(&flctx->flc_flock))
return;
flock_make_lock(filp, LOCK_UN, &fl);
flock_make_lock(filp, &fl, F_UNLCK);
fl.fl_flags |= FL_CLOSE;
if (filp->f_op->flock)

View File

@@ -295,12 +295,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
const void *data, int data_type,
struct inode *dir)
{
__u32 marks_mask = 0, marks_ignored_mask = 0;
__u32 marks_mask = 0, marks_ignore_mask = 0;
__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
FANOTIFY_EVENT_FLAGS;
const struct path *path = fsnotify_data_path(data, data_type);
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
struct fsnotify_mark *mark;
bool ondir = event_mask & FAN_ONDIR;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
@@ -315,19 +316,21 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
return 0;
} else if (!(fid_mode & FAN_REPORT_FID)) {
/* Do we have a directory inode to report? */
if (!dir && !(event_mask & FS_ISDIR))
if (!dir && !ondir)
return 0;
}
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
/* Apply ignore mask regardless of mark's ISDIR flag */
marks_ignored_mask |= mark->ignored_mask;
/*
* Apply ignore mask depending on event flags in ignore mask.
*/
marks_ignore_mask |=
fsnotify_effective_ignore_mask(mark, ondir, type);
/*
* If the event is on dir and this mark doesn't care about
* events on dir, don't send it!
* Send the event depending on event flags in mark mask.
*/
if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
if (!fsnotify_mask_applicable(mark->mask, ondir, type))
continue;
marks_mask |= mark->mask;
@@ -336,7 +339,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
*match_mask |= 1U << type;
}
test_mask = event_mask & marks_mask & ~marks_ignored_mask;
test_mask = event_mask & marks_mask & ~marks_ignore_mask;
/*
* For dirent modification events (create/delete/move) that do not carry

View File

@@ -499,6 +499,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
mflags |= FAN_MARK_EVICTABLE;
if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
mflags |= FAN_MARK_IGNORE;
return mflags;
}

View File

@@ -1009,10 +1009,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
mask &= ~umask;
spin_lock(&fsn_mark->lock);
oldmask = fsnotify_calc_mask(fsn_mark);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) {
fsn_mark->mask &= ~mask;
} else {
fsn_mark->ignored_mask &= ~mask;
fsn_mark->ignore_mask &= ~mask;
}
newmask = fsnotify_calc_mask(fsn_mark);
/*
@@ -1021,7 +1021,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
* changes to the mask.
* Destroy mark when only umask bits remain.
*/
*destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
*destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask);
spin_unlock(&fsn_mark->lock);
return oldmask & ~newmask;
@@ -1085,15 +1085,24 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS;
bool recalc = false;
/*
* When using FAN_MARK_IGNORE for the first time, mark starts using
* independent event flags in ignore mask. After that, trying to
* update the ignore mask with the old FAN_MARK_IGNORED_MASK API
* will result in EEXIST error.
*/
if (ignore == FAN_MARK_IGNORE)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS;
/*
* Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
* the removal of the FS_MODIFY bit in calculated mask if it was set
* because of an ignored mask that is now going to survive FS_MODIFY.
* because of an ignore mask that is now going to survive FS_MODIFY.
*/
if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
if (!(fsn_mark->mask & FS_MODIFY))
@@ -1120,10 +1129,10 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
bool recalc;
spin_lock(&fsn_mark->lock);
if (!(fan_flags & FAN_MARK_IGNORED_MASK))
if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS))
fsn_mark->mask |= mask;
else
fsn_mark->ignored_mask |= mask;
fsn_mark->ignore_mask |= mask;
recalc = fsnotify_calc_mask(fsn_mark) &
~fsnotify_conn_mask(fsn_mark->connector);
@@ -1187,6 +1196,37 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
sizeof(struct fanotify_error_event));
}
static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
/*
* Non evictable mark cannot be downgraded to evictable mark.
*/
if (fan_flags & FAN_MARK_EVICTABLE &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
return -EEXIST;
/*
* New ignore mask semantics cannot be downgraded to old semantics.
*/
if (fan_flags & FAN_MARK_IGNORED_MASK &&
fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
return -EEXIST;
/*
* An ignore mask that survives modify could never be downgraded to not
* survive modify. With new FAN_MARK_IGNORE semantics we make that rule
* explicit and return an error when trying to update the ignore mask
* without the original FAN_MARK_IGNORED_SURV_MODIFY value.
*/
if (fan_flags & FAN_MARK_IGNORE &&
!(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
return -EEXIST;
return 0;
}
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type,
__u32 mask, unsigned int fan_flags,
@@ -1208,19 +1248,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
}
/*
* Non evictable mark cannot be downgraded to evictable mark.
* Check if requested mark flags conflict with an existing mark flags.
*/
if (fan_flags & FAN_MARK_EVICTABLE &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
ret = -EEXIST;
ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
if (ret)
goto out;
}
/*
* Error events are pre-allocated per group, only if strictly
* needed (i.e. FAN_FS_ERROR was requested).
*/
if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) &&
(mask & FAN_FS_ERROR)) {
ret = fanotify_group_init_error_pool(group);
if (ret)
goto out;
@@ -1261,10 +1300,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
/*
* If some other task has this inode open for write we should not add
* an ignored mark, unless that ignored mark is supposed to survive
* an ignore mask, unless that ignore mask is supposed to survive
* modification changes anyway.
*/
if ((flags & FAN_MARK_IGNORED_MASK) &&
if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
inode_is_open_for_write(inode))
return 0;
@@ -1520,7 +1559,8 @@ static int fanotify_events_supported(struct fsnotify_group *group,
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
(mask & FAN_RENAME);
(mask & FAN_RENAME) ||
(flags & FAN_MARK_IGNORE);
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
@@ -1557,7 +1597,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
__kernel_fsid_t __fsid, *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
bool ignored = flags & FAN_MARK_IGNORED_MASK;
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
u32 umask = 0;
int ret;
@@ -1586,7 +1627,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
}
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
switch (mark_cmd) {
case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
if (!mask)
@@ -1606,9 +1647,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ~valid_mask)
return -EINVAL;
/* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
if (ignored)
/* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */
if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK))
return -EINVAL;
/*
* Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
* FAN_MARK_IGNORED_MASK.
*/
if (ignore == FAN_MARK_IGNORED_MASK) {
mask &= ~FANOTIFY_EVENT_FLAGS;
umask = FANOTIFY_EVENT_FLAGS;
}
f = fdget(fanotify_fd);
if (unlikely(!f.file))
@@ -1672,7 +1723,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
goto fput_and_out;
if (flags & FAN_MARK_FLUSH) {
if (mark_cmd == FAN_MARK_FLUSH) {
ret = 0;
if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
@@ -1688,7 +1739,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
if (flags & FAN_MARK_ADD) {
if (mark_cmd == FAN_MARK_ADD) {
ret = fanotify_events_supported(group, &path, mask, flags);
if (ret)
goto path_put_and_out;
@@ -1712,6 +1763,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
ret = mnt ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
(mnt || S_ISDIR(inode->i_mode)) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY))
goto path_put_and_out;
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
@@ -1721,12 +1779,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* events with parent/name info for non-directory.
*/
if ((fid_mode & FAN_REPORT_DIR_FID) &&
(flags & FAN_MARK_ADD) && !ignored)
(flags & FAN_MARK_ADD) && !ignore)
mask |= FAN_EVENT_ON_CHILD;
}
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
switch (mark_cmd) {
case FAN_MARK_ADD:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask,
@@ -1804,7 +1862,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);

View File

@@ -113,7 +113,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
return;
seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
inode->i_ino, inode->i_sb->s_dev,
mflags, mark->mask, mark->ignored_mask);
mflags, mark->mask, mark->ignore_mask);
show_mark_fhandle(m, inode);
seq_putc(m, '\n');
iput(inode);
@@ -121,12 +121,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
struct mount *mnt = fsnotify_conn_mount(mark->connector);
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
mnt->mnt_id, mflags, mark->mask, mark->ignore_mask);
} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
struct super_block *sb = fsnotify_conn_sb(mark->connector);
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignored_mask);
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
}
}

View File

@@ -100,7 +100,7 @@ void fsnotify_sb_delete(struct super_block *sb)
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
* parent cares. Thus when an event happens on a child it can quickly tell if
* parent cares. Thus when an event happens on a child it can quickly tell
* if there is a need to find a parent and send the event to the parent.
*/
void __fsnotify_update_child_dentry_flags(struct inode *inode)
@@ -324,7 +324,8 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
struct fsnotify_group *group = NULL;
__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
__u32 marks_mask = 0;
__u32 marks_ignored_mask = 0;
__u32 marks_ignore_mask = 0;
bool is_dir = mask & FS_ISDIR;
struct fsnotify_mark *mark;
int type;
@@ -336,7 +337,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
if (!(mark->flags &
FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mark->ignored_mask = 0;
mark->ignore_mask = 0;
}
}
@@ -344,14 +345,15 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
group = mark->group;
marks_mask |= mark->mask;
marks_ignored_mask |= mark->ignored_mask;
marks_ignore_mask |=
fsnotify_effective_ignore_mask(mark, is_dir, type);
}
pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
__func__, group, mask, marks_mask, marks_ignored_mask,
pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
__func__, group, mask, marks_mask, marks_ignore_mask,
data, data_type, dir, cookie);
if (!(test_mask & marks_mask & ~marks_ignored_mask))
if (!(test_mask & marks_mask & ~marks_ignore_mask))
return 0;
if (group->ops->handle_event) {
@@ -423,7 +425,8 @@ static bool fsnotify_iter_select_report_types(
* But is *this mark* watching children?
*/
if (type == FSNOTIFY_ITER_TYPE_PARENT &&
!(mark->mask & FS_EVENT_ON_CHILD))
!(mark->mask & FS_EVENT_ON_CHILD) &&
!(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
continue;
fsnotify_iter_set_report_type(iter_info, type);
@@ -532,8 +535,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
/*
* If this is a modify event we may need to clear some ignored masks.
* In that case, the object with ignored masks will have the FS_MODIFY
* If this is a modify event we may need to clear some ignore masks.
* In that case, the object with ignore masks will have the FS_MODIFY
* event in its mask.
* Otherwise, return if none of the marks care about this type of event.
*/

View File

@@ -136,7 +136,7 @@ static inline u32 inotify_mask_to_arg(__u32 mask)
IN_Q_OVERFLOW);
}
/* intofiy userspace file descriptor functions */
/* inotify userspace file descriptor functions */
static __poll_t inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;

View File

@@ -290,7 +290,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
struct buffer_head *bh;
struct item_head *ih, tmp_ih;
b_blocknr_t blocknr;
char *p = NULL;
char *p;
int chars;
int ret;
int result;
@@ -305,8 +305,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
result = search_for_position_by_key(inode->i_sb, &key, &path);
if (result != POSITION_FOUND) {
pathrelse(&path);
if (p)
kunmap(bh_result->b_page);
if (result == IO_ERROR)
return -EIO;
/*
@@ -352,8 +350,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
}
pathrelse(&path);
if (p)
kunmap(bh_result->b_page);
return ret;
}
/* requested data are in direct item(s) */
@@ -363,8 +359,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* when it is stored in direct item(s)
*/
pathrelse(&path);
if (p)
kunmap(bh_result->b_page);
return -ENOENT;
}
@@ -396,9 +390,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* sure we need to. But, this means the item might move if
* kmap schedules
*/
if (!p)
p = (char *)kmap(bh_result->b_page);
p = (char *)kmap(bh_result->b_page);
p += offset;
memset(p, 0, inode->i_sb->s_blocksize);
do {

View File

@@ -59,15 +59,19 @@
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORE)
#define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \
FAN_MARK_ADD | \
FAN_MARK_REMOVE | \
FANOTIFY_MARK_CMD_BITS | \
FANOTIFY_MARK_IGNORE_BITS | \
FAN_MARK_DONT_FOLLOW | \
FAN_MARK_ONLYDIR | \
FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORED_SURV_MODIFY | \
FAN_MARK_EVICTABLE | \
FAN_MARK_FLUSH)
FAN_MARK_EVICTABLE)
/*
* Events that can be reported with data type FSNOTIFY_EVENT_PATH.

View File

@@ -518,8 +518,8 @@ struct fsnotify_mark {
struct hlist_node obj_list;
/* Head of list of marks for an object [mark ref] */
struct fsnotify_mark_connector *connector;
/* Events types to ignore [mark->lock, group->mark_mutex] */
__u32 ignored_mask;
/* Events types and flags to ignore [mark->lock, group->mark_mutex] */
__u32 ignore_mask;
/* General fsnotify mark flags */
#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
@@ -529,6 +529,7 @@ struct fsnotify_mark {
/* fanotify mark flags */
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400
unsigned int flags; /* flags [mark->lock] */
};
@@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
/* functions used to manipulate the marks attached to inodes */
/* Get mask for calculating object interest taking ignored mask into account */
/*
* Canonical "ignore mask" including event flags.
*
* Note the subtle semantic difference from the legacy ->ignored_mask.
* ->ignored_mask traditionally only meant which events should be ignored,
* while ->ignore_mask also includes flags regarding the type of objects on
* which events should be ignored.
*/
static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
{
__u32 ignore_mask = mark->ignore_mask;
/* The event flags in ignore mask take effect */
if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
return ignore_mask;
/*
* Legacy behavior:
* - Always ignore events on dir
* - Ignore events on child if parent is watching children
*/
ignore_mask |= FS_ISDIR;
ignore_mask &= ~FS_EVENT_ON_CHILD;
ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;
return ignore_mask;
}
/* Legacy ignored_mask - only event types to ignore */
static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
{
return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
}
/*
* Check if mask (or ignore mask) should be applied depending if victim is a
* directory and whether it is reported to a watching parent.
*/
static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
int iter_type)
{
/* Should mask be applied to a directory? */
if (is_dir && !(mask & FS_ISDIR))
return false;
/* Should mask be applied to a child? */
if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
!(mask & FS_EVENT_ON_CHILD))
return false;
return true;
}
/*
* Effective ignore mask taking into account if event victim is a
* directory and whether it is reported to a watching parent.
*/
static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
bool is_dir, int iter_type)
{
__u32 ignore_mask = fsnotify_ignored_events(mark);
if (!ignore_mask)
return 0;
/* For non-dir and non-child, no need to consult the event flags */
if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
return ignore_mask;
ignore_mask = fsnotify_ignore_mask(mark);
if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
return 0;
return ignore_mask & ALL_FSNOTIFY_EVENTS;
}
/* Get mask for calculating object interest taking ignore mask into account */
static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
{
__u32 mask = mark->mask;
if (!mark->ignored_mask)
if (!fsnotify_ignored_events(mark))
return mask;
/* Interest in FS_MODIFY may be needed for clearing ignored mask */
/* Interest in FS_MODIFY may be needed for clearing ignore mask */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mask |= FS_MODIFY;
@@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
* If mark is interested in ignoring events on children, the object must
* show interest in those events for fsnotify_parent() to notice it.
*/
return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS);
return mask | mark->ignore_mask;
}
/* Get mask of events for a list of marks */

View File

@@ -49,38 +49,52 @@
/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
TRACE_EVENT(dlm_lock_start,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
__u32 flags),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
unsigned int namelen, int mode, __u32 flags),
TP_ARGS(ls, lkb, mode, flags),
TP_ARGS(ls, lkb, name, namelen, mode, flags),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__field(__u32, flags)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
else if (name)
memcpy(__get_dynamic_array(res_name), name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
show_lock_flags(__entry->flags))
show_lock_flags(__entry->flags),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_lock_end,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
int error),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
unsigned int namelen, int mode, __u32 flags, int error),
TP_ARGS(ls, lkb, mode, flags, error),
TP_ARGS(ls, lkb, name, namelen, mode, flags, error),
TP_STRUCT__entry(
__field(__u32, ls_id)
@@ -88,14 +102,26 @@ TRACE_EVENT(dlm_lock_end,
__field(int, mode)
__field(__u32, flags)
__field(int, error)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
else if (name)
memcpy(__get_dynamic_array(res_name), name,
__get_dynamic_array_len(res_name));
/* return value will be zeroed in those cases by dlm_lock()
* we do it here again to not introduce more overhead if
* trace isn't running and error reflects the return value.
@@ -104,12 +130,15 @@ TRACE_EVENT(dlm_lock_end,
__entry->error = 0;
else
__entry->error = error;
),
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
show_lock_flags(__entry->flags), __entry->error)
show_lock_flags(__entry->flags), __entry->error,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
@@ -123,42 +152,65 @@ TRACE_EVENT(dlm_bast,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
__entry->lkb_id, show_lock_mode(__entry->mode))
TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_ast,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb),
TP_ARGS(ls, lkb, lksb),
TP_ARGS(ls, lkb),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(u8, sb_flags)
__field(int, sb_status)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->sb_flags = lksb->sb_flags;
__entry->sb_status = lksb->sb_status;
__entry->sb_flags = lkb->lkb_lksb->sb_flags;
__entry->sb_status = lkb->lkb_lksb->sb_status;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
@@ -173,17 +225,28 @@ TRACE_EVENT(dlm_unlock_start,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(__u32, flags)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x flags=%s",
TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_flags(__entry->flags))
show_lock_flags(__entry->flags),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
@@ -199,18 +262,29 @@ TRACE_EVENT(dlm_unlock_end,
__field(__u32, lkb_id)
__field(__u32, flags)
__field(int, error)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
__entry->error = error;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_flags(__entry->flags), __entry->error)
show_lock_flags(__entry->flags), __entry->error,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);

View File

@@ -83,12 +83,20 @@
#define FAN_MARK_FLUSH 0x00000080
/* FAN_MARK_FILESYSTEM is 0x00000100 */
#define FAN_MARK_EVICTABLE 0x00000200
/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
#define FAN_MARK_IGNORE 0x00000400
/* These are NOT bitwise flags. Both bits can be used togther. */
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
/*
* Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
* for non-inode mark types.
*/
#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
/* Deprecated - do not use this in programs and do not add new flags here! */
#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
FAN_MARK_REMOVE |\