diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4f81b9b181d0..8e724e1bf087 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -490,6 +490,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) req->in.h.nodeid = args->nodeid; req->in.h.padding = args->error_in; req->args = args; + if (args->is_ext) + req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8; if (args->end) __set_bit(FR_ASYNC, &req->flags); } @@ -1512,7 +1514,7 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, buf[outarg.namelen] = 0; down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name); + err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); up_read(&fc->killsb); kfree(buf); return err; @@ -1560,7 +1562,7 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, buf[outarg.namelen] = 0; down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name); + err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); up_read(&fc->killsb); kfree(buf); return err; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index de655966cf52..ba0d20a57d4a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -662,7 +662,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } static int get_security_context(struct dentry *entry, umode_t mode, - void **security_ctx, u32 *security_ctxlen) + struct fuse_in_arg *ext) { struct fuse_secctx *fctx; struct fuse_secctx_header *header; @@ -709,14 +709,42 @@ static int get_security_context(struct dentry *entry, umode_t mode, memcpy(ptr, ctx, ctxlen); } - *security_ctxlen = total_len; - *security_ctx = header; + ext->size = total_len; + ext->value = header; err = 0; out_err: kfree(ctx); return err; } +static int get_create_ext(struct fuse_args *args, struct dentry *dentry, + umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); + struct fuse_in_arg ext = { .size = 0, .value = NULL }; + int err = 0; + + if (fc->init_security) + err = get_security_context(dentry, mode, &ext); + + if (!err && ext.size) { + WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); + args->is_ext = true; + args->ext_idx = args->in_numargs++; + args->in_args[args->ext_idx] = ext; + } else { + kfree(ext.value); + } + + return err; +} + +static void free_ext_value(struct fuse_args *args) +{ + if (args->is_ext) + kfree(args->in_args[args->ext_idx].value); +} + /* * Atomic create+open operation * @@ -738,8 +766,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_inode *fi; struct fuse_file *ff; - void *security_ctx = NULL; - u32 security_ctxlen; bool trunc = flags & O_TRUNC; /* Userspace expects S_IFREG in create mode */ @@ -797,19 +823,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[1].size = sizeof(outopen); args.out_args[1].value = &outopen; - if (fm->fc->init_security) { - err = get_security_context(entry, mode, &security_ctx, - &security_ctxlen); - if (err) - goto out_put_forget_req; - - args.in_numargs = 3; - args.in_args[2].size = security_ctxlen; - args.in_args[2].value = security_ctx; - } + err = get_create_ext(&args, entry, mode); + if (err) + goto out_free_ff; err = fuse_simple_request(fm, &args); - kfree(security_ctx); + free_ext_value(&args); if (err) goto out_free_ff; @@ -917,8 +936,6 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, struct dentry *d; int err; struct fuse_forget_link *forget; - void *security_ctx = NULL; - u32 security_ctxlen; if (fuse_is_bad(dir)) return -EIO; @@ -933,21 +950,14 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_args[0].size = sizeof(outarg); args->out_args[0].value = &outarg; - if (fm->fc->init_security && args->opcode != FUSE_LINK) { - err = get_security_context(entry, mode, &security_ctx, - &security_ctxlen); + if (args->opcode != FUSE_LINK) { + err = get_create_ext(args, entry, mode); if (err) goto out_put_forget_req; - - BUG_ON(args->in_numargs != 2); - - args->in_numargs = 3; - args->in_args[2].size = security_ctxlen; - args->in_args[2].value = security_ctx; } err = fuse_simple_request(fm, args); - kfree(security_ctx); + free_ext_value(args); if (err) goto out_put_forget_req; @@ -1479,7 +1489,7 @@ int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, - u64 child_nodeid, struct qstr *name) + u64 child_nodeid, struct qstr *name, u32 flags) { int err = -ENOTDIR; struct inode *parent; @@ -1506,7 +1516,9 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, goto unlock; fuse_dir_changed(parent); - fuse_invalidate_entry(entry); + if (!(flags & FUSE_EXPIRE_ONLY)) + d_invalidate(entry); + fuse_invalidate_entry_cache(entry); if (child_nodeid != 0 && d_really_is_positive(entry)) { inode_lock(d_inode(entry)); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 069a344d34a9..dbcc71e3b755 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1629,14 +1629,47 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) return res; } +static bool fuse_direct_write_extending_i_size(struct kiocb *iocb, + struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode); +} + static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t res; + bool exclusive_lock = + !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) || + iocb->ki_flags & IOCB_APPEND || + fuse_direct_write_extending_i_size(iocb, from); + + /* + * Take exclusive lock if + * - Parallel direct writes are disabled - a user space decision + * - Parallel direct writes are enabled and i_size is being extended. + * This might not be needed at all, but needs further investigation. + */ + if (exclusive_lock) + inode_lock(inode); + else { + inode_lock_shared(inode); + + /* A race with truncate might have come up as the decision for + * the lock type was done without holding the lock, check again. + */ + if (fuse_direct_write_extending_i_size(iocb, from)) { + inode_unlock_shared(inode); + inode_lock(inode); + exclusive_lock = true; + } + } - /* Don't allow parallel writes to the same file */ - inode_lock(inode); res = generic_write_checks(iocb, from); if (res > 0) { if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { @@ -1647,7 +1680,10 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) fuse_write_update_attr(inode, iocb->ki_pos, res); } } - inode_unlock(inode); + if (exclusive_lock) + inode_unlock(inode); + else + inode_unlock_shared(inode); return res; } @@ -3074,6 +3110,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { fuse_write_update_attr(inode, pos, ret); + /* For extending writes we already hold exclusive lock */ if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 573b566849f1..8e166a4941a0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -345,8 +345,9 @@ struct fuse_args { uint64_t nodeid; uint32_t opcode; uint32_t error_in; - unsigned short in_numargs; - unsigned short out_numargs; + uint8_t in_numargs; + uint8_t out_numargs; + uint8_t ext_idx; bool force:1; bool noreply:1; bool nocreds:1; @@ -357,6 +358,7 @@ struct fuse_args { bool page_zeroing:1; bool page_replace:1; bool may_block:1; + bool is_ext:1; struct fuse_in_arg in_args[FUSE_MAX_IN_ARGS]; struct fuse_arg out_args[FUSE_MAX_OUT_ARGS]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); @@ -1339,7 +1341,7 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, * then the dentry is unhashed (d_delete()). */ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, - u64 child_nodeid, struct qstr *name); + u64 child_nodeid, struct qstr *name, u32 flags); int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, bool isdir); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 9b691de22b84..f0cb03e3d877 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -197,6 +197,13 @@ * * 7.37 * - add FUSE_TMPFILE + * + * 7.38 + * - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry + * - add FOPEN_PARALLEL_DIRECT_WRITES + * - add total_extlen to fuse_in_header + * - add FUSE_MAX_NR_SECCTX + * - add extension header */ #ifndef _LINUX_FUSE_H @@ -232,7 +239,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 37 +#define FUSE_KERNEL_MINOR_VERSION 38 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -304,6 +311,7 @@ struct fuse_file_lock { * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -311,6 +319,7 @@ struct fuse_file_lock { #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) +#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) /** * INIT request/reply flags @@ -503,6 +512,21 @@ struct fuse_file_lock { */ #define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) +/** + * notify_inval_entry flags + * FUSE_EXPIRE_ONLY + */ +#define FUSE_EXPIRE_ONLY (1 << 0) + +/** + * extension type + * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx + */ +enum fuse_ext_type { + /* Types 0..31 are reserved for fuse_secctx_header */ + FUSE_MAX_NR_SECCTX = 31, +}; + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -887,7 +911,8 @@ struct fuse_in_header { uint32_t uid; uint32_t gid; uint32_t pid; - uint32_t padding; + uint16_t total_extlen; /* length of extensions in 8byte units */ + uint16_t padding; }; struct fuse_out_header { @@ -932,7 +957,7 @@ struct fuse_notify_inval_inode_out { struct fuse_notify_inval_entry_out { uint64_t parent; uint32_t namelen; - uint32_t padding; + uint32_t flags; }; struct fuse_notify_delete_out { @@ -1051,4 +1076,17 @@ struct fuse_secctx_header { uint32_t nr_secctx; }; +/** + * struct fuse_ext_header - extension header + * @size: total size of this extension including this header + * @type: type of extension + * + * This is made compatible with fuse_secctx_header by using type values > + * FUSE_MAX_NR_SECCTX + */ +struct fuse_ext_header { + uint32_t size; + uint32_t type; +}; + #endif /* _LINUX_FUSE_H */