mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Merge 9836e93c0a ("Merge tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block") into android-mainline
Steps on the way to 5.19-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I7a509815a37aac73516657cef841b64f7601187f
This commit is contained in:
109
block/blk-mq.c
109
block/blk-mq.c
@@ -1169,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
|
||||
complete(waiting);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
|
||||
* queues. This is important for md arrays to benefit from merging
|
||||
* requests.
|
||||
*/
|
||||
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
|
||||
{
|
||||
if (plug->multiple_queues)
|
||||
return BLK_MAX_REQUEST_COUNT * 2;
|
||||
return BLK_MAX_REQUEST_COUNT;
|
||||
}
|
||||
|
||||
static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
{
|
||||
struct request *last = rq_list_peek(&plug->mq_list);
|
||||
|
||||
if (!plug->rq_count) {
|
||||
trace_block_plug(rq->q);
|
||||
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
|
||||
(!blk_queue_nomerges(rq->q) &&
|
||||
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
|
||||
blk_mq_flush_plug_list(plug, false);
|
||||
trace_block_plug(rq->q);
|
||||
}
|
||||
|
||||
if (!plug->multiple_queues && last && last->q != rq->q)
|
||||
plug->multiple_queues = true;
|
||||
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
|
||||
plug->has_elevator = true;
|
||||
rq->rq_next = NULL;
|
||||
rq_list_add(&plug->mq_list, rq);
|
||||
plug->rq_count++;
|
||||
}
|
||||
|
||||
static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
|
||||
rq_end_io_fn *done, bool use_plug)
|
||||
{
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
if (use_plug && current->plug) {
|
||||
blk_add_rq_to_plug(current->plug, rq);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
* be reused after dying flag is set
|
||||
*/
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
|
||||
* @rq: request to insert
|
||||
@@ -1184,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
|
||||
*/
|
||||
void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
|
||||
{
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
__blk_execute_rq_nowait(rq, at_head, done, true);
|
||||
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
* be reused after dying flag is set
|
||||
*/
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
@@ -1233,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
DECLARE_COMPLETION_ONSTACK(wait);
|
||||
unsigned long hang_check;
|
||||
|
||||
/*
|
||||
* iopoll requires request to be submitted to driver, so can't
|
||||
* use plug
|
||||
*/
|
||||
rq->end_io_data = &wait;
|
||||
blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
|
||||
__blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
|
||||
!blk_rq_is_poll(rq));
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
@@ -2676,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
hctx->queue->mq_ops->commit_rqs(hctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
|
||||
* queues. This is important for md arrays to benefit from merging
|
||||
* requests.
|
||||
*/
|
||||
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
|
||||
{
|
||||
if (plug->multiple_queues)
|
||||
return BLK_MAX_REQUEST_COUNT * 2;
|
||||
return BLK_MAX_REQUEST_COUNT;
|
||||
}
|
||||
|
||||
static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
{
|
||||
struct request *last = rq_list_peek(&plug->mq_list);
|
||||
|
||||
if (!plug->rq_count) {
|
||||
trace_block_plug(rq->q);
|
||||
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
|
||||
(!blk_queue_nomerges(rq->q) &&
|
||||
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
|
||||
blk_mq_flush_plug_list(plug, false);
|
||||
trace_block_plug(rq->q);
|
||||
}
|
||||
|
||||
if (!plug->multiple_queues && last && last->q != rq->q)
|
||||
plug->multiple_queues = true;
|
||||
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
|
||||
plug->has_elevator = true;
|
||||
rq->rq_next = NULL;
|
||||
rq_list_add(&plug->mq_list, rq);
|
||||
plug->rq_count++;
|
||||
}
|
||||
|
||||
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
|
||||
@@ -3146,6 +3146,7 @@ static const struct file_operations nvme_dev_fops = {
|
||||
.release = nvme_dev_release,
|
||||
.unlocked_ioctl = nvme_dev_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
.uring_cmd = nvme_dev_uring_cmd,
|
||||
};
|
||||
|
||||
static ssize_t nvme_sysfs_reset(struct device *dev,
|
||||
@@ -3699,6 +3700,7 @@ static const struct file_operations nvme_ns_chr_fops = {
|
||||
.release = nvme_ns_chr_release,
|
||||
.unlocked_ioctl = nvme_ns_chr_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
.uring_cmd = nvme_ns_chr_uring_cmd,
|
||||
};
|
||||
|
||||
static int nvme_add_ns_cdev(struct nvme_ns *ns)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
#include <linux/ptrace.h> /* for force_successful_syscall_return */
|
||||
#include <linux/nvme_ioctl.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include "nvme.h"
|
||||
|
||||
/*
|
||||
@@ -53,10 +54,21 @@ out:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
|
||||
void *meta, unsigned len, int ret)
|
||||
{
|
||||
if (!ret && req_op(req) == REQ_OP_DRV_IN &&
|
||||
copy_to_user(ubuf, meta, len))
|
||||
ret = -EFAULT;
|
||||
kfree(meta);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct request *nvme_alloc_user_request(struct request_queue *q,
|
||||
struct nvme_command *cmd, void __user *ubuffer,
|
||||
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
|
||||
u32 meta_seed, u64 *result, unsigned timeout, bool vec)
|
||||
u32 meta_seed, void **metap, unsigned timeout, bool vec,
|
||||
unsigned int rq_flags, blk_mq_req_flags_t blk_flags)
|
||||
{
|
||||
bool write = nvme_is_write(cmd);
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
@@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
void *meta = NULL;
|
||||
int ret;
|
||||
|
||||
req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
|
||||
req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
return req;
|
||||
nvme_init_request(req, cmd);
|
||||
|
||||
if (timeout)
|
||||
@@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
goto out_unmap;
|
||||
}
|
||||
req->cmd_flags |= REQ_INTEGRITY;
|
||||
*metap = meta;
|
||||
}
|
||||
}
|
||||
|
||||
ret = nvme_execute_passthru_rq(req);
|
||||
if (result)
|
||||
*result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
if (meta && !ret && !write) {
|
||||
if (copy_to_user(meta_buffer, meta, meta_len))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
kfree(meta);
|
||||
out_unmap:
|
||||
return req;
|
||||
|
||||
out_unmap:
|
||||
if (bio)
|
||||
blk_rq_unmap_user(bio);
|
||||
out:
|
||||
blk_mq_free_request(req);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
struct nvme_command *cmd, void __user *ubuffer,
|
||||
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
|
||||
u32 meta_seed, u64 *result, unsigned timeout, bool vec)
|
||||
{
|
||||
struct request *req;
|
||||
void *meta = NULL;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
|
||||
meta_len, meta_seed, &meta, timeout, vec, 0, 0);
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
|
||||
bio = req->bio;
|
||||
|
||||
ret = nvme_execute_passthru_rq(req);
|
||||
|
||||
if (result)
|
||||
*result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
if (meta)
|
||||
ret = nvme_finish_user_metadata(req, meta_buffer, meta,
|
||||
meta_len, ret);
|
||||
if (bio)
|
||||
blk_rq_unmap_user(bio);
|
||||
out:
|
||||
blk_mq_free_request(req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
||||
{
|
||||
struct nvme_user_io io;
|
||||
@@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
return status;
|
||||
}
|
||||
|
||||
struct nvme_uring_data {
|
||||
__u64 metadata;
|
||||
__u64 addr;
|
||||
__u32 data_len;
|
||||
__u32 metadata_len;
|
||||
__u32 timeout_ms;
|
||||
};
|
||||
|
||||
/*
|
||||
* This overlays struct io_uring_cmd pdu.
|
||||
* Expect build errors if this grows larger than that.
|
||||
*/
|
||||
struct nvme_uring_cmd_pdu {
|
||||
union {
|
||||
struct bio *bio;
|
||||
struct request *req;
|
||||
};
|
||||
void *meta; /* kernel-resident buffer */
|
||||
void __user *meta_buffer;
|
||||
u32 meta_len;
|
||||
};
|
||||
|
||||
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
|
||||
struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
|
||||
}
|
||||
|
||||
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||
struct request *req = pdu->req;
|
||||
struct bio *bio = req->bio;
|
||||
int status;
|
||||
u64 result;
|
||||
|
||||
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
|
||||
status = -EINTR;
|
||||
else
|
||||
status = nvme_req(req)->status;
|
||||
|
||||
result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
|
||||
if (pdu->meta)
|
||||
status = nvme_finish_user_metadata(req, pdu->meta_buffer,
|
||||
pdu->meta, pdu->meta_len, status);
|
||||
if (bio)
|
||||
blk_rq_unmap_user(bio);
|
||||
blk_mq_free_request(req);
|
||||
|
||||
io_uring_cmd_done(ioucmd, status, result);
|
||||
}
|
||||
|
||||
static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = req->end_io_data;
|
||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||
/* extract bio before reusing the same field for request */
|
||||
struct bio *bio = pdu->bio;
|
||||
|
||||
pdu->req = req;
|
||||
req->bio = bio;
|
||||
/* this takes care of moving rest of completion-work to task context */
|
||||
io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
|
||||
}
|
||||
|
||||
static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
|
||||
{
|
||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||
const struct nvme_uring_cmd *cmd = ioucmd->cmd;
|
||||
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
|
||||
struct nvme_uring_data d;
|
||||
struct nvme_command c;
|
||||
struct request *req;
|
||||
unsigned int rq_flags = 0;
|
||||
blk_mq_req_flags_t blk_flags = 0;
|
||||
void *meta = NULL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
c.common.opcode = READ_ONCE(cmd->opcode);
|
||||
c.common.flags = READ_ONCE(cmd->flags);
|
||||
if (c.common.flags)
|
||||
return -EINVAL;
|
||||
|
||||
c.common.command_id = 0;
|
||||
c.common.nsid = cpu_to_le32(cmd->nsid);
|
||||
if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
|
||||
return -EINVAL;
|
||||
|
||||
c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
|
||||
c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
|
||||
c.common.metadata = 0;
|
||||
c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
|
||||
c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
|
||||
c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
|
||||
c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
|
||||
c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
|
||||
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
|
||||
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
|
||||
|
||||
d.metadata = READ_ONCE(cmd->metadata);
|
||||
d.addr = READ_ONCE(cmd->addr);
|
||||
d.data_len = READ_ONCE(cmd->data_len);
|
||||
d.metadata_len = READ_ONCE(cmd->metadata_len);
|
||||
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
rq_flags = REQ_NOWAIT;
|
||||
blk_flags = BLK_MQ_REQ_NOWAIT;
|
||||
}
|
||||
|
||||
req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
|
||||
d.data_len, nvme_to_user_ptr(d.metadata),
|
||||
d.metadata_len, 0, &meta, d.timeout_ms ?
|
||||
msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
|
||||
blk_flags);
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
req->end_io_data = ioucmd;
|
||||
|
||||
/* to free bio on completion, as req->bio will be null at that time */
|
||||
pdu->bio = req->bio;
|
||||
pdu->meta = meta;
|
||||
pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
|
||||
pdu->meta_len = d.metadata_len;
|
||||
|
||||
blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io);
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
static bool is_ctrl_ioctl(unsigned int cmd)
|
||||
{
|
||||
if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
|
||||
@@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return __nvme_ioctl(ns, cmd, (void __user *)arg);
|
||||
}
|
||||
|
||||
static int nvme_uring_cmd_checks(unsigned int issue_flags)
|
||||
{
|
||||
/* IOPOLL not supported yet */
|
||||
if (issue_flags & IO_URING_F_IOPOLL)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* NVMe passthrough requires big SQE/CQE support */
|
||||
if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
|
||||
(IO_URING_F_SQE128|IO_URING_F_CQE32))
|
||||
return -EOPNOTSUPP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
|
||||
|
||||
ret = nvme_uring_cmd_checks(issue_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (ioucmd->cmd_op) {
|
||||
case NVME_URING_CMD_IO:
|
||||
ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
|
||||
break;
|
||||
case NVME_URING_CMD_IO_VEC:
|
||||
ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOTTY;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
|
||||
{
|
||||
struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
|
||||
struct nvme_ns, cdev);
|
||||
|
||||
return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
|
||||
void __user *argp, struct nvme_ns_head *head, int srcu_idx)
|
||||
@@ -453,8 +669,46 @@ out_unlock:
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
|
||||
struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
|
||||
int srcu_idx = srcu_read_lock(&head->srcu);
|
||||
struct nvme_ns *ns = nvme_find_path(head);
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (ns)
|
||||
ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_NVME_MULTIPATH */
|
||||
|
||||
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = ioucmd->file->private_data;
|
||||
int ret;
|
||||
|
||||
ret = nvme_uring_cmd_checks(issue_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (ioucmd->cmd_op) {
|
||||
case NVME_URING_CMD_ADMIN:
|
||||
ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
|
||||
break;
|
||||
case NVME_URING_CMD_ADMIN_VEC:
|
||||
ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOTTY;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
@@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
|
||||
.release = nvme_ns_head_chr_release,
|
||||
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
.uring_cmd = nvme_ns_head_chr_uring_cmd,
|
||||
};
|
||||
|
||||
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
|
||||
|
||||
@@ -782,7 +782,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
long nvme_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags);
|
||||
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags);
|
||||
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
||||
|
||||
extern const struct attribute_group *nvme_ns_id_attr_groups[];
|
||||
extern const struct pr_ops nvme_pr_ops;
|
||||
|
||||
@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
|
||||
struct pipe_inode_info *opipe,
|
||||
loff_t *offset,
|
||||
size_t len, unsigned int flags);
|
||||
|
||||
/*
|
||||
* fs/xattr.c:
|
||||
*/
|
||||
struct xattr_name {
|
||||
char name[XATTR_NAME_MAX + 1];
|
||||
};
|
||||
|
||||
struct xattr_ctx {
|
||||
/* Value of attribute */
|
||||
union {
|
||||
const void __user *cvalue;
|
||||
void __user *value;
|
||||
};
|
||||
void *kvalue;
|
||||
size_t size;
|
||||
/* Attribute name */
|
||||
struct xattr_name *kname;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
|
||||
ssize_t do_getxattr(struct user_namespace *mnt_userns,
|
||||
struct dentry *d,
|
||||
struct xattr_ctx *ctx);
|
||||
|
||||
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
|
||||
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
struct xattr_ctx *ctx);
|
||||
|
||||
@@ -871,7 +871,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
|
||||
|
||||
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
||||
{
|
||||
set_notify_signal(worker->task);
|
||||
__set_notify_signal(worker->task);
|
||||
wake_up_process(worker->task);
|
||||
return false;
|
||||
}
|
||||
@@ -991,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
|
||||
{
|
||||
if (work && match->fn(work, match->data)) {
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
set_notify_signal(worker->task);
|
||||
__set_notify_signal(worker->task);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
|
||||
struct io_wq_work {
|
||||
struct io_wq_work_node list;
|
||||
unsigned flags;
|
||||
int cancel_seq;
|
||||
};
|
||||
|
||||
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
|
||||
|
||||
3576
fs/io_uring.c
3576
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
143
fs/xattr.c
143
fs/xattr.c
@@ -25,6 +25,8 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
static const char *
|
||||
strcmp_prefix(const char *a, const char *a_prefix)
|
||||
{
|
||||
@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
|
||||
/*
|
||||
* Extended attribute SET operations
|
||||
*/
|
||||
static long
|
||||
setxattr(struct user_namespace *mnt_userns, struct dentry *d,
|
||||
const char __user *name, const void __user *value, size_t size,
|
||||
int flags)
|
||||
|
||||
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
|
||||
{
|
||||
int error;
|
||||
void *kvalue = NULL;
|
||||
char kname[XATTR_NAME_MAX + 1];
|
||||
|
||||
if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
|
||||
if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
|
||||
return -EINVAL;
|
||||
|
||||
error = strncpy_from_user(kname, name, sizeof(kname));
|
||||
if (error == 0 || error == sizeof(kname))
|
||||
error = -ERANGE;
|
||||
error = strncpy_from_user(ctx->kname->name, name,
|
||||
sizeof(ctx->kname->name));
|
||||
if (error == 0 || error == sizeof(ctx->kname->name))
|
||||
return -ERANGE;
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
if (size) {
|
||||
if (size > XATTR_SIZE_MAX)
|
||||
error = 0;
|
||||
if (ctx->size) {
|
||||
if (ctx->size > XATTR_SIZE_MAX)
|
||||
return -E2BIG;
|
||||
kvalue = kvmalloc(size, GFP_KERNEL);
|
||||
if (!kvalue)
|
||||
return -ENOMEM;
|
||||
if (copy_from_user(kvalue, value, size)) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
|
||||
ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
|
||||
if (IS_ERR(ctx->kvalue)) {
|
||||
error = PTR_ERR(ctx->kvalue);
|
||||
ctx->kvalue = NULL;
|
||||
}
|
||||
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
|
||||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
|
||||
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
|
||||
kvalue, size);
|
||||
}
|
||||
|
||||
error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
|
||||
out:
|
||||
kvfree(kvalue);
|
||||
return error;
|
||||
}
|
||||
|
||||
static void setxattr_convert(struct user_namespace *mnt_userns,
|
||||
struct dentry *d, struct xattr_ctx *ctx)
|
||||
{
|
||||
if (ctx->size &&
|
||||
((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
|
||||
(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
|
||||
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
|
||||
ctx->kvalue, ctx->size);
|
||||
}
|
||||
|
||||
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
struct xattr_ctx *ctx)
|
||||
{
|
||||
setxattr_convert(mnt_userns, dentry, ctx);
|
||||
return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
|
||||
ctx->kvalue, ctx->size, ctx->flags);
|
||||
}
|
||||
|
||||
static long
|
||||
setxattr(struct user_namespace *mnt_userns, struct dentry *d,
|
||||
const char __user *name, const void __user *value, size_t size,
|
||||
int flags)
|
||||
{
|
||||
struct xattr_name kname;
|
||||
struct xattr_ctx ctx = {
|
||||
.cvalue = value,
|
||||
.kvalue = NULL,
|
||||
.size = size,
|
||||
.kname = &kname,
|
||||
.flags = flags,
|
||||
};
|
||||
int error;
|
||||
|
||||
error = setxattr_copy(name, &ctx);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = do_setxattr(mnt_userns, d, &ctx);
|
||||
|
||||
kvfree(ctx.kvalue);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
|
||||
/*
|
||||
* Extended attribute GET operations
|
||||
*/
|
||||
static ssize_t
|
||||
getxattr(struct user_namespace *mnt_userns, struct dentry *d,
|
||||
const char __user *name, void __user *value, size_t size)
|
||||
ssize_t
|
||||
do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
|
||||
struct xattr_ctx *ctx)
|
||||
{
|
||||
ssize_t error;
|
||||
void *kvalue = NULL;
|
||||
char kname[XATTR_NAME_MAX + 1];
|
||||
char *kname = ctx->kname->name;
|
||||
|
||||
error = strncpy_from_user(kname, name, sizeof(kname));
|
||||
if (error == 0 || error == sizeof(kname))
|
||||
error = -ERANGE;
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
if (size) {
|
||||
if (size > XATTR_SIZE_MAX)
|
||||
size = XATTR_SIZE_MAX;
|
||||
kvalue = kvzalloc(size, GFP_KERNEL);
|
||||
if (!kvalue)
|
||||
if (ctx->size) {
|
||||
if (ctx->size > XATTR_SIZE_MAX)
|
||||
ctx->size = XATTR_SIZE_MAX;
|
||||
ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
|
||||
if (!ctx->kvalue)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
|
||||
error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
|
||||
if (error > 0) {
|
||||
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
|
||||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
|
||||
posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
|
||||
kvalue, error);
|
||||
if (size && copy_to_user(value, kvalue, error))
|
||||
ctx->kvalue, error);
|
||||
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
|
||||
error = -EFAULT;
|
||||
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
|
||||
} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
|
||||
/* The file system tried to returned a value bigger
|
||||
than XATTR_SIZE_MAX bytes. Not possible. */
|
||||
error = -E2BIG;
|
||||
}
|
||||
|
||||
kvfree(kvalue);
|
||||
return error;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
getxattr(struct user_namespace *mnt_userns, struct dentry *d,
|
||||
const char __user *name, void __user *value, size_t size)
|
||||
{
|
||||
ssize_t error;
|
||||
struct xattr_name kname;
|
||||
struct xattr_ctx ctx = {
|
||||
.value = value,
|
||||
.kvalue = NULL,
|
||||
.size = size,
|
||||
.kname = &kname,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
error = strncpy_from_user(kname.name, name, sizeof(kname.name));
|
||||
if (error == 0 || error == sizeof(kname.name))
|
||||
error = -ERANGE;
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
error = do_getxattr(mnt_userns, d, &ctx);
|
||||
|
||||
kvfree(ctx.kvalue);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
@@ -1953,6 +1953,7 @@ struct dir_context {
|
||||
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
|
||||
|
||||
struct iov_iter;
|
||||
struct io_uring_cmd;
|
||||
|
||||
struct file_operations {
|
||||
struct module *owner;
|
||||
@@ -1995,6 +1996,7 @@ struct file_operations {
|
||||
struct file *file_out, loff_t pos_out,
|
||||
loff_t len, unsigned int remap_flags);
|
||||
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
||||
} __randomize_layout;
|
||||
|
||||
struct inode_operations {
|
||||
|
||||
@@ -5,11 +5,37 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = 4,
|
||||
IO_URING_F_CQE32 = 8,
|
||||
IO_URING_F_IOPOLL = 16,
|
||||
};
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const void *cmd;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd);
|
||||
u32 cmd_op;
|
||||
u32 pad;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
|
||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *));
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@@ -29,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
#else
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
@@ -42,6 +76,10 @@ static inline void io_uring_files_cancel(void)
|
||||
static inline void io_uring_free(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
static inline const char *io_uring_get_opcode(u8 opcode)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -355,14 +355,23 @@ static inline void clear_notify_signal(void)
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 'true' if kick_process() is needed to force a transition from
|
||||
* user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
|
||||
*/
|
||||
static inline bool __set_notify_signal(struct task_struct *task)
|
||||
{
|
||||
return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
|
||||
!wake_up_state(task, TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to break out of interruptible wait loops, and enter the
|
||||
* exit_to_user_mode_loop().
|
||||
*/
|
||||
static inline void set_notify_signal(struct task_struct *task)
|
||||
{
|
||||
if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
|
||||
!wake_up_state(task, TASK_INTERRUPTIBLE))
|
||||
if (__set_notify_signal(task))
|
||||
kick_process(task);
|
||||
}
|
||||
|
||||
|
||||
@@ -50,6 +50,9 @@ struct linger {
|
||||
struct msghdr {
|
||||
void *msg_name; /* ptr to socket address structure */
|
||||
int msg_namelen; /* size of socket address structure */
|
||||
|
||||
int msg_inq; /* output, data left in socket */
|
||||
|
||||
struct iov_iter msg_iter; /* data */
|
||||
|
||||
/*
|
||||
@@ -62,8 +65,9 @@ struct msghdr {
|
||||
void __user *msg_control_user;
|
||||
};
|
||||
bool msg_control_is_user : 1;
|
||||
__kernel_size_t msg_controllen; /* ancillary data buffer length */
|
||||
bool msg_get_inq : 1;/* return INQ after receive */
|
||||
unsigned int msg_flags; /* flags on received message */
|
||||
__kernel_size_t msg_controllen; /* ancillary data buffer length */
|
||||
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
|
||||
};
|
||||
|
||||
@@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
|
||||
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
||||
int __user *upeer_addrlen, int flags);
|
||||
extern int __sys_socket(int family, int type, int protocol);
|
||||
extern struct file *__sys_socket_file(int family, int type, int protocol);
|
||||
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
|
||||
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
|
||||
int addrlen, int file_flags);
|
||||
|
||||
@@ -17,6 +17,7 @@ enum task_work_notify_mode {
|
||||
TWA_NONE,
|
||||
TWA_RESUME,
|
||||
TWA_SIGNAL,
|
||||
TWA_SIGNAL_NO_IPI,
|
||||
};
|
||||
|
||||
static inline bool task_work_pending(struct task_struct *task)
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring.h>
|
||||
|
||||
struct io_wq_work;
|
||||
|
||||
@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
|
||||
__entry->rw = rw;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
|
||||
);
|
||||
|
||||
@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
|
||||
__entry->opcode = opcode;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
|
||||
__entry->ctx, __entry->req, __entry->data, __entry->opcode)
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
|
||||
__entry->ctx, __entry->req, __entry->data,
|
||||
io_uring_get_opcode(__entry->opcode))
|
||||
);
|
||||
|
||||
/**
|
||||
@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
|
||||
__entry->link = link;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
__entry->link)
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode), __entry->link)
|
||||
);
|
||||
|
||||
/**
|
||||
@@ -318,13 +321,16 @@ TRACE_EVENT(io_uring_fail_link,
|
||||
* @user_data: user data associated with the request
|
||||
* @res: result of the request
|
||||
* @cflags: completion flags
|
||||
* @extra1: extra 64-bit data for CQE32
|
||||
* @extra2: extra 64-bit data for CQE32
|
||||
*
|
||||
*/
|
||||
TRACE_EVENT(io_uring_complete,
|
||||
|
||||
TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
|
||||
TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
|
||||
u64 extra1, u64 extra2),
|
||||
|
||||
TP_ARGS(ctx, req, user_data, res, cflags),
|
||||
TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( void *, ctx )
|
||||
@@ -332,6 +338,8 @@ TRACE_EVENT(io_uring_complete,
|
||||
__field( u64, user_data )
|
||||
__field( int, res )
|
||||
__field( unsigned, cflags )
|
||||
__field( u64, extra1 )
|
||||
__field( u64, extra2 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -340,12 +348,17 @@ TRACE_EVENT(io_uring_complete,
|
||||
__entry->user_data = user_data;
|
||||
__entry->res = res;
|
||||
__entry->cflags = cflags;
|
||||
__entry->extra1 = extra1;
|
||||
__entry->extra2 = extra2;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
|
||||
"extra1 %llu extra2 %llu ",
|
||||
__entry->ctx, __entry->req,
|
||||
__entry->user_data,
|
||||
__entry->res, __entry->cflags)
|
||||
__entry->res, __entry->cflags,
|
||||
(unsigned long long) __entry->extra1,
|
||||
(unsigned long long) __entry->extra2)
|
||||
);
|
||||
|
||||
/**
|
||||
@@ -389,9 +402,9 @@ TRACE_EVENT(io_uring_submit_sqe,
|
||||
__entry->sq_thread = sq_thread;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
|
||||
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
|
||||
__entry->user_data, __entry->opcode,
|
||||
__entry->user_data, io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
|
||||
);
|
||||
|
||||
@@ -433,8 +446,9 @@ TRACE_EVENT(io_uring_poll_arm,
|
||||
__entry->events = events;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->mask, __entry->events)
|
||||
);
|
||||
|
||||
@@ -470,8 +484,9 @@ TRACE_EVENT(io_uring_task_add,
|
||||
__entry->mask = mask;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->mask)
|
||||
);
|
||||
|
||||
@@ -506,7 +521,7 @@ TRACE_EVENT(io_uring_req_failed,
|
||||
__field( u16, personality )
|
||||
__field( u32, file_index )
|
||||
__field( u64, pad1 )
|
||||
__field( u64, pad2 )
|
||||
__field( u64, addr3 )
|
||||
__field( int, error )
|
||||
),
|
||||
|
||||
@@ -520,27 +535,69 @@ TRACE_EVENT(io_uring_req_failed,
|
||||
__entry->off = sqe->off;
|
||||
__entry->addr = sqe->addr;
|
||||
__entry->len = sqe->len;
|
||||
__entry->op_flags = sqe->rw_flags;
|
||||
__entry->op_flags = sqe->poll32_events;
|
||||
__entry->buf_index = sqe->buf_index;
|
||||
__entry->personality = sqe->personality;
|
||||
__entry->file_index = sqe->file_index;
|
||||
__entry->pad1 = sqe->__pad2[0];
|
||||
__entry->pad2 = sqe->__pad2[1];
|
||||
__entry->addr3 = sqe->addr3;
|
||||
__entry->error = error;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, "
|
||||
"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
|
||||
"opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
|
||||
"len=%u, rw_flags=0x%x, buf_index=%d, "
|
||||
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
|
||||
"personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
|
||||
"error=%d",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
__entry->opcode, __entry->flags, __entry->ioprio,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->ioprio,
|
||||
(unsigned long long)__entry->off,
|
||||
(unsigned long long) __entry->addr, __entry->len,
|
||||
__entry->op_flags,
|
||||
__entry->buf_index, __entry->personality, __entry->file_index,
|
||||
(unsigned long long) __entry->pad1,
|
||||
(unsigned long long) __entry->pad2, __entry->error)
|
||||
(unsigned long long) __entry->addr3, __entry->error)
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
* io_uring_cqe_overflow - a CQE overflowed
|
||||
*
|
||||
* @ctx: pointer to a ring context structure
|
||||
* @user_data: user data associated with the request
|
||||
* @res: CQE result
|
||||
* @cflags: CQE flags
|
||||
* @ocqe: pointer to the overflow cqe (if available)
|
||||
*
|
||||
*/
|
||||
TRACE_EVENT(io_uring_cqe_overflow,
|
||||
|
||||
TP_PROTO(void *ctx, unsigned long long user_data, s32 res, u32 cflags,
|
||||
void *ocqe),
|
||||
|
||||
TP_ARGS(ctx, user_data, res, cflags, ocqe),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( void *, ctx )
|
||||
__field( unsigned long long, user_data )
|
||||
__field( s32, res )
|
||||
__field( u32, cflags )
|
||||
__field( void *, ocqe )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ctx = ctx;
|
||||
__entry->user_data = user_data;
|
||||
__entry->res = res;
|
||||
__entry->cflags = cflags;
|
||||
__entry->ocqe = ocqe;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, "
|
||||
"overflow_cqe %p",
|
||||
__entry->ctx, __entry->user_data, __entry->res,
|
||||
__entry->cflags, __entry->ocqe)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_IO_URING_H */
|
||||
|
||||
@@ -22,6 +22,7 @@ struct io_uring_sqe {
|
||||
union {
|
||||
__u64 off; /* offset into file */
|
||||
__u64 addr2;
|
||||
__u32 cmd_op;
|
||||
};
|
||||
union {
|
||||
__u64 addr; /* pointer to buffer or iovecs */
|
||||
@@ -45,6 +46,7 @@ struct io_uring_sqe {
|
||||
__u32 rename_flags;
|
||||
__u32 unlink_flags;
|
||||
__u32 hardlink_flags;
|
||||
__u32 xattr_flags;
|
||||
};
|
||||
__u64 user_data; /* data to be passed back at completion time */
|
||||
/* pack this to avoid bogus arm OABI complaints */
|
||||
@@ -60,9 +62,28 @@ struct io_uring_sqe {
|
||||
__s32 splice_fd_in;
|
||||
__u32 file_index;
|
||||
};
|
||||
__u64 __pad2[2];
|
||||
union {
|
||||
struct {
|
||||
__u64 addr3;
|
||||
__u64 __pad2[1];
|
||||
};
|
||||
/*
|
||||
* If the ring is initialized with IORING_SETUP_SQE128, then
|
||||
* this field is used for 80 bytes of arbitrary command data
|
||||
*/
|
||||
__u8 cmd[0];
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* If sqe->file_index is set to this for opcodes that instantiate a new
|
||||
* direct descriptor (like openat/openat2/accept), then io_uring will allocate
|
||||
* an available direct descriptor instead of having the application pass one
|
||||
* in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
|
||||
* if the space is full.
|
||||
*/
|
||||
#define IORING_FILE_INDEX_ALLOC (~0U)
|
||||
|
||||
enum {
|
||||
IOSQE_FIXED_FILE_BIT,
|
||||
IOSQE_IO_DRAIN_BIT,
|
||||
@@ -102,8 +123,25 @@ enum {
|
||||
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
|
||||
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
|
||||
#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
|
||||
/*
|
||||
* Cooperative task running. When requests complete, they often require
|
||||
* forcing the submitter to transition to the kernel to complete. If this
|
||||
* flag is set, work will be done when the task transitions anyway, rather
|
||||
* than force an inter-processor interrupt reschedule. This avoids interrupting
|
||||
* a task running in userspace, and saves an IPI.
|
||||
*/
|
||||
#define IORING_SETUP_COOP_TASKRUN (1U << 8)
|
||||
/*
|
||||
* If COOP_TASKRUN is set, get notified if task work is available for
|
||||
* running and a kernel transition would be needed to run it. This sets
|
||||
* IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
|
||||
*/
|
||||
#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
|
||||
|
||||
enum {
|
||||
#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
|
||||
#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
|
||||
|
||||
enum io_uring_op {
|
||||
IORING_OP_NOP,
|
||||
IORING_OP_READV,
|
||||
IORING_OP_WRITEV,
|
||||
@@ -145,6 +183,12 @@ enum {
|
||||
IORING_OP_SYMLINKAT,
|
||||
IORING_OP_LINKAT,
|
||||
IORING_OP_MSG_RING,
|
||||
IORING_OP_FSETXATTR,
|
||||
IORING_OP_SETXATTR,
|
||||
IORING_OP_FGETXATTR,
|
||||
IORING_OP_GETXATTR,
|
||||
IORING_OP_SOCKET,
|
||||
IORING_OP_URING_CMD,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
@@ -187,6 +231,33 @@ enum {
|
||||
#define IORING_POLL_UPDATE_EVENTS (1U << 1)
|
||||
#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
|
||||
|
||||
/*
|
||||
* ASYNC_CANCEL flags.
|
||||
*
|
||||
* IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key
|
||||
* IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the
|
||||
* request 'user_data'
|
||||
* IORING_ASYNC_CANCEL_ANY Match any request
|
||||
*/
|
||||
#define IORING_ASYNC_CANCEL_ALL (1U << 0)
|
||||
#define IORING_ASYNC_CANCEL_FD (1U << 1)
|
||||
#define IORING_ASYNC_CANCEL_ANY (1U << 2)
|
||||
|
||||
/*
|
||||
* send/sendmsg and recv/recvmsg flags (sqe->addr2)
|
||||
*
|
||||
* IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
|
||||
* or receive and arm poll if that yields an
|
||||
* -EAGAIN result, arm poll upfront and skip
|
||||
* the initial transfer attempt.
|
||||
*/
|
||||
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
|
||||
|
||||
/*
|
||||
* accept flags stored in sqe->ioprio
|
||||
*/
|
||||
#define IORING_ACCEPT_MULTISHOT (1U << 0)
|
||||
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
*/
|
||||
@@ -194,6 +265,12 @@ struct io_uring_cqe {
|
||||
__u64 user_data; /* sqe->data submission passed back */
|
||||
__s32 res; /* result code for this event */
|
||||
__u32 flags;
|
||||
|
||||
/*
|
||||
* If the ring is initialized with IORING_SETUP_CQE32, then this field
|
||||
* contains 16-bytes of padding, doubling the size of the CQE.
|
||||
*/
|
||||
__u64 big_cqe[];
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -201,9 +278,11 @@ struct io_uring_cqe {
|
||||
*
|
||||
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
|
||||
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
|
||||
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
|
||||
*/
|
||||
#define IORING_CQE_F_BUFFER (1U << 0)
|
||||
#define IORING_CQE_F_MORE (1U << 1)
|
||||
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
|
||||
|
||||
enum {
|
||||
IORING_CQE_BUFFER_SHIFT = 16,
|
||||
@@ -236,6 +315,7 @@ struct io_sqring_offsets {
|
||||
*/
|
||||
#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
|
||||
#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
|
||||
#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
|
||||
|
||||
struct io_cqring_offsets {
|
||||
__u32 head;
|
||||
@@ -333,6 +413,10 @@ enum {
|
||||
IORING_REGISTER_RING_FDS = 20,
|
||||
IORING_UNREGISTER_RING_FDS = 21,
|
||||
|
||||
/* register ring based provide buffer group */
|
||||
IORING_REGISTER_PBUF_RING = 22,
|
||||
IORING_UNREGISTER_PBUF_RING = 23,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST
|
||||
};
|
||||
@@ -350,9 +434,15 @@ struct io_uring_files_update {
|
||||
__aligned_u64 /* __s32 * */ fds;
|
||||
};
|
||||
|
||||
/*
|
||||
* Register a fully sparse file space, rather than pass in an array of all
|
||||
* -1 file descriptors.
|
||||
*/
|
||||
#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
|
||||
|
||||
struct io_uring_rsrc_register {
|
||||
__u32 nr;
|
||||
__u32 resv;
|
||||
__u32 flags;
|
||||
__u64 resv2;
|
||||
__aligned_u64 data;
|
||||
__aligned_u64 tags;
|
||||
@@ -404,6 +494,38 @@ struct io_uring_restriction {
|
||||
__u32 resv2[3];
|
||||
};
|
||||
|
||||
struct io_uring_buf {
|
||||
__u64 addr;
|
||||
__u32 len;
|
||||
__u16 bid;
|
||||
__u16 resv;
|
||||
};
|
||||
|
||||
struct io_uring_buf_ring {
|
||||
union {
|
||||
/*
|
||||
* To avoid spilling into more pages than we need to, the
|
||||
* ring tail is overlaid with the io_uring_buf->resv field.
|
||||
*/
|
||||
struct {
|
||||
__u64 resv1;
|
||||
__u32 resv2;
|
||||
__u16 resv3;
|
||||
__u16 tail;
|
||||
};
|
||||
struct io_uring_buf bufs[0];
|
||||
};
|
||||
};
|
||||
|
||||
/* argument for IORING_(UN)REGISTER_PBUF_RING */
|
||||
struct io_uring_buf_reg {
|
||||
__u64 ring_addr;
|
||||
__u32 ring_entries;
|
||||
__u16 bgid;
|
||||
__u16 pad;
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_restriction->opcode values
|
||||
*/
|
||||
|
||||
@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
|
||||
__u64 result;
|
||||
};
|
||||
|
||||
/* same as struct nvme_passthru_cmd64, minus the 8b result field */
|
||||
struct nvme_uring_cmd {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u16 rsvd1;
|
||||
__u32 nsid;
|
||||
__u32 cdw2;
|
||||
__u32 cdw3;
|
||||
__u64 metadata;
|
||||
__u64 addr;
|
||||
__u32 metadata_len;
|
||||
__u32 data_len;
|
||||
__u32 cdw10;
|
||||
__u32 cdw11;
|
||||
__u32 cdw12;
|
||||
__u32 cdw13;
|
||||
__u32 cdw14;
|
||||
__u32 cdw15;
|
||||
__u32 timeout_ms;
|
||||
__u32 rsvd2;
|
||||
};
|
||||
|
||||
#define nvme_admin_cmd nvme_passthru_cmd
|
||||
|
||||
#define NVME_IOCTL_ID _IO('N', 0x40)
|
||||
@@ -83,4 +105,10 @@ struct nvme_passthru_cmd64 {
|
||||
#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
|
||||
#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
|
||||
|
||||
/* io_uring async commands: */
|
||||
#define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd)
|
||||
#define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd)
|
||||
#define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd)
|
||||
#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
|
||||
|
||||
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
|
||||
|
||||
@@ -12,12 +12,22 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||
* @notify: how to notify the targeted task
|
||||
*
|
||||
* Queue @work for task_work_run() below and notify the @task if @notify
|
||||
* is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
|
||||
* it will interrupt the targeted task and run the task_work. @TWA_RESUME
|
||||
* work is run only when the task exits the kernel and returns to user mode,
|
||||
* or before entering guest mode. Fails if the @task is exiting/exited and thus
|
||||
* it can't process this @work. Otherwise @work->func() will be called when the
|
||||
* @task goes through one of the aforementioned transitions, or exits.
|
||||
* is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
|
||||
*
|
||||
* @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
|
||||
* task and run the task_work, regardless of whether the task is currently
|
||||
* running in the kernel or userspace.
|
||||
* @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a
|
||||
* reschedule IPI to force the targeted task to reschedule and run task_work.
|
||||
* This can be advantageous if there's no strict requirement that the
|
||||
* task_work be run as soon as possible, just whenever the task enters the
|
||||
* kernel anyway.
|
||||
* @TWA_RESUME work is run only when the task exits the kernel and returns to
|
||||
* user mode, or before entering guest mode.
|
||||
*
|
||||
* Fails if the @task is exiting/exited and thus it can't process this @work.
|
||||
* Otherwise @work->func() will be called when the @task goes through one of
|
||||
* the aforementioned transitions, or exits.
|
||||
*
|
||||
* If the targeted task is exiting, then an error is returned and the work item
|
||||
* is not queued. It's up to the caller to arrange for an alternative mechanism
|
||||
@@ -53,6 +63,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
||||
case TWA_SIGNAL:
|
||||
set_notify_signal(task);
|
||||
break;
|
||||
case TWA_SIGNAL_NO_IPI:
|
||||
__set_notify_signal(task);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
|
||||
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
if (sk->sk_state == TCP_LISTEN)
|
||||
goto out;
|
||||
|
||||
if (tp->recvmsg_inq)
|
||||
if (tp->recvmsg_inq) {
|
||||
*cmsg_flags = TCP_CMSG_INQ;
|
||||
msg->msg_get_inq = 1;
|
||||
}
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
|
||||
/* Urgent data needs to be handled specially. */
|
||||
@@ -2559,7 +2561,7 @@ recv_sndq:
|
||||
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
|
||||
int flags, int *addr_len)
|
||||
{
|
||||
int cmsg_flags = 0, ret, inq;
|
||||
int cmsg_flags = 0, ret;
|
||||
struct scm_timestamping_internal tss;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
|
||||
release_sock(sk);
|
||||
sk_defer_free_flush(sk);
|
||||
|
||||
if (cmsg_flags && ret >= 0) {
|
||||
if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
|
||||
if (cmsg_flags & TCP_CMSG_TS)
|
||||
tcp_recv_timestamp(msg, sk, &tss);
|
||||
if (cmsg_flags & TCP_CMSG_INQ) {
|
||||
inq = tcp_inq_hint(sk);
|
||||
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
|
||||
if (msg->msg_get_inq) {
|
||||
msg->msg_inq = tcp_inq_hint(sk);
|
||||
if (cmsg_flags & TCP_CMSG_INQ)
|
||||
put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
|
||||
sizeof(msg->msg_inq), &msg->msg_inq);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
54
net/socket.c
54
net/socket.c
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
|
||||
struct socket *sock_from_file(struct file *file)
|
||||
{
|
||||
if (file->f_op == &socket_file_ops)
|
||||
return file->private_data; /* set in sock_map_fd */
|
||||
return file->private_data; /* set in sock_alloc_file */
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
|
||||
}
|
||||
EXPORT_SYMBOL(sock_create_kern);
|
||||
|
||||
int __sys_socket(int family, int type, int protocol)
|
||||
static struct socket *__sys_socket_create(int family, int type, int protocol)
|
||||
{
|
||||
int retval;
|
||||
struct socket *sock;
|
||||
int flags;
|
||||
int retval;
|
||||
|
||||
/* Check the SOCK_* constants for consistency. */
|
||||
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
|
||||
@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
|
||||
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
|
||||
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||
return -EINVAL;
|
||||
if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||
return ERR_PTR(-EINVAL);
|
||||
type &= SOCK_TYPE_MASK;
|
||||
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
retval = sock_create(family, type, protocol, &sock);
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
return ERR_PTR(retval);
|
||||
|
||||
return sock;
|
||||
}
|
||||
|
||||
struct file *__sys_socket_file(int family, int type, int protocol)
|
||||
{
|
||||
struct socket *sock;
|
||||
struct file *file;
|
||||
int flags;
|
||||
|
||||
sock = __sys_socket_create(family, type, protocol);
|
||||
if (IS_ERR(sock))
|
||||
return ERR_CAST(sock);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
file = sock_alloc_file(sock, flags, NULL);
|
||||
if (IS_ERR(file))
|
||||
sock_release(sock);
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
int __sys_socket(int family, int type, int protocol)
|
||||
{
|
||||
struct socket *sock;
|
||||
int flags;
|
||||
|
||||
sock = __sys_socket_create(family, type, protocol);
|
||||
if (IS_ERR(sock))
|
||||
return PTR_ERR(sock);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user