From e9a4d187cd1589745c205e9936057ca48d41817d Mon Sep 17 00:00:00 2001
From: wenlylinux <wenly@linux.alibaba.com>
Date: Wed, 8 Mar 2023 06:52:57 +0000
Subject: [PATCH 1/2] update README. ANBZ: #133

commit 580077855a40741cf511766129702d97ff02f4d9 upstream.

In nf_tables_updtable, if nf_tables_table_enable returns an error,
nft_trans_destroy is called to free the transaction object.
---
 README | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README b/README
index 669ac7c32292..98f5fe01cbfa 100644
--- a/README
+++ b/README
@@ -16,3 +16,5 @@ several of them using the Restructured Text markup notation.
 Please read the Documentation/process/changes.rst file, as it contains the
 requirements for building and running the kernel, and information about
 the problems which may result by upgrading your kernel.
+
+test123123
-- 
Gitee


From ed2834dc98f5ae68d72170c6481e2ce8b3a66e55 Mon Sep 17 00:00:00 2001
From: wenlylinux <wenly@linux.alibaba.com>
Date: Thu, 23 Mar 2023 07:30:35 +0000
Subject: [PATCH 2/2] update fs/aio.c. ANBZ: #133

commit 580077855a40741cf511766129702d97ff02f4d9 upstream.

In nf_tables_updtable, if nf_tables_table_enable returns an error,
nft_trans_destroy is called to free the transaction object.
---
 fs/aio.c | 510 ++++++++++++++++++-------------------------------------
 1 file changed, 161 insertions(+), 349 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 2a9dfa58ec3a..983c8179ac92 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1,4 +1,5 @@
 /*
+ *  test
  *	An async IO implementation for Linux
  *	Written by Benjamin LaHaise <bcrl@kvack.org>
  *
@@ -27,6 +28,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/mmu_context.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
@@ -41,7 +43,6 @@
 #include <linux/ramfs.h>
 #include <linux/percpu-refcount.h>
 #include <linux/mount.h>
-#include <linux/pseudo_fs.h>
 
 #include <asm/kmap_types.h>
 #include <linux/uaccess.h>
@@ -67,15 +68,9 @@ struct aio_ring {
 	unsigned	header_length;	/* size of aio_ring */
 
 
-	struct io_event		io_events[];
+	struct io_event		io_events[0];
 }; /* 128 bytes + ring size */
 
-/*
- * Plugging is meant to work with larger batches of IOs. If we don't
- * have more than the below, then don't bother setting up a plug.
- */
-#define AIO_PLUG_THRESHOLD	2
-
 #define AIO_RING_PAGES	8
 
 struct kioctx_table {
@@ -182,9 +177,8 @@ struct poll_iocb {
 	struct file		*file;
 	struct wait_queue_head	*head;
 	__poll_t		events;
+	bool			done;
 	bool			cancelled;
-	bool			work_scheduled;
-	bool			work_need_resched;
 	struct wait_queue_entry	wait;
 	struct work_struct	work;
 };
@@ -251,12 +245,15 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	return file;
 }
 
-static int aio_init_fs_context(struct fs_context *fc)
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
 {
-	if (!init_pseudo(fc, AIO_RING_MAGIC))
-		return -ENOMEM;
-	fc->s_iflags |= SB_I_NOEXEC;
-	return 0;
+	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
+					   AIO_RING_MAGIC);
+
+	if (!IS_ERR(root))
+		root->d_sb->s_iflags |= SB_I_NOEXEC;
+	return root;
 }
 
 /* aio_setup
@@ -267,7 +264,7 @@ static int __init aio_setup(void)
 {
 	static struct file_system_type aio_fs = {
 		.name		= "aio",
-		.init_fs_context = aio_init_fs_context,
+		.mount		= aio_mount,
 		.kill_sb	= kill_anon_super,
 	};
 	aio_mnt = kern_mount(&aio_fs);
@@ -424,7 +421,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 	BUG_ON(PageWriteback(old));
 	get_page(new);
 
-	rc = migrate_page_move_mapping(mapping, new, old, 1);
+	rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
 	if (rc != MIGRATEPAGE_SUCCESS) {
 		put_page(new);
 		goto out_unlock;
@@ -520,16 +517,16 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 	ctx->mmap_size = nr_pages * PAGE_SIZE;
 	pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
 
-	if (mmap_write_lock_killable(mm)) {
+	if (down_write_killable(&mm->mmap_sem)) {
 		ctx->mmap_size = 0;
 		aio_free_ring(ctx);
 		return -EINTR;
 	}
 
-	ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
-				 PROT_READ | PROT_WRITE,
-				 MAP_SHARED, 0, &unused, NULL);
-	mmap_write_unlock(mm);
+	ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
+				       PROT_READ | PROT_WRITE,
+				       MAP_SHARED, 0, &unused, NULL);
+	up_write(&mm->mmap_sem);
 	if (IS_ERR((void *)ctx->mmap_base)) {
 		ctx->mmap_size = 0;
 		aio_free_ring(ctx);
@@ -1032,11 +1029,6 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 	if (unlikely(!req))
 		return NULL;
 
-	if (unlikely(!get_reqs_available(ctx))) {
-		kmem_cache_free(kiocb_cachep, req);
-		return NULL;
-	}
-
 	percpu_ref_get(&ctx->reqs);
 	req->ki_ctx = ctx;
 	INIT_LIST_HEAD(&req->ki_list);
@@ -1075,8 +1067,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 
 static inline void iocb_destroy(struct aio_kiocb *iocb)
 {
-	if (iocb->ki_eventfd)
-		eventfd_ctx_put(iocb->ki_eventfd);
 	if (iocb->ki_filp)
 		fput(iocb->ki_filp);
 	percpu_ref_put(&iocb->ki_ctx->reqs);
@@ -1144,8 +1134,10 @@ static void aio_complete(struct aio_kiocb *iocb)
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * from IRQ context.
 	 */
-	if (iocb->ki_eventfd)
+	if (iocb->ki_eventfd) {
 		eventfd_signal(iocb->ki_eventfd, 1);
+		eventfd_ctx_put(iocb->ki_eventfd);
+	}
 
 	/*
 	 * We have to order our ring_info tail store above and test
@@ -1468,7 +1460,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 
 		req->ki_ioprio = iocb->aio_reqprio;
 	} else
-		req->ki_ioprio = get_current_ioprio();
+		req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
 
 	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
 	if (unlikely(ret))
@@ -1490,8 +1482,12 @@ static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
 		*iovec = NULL;
 		return ret;
 	}
-
-	return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
+#ifdef CONFIG_COMPAT
+	if (compat)
+		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
+				iter);
+#endif
+	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 }
 
 static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
@@ -1508,19 +1504,19 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 		 * may be already running. Just fail this IO with EINTR.
 		 */
 		ret = -EINTR;
-		fallthrough;
+		/*FALLTHRU*/
 	default:
 		req->ki_complete(req, ret, 0);
 	}
 }
 
-static int aio_read(struct kiocb *req, const struct iocb *iocb,
+static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
 			bool vectored, bool compat)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct file *file;
-	int ret;
+	ssize_t ret;
 
 	ret = aio_prep_rw(req, iocb);
 	if (ret)
@@ -1542,13 +1538,13 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
 	return ret;
 }
 
-static int aio_write(struct kiocb *req, const struct iocb *iocb,
+static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
 			 bool vectored, bool compat)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct file *file;
-	int ret;
+	ssize_t ret;
 
 	ret = aio_prep_rw(req, iocb);
 	if (ret)
@@ -1573,7 +1569,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
 		 * we return to userspace.
 		 */
 		if (S_ISREG(file_inode(file)->i_mode)) {
-			sb_start_write(file_inode(file)->i_sb);
+			__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
 			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 		}
 		req->ki_flags |= IOCB_WRITE;
@@ -1622,51 +1618,6 @@ static void aio_poll_put_work(struct work_struct *work)
 	iocb_put(iocb);
 }
 
-/*
- * Safely lock the waitqueue which the request is on, synchronizing with the
- * case where the ->poll() provider decides to free its waitqueue early.
- *
- * Returns true on success, meaning that req->head->lock was locked, req->wait
- * is on req->head, and an RCU read lock was taken.  Returns false if the
- * request was already removed from its waitqueue (which might no longer exist).
- */
-static bool poll_iocb_lock_wq(struct poll_iocb *req)
-{
-	wait_queue_head_t *head;
-
-	/*
-	 * While we hold the waitqueue lock and the waitqueue is nonempty,
-	 * wake_up_pollfree() will wait for us.  However, taking the waitqueue
-	 * lock in the first place can race with the waitqueue being freed.
-	 *
-	 * We solve this as eventpoll does: by taking advantage of the fact that
-	 * all users of wake_up_pollfree() will RCU-delay the actual free.  If
-	 * we enter rcu_read_lock() and see that the pointer to the queue is
-	 * non-NULL, we can then lock it without the memory being freed out from
-	 * under us, then check whether the request is still on the queue.
-	 *
-	 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
-	 * case the caller deletes the entry from the queue, leaving it empty.
-	 * In that case, only RCU prevents the queue memory from being freed.
-	 */
-	rcu_read_lock();
-	head = smp_load_acquire(&req->head);
-	if (head) {
-		spin_lock(&head->lock);
-		if (!list_empty(&req->wait.entry))
-			return true;
-		spin_unlock(&head->lock);
-	}
-	rcu_read_unlock();
-	return false;
-}
-
-static void poll_iocb_unlock_wq(struct poll_iocb *req)
-{
-	spin_unlock(&req->head->lock);
-	rcu_read_unlock();
-}
-
 static void aio_poll_complete_work(struct work_struct *work)
 {
 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1686,27 +1637,14 @@ static void aio_poll_complete_work(struct work_struct *work)
 	 * avoid further branches in the fast path.
 	 */
 	spin_lock_irq(&ctx->ctx_lock);
-	if (poll_iocb_lock_wq(req)) {
-		if (!mask && !READ_ONCE(req->cancelled)) {
-			/*
-			 * The request isn't actually ready to be completed yet.
-			 * Reschedule completion if another wakeup came in.
-			 */
-			if (req->work_need_resched) {
-				schedule_work(&req->work);
-				req->work_need_resched = false;
-			} else {
-				req->work_scheduled = false;
-			}
-			poll_iocb_unlock_wq(req);
-			spin_unlock_irq(&ctx->ctx_lock);
-			return;
-		}
-		list_del_init(&req->wait.entry);
-		poll_iocb_unlock_wq(req);
-	} /* else, POLLFREE has freed the waitqueue, so we must complete */
+	if (!mask && !READ_ONCE(req->cancelled)) {
+		add_wait_queue(req->head, &req->wait);
+		spin_unlock_irq(&ctx->ctx_lock);
+		return;
+	}
 	list_del_init(&iocb->ki_list);
 	iocb->ki_res.res = mangle_poll(mask);
+	req->done = true;
 	spin_unlock_irq(&ctx->ctx_lock);
 
 	iocb_put(iocb);
@@ -1718,14 +1656,13 @@ static int aio_poll_cancel(struct kiocb *iocb)
 	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
 	struct poll_iocb *req = &aiocb->poll;
 
-	if (poll_iocb_lock_wq(req)) {
-		WRITE_ONCE(req->cancelled, true);
-		if (!req->work_scheduled) {
-			schedule_work(&aiocb->poll.work);
-			req->work_scheduled = true;
-		}
-		poll_iocb_unlock_wq(req);
-	} /* else, the request was force-cancelled by POLLFREE already */
+	spin_lock(&req->head->lock);
+	WRITE_ONCE(req->cancelled, true);
+	if (!list_empty(&req->wait.entry)) {
+		list_del_init(&req->wait.entry);
+		schedule_work(&aiocb->poll.work);
+	}
+	spin_unlock(&req->head->lock);
 
 	return 0;
 }
@@ -1742,26 +1679,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (mask && !(mask & req->events))
 		return 0;
 
-	/*
-	 * Complete the request inline if possible.  This requires that three
-	 * conditions be met:
-	 *   1. An event mask must have been passed.  If a plain wakeup was done
-	 *	instead, then mask == 0 and we have to call vfs_poll() to get
-	 *	the events, so inline completion isn't possible.
-	 *   2. The completion work must not have already been scheduled.
-	 *   3. ctx_lock must not be busy.  We have to use trylock because we
-	 *	already hold the waitqueue lock, so this inverts the normal
-	 *	locking order.  Use irqsave/irqrestore because not all
-	 *	filesystems (e.g. fuse) call this function with IRQs disabled,
-	 *	yet IRQs have to be disabled before ctx_lock is obtained.
-	 */
-	if (mask && !req->work_scheduled &&
-	    spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+	list_del_init(&req->wait.entry);
+
+	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
 		struct kioctx *ctx = iocb->ki_ctx;
 
-		list_del_init(&req->wait.entry);
+		/*
+		 * Try to complete the iocb inline if we can. Use
+		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
+		 * call this function with IRQs disabled and because IRQs
+		 * have to be disabled before ctx_lock is obtained.
+		 */
 		list_del(&iocb->ki_list);
 		iocb->ki_res.res = mangle_poll(mask);
+		req->done = true;
 		if (iocb->ki_eventfd && eventfd_signal_count()) {
 			iocb = NULL;
 			INIT_WORK(&req->work, aio_poll_put_work);
@@ -1771,43 +1702,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		if (iocb)
 			iocb_put(iocb);
 	} else {
-		/*
-		 * Schedule the completion work if needed.  If it was already
-		 * scheduled, record that another wakeup came in.
-		 *
-		 * Don't remove the request from the waitqueue here, as it might
-		 * not actually be complete yet (we won't know until vfs_poll()
-		 * is called), and we must not miss any wakeups.  POLLFREE is an
-		 * exception to this; see below.
-		 */
-		if (req->work_scheduled) {
-			req->work_need_resched = true;
-		} else {
-			schedule_work(&req->work);
-			req->work_scheduled = true;
-		}
-
-		/*
-		 * If the waitqueue is being freed early but we can't complete
-		 * the request inline, we have to tear down the request as best
-		 * we can.  That means immediately removing the request from its
-		 * waitqueue and preventing all further accesses to the
-		 * waitqueue via the request.  We also need to schedule the
-		 * completion work (done above).  Also mark the request as
-		 * cancelled, to potentially skip an unneeded call to ->poll().
-		 */
-		if (mask & POLLFREE) {
-			WRITE_ONCE(req->cancelled, true);
-			list_del_init(&req->wait.entry);
-
-			/*
-			 * Careful: this *must* be the last step, since as soon
-			 * as req->head is NULL'ed out, the request can be
-			 * completed and freed, since aio_poll_complete_work()
-			 * will no longer need to take the waitqueue lock.
-			 */
-			smp_store_release(&req->head, NULL);
-		}
+		schedule_work(&req->work);
 	}
 	return 1;
 }
@@ -1815,7 +1710,6 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 struct aio_poll_table {
 	struct poll_table_struct	pt;
 	struct aio_kiocb		*iocb;
-	bool				queued;
 	int				error;
 };
 
@@ -1826,18 +1720,17 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
 
 	/* multiple wait queues per file are not supported */
-	if (unlikely(pt->queued)) {
+	if (unlikely(pt->iocb->poll.head)) {
 		pt->error = -EINVAL;
 		return;
 	}
 
-	pt->queued = true;
 	pt->error = 0;
 	pt->iocb->poll.head = head;
 	add_wait_queue(head, &pt->iocb->poll.wait);
 }
 
-static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
+static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 {
 	struct kioctx *ctx = aiocb->ki_ctx;
 	struct poll_iocb *req = &aiocb->poll;
@@ -1856,14 +1749,12 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
 
 	req->head = NULL;
+	req->done = false;
 	req->cancelled = false;
-	req->work_scheduled = false;
-	req->work_need_resched = false;
 
 	apt.pt._qproc = aio_poll_queue_proc;
 	apt.pt._key = req->events;
 	apt.iocb = aiocb;
-	apt.queued = false;
 	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
 
 	/* initialized the list so that we can do list_empty checks */
@@ -1872,35 +1763,23 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 
 	mask = vfs_poll(req->file, &apt.pt) & req->events;
 	spin_lock_irq(&ctx->ctx_lock);
-	if (likely(apt.queued)) {
-		bool on_queue = poll_iocb_lock_wq(req);
-
-		if (!on_queue || req->work_scheduled) {
-			/*
-			 * aio_poll_wake() already either scheduled the async
-			 * completion work, or completed the request inline.
-			 */
-			if (apt.error) /* unsupported case: multiple queues */
+	if (likely(req->head)) {
+		spin_lock(&req->head->lock);
+		if (unlikely(list_empty(&req->wait.entry))) {
+			if (apt.error)
 				cancel = true;
 			apt.error = 0;
 			mask = 0;
 		}
 		if (mask || apt.error) {
-			/* Steal to complete synchronously. */
 			list_del_init(&req->wait.entry);
 		} else if (cancel) {
-			/* Cancel if possible (may be too late though). */
 			WRITE_ONCE(req->cancelled, true);
-		} else if (on_queue) {
-			/*
-			 * Actually waiting for an event, so add the request to
-			 * active_reqs so that it can be cancelled if needed.
-			 */
+		} else if (!req->done) { /* actually waiting for an event */
 			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
 			aiocb->ki_cancel = aio_poll_cancel;
 		}
-		if (on_queue)
-			poll_iocb_unlock_wq(req);
+		spin_unlock(&req->head->lock);
 	}
 	if (mask) { /* no async, we'd stolen it */
 		aiocb->ki_res.res = mangle_poll(mask);
@@ -1913,31 +1792,59 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 }
 
 static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
-			   struct iocb __user *user_iocb, struct aio_kiocb *req,
-			   bool compat)
+			   struct iocb __user *user_iocb, bool compat)
 {
+	struct aio_kiocb *req;
+	ssize_t ret;
+
+	/* enforce forwards compatibility on users */
+	if (unlikely(iocb->aio_reserved2)) {
+		pr_debug("EINVAL: reserve field set\n");
+		return -EINVAL;
+	}
+
+	/* prevent overflows */
+	if (unlikely(
+	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
+	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
+	    ((ssize_t)iocb->aio_nbytes < 0)
+	   )) {
+		pr_debug("EINVAL: overflow check\n");
+		return -EINVAL;
+	}
+
+	if (!get_reqs_available(ctx))
+		return -EAGAIN;
+
+	ret = -EAGAIN;
+	req = aio_get_req(ctx);
+	if (unlikely(!req))
+		goto out_put_reqs_available;
+
 	req->ki_filp = fget(iocb->aio_fildes);
+	ret = -EBADF;
 	if (unlikely(!req->ki_filp))
-		return -EBADF;
+		goto out_put_req;
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
-		struct eventfd_ctx *eventfd;
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
 		 * instance of the file* now. The file descriptor must be
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
-		if (IS_ERR(eventfd))
-			return PTR_ERR(eventfd);
-
-		req->ki_eventfd = eventfd;
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+		if (IS_ERR(req->ki_eventfd)) {
+			ret = PTR_ERR(req->ki_eventfd);
+			req->ki_eventfd = NULL;
+			goto out_put_req;
+		}
 	}
 
-	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
+	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
+	if (unlikely(ret)) {
 		pr_debug("EFAULT: aio_key\n");
-		return -EFAULT;
+		goto out_put_req;
 	}
 
 	req->ki_res.obj = (u64)(unsigned long)user_iocb;
@@ -1947,70 +1854,61 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 
 	switch (iocb->aio_lio_opcode) {
 	case IOCB_CMD_PREAD:
-		return aio_read(&req->rw, iocb, false, compat);
+		ret = aio_read(&req->rw, iocb, false, compat);
+		break;
 	case IOCB_CMD_PWRITE:
-		return aio_write(&req->rw, iocb, false, compat);
+		ret = aio_write(&req->rw, iocb, false, compat);
+		break;
 	case IOCB_CMD_PREADV:
-		return aio_read(&req->rw, iocb, true, compat);
+		ret = aio_read(&req->rw, iocb, true, compat);
+		break;
 	case IOCB_CMD_PWRITEV:
-		return aio_write(&req->rw, iocb, true, compat);
+		ret = aio_write(&req->rw, iocb, true, compat);
+		break;
 	case IOCB_CMD_FSYNC:
-		return aio_fsync(&req->fsync, iocb, false);
+		ret = aio_fsync(&req->fsync, iocb, false);
+		break;
 	case IOCB_CMD_FDSYNC:
-		return aio_fsync(&req->fsync, iocb, true);
+		ret = aio_fsync(&req->fsync, iocb, true);
+		break;
 	case IOCB_CMD_POLL:
-		return aio_poll(req, iocb);
+		ret = aio_poll(req, iocb);
+		break;
 	default:
 		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	}
+
+	/* Done with the synchronous reference */
+	iocb_put(req);
+
+	/*
+	 * If ret is 0, we'd either done aio_complete() ourselves or have
+	 * arranged for that to be done asynchronously.  Anything non-zero
+	 * means that we need to destroy req ourselves.
+	 */
+	if (!ret)
+		return 0;
+
+out_put_req:
+	if (req->ki_eventfd)
+		eventfd_ctx_put(req->ki_eventfd);
+	iocb_destroy(req);
+out_put_reqs_available:
+	put_reqs_available(ctx, 1);
+	return ret;
 }
 
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 bool compat)
 {
-	struct aio_kiocb *req;
 	struct iocb iocb;
-	int err;
 
 	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
 		return -EFAULT;
 
-	/* enforce forwards compatibility on users */
-	if (unlikely(iocb.aio_reserved2)) {
-		pr_debug("EINVAL: reserve field set\n");
-		return -EINVAL;
-	}
-
-	/* prevent overflows */
-	if (unlikely(
-	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
-	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
-	    ((ssize_t)iocb.aio_nbytes < 0)
-	   )) {
-		pr_debug("EINVAL: overflow check\n");
-		return -EINVAL;
-	}
-
-	req = aio_get_req(ctx);
-	if (unlikely(!req))
-		return -EAGAIN;
-
-	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
-
-	/* Done with the synchronous reference */
-	iocb_put(req);
-
-	/*
-	 * If err is 0, we'd either done aio_complete() ourselves or have
-	 * arranged for that to be done asynchronously.  Anything non-zero
-	 * means that we need to destroy req ourselves.
-	 */
-	if (unlikely(err)) {
-		iocb_destroy(req);
-		put_reqs_available(ctx, 1);
-	}
-	return err;
+	return __io_submit_one(ctx, &iocb, user_iocb, compat);
 }
 
 /* sys_io_submit:
@@ -2045,8 +1943,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 	if (nr > ctx->nr_events)
 		nr = ctx->nr_events;
 
-	if (nr > AIO_PLUG_THRESHOLD)
-		blk_start_plug(&plug);
+	blk_start_plug(&plug);
 	for (i = 0; i < nr; i++) {
 		struct iocb __user *user_iocb;
 
@@ -2059,8 +1956,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 		if (ret)
 			break;
 	}
-	if (nr > AIO_PLUG_THRESHOLD)
-		blk_finish_plug(&plug);
+	blk_finish_plug(&plug);
 
 	percpu_ref_put(&ctx->users);
 	return i ? i : ret;
@@ -2087,8 +1983,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
 	if (nr > ctx->nr_events)
 		nr = ctx->nr_events;
 
-	if (nr > AIO_PLUG_THRESHOLD)
-		blk_start_plug(&plug);
+	blk_start_plug(&plug);
 	for (i = 0; i < nr; i++) {
 		compat_uptr_t user_iocb;
 
@@ -2101,8 +1996,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
 		if (ret)
 			break;
 	}
-	if (nr > AIO_PLUG_THRESHOLD)
-		blk_finish_plug(&plug);
+	blk_finish_plug(&plug);
 
 	percpu_ref_put(&ctx->users);
 	return i ? i : ret;
@@ -2193,13 +2087,11 @@ static long do_io_getevents(aio_context_t ctx_id,
  *	specifies an infinite timeout. Note that the timeout pointed to by
  *	timeout is relative.  Will fail with -ENOSYS if not implemented.
  */
-#ifdef CONFIG_64BIT
-
 SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 		long, min_nr,
 		long, nr,
 		struct io_event __user *, events,
-		struct __kernel_timespec __user *, timeout)
+		struct timespec __user *, timeout)
 {
 	struct timespec64	ts;
 	int			ret;
@@ -2213,8 +2105,6 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	return ret;
 }
 
-#endif
-
 struct __aio_sigset {
 	const sigset_t __user	*sigmask;
 	size_t		sigsetsize;
@@ -2225,7 +2115,7 @@ SYSCALL_DEFINE6(io_pgetevents,
 		long, min_nr,
 		long, nr,
 		struct io_event __user *, events,
-		struct __kernel_timespec __user *, timeout,
+		struct timespec __user *, timeout,
 		const struct __aio_sigset __user *, usig)
 {
 	struct __aio_sigset	ksig = { NULL, };
@@ -2239,41 +2129,6 @@ SYSCALL_DEFINE6(io_pgetevents,
 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
 		return -EFAULT;
 
-	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
-	if (ret)
-		return ret;
-
-	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
-
-	interrupted = signal_pending(current);
-	restore_saved_sigmask_unless(interrupted);
-	if (interrupted && !ret)
-		ret = -ERESTARTNOHAND;
-
-	return ret;
-}
-
-#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
-
-SYSCALL_DEFINE6(io_pgetevents_time32,
-		aio_context_t, ctx_id,
-		long, min_nr,
-		long, nr,
-		struct io_event __user *, events,
-		struct old_timespec32 __user *, timeout,
-		const struct __aio_sigset __user *, usig)
-{
-	struct __aio_sigset	ksig = { NULL, };
-	struct timespec64	ts;
-	bool interrupted;
-	int ret;
-
-	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
-		return -EFAULT;
-
-	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
-		return -EFAULT;
-
 
 	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
 	if (ret)
@@ -2289,20 +2144,17 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
 	return ret;
 }
 
-#endif
-
-#if defined(CONFIG_COMPAT_32BIT_TIME)
-
-SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
-		__s32, min_nr,
-		__s32, nr,
-		struct io_event __user *, events,
-		struct old_timespec32 __user *, timeout)
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
+		       compat_long_t, min_nr,
+		       compat_long_t, nr,
+		       struct io_event __user *, events,
+		       struct compat_timespec __user *, timeout)
 {
 	struct timespec64 t;
 	int ret;
 
-	if (timeout && get_old_timespec32(&t, timeout))
+	if (timeout && compat_get_timespec64(&t, timeout))
 		return -EFAULT;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
@@ -2311,72 +2163,32 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
 	return ret;
 }
 
-#endif
-
-#ifdef CONFIG_COMPAT
 
 struct __compat_aio_sigset {
-	compat_uptr_t		sigmask;
+	compat_sigset_t __user	*sigmask;
 	compat_size_t		sigsetsize;
 };
 
-#if defined(CONFIG_COMPAT_32BIT_TIME)
-
 COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 		compat_aio_context_t, ctx_id,
 		compat_long_t, min_nr,
 		compat_long_t, nr,
 		struct io_event __user *, events,
-		struct old_timespec32 __user *, timeout,
-		const struct __compat_aio_sigset __user *, usig)
-{
-	struct __compat_aio_sigset ksig = { 0, };
-	struct timespec64 t;
-	bool interrupted;
-	int ret;
-
-	if (timeout && get_old_timespec32(&t, timeout))
-		return -EFAULT;
-
-	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
-		return -EFAULT;
-
-	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
-	if (ret)
-		return ret;
-
-	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
-
-	interrupted = signal_pending(current);
-	restore_saved_sigmask_unless(interrupted);
-	if (interrupted && !ret)
-		ret = -ERESTARTNOHAND;
-
-	return ret;
-}
-
-#endif
-
-COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
-		compat_aio_context_t, ctx_id,
-		compat_long_t, min_nr,
-		compat_long_t, nr,
-		struct io_event __user *, events,
-		struct __kernel_timespec __user *, timeout,
+		struct compat_timespec __user *, timeout,
 		const struct __compat_aio_sigset __user *, usig)
 {
-	struct __compat_aio_sigset ksig = { 0, };
+	struct __compat_aio_sigset ksig = { NULL, };
 	struct timespec64 t;
 	bool interrupted;
 	int ret;
 
-	if (timeout && get_timespec64(&t, timeout))
+	if (timeout && compat_get_timespec64(&t, timeout))
 		return -EFAULT;
 
 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
 		return -EFAULT;
 
-	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
+	ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
 	if (ret)
 		return ret;
 
-- 
Gitee