Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
diff --combined fs/eventpoll.c

index 7ebae39fbcb3750a6cb264c2e84b844ddba7e7da,2329f96469e2d719fa074c6486396c32ebce3d90..a5d219d920e755aa7761253c87a1da6470f26782
--- 1/fs/eventpoll.c
--- 2/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@@ -381,7 -381,8 +381,8 @@@ static void ep_nested_calls_init(struc
    */
   static inline int ep_events_available(struct eventpoll *ep)
   {
-       return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+       return !list_empty_careful(&ep->rdllist) ||
+               READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
   }
   
   #ifdef CONFIG_NET_RX_BUSY_POLL
@@@ -471,7 -472,6 +472,6 @@@ static inline void ep_set_busy_poll_nap
    *                  no re-entered.
    *
    * @ncalls: Pointer to the nested_calls structure to be used for this call.
-  * @max_nests: Maximum number of allowed nesting calls.
    * @nproc: Nested call core function pointer.
    * @priv: Opaque data to be passed to the @nproc callback.
    * @cookie: Cookie to be used to identify this nested call.
@@@ -480,7 -480,7 +480,7 @@@
    * Returns: Returns the code returned by the @nproc callback, or -1 if
    *          the maximum recursion limit has been exceeded.
    */
- static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+ static int ep_call_nested(struct nested_calls *ncalls,
                           int (*nproc)(void *, void *, int), void *priv,
                           void *cookie, void *ctx)
   {
@@@ -499,7 -499,7 +499,7 @@@
          */
         list_for_each_entry(tncur, lsthead, llink) {
                 if (tncur->ctx == ctx &&
-                   (tncur->cookie == cookie || ++call_nests > max_nests)) {
+                   (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
                         /*
                          * Ops ... loop detected or maximum nest level reached.
                          * We abort this wake by breaking the cycle itself.
@@@ -573,7 -573,7 +573,7 @@@ static void ep_poll_safewake(wait_queue
   {
         int this_cpu = get_cpu();
   
-       ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+       ep_call_nested(&poll_safewake_ncalls,
                        ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
   
         put_cpu();
@@@ -699,7 -699,7 +699,7 @@@ static __poll_t ep_scan_ready_list(stru
          */
         spin_lock_irq(&ep->wq.lock);
         list_splice_init(&ep->rdllist, &txlist);
-       ep->ovflist = NULL;
+       WRITE_ONCE(ep->ovflist, NULL);
         spin_unlock_irq(&ep->wq.lock);
   
         /*
@@@ -713,7 -713,7 +713,7 @@@
          * other events might have been queued by the poll callback.
          * We re-insert them inside the main ready-list here.
          */
-       for (nepi = ep->ovflist; (epi = nepi) != NULL;
+       for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
              nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
                 /*
                  * We need to check if the item is already in the list.
@@@ -731,7 -731,7 +731,7 @@@
          * releasing the lock, events will be queued in the normal way inside
          * ep->rdllist.
          */
-       ep->ovflist = EP_UNACTIVE_PTR;
+       WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
   
         /*
          * Quickly re-inject items left on "txlist".
@@@ -1154,10 -1154,10 +1154,10 @@@ static int ep_poll_callback(wait_queue_
          * semantics). All the events that happen during that period of time are
          * chained in ep->ovflist and requeued later on.
          */
-       if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+       if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
                 if (epi->next == EP_UNACTIVE_PTR) {
-                       epi->next = ep->ovflist;
-                       ep->ovflist = epi;
+                       epi->next = READ_ONCE(ep->ovflist);
+                       WRITE_ONCE(ep->ovflist, epi);
                         if (epi->ws) {
                                 /*
                                  * Activate ep->ws since epi->ws may get
@@@ -1333,7 -1333,6 +1333,6 @@@ static int reverse_path_check_proc(voi
                                 }
                         } else {
                                 error = ep_call_nested(&poll_loop_ncalls,
-                                                       EP_MAX_NESTS,
                                                         reverse_path_check_proc,
                                                         child_file, child_file,
                                                         current);
@@@ -1367,7 -1366,7 +1366,7 @@@ static int reverse_path_check(void
         /* let's call this for all tfiles */
         list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
                 path_count_init();
-               error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+               error = ep_call_nested(&poll_loop_ncalls,
                                         reverse_path_check_proc, current_file,
                                         current_file, current);
                 if (error)
@@@ -1626,21 -1625,24 +1625,24 @@@ static __poll_t ep_send_events_proc(str
   {
         struct ep_send_events_data *esed = priv;
         __poll_t revents;
-       struct epitem *epi;
-       struct epoll_event __user *uevent;
+       struct epitem *epi, *tmp;
+       struct epoll_event __user *uevent = esed->events;
         struct wakeup_source *ws;
         poll_table pt;
   
         init_poll_funcptr(&pt, NULL);
+       esed->res = 0;
   
         /*
          * We can loop without lock because we are passed a task private list.
          * Items cannot vanish during the loop because ep_scan_ready_list() is
          * holding "mtx" during this call.
          */
-       for (esed->res = 0, uevent = esed->events;
-            !list_empty(head) && esed->res < esed->maxevents;) {
-               epi = list_first_entry(head, struct epitem, rdllink);
+       lockdep_assert_held(&ep->mtx);
+ 
+       list_for_each_entry_safe(epi, tmp, head, rdllink) {
+               if (esed->res >= esed->maxevents)
+                       break;
   
                 /*
                  * Activate ep->ws before deactivating epi->ws to prevent
@@@ -1660,42 -1662,42 +1662,42 @@@
   
                 list_del_init(&epi->rdllink);
   
-               revents = ep_item_poll(epi, &pt, 1);
- 
                 /*
                  * If the event mask intersect the caller-requested one,
                  * deliver the event to userspace. Again, ep_scan_ready_list()
-                * is holding "mtx", so no operations coming from userspace
+                * is holding ep->mtx, so no operations coming from userspace
                  * can change the item.
                  */
-               if (revents) {
-                       if (__put_user(revents, &uevent->events) ||
-                           __put_user(epi->event.data, &uevent->data)) {
-                               list_add(&epi->rdllink, head);
-                               ep_pm_stay_awake(epi);
-                               if (!esed->res)
-                                       esed->res = -EFAULT;
-                               return 0;
-                       }
-                       esed->res++;
-                       uevent++;
-                       if (epi->event.events & EPOLLONESHOT)
-                               epi->event.events &= EP_PRIVATE_BITS;
-                       else if (!(epi->event.events & EPOLLET)) {
-                               /*
-                                * If this file has been added with Level
-                                * Trigger mode, we need to insert back inside
-                                * the ready list, so that the next call to
-                                * epoll_wait() will check again the events
-                                * availability. At this point, no one can insert
-                                * into ep->rdllist besides us. The epoll_ctl()
-                                * callers are locked out by
-                                * ep_scan_ready_list() holding "mtx" and the
-                                * poll callback will queue them in ep->ovflist.
-                                */
-                               list_add_tail(&epi->rdllink, &ep->rdllist);
-                               ep_pm_stay_awake(epi);
-                       }
+               revents = ep_item_poll(epi, &pt, 1);
+               if (!revents)
+                       continue;
+ 
+               if (__put_user(revents, &uevent->events) ||
+                   __put_user(epi->event.data, &uevent->data)) {
+                       list_add(&epi->rdllink, head);
+                       ep_pm_stay_awake(epi);
+                       if (!esed->res)
+                               esed->res = -EFAULT;
+                       return 0;
+               }
+               esed->res++;
+               uevent++;
+               if (epi->event.events & EPOLLONESHOT)
+                       epi->event.events &= EP_PRIVATE_BITS;
+               else if (!(epi->event.events & EPOLLET)) {
+                       /*
+                        * If this file has been added with Level
+                        * Trigger mode, we need to insert back inside
+                        * the ready list, so that the next call to
+                        * epoll_wait() will check again the events
+                        * availability. At this point, no one can insert
+                        * into ep->rdllist besides us. The epoll_ctl()
+                        * callers are locked out by
+                        * ep_scan_ready_list() holding "mtx" and the
+                        * poll callback will queue them in ep->ovflist.
+                        */
+                       list_add_tail(&epi->rdllink, &ep->rdllist);
+                       ep_pm_stay_awake(epi);
                 }
         }
   
@@@ -1747,6 -1749,7 +1749,7 @@@ static int ep_poll(struct eventpoll *ep
   {
         int res = 0, eavail, timed_out = 0;
         u64 slack = 0;
+       bool waiter = false;
         wait_queue_entry_t wait;
         ktime_t expires, *to = NULL;
   
@@@ -1761,11 -1764,18 +1764,18 @@@
         } else if (timeout == 0) {
                 /*
                  * Avoid the unnecessary trip to the wait queue loop, if the
-                * caller specified a non blocking operation.
+                * caller specified a non blocking operation. We still need
+                * lock because we could race and not see an epi being added
+                * to the ready list while in irq callback. Thus incorrectly
+                * returning 0 back to userspace.
                  */
                 timed_out = 1;
+ 
                 spin_lock_irq(&ep->wq.lock);
-               goto check_events;
+               eavail = ep_events_available(ep);
+               spin_unlock_irq(&ep->wq.lock);
+ 
+               goto send_events;
         }
   
   fetch_events:
@@@ -1773,64 -1783,66 +1783,66 @@@
         if (!ep_events_available(ep))
                 ep_busy_loop(ep, timed_out);
   
-       spin_lock_irq(&ep->wq.lock);
+       eavail = ep_events_available(ep);
+       if (eavail)
+               goto send_events;
   
-       if (!ep_events_available(ep)) {
-               /*
-                * Busy poll timed out.  Drop NAPI ID for now, we can add
-                * it back in when we have moved a socket with a valid NAPI
-                * ID onto the ready list.
-                */
-               ep_reset_busy_poll_napi_id(ep);
+       /*
+        * Busy poll timed out.  Drop NAPI ID for now, we can add
+        * it back in when we have moved a socket with a valid NAPI
+        * ID onto the ready list.
+        */
+       ep_reset_busy_poll_napi_id(ep);
   
-               /*
-                * We don't have any available event to return to the caller.
-                * We need to sleep here, and we will be wake up by
-                * ep_poll_callback() when events will become available.
-                */
+       /*
+        * We don't have any available event to return to the caller.  We need
+        * to sleep here, and we will be woken by ep_poll_callback() when events
+        * become available.
+        */
+       if (!waiter) {
+               waiter = true;
                 init_waitqueue_entry(&wait, current);
-               __add_wait_queue_exclusive(&ep->wq, &wait);
   
-               for (;;) {
-                       /*
-                        * We don't want to sleep if the ep_poll_callback() sends us
-                        * a wakeup in between. That's why we set the task state
-                        * to TASK_INTERRUPTIBLE before doing the checks.
-                        */
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       /*
-                        * Always short-circuit for fatal signals to allow
-                        * threads to make a timely exit without the chance of
-                        * finding more events available and fetching
-                        * repeatedly.
-                        */
-                       if (fatal_signal_pending(current)) {
-                               res = -EINTR;
-                               break;
-                       }
-                       if (ep_events_available(ep) || timed_out)
-                               break;
-                       if (signal_pending(current)) {
-                               res = -EINTR;
-                               break;
-                       }
+               spin_lock_irq(&ep->wq.lock);
+               __add_wait_queue_exclusive(&ep->wq, &wait);
+               spin_unlock_irq(&ep->wq.lock);
+       }
   
-                       spin_unlock_irq(&ep->wq.lock);
-                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
-                               timed_out = 1;
+       for (;;) {
+               /*
+                * We don't want to sleep if the ep_poll_callback() sends us
+                * a wakeup in between. That's why we set the task state
+                * to TASK_INTERRUPTIBLE before doing the checks.
+                */
+               set_current_state(TASK_INTERRUPTIBLE);
+               /*
+                * Always short-circuit for fatal signals to allow
+                * threads to make a timely exit without the chance of
+                * finding more events available and fetching
+                * repeatedly.
+                */
+               if (fatal_signal_pending(current)) {
+                       res = -EINTR;
+                       break;
+               }
   
-                       spin_lock_irq(&ep->wq.lock);
+               eavail = ep_events_available(ep);
+               if (eavail)
+                       break;
+               if (signal_pending(current)) {
+                       res = -EINTR;
+                       break;
                 }
   
-               __remove_wait_queue(&ep->wq, &wait);
-               __set_current_state(TASK_RUNNING);
+               if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+                       timed_out = 1;
+                       break;
+               }
         }
- check_events:
-       /* Is it worth to try to dig for events ? */
-       eavail = ep_events_available(ep);
   
-       spin_unlock_irq(&ep->wq.lock);
+       __set_current_state(TASK_RUNNING);
   
+ send_events:
         /*
          * Try to transfer events to user space. In case we get 0 events and
          * there's still timeout left over, we go trying again in search of
@@@ -1840,6 -1852,12 +1852,12 @@@
             !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
                 goto fetch_events;
   
+       if (waiter) {
+               spin_lock_irq(&ep->wq.lock);
+               __remove_wait_queue(&ep->wq, &wait);
+               spin_unlock_irq(&ep->wq.lock);
+       }
+ 
         return res;
   }
   
@@@ -1876,7 -1894,7 +1894,7 @@@ static int ep_loop_check_proc(void *pri
                         ep_tovisit = epi->ffd.file->private_data;
                         if (ep_tovisit->visited)
                                 continue;
-                       error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                       error = ep_call_nested(&poll_loop_ncalls,
                                         ep_loop_check_proc, epi->ffd.file,
                                         ep_tovisit, current);
                         if (error != 0)
@@@ -1916,7 -1934,7 +1934,7 @@@ static int ep_loop_check(struct eventpo
         int ret;
         struct eventpoll *ep_cur, *ep_next;
   
-       ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+       ret = ep_call_nested(&poll_loop_ncalls,
                               ep_loop_check_proc, file, ep, current);
         /* clear visited list */
         list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
@@@ -2172,7 -2190,7 +2190,7 @@@ static int do_epoll_wait(int epfd, stru
                 return -EINVAL;
   
         /* Verify that the area passed by the user is writeable */
- -      if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
+ +      if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
                 return -EFAULT;
   
         /* Get the "struct file *" for the eventpoll file */
diff --combined fs/fat/dir.c

index 0295a095b9205352c6c993edf58f51a4147c1aea,20acaea8a7e6f2c5c0187481335dd8b973fce089..9d01db37183f2616feec648fd63c9ab292a45b8f
--- 1/fs/fat/dir.c
--- 2/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@@ -57,7 -57,7 +57,7 @@@ static inline void fat_dir_readahead(st
         if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
                 return;
         /* root dir of FAT12/FAT16 */
-       if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+       if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO))
                 return;
   
         bh = sb_find_get_block(sb, phys);
@@@ -805,7 -805,7 +805,7 @@@ static long fat_dir_ioctl(struct file *
                 return fat_generic_ioctl(filp, cmd, arg);
         }
   
- -      if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
+ +      if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
                 return -EFAULT;
         /*
          * Yes, we don't need this put_user() absolutely. However old
@@@ -845,7 -845,7 +845,7 @@@ static long fat_compat_dir_ioctl(struc
                 return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
         }
   
- -      if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
+ +      if (!access_ok(d1, sizeof(struct compat_dirent[2])))
                 return -EFAULT;
         /*
          * Yes, we don't need this put_user() absolutely. However old
@@@ -1313,7 -1313,7 +1313,7 @@@ int fat_add_entries(struct inode *dir, 
                 }
         }
         if (dir->i_ino == MSDOS_ROOT_INO) {
-               if (sbi->fat_bits != 32)
+               if (!is_fat32(sbi))
                         goto error;
         } else if (MSDOS_I(dir)->i_start == 0) {
                 fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
diff --combined kernel/sched/core.c

index 1f3e19fd6dc6c7cf09790732c4873d92d43eeec9,17a954c9e15377adf4e97a53c8760a6a03797ca0..223f78d5c1110d90d7d64e0ee55d1efb99f0ec6a
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -3416,7 -3416,7 +3416,7 @@@ static void __sched notrace __schedule(
   
         switch_count = &prev->nivcsw;
         if (!preempt && prev->state) {
-               if (unlikely(signal_pending_state(prev->state, prev))) {
+               if (signal_pending_state(prev->state, prev)) {
                         prev->state = TASK_RUNNING;
                 } else {
                         deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
@@@ -4450,7 -4450,7 +4450,7 @@@ static int sched_copy_attr(struct sched
         u32 size;
         int ret;
   
- -      if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
+ +      if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
                 return -EFAULT;
   
         /* Zero the full structure, so that a short copy will be nice: */
@@@ -4650,7 -4650,7 +4650,7 @@@ static int sched_read_attr(struct sched
   {
         int ret;
   
- -      if (!access_ok(VERIFY_WRITE, uattr, usize))
+ +      if (!access_ok(uattr, usize))
                 return -EFAULT;
   
         /*
diff --combined mm/gup.c

index 6f591ccb8eca74282049fdefaadcd0c9dacb1e71,6dd33e16a8063ca21c653cbdf68537ebefdd710c..05acd7e2eb22e0849c5125d0cabc671fdc58f71f
--- 1/mm/gup.c
--- 2/mm/gup.c
+++ b/mm/gup.c
@@@ -727,7 -727,7 +727,7 @@@ retry
                  * If we have a pending SIGKILL, don't keep faulting pages and
                  * potentially allocating memory.
                  */
-               if (unlikely(fatal_signal_pending(current))) {
+               if (fatal_signal_pending(current)) {
                         ret = -ERESTARTSYS;
                         goto out;
                 }
@@@ -1813,7 -1813,8 +1813,7 @@@ int __get_user_pages_fast(unsigned lon
         len = (unsigned long) nr_pages << PAGE_SHIFT;
         end = start + len;
   
- -      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- -                                      (void __user *)start, len)))
+ +      if (unlikely(!access_ok((void __user *)start, len)))
                 return 0;
   
         /*
@@@ -1867,7 -1868,8 +1867,7 @@@ int get_user_pages_fast(unsigned long s
         if (nr_pages <= 0)
                 return 0;
   
- -      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- -                                      (void __user *)start, len)))
+ +      if (unlikely(!access_ok((void __user *)start, len)))
                 return -EFAULT;
   
         if (gup_fast_permitted(start, nr_pages, write)) {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
		1	2
fs/eventpoll.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fat/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/gup.c	patch \|	diff1 \|	diff2 \|	blob \| history