Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jan 2019 17:16:18 +0000 (09:16 -0800)
Merge more updates from Andrew Morton:

 - procfs updates

 - various misc bits

 - lib/ updates

 - epoll updates

 - autofs

 - fatfs

 - a few more MM bits

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (58 commits)
  mm/page_io.c: fix polled swap page in
  checkpatch: add Co-developed-by to signature tags
  docs: fix Co-Developed-by docs
  drivers/base/platform.c: kmemleak ignore a known leak
  fs: don't open code lru_to_page()
  fs/: remove caller signal_pending branch predictions
  mm/: remove caller signal_pending branch predictions
  arch/arc/mm/fault.c: remove caller signal_pending_branch predictions
  kernel/sched/: remove caller signal_pending branch predictions
  kernel/locking/mutex.c: remove caller signal_pending branch predictions
  mm: select HAVE_MOVE_PMD on x86 for faster mremap
  mm: speed up mremap by 20x on large regions
  mm: treewide: remove unused address argument from pte_alloc functions
  initramfs: cleanup incomplete rootfs
  scripts/gdb: fix lx-version string output
  kernel/kcov.c: mark write_comp_data() as notrace
  kernel/sysctl: add panic_print into sysctl
  panic: add options to print system info when panic happens
  bfs: extra sanity checking and static inode bitmap
  exec: separate MM_ANONPAGES and RLIMIT_STACK accounting
  ...

1  2 
fs/eventpoll.c
fs/fat/dir.c
kernel/sched/core.c
mm/gup.c

diff --combined fs/eventpoll.c
index 7ebae39fbcb3750a6cb264c2e84b844ddba7e7da,2329f96469e2d719fa074c6486396c32ebce3d90..a5d219d920e755aa7761253c87a1da6470f26782
@@@ -381,7 -381,8 +381,8 @@@ static void ep_nested_calls_init(struc
   */
  static inline int ep_events_available(struct eventpoll *ep)
  {
-       return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+       return !list_empty_careful(&ep->rdllist) ||
+               READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
  }
  
  #ifdef CONFIG_NET_RX_BUSY_POLL
@@@ -471,7 -472,6 +472,6 @@@ static inline void ep_set_busy_poll_nap
   *                  no re-entered.
   *
   * @ncalls: Pointer to the nested_calls structure to be used for this call.
-  * @max_nests: Maximum number of allowed nesting calls.
   * @nproc: Nested call core function pointer.
   * @priv: Opaque data to be passed to the @nproc callback.
   * @cookie: Cookie to be used to identify this nested call.
   * Returns: Returns the code returned by the @nproc callback, or -1 if
   *          the maximum recursion limit has been exceeded.
   */
- static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+ static int ep_call_nested(struct nested_calls *ncalls,
                          int (*nproc)(void *, void *, int), void *priv,
                          void *cookie, void *ctx)
  {
         */
        list_for_each_entry(tncur, lsthead, llink) {
                if (tncur->ctx == ctx &&
-                   (tncur->cookie == cookie || ++call_nests > max_nests)) {
+                   (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
                        /*
                         * Ops ... loop detected or maximum nest level reached.
                         * We abort this wake by breaking the cycle itself.
@@@ -573,7 -573,7 +573,7 @@@ static void ep_poll_safewake(wait_queue
  {
        int this_cpu = get_cpu();
  
-       ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+       ep_call_nested(&poll_safewake_ncalls,
                       ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  
        put_cpu();
@@@ -699,7 -699,7 +699,7 @@@ static __poll_t ep_scan_ready_list(stru
         */
        spin_lock_irq(&ep->wq.lock);
        list_splice_init(&ep->rdllist, &txlist);
-       ep->ovflist = NULL;
+       WRITE_ONCE(ep->ovflist, NULL);
        spin_unlock_irq(&ep->wq.lock);
  
        /*
         * other events might have been queued by the poll callback.
         * We re-insert them inside the main ready-list here.
         */
-       for (nepi = ep->ovflist; (epi = nepi) != NULL;
+       for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
                /*
                 * We need to check if the item is already in the list.
         * releasing the lock, events will be queued in the normal way inside
         * ep->rdllist.
         */
-       ep->ovflist = EP_UNACTIVE_PTR;
+       WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
  
        /*
         * Quickly re-inject items left on "txlist".
@@@ -1154,10 -1154,10 +1154,10 @@@ static int ep_poll_callback(wait_queue_
         * semantics). All the events that happen during that period of time are
         * chained in ep->ovflist and requeued later on.
         */
-       if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+       if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
                if (epi->next == EP_UNACTIVE_PTR) {
-                       epi->next = ep->ovflist;
-                       ep->ovflist = epi;
+                       epi->next = READ_ONCE(ep->ovflist);
+                       WRITE_ONCE(ep->ovflist, epi);
                        if (epi->ws) {
                                /*
                                 * Activate ep->ws since epi->ws may get
@@@ -1333,7 -1333,6 +1333,6 @@@ static int reverse_path_check_proc(voi
                                }
                        } else {
                                error = ep_call_nested(&poll_loop_ncalls,
-                                                       EP_MAX_NESTS,
                                                        reverse_path_check_proc,
                                                        child_file, child_file,
                                                        current);
@@@ -1367,7 -1366,7 +1366,7 @@@ static int reverse_path_check(void
        /* let's call this for all tfiles */
        list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
                path_count_init();
-               error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+               error = ep_call_nested(&poll_loop_ncalls,
                                        reverse_path_check_proc, current_file,
                                        current_file, current);
                if (error)
@@@ -1626,21 -1625,24 +1625,24 @@@ static __poll_t ep_send_events_proc(str
  {
        struct ep_send_events_data *esed = priv;
        __poll_t revents;
-       struct epitem *epi;
-       struct epoll_event __user *uevent;
+       struct epitem *epi, *tmp;
+       struct epoll_event __user *uevent = esed->events;
        struct wakeup_source *ws;
        poll_table pt;
  
        init_poll_funcptr(&pt, NULL);
+       esed->res = 0;
  
        /*
         * We can loop without lock because we are passed a task private list.
         * Items cannot vanish during the loop because ep_scan_ready_list() is
         * holding "mtx" during this call.
         */
-       for (esed->res = 0, uevent = esed->events;
-            !list_empty(head) && esed->res < esed->maxevents;) {
-               epi = list_first_entry(head, struct epitem, rdllink);
+       lockdep_assert_held(&ep->mtx);
+       list_for_each_entry_safe(epi, tmp, head, rdllink) {
+               if (esed->res >= esed->maxevents)
+                       break;
  
                /*
                 * Activate ep->ws before deactivating epi->ws to prevent
  
                list_del_init(&epi->rdllink);
  
-               revents = ep_item_poll(epi, &pt, 1);
                /*
                 * If the event mask intersect the caller-requested one,
                 * deliver the event to userspace. Again, ep_scan_ready_list()
-                * is holding "mtx", so no operations coming from userspace
+                * is holding ep->mtx, so no operations coming from userspace
                 * can change the item.
                 */
-               if (revents) {
-                       if (__put_user(revents, &uevent->events) ||
-                           __put_user(epi->event.data, &uevent->data)) {
-                               list_add(&epi->rdllink, head);
-                               ep_pm_stay_awake(epi);
-                               if (!esed->res)
-                                       esed->res = -EFAULT;
-                               return 0;
-                       }
-                       esed->res++;
-                       uevent++;
-                       if (epi->event.events & EPOLLONESHOT)
-                               epi->event.events &= EP_PRIVATE_BITS;
-                       else if (!(epi->event.events & EPOLLET)) {
-                               /*
-                                * If this file has been added with Level
-                                * Trigger mode, we need to insert back inside
-                                * the ready list, so that the next call to
-                                * epoll_wait() will check again the events
-                                * availability. At this point, no one can insert
-                                * into ep->rdllist besides us. The epoll_ctl()
-                                * callers are locked out by
-                                * ep_scan_ready_list() holding "mtx" and the
-                                * poll callback will queue them in ep->ovflist.
-                                */
-                               list_add_tail(&epi->rdllink, &ep->rdllist);
-                               ep_pm_stay_awake(epi);
-                       }
+               revents = ep_item_poll(epi, &pt, 1);
+               if (!revents)
+                       continue;
+               if (__put_user(revents, &uevent->events) ||
+                   __put_user(epi->event.data, &uevent->data)) {
+                       list_add(&epi->rdllink, head);
+                       ep_pm_stay_awake(epi);
+                       if (!esed->res)
+                               esed->res = -EFAULT;
+                       return 0;
+               }
+               esed->res++;
+               uevent++;
+               if (epi->event.events & EPOLLONESHOT)
+                       epi->event.events &= EP_PRIVATE_BITS;
+               else if (!(epi->event.events & EPOLLET)) {
+                       /*
+                        * If this file has been added with Level
+                        * Trigger mode, we need to insert back inside
+                        * the ready list, so that the next call to
+                        * epoll_wait() will check again the events
+                        * availability. At this point, no one can insert
+                        * into ep->rdllist besides us. The epoll_ctl()
+                        * callers are locked out by
+                        * ep_scan_ready_list() holding "mtx" and the
+                        * poll callback will queue them in ep->ovflist.
+                        */
+                       list_add_tail(&epi->rdllink, &ep->rdllist);
+                       ep_pm_stay_awake(epi);
                }
        }
  
@@@ -1747,6 -1749,7 +1749,7 @@@ static int ep_poll(struct eventpoll *ep
  {
        int res = 0, eavail, timed_out = 0;
        u64 slack = 0;
+       bool waiter = false;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;
  
        } else if (timeout == 0) {
                /*
                 * Avoid the unnecessary trip to the wait queue loop, if the
-                * caller specified a non blocking operation.
+                * caller specified a non blocking operation. We still need
+                * lock because we could race and not see an epi being added
+                * to the ready list while in irq callback. Thus incorrectly
+                * returning 0 back to userspace.
                 */
                timed_out = 1;
                spin_lock_irq(&ep->wq.lock);
-               goto check_events;
+               eavail = ep_events_available(ep);
+               spin_unlock_irq(&ep->wq.lock);
+               goto send_events;
        }
  
  fetch_events:
        if (!ep_events_available(ep))
                ep_busy_loop(ep, timed_out);
  
-       spin_lock_irq(&ep->wq.lock);
+       eavail = ep_events_available(ep);
+       if (eavail)
+               goto send_events;
  
-       if (!ep_events_available(ep)) {
-               /*
-                * Busy poll timed out.  Drop NAPI ID for now, we can add
-                * it back in when we have moved a socket with a valid NAPI
-                * ID onto the ready list.
-                */
-               ep_reset_busy_poll_napi_id(ep);
+       /*
+        * Busy poll timed out.  Drop NAPI ID for now, we can add
+        * it back in when we have moved a socket with a valid NAPI
+        * ID onto the ready list.
+        */
+       ep_reset_busy_poll_napi_id(ep);
  
-               /*
-                * We don't have any available event to return to the caller.
-                * We need to sleep here, and we will be wake up by
-                * ep_poll_callback() when events will become available.
-                */
+       /*
+        * We don't have any available event to return to the caller.  We need
+        * to sleep here, and we will be woken by ep_poll_callback() when events
+        * become available.
+        */
+       if (!waiter) {
+               waiter = true;
                init_waitqueue_entry(&wait, current);
-               __add_wait_queue_exclusive(&ep->wq, &wait);
  
-               for (;;) {
-                       /*
-                        * We don't want to sleep if the ep_poll_callback() sends us
-                        * a wakeup in between. That's why we set the task state
-                        * to TASK_INTERRUPTIBLE before doing the checks.
-                        */
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       /*
-                        * Always short-circuit for fatal signals to allow
-                        * threads to make a timely exit without the chance of
-                        * finding more events available and fetching
-                        * repeatedly.
-                        */
-                       if (fatal_signal_pending(current)) {
-                               res = -EINTR;
-                               break;
-                       }
-                       if (ep_events_available(ep) || timed_out)
-                               break;
-                       if (signal_pending(current)) {
-                               res = -EINTR;
-                               break;
-                       }
+               spin_lock_irq(&ep->wq.lock);
+               __add_wait_queue_exclusive(&ep->wq, &wait);
+               spin_unlock_irq(&ep->wq.lock);
+       }
  
-                       spin_unlock_irq(&ep->wq.lock);
-                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
-                               timed_out = 1;
+       for (;;) {
+               /*
+                * We don't want to sleep if the ep_poll_callback() sends us
+                * a wakeup in between. That's why we set the task state
+                * to TASK_INTERRUPTIBLE before doing the checks.
+                */
+               set_current_state(TASK_INTERRUPTIBLE);
+               /*
+                * Always short-circuit for fatal signals to allow
+                * threads to make a timely exit without the chance of
+                * finding more events available and fetching
+                * repeatedly.
+                */
+               if (fatal_signal_pending(current)) {
+                       res = -EINTR;
+                       break;
+               }
  
-                       spin_lock_irq(&ep->wq.lock);
+               eavail = ep_events_available(ep);
+               if (eavail)
+                       break;
+               if (signal_pending(current)) {
+                       res = -EINTR;
+                       break;
                }
  
-               __remove_wait_queue(&ep->wq, &wait);
-               __set_current_state(TASK_RUNNING);
+               if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+                       timed_out = 1;
+                       break;
+               }
        }
- check_events:
-       /* Is it worth to try to dig for events ? */
-       eavail = ep_events_available(ep);
  
-       spin_unlock_irq(&ep->wq.lock);
+       __set_current_state(TASK_RUNNING);
  
+ send_events:
        /*
         * Try to transfer events to user space. In case we get 0 events and
         * there's still timeout left over, we go trying again in search of
            !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
                goto fetch_events;
  
+       if (waiter) {
+               spin_lock_irq(&ep->wq.lock);
+               __remove_wait_queue(&ep->wq, &wait);
+               spin_unlock_irq(&ep->wq.lock);
+       }
        return res;
  }
  
@@@ -1876,7 -1894,7 +1894,7 @@@ static int ep_loop_check_proc(void *pri
                        ep_tovisit = epi->ffd.file->private_data;
                        if (ep_tovisit->visited)
                                continue;
-                       error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                       error = ep_call_nested(&poll_loop_ncalls,
                                        ep_loop_check_proc, epi->ffd.file,
                                        ep_tovisit, current);
                        if (error != 0)
@@@ -1916,7 -1934,7 +1934,7 @@@ static int ep_loop_check(struct eventpo
        int ret;
        struct eventpoll *ep_cur, *ep_next;
  
-       ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+       ret = ep_call_nested(&poll_loop_ncalls,
                              ep_loop_check_proc, file, ep, current);
        /* clear visited list */
        list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
@@@ -2172,7 -2190,7 +2190,7 @@@ static int do_epoll_wait(int epfd, stru
                return -EINVAL;
  
        /* Verify that the area passed by the user is writeable */
 -      if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
 +      if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
                return -EFAULT;
  
        /* Get the "struct file *" for the eventpoll file */
diff --combined fs/fat/dir.c
index 0295a095b9205352c6c993edf58f51a4147c1aea,20acaea8a7e6f2c5c0187481335dd8b973fce089..9d01db37183f2616feec648fd63c9ab292a45b8f
@@@ -57,7 -57,7 +57,7 @@@ static inline void fat_dir_readahead(st
        if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
                return;
        /* root dir of FAT12/FAT16 */
-       if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+       if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO))
                return;
  
        bh = sb_find_get_block(sb, phys);
@@@ -805,7 -805,7 +805,7 @@@ static long fat_dir_ioctl(struct file *
                return fat_generic_ioctl(filp, cmd, arg);
        }
  
 -      if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
 +      if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
                return -EFAULT;
        /*
         * Yes, we don't need this put_user() absolutely. However old
@@@ -845,7 -845,7 +845,7 @@@ static long fat_compat_dir_ioctl(struc
                return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
        }
  
 -      if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
 +      if (!access_ok(d1, sizeof(struct compat_dirent[2])))
                return -EFAULT;
        /*
         * Yes, we don't need this put_user() absolutely. However old
@@@ -1313,7 -1313,7 +1313,7 @@@ int fat_add_entries(struct inode *dir, 
                }
        }
        if (dir->i_ino == MSDOS_ROOT_INO) {
-               if (sbi->fat_bits != 32)
+               if (!is_fat32(sbi))
                        goto error;
        } else if (MSDOS_I(dir)->i_start == 0) {
                fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
diff --combined kernel/sched/core.c
index 1f3e19fd6dc6c7cf09790732c4873d92d43eeec9,17a954c9e15377adf4e97a53c8760a6a03797ca0..223f78d5c1110d90d7d64e0ee55d1efb99f0ec6a
@@@ -3416,7 -3416,7 +3416,7 @@@ static void __sched notrace __schedule(
  
        switch_count = &prev->nivcsw;
        if (!preempt && prev->state) {
-               if (unlikely(signal_pending_state(prev->state, prev))) {
+               if (signal_pending_state(prev->state, prev)) {
                        prev->state = TASK_RUNNING;
                } else {
                        deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
@@@ -4450,7 -4450,7 +4450,7 @@@ static int sched_copy_attr(struct sched
        u32 size;
        int ret;
  
 -      if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
 +      if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
                return -EFAULT;
  
        /* Zero the full structure, so that a short copy will be nice: */
@@@ -4650,7 -4650,7 +4650,7 @@@ static int sched_read_attr(struct sched
  {
        int ret;
  
 -      if (!access_ok(VERIFY_WRITE, uattr, usize))
 +      if (!access_ok(uattr, usize))
                return -EFAULT;
  
        /*
diff --combined mm/gup.c
index 6f591ccb8eca74282049fdefaadcd0c9dacb1e71,6dd33e16a8063ca21c653cbdf68537ebefdd710c..05acd7e2eb22e0849c5125d0cabc671fdc58f71f
+++ b/mm/gup.c
@@@ -727,7 -727,7 +727,7 @@@ retry
                 * If we have a pending SIGKILL, don't keep faulting pages and
                 * potentially allocating memory.
                 */
-               if (unlikely(fatal_signal_pending(current))) {
+               if (fatal_signal_pending(current)) {
                        ret = -ERESTARTSYS;
                        goto out;
                }
@@@ -1813,7 -1813,8 +1813,7 @@@ int __get_user_pages_fast(unsigned lon
        len = (unsigned long) nr_pages << PAGE_SHIFT;
        end = start + len;
  
 -      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
 -                                      (void __user *)start, len)))
 +      if (unlikely(!access_ok((void __user *)start, len)))
                return 0;
  
        /*
@@@ -1867,7 -1868,8 +1867,7 @@@ int get_user_pages_fast(unsigned long s
        if (nr_pages <= 0)
                return 0;
  
 -      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
 -                                      (void __user *)start, len)))
 +      if (unlikely(!access_ok((void __user *)start, len)))
                return -EFAULT;
  
        if (gup_fast_permitted(start, nr_pages, write)) {