int regions_needed)
__must_hold(&resv->lock)
{
- struct list_head allocated_regions;
+ LIST_HEAD(allocated_regions);
int to_allocate = 0, i = 0;
struct file_region *trg = NULL, *rg = NULL;
VM_BUG_ON(regions_needed < 0);
- INIT_LIST_HEAD(&allocated_regions);
-
/*
* Check for sufficient descriptors in the cache to accommodate
* the number of in progress add operations plus regions_needed.
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
set_page_private(page, 0);
+ /*
+ * We have to set HPageVmemmapOptimized again as above
+ * set_page_private(page, 0) cleared it.
+ */
SetHPageVmemmapOptimized(page);
/*
static int gather_surplus_pages(struct hstate *h, long delta)
__must_hold(&hugetlb_lock)
{
- struct list_head surplus_list;
+ LIST_HEAD(surplus_list);
struct page *page, *tmp;
int ret;
long i;
}
allocated = 0;
- INIT_LIST_HEAD(&surplus_list);
ret = -ENOMEM;
retry:
* based on pool changes for the demoted page.
*/
h->max_huge_pages--;
- target_hstate->max_huge_pages += pages_per_huge_page(h);
+ target_hstate->max_huge_pages +=
+ pages_per_huge_page(h) / pages_per_huge_page(target_hstate);
return rc;
}
static ssize_t demote_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int nid;
- struct hstate *h = kobj_to_hstate(kobj, &nid);
+ struct hstate *h = kobj_to_hstate(kobj, NULL);
unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K;
return sysfs_emit(buf, "%lukB\n", demote_size);
struct hstate *h, *demote_hstate;
unsigned long demote_size;
unsigned int demote_order;
- int nid;
demote_size = (unsigned long)memparse(buf, NULL);
return -EINVAL;
/* demote order must be smaller than hstate order */
- h = kobj_to_hstate(kobj, &nid);
+ h = kobj_to_hstate(kobj, NULL);
if (demote_order >= h->order)
return -EINVAL;
if (retval) {
kobject_put(hstate_kobjs[hi]);
hstate_kobjs[hi] = NULL;
+ return retval;
}
if (h->demote_order) {
- if (sysfs_create_group(hstate_kobjs[hi],
- &hstate_demote_attr_group))
+ retval = sysfs_create_group(hstate_kobjs[hi],
+ &hstate_demote_attr_group);
+ if (retval) {
pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name);
+ sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group);
+ kobject_put(hstate_kobjs[hi]);
+ hstate_kobjs[hi] = NULL;
+ return retval;
+ }
}
- return retval;
+ return 0;
}
static void __init hugetlb_sysfs_init(void)
for_each_hstate(h) {
int idx = hstate_index(h);
- if (nhs->hstate_kobjs[idx]) {
- kobject_put(nhs->hstate_kobjs[idx]);
- nhs->hstate_kobjs[idx] = NULL;
- }
+ struct kobject *hstate_kobj = nhs->hstate_kobjs[idx];
+
+ if (!hstate_kobj)
+ continue;
+ if (h->demote_order)
+ sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group);
+ sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group);
+ kobject_put(hstate_kobj);
+ nhs->hstate_kobjs[idx] = NULL;
}
kobject_put(nhs->hugepages_kobj);
#endif
+#ifdef CONFIG_CMA
+static void __init hugetlb_cma_check(void);
+#else
+static inline __init void hugetlb_cma_check(void)
+{
+}
+#endif
+
static int __init hugetlb_init(void)
{
int i;
h->next_nid_to_alloc = first_memory_node;
h->next_nid_to_free = first_memory_node;
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
- huge_page_size(h)/1024);
+ huge_page_size(h)/SZ_1K);
parsed_hstate = h;
}
if (!hugetlb_max_hstate) {
default_hstate_max_huge_pages = 0;
memset(default_hugepages_in_node, 0,
- MAX_NUMNODES * sizeof(unsigned int));
+ sizeof(default_hugepages_in_node));
} else {
parsed_hstate->max_huge_pages = 0;
memset(parsed_hstate->max_huge_pages_node, 0,
- MAX_NUMNODES * sizeof(unsigned int));
+ sizeof(parsed_hstate->max_huge_pages_node));
}
}
}
__setup("default_hugepagesz=", default_hugepagesz_setup);
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+ struct mempolicy *mpol = get_task_policy(current);
+
+ /*
+ * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+ * (from policy_nodemask) specifically for hugetlb case
+ */
+ if (mpol->mode == MPOL_BIND &&
+ (apply_policy_zone(mpol, gfp_zone(gfp)) &&
+ cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+ return &mpol->nodes;
+#endif
+ return NULL;
+}
+
static unsigned int allowed_mems_nr(struct hstate *h)
{
int node;
unsigned int nr = 0;
- nodemask_t *mpol_allowed;
+ nodemask_t *mbind_nodemask;
unsigned int *array = h->free_huge_pages_node;
gfp_t gfp_mask = htlb_alloc_mask(h);
- mpol_allowed = policy_nodemask_current(gfp_mask);
-
+ mbind_nodemask = policy_mbind_nodemask(gfp_mask);
for_each_node_mask(node, cpuset_current_mems_allowed) {
- if (!mpol_allowed || node_isset(node, *mpol_allowed))
+ if (!mbind_nodemask || node_isset(node, *mbind_nodemask))
nr += array[node];
}
struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma)
{
- pte_t *src_pte, *dst_pte, entry, dst_entry;
+ pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
unsigned long addr;
bool cow = is_cow_mapping(src_vma->vm_flags);
/*
* If the pagetables are shared don't copy or take references.
- * dst_pte == src_pte is the common case of src/dest sharing.
*
+ * dst_pte == src_pte is the common case of src/dest sharing.
* However, src could have 'unshared' and dst shares with
- * another vma. If dst_pte !none, this implies sharing.
- * Check here before taking page table lock, and once again
- * after taking the lock below.
+ * another vma. So page_count of ptep page is checked instead
+ * to reliably determine whether pte is shared.
*/
- dst_entry = huge_ptep_get(dst_pte);
- if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
+ if (page_count(virt_to_page(dst_pte)) > 1) {
addr |= last_addr_mask;
continue;
}
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
- dst_entry = huge_ptep_get(dst_pte);
again:
- if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+ if (huge_pte_none(entry)) {
/*
- * Skip if src entry none. Also, skip in the
- * unlikely case dst entry !none as this implies
- * sharing with another vma.
+ * Skip if src entry none.
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
restore_reserve_on_error(h, dst_vma, addr,
new);
put_page(new);
- /* dst_entry won't change as in child */
+ /* huge_ptep of dst_pte won't change as in child */
goto again;
}
hugetlb_install_page(dst_vma, dst_pte, addr, new);
u32 hash;
put_page(old_page);
- BUG_ON(huge_pte_none(pte));
/*
* Drop hugetlb_fault_mutex and i_mmap_rwsem before
* unmapping. unmapping needs to hold i_mmap_rwsem
return ret;
}
-/* Return the pagecache page at a given address within a VMA */
-static struct page *hugetlbfs_pagecache_page(struct hstate *h,
- struct vm_area_struct *vma, unsigned long address)
-{
- struct address_space *mapping;
- pgoff_t idx;
-
- mapping = vma->vm_file->f_mapping;
- idx = vma_hugecache_offset(h, vma, address);
-
- return find_lock_page(mapping, idx);
-}
-
/*
* Return whether there is a pagecache page to back given address within VMA.
* Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page.
if (idx >= size)
goto out;
-retry:
new_page = false;
page = find_lock_page(mapping, idx);
if (!page) {
if (vma->vm_flags & VM_MAYSHARE) {
int err = huge_add_to_page_cache(page, mapping, idx);
if (err) {
+ /*
+ * err can't be -EEXIST which implies someone
+ * else consumed the reservation since hugetlb
+ * fault mutex is held when add a hugetlb page
+ * to the page cache. So it's safe to call
+ * restore_reserve_on_error() here.
+ */
+ restore_reserve_on_error(h, vma, haddr, page);
put_page(page);
- if (err == -EEXIST)
- goto retry;
goto out;
}
new_pagecache_page = true;
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
+ pagecache_page = find_lock_page(mapping, idx);
}
ptl = huge_pte_lock(h, mm, ptep);
page_in_pagecache = true;
}
- ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
- spin_lock(ptl);
+ ptl = huge_pte_lock(h, dst_mm, dst_pte);
/*
* Recheck the i_size after holding PT lock to make sure not
hugetlb_cma_size = 0;
}
-void __init hugetlb_cma_check(void)
+static void __init hugetlb_cma_check(void)
{
if (!hugetlb_cma_size || cma_reserve_called)
return;