/*
* We want to release as many surplus pages as possible, spread
- * evenly across all nodes. Iterate across all nodes until we
- * can no longer free unreserved surplus pages. This occurs when
- * the nodes with surplus pages have no free pages.
- * free_pool_huge_page() will balance the the frees across the
- * on-line nodes for us and will handle the hstate accounting.
+ * evenly across all nodes with memory. Iterate across these nodes
+ * until we can no longer free unreserved surplus pages. This occurs
+ * when the nodes with surplus pages have no free pages.
+ * free_pool_huge_page() will balance the the freed pages across the
+ * on-line nodes with memory and will handle the hstate accounting.
*/
while (nr_pages--) {
- if (!free_pool_huge_page(h, &node_online_map, 1))
+ if (!free_pool_huge_page(h, &node_states[N_HIGH_MEMORY], 1))
break;
}
}
int __weak alloc_bootmem_huge_page(struct hstate *h)
{
struct huge_bootmem_page *m;
- int nr_nodes = nodes_weight(node_online_map);
+ int nr_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
while (nr_nodes) {
void *addr;
addr = __alloc_bootmem_node_nopanic(
NODE_DATA(hstate_next_node_to_alloc(h,
- &node_online_map)),
+ &node_states[N_HIGH_MEMORY])),
huge_page_size(h), huge_page_size(h), 0);
if (addr) {
if (h->order >= MAX_ORDER) {
if (!alloc_bootmem_huge_page(h))
break;
- } else if (!alloc_fresh_huge_page(h, &node_online_map))
+ } else if (!alloc_fresh_huge_page(h,
+ &node_states[N_HIGH_MEMORY]))
break;
}
h->max_huge_pages = i;
if (!ret)
goto out;
+ /* Bail for signals. Probably ctrl-c from user */
+ if (signal_pending(current))
+ goto out;
}
/*
int nid;
unsigned long count;
struct hstate *h;
- NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+ NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
err = strict_strtoul(buf, 10, &count);
if (err)
h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
- if (nodes_allowed != &node_online_map)
+ if (nodes_allowed != &node_states[N_HIGH_MEMORY])
NODEMASK_FREE(nodes_allowed);
return len;
struct node_hstate *nhs = &node_hstates[node->sysdev.id];
if (!nhs->hugepages_kobj)
- return;
+ return; /* no hstate attributes */
for_each_hstate(h)
if (nhs->hstate_kobjs[h - hstates]) {
}
/*
- * hugetlb init time: register hstate attributes for all registered
- * node sysdevs. All on-line nodes should have registered their
- * associated sysdev by the time the hugetlb module initializes.
+ * hugetlb init time: register hstate attributes for all registered node
+ * sysdevs of nodes that have memory. All on-line nodes should have
+ * registered their associated sysdev by this time.
*/
static void hugetlb_register_all_nodes(void)
{
int nid;
- for (nid = 0; nid < nr_node_ids; nid++) {
+ for_each_node_state(nid, N_HIGH_MEMORY) {
struct node *node = &node_devices[nid];
if (node->sysdev.id == nid)
hugetlb_register_node(node);
h->free_huge_pages = 0;
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&h->hugepage_freelists[i]);
- h->next_nid_to_alloc = first_node(node_online_map);
- h->next_nid_to_free = first_node(node_online_map);
+ h->next_nid_to_alloc = first_node(node_states[N_HIGH_MEMORY]);
+ h->next_nid_to_free = first_node(node_states[N_HIGH_MEMORY]);
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024);
proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write) {
- NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+ NODEMASK_ALLOC(nodemask_t, nodes_allowed,
+ GFP_KERNEL | __GFP_NORETRY);
if (!(obey_mempolicy &&
init_nodemask_of_mempolicy(nodes_allowed))) {
NODEMASK_FREE(nodes_allowed);
+ (vma->vm_pgoff >> PAGE_SHIFT);
mapping = (struct address_space *)page_private(page);
+ /*
+ * Take the mapping lock for the duration of the table walk. As
+ * this mapping should be shared between all the VMAs,
+ * __unmap_hugepage_range() is called as the lock is already held
+ */
+ spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
/* Do not unmap the current VMA */
if (iter_vma == vma)
* from the time of fork. This would look like data corruption
*/
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
- unmap_hugepage_range(iter_vma,
+ __unmap_hugepage_range(iter_vma,
address, address + huge_page_size(h),
page);
}
+ spin_unlock(&mapping->i_mmap_lock);
return 1;
}
outside_reserve = 1;
page_cache_get(old_page);
+
+ /* Drop page_table_lock as buddy allocator may be called */
+ spin_unlock(&mm->page_table_lock);
new_page = alloc_huge_page(vma, address, outside_reserve);
if (IS_ERR(new_page)) {
if (unmap_ref_private(mm, vma, old_page, address)) {
BUG_ON(page_count(old_page) != 1);
BUG_ON(huge_pte_none(pte));
+ spin_lock(&mm->page_table_lock);
goto retry_avoidcopy;
}
WARN_ON_ONCE(1);
}
+ /* Caller expects lock to be held */
+ spin_lock(&mm->page_table_lock);
return -PTR_ERR(new_page);
}
- spin_unlock(&mm->page_table_lock);
copy_huge_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);
- spin_lock(&mm->page_table_lock);
+ /*
+ * Retake the page_table_lock to check for racing updates
+ * before the page tables are altered
+ */
+ spin_lock(&mm->page_table_lock);
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
if (likely(pte_same(huge_ptep_get(ptep), pte))) {
/* Break COW */