]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blobdiff - mm/page_alloc.c
Fix bug
[can-eth-gw-linux.git] / mm / page_alloc.c
index 5a8d339d282a88cd0c5687596eca8023f93846ba..83637dfba110c8308570d7f75ec46bca7386dc3a 100644 (file)
@@ -89,6 +89,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
        [N_NORMAL_MEMORY] = { { [0] = 1UL } },
 #ifdef CONFIG_HIGHMEM
        [N_HIGH_MEMORY] = { { [0] = 1UL } },
+#endif
+#ifdef CONFIG_MOVABLE_NODE
+       [N_MEMORY] = { { [0] = 1UL } },
 #endif
        [N_CPU] = { { [0] = 1UL } },
 #endif /* NUMA */
@@ -523,7 +526,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
  * If a block is freed, and its buddy is also free, then this
  * triggers coalescing into a block of larger size.
  *
- * -- wli
+ * -- nyc
  */
 
 static inline void __free_one_page(struct page *page,
@@ -732,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
 
+/*
+ * Read access to zone->managed_pages is safe because it's unsigned long,
+ * but we still need to serialize writers. Currently all callers of
+ * __free_pages_bootmem() except put_page_bootmem() should only be used
+ * at boot time. So for shorter boot time, we shift the burden to
+ * put_page_bootmem() to serialize writers.
+ */
 void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
@@ -747,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
                set_page_count(p, 0);
        }
 
+       page_zone(page)->managed_pages += 1 << order;
        set_page_refcounted(page);
        __free_pages(page, order);
 }
@@ -782,7 +793,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
  * large block of memory acted on by a series of small allocations.
  * This behavior is a critical factor in sglist merging's success.
  *
- * -- wli
+ * -- nyc
  */
 static inline void expand(struct zone *zone, struct page *page,
        int low, int high, struct free_area *area,
@@ -1695,7 +1706,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
  *
  * If the zonelist cache is present in the passed in zonelist, then
  * returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
+ * tasks mems_allowed, or node_states[N_MEMORY].)
  *
  * If the zonelist cache is not available for this zonelist, does
  * nothing and returns NULL.
@@ -1724,7 +1735,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
 
        allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
                                        &cpuset_current_mems_allowed :
-                                       &node_states[N_HIGH_MEMORY];
+                                       &node_states[N_MEMORY];
        return allowednodes;
 }
 
@@ -2981,6 +2992,7 @@ void show_free_areas(unsigned int filter)
                        " isolated(anon):%lukB"
                        " isolated(file):%lukB"
                        " present:%lukB"
+                       " managed:%lukB"
                        " mlocked:%lukB"
                        " dirty:%lukB"
                        " writeback:%lukB"
@@ -3010,6 +3022,7 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_ISOLATED_ANON)),
                        K(zone_page_state(zone, NR_ISOLATED_FILE)),
                        K(zone->present_pages),
+                       K(zone->managed_pages),
                        K(zone_page_state(zone, NR_MLOCK)),
                        K(zone_page_state(zone, NR_FILE_DIRTY)),
                        K(zone_page_state(zone, NR_WRITEBACK)),
@@ -3238,7 +3251,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
                return node;
        }
 
-       for_each_node_state(n, N_HIGH_MEMORY) {
+       for_each_node_state(n, N_MEMORY) {
 
                /* Don't want a node to appear more than once */
                if (node_isset(n, *used_node_mask))
@@ -3380,7 +3393,7 @@ static int default_zonelist_order(void)
         * local memory, NODE_ORDER may be suitable.
          */
        average_size = total_size /
-                               (nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
+                               (nodes_weight(node_states[N_MEMORY]) + 1);
        for_each_online_node(nid) {
                low_kmem_size = 0;
                total_size = 0;
@@ -4476,6 +4489,26 @@ void __init set_pageblock_order(void)
 
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
+static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
+                                                  unsigned long present_pages)
+{
+       unsigned long pages = spanned_pages;
+
+       /*
+        * Provide a more accurate estimation if there are holes within
+        * the zone and SPARSEMEM is in use. If there are holes within the
+        * zone, each populated memory region may cost us one or two extra
+        * memmap pages due to alignment because memmap pages for each
+        * populated regions may not naturally algined on page boundary.
+        * So the (present_pages >> 4) heuristic is a tradeoff for that.
+        */
+       if (spanned_pages > present_pages + (present_pages >> 4) &&
+           IS_ENABLED(CONFIG_SPARSEMEM))
+               pages = present_pages;
+
+       return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -4499,48 +4532,56 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
-               unsigned long size, realsize, memmap_pages;
+               unsigned long size, realsize, freesize, memmap_pages;
 
                size = zone_spanned_pages_in_node(nid, j, zones_size);
-               realsize = size - zone_absent_pages_in_node(nid, j,
+               realsize = freesize = size - zone_absent_pages_in_node(nid, j,
                                                                zholes_size);
 
                /*
-                * Adjust realsize so that it accounts for how much memory
+                * Adjust freesize so that it accounts for how much memory
                 * is used by this zone for memmap. This affects the watermark
                 * and per-cpu initialisations
                 */
-               memmap_pages =
-                       PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
-               if (realsize >= memmap_pages) {
-                       realsize -= memmap_pages;
+               memmap_pages = calc_memmap_size(size, realsize);
+               if (freesize >= memmap_pages) {
+                       freesize -= memmap_pages;
                        if (memmap_pages)
                                printk(KERN_DEBUG
                                       "  %s zone: %lu pages used for memmap\n",
                                       zone_names[j], memmap_pages);
                } else
                        printk(KERN_WARNING
-                               "  %s zone: %lu pages exceeds realsize %lu\n",
-                               zone_names[j], memmap_pages, realsize);
+                               "  %s zone: %lu pages exceeds freesize %lu\n",
+                               zone_names[j], memmap_pages, freesize);
 
                /* Account for reserved pages */
-               if (j == 0 && realsize > dma_reserve) {
-                       realsize -= dma_reserve;
+               if (j == 0 && freesize > dma_reserve) {
+                       freesize -= dma_reserve;
                        printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
                                        zone_names[0], dma_reserve);
                }
 
                if (!is_highmem_idx(j))
-                       nr_kernel_pages += realsize;
-               nr_all_pages += realsize;
+                       nr_kernel_pages += freesize;
+               /* Charge for highmem memmap if there are enough kernel pages */
+               else if (nr_kernel_pages > memmap_pages * 2)
+                       nr_kernel_pages -= memmap_pages;
+               nr_all_pages += freesize;
 
                zone->spanned_pages = size;
-               zone->present_pages = realsize;
+               zone->present_pages = freesize;
+               /*
+                * Set an approximate value for lowmem here, it will be adjusted
+                * when the bootmem allocator frees pages into the buddy system.
+                * And all highmem pages will be managed by the buddy system.
+                */
+               zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
 #ifdef CONFIG_NUMA
                zone->node = nid;
-               zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
+               zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
                                                / 100;
-               zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+               zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
 #endif
                zone->name = zone_names[j];
                spin_lock_init(&zone->lock);
@@ -4731,7 +4772,7 @@ unsigned long __init find_min_pfn_with_active_regions(void)
 /*
  * early_calculate_totalpages()
  * Sum pages in active regions for movable zone.
- * Populate N_HIGH_MEMORY for calculating usable_nodes.
+ * Populate N_MEMORY for calculating usable_nodes.
  */
 static unsigned long __init early_calculate_totalpages(void)
 {
@@ -4744,7 +4785,7 @@ static unsigned long __init early_calculate_totalpages(void)
 
                totalpages += pages;
                if (pages)
-                       node_set_state(nid, N_HIGH_MEMORY);
+                       node_set_state(nid, N_MEMORY);
        }
        return totalpages;
 }
@@ -4761,9 +4802,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)
        unsigned long usable_startpfn;
        unsigned long kernelcore_node, kernelcore_remaining;
        /* save the state before borrow the nodemask */
-       nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
+       nodemask_t saved_node_state = node_states[N_MEMORY];
        unsigned long totalpages = early_calculate_totalpages();
-       int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
+       int usable_nodes = nodes_weight(node_states[N_MEMORY]);
 
        /*
         * If movablecore was specified, calculate what size of
@@ -4798,7 +4839,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
 restart:
        /* Spread kernelcore memory as evenly as possible throughout nodes */
        kernelcore_node = required_kernelcore / usable_nodes;
-       for_each_node_state(nid, N_HIGH_MEMORY) {
+       for_each_node_state(nid, N_MEMORY) {
                unsigned long start_pfn, end_pfn;
 
                /*
@@ -4890,23 +4931,27 @@ restart:
 
 out:
        /* restore the node_state */
-       node_states[N_HIGH_MEMORY] = saved_node_state;
+       node_states[N_MEMORY] = saved_node_state;
 }
 
-/* Any regular memory on that node ? */
-static void __init check_for_regular_memory(pg_data_t *pgdat)
+/* Any regular or high memory on that node ? */
+static void check_for_memory(pg_data_t *pgdat, int nid)
 {
-#ifdef CONFIG_HIGHMEM
        enum zone_type zone_type;
 
-       for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
+       if (N_MEMORY == N_NORMAL_MEMORY)
+               return;
+
+       for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
                struct zone *zone = &pgdat->node_zones[zone_type];
                if (zone->present_pages) {
-                       node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+                       node_set_state(nid, N_HIGH_MEMORY);
+                       if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
+                           zone_type <= ZONE_NORMAL)
+                               node_set_state(nid, N_NORMAL_MEMORY);
                        break;
                }
        }
-#endif
 }
 
 /**
@@ -4989,8 +5034,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
                /* Any memory on that node */
                if (pgdat->node_present_pages)
-                       node_set_state(nid, N_HIGH_MEMORY);
-               check_for_regular_memory(pgdat);
+                       node_set_state(nid, N_MEMORY);
+               check_for_memory(pgdat, nid);
        }
 }
 
@@ -5727,7 +5772,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
        unsigned int tries = 0;
        int ret = 0;
 
-       migrate_prep_local();
+       migrate_prep();
 
        while (pfn < end || !list_empty(&cc->migratepages)) {
                if (fatal_signal_pending(current)) {