From: Pavel Machek This is patch from Rafael, it eliminates order-5 (or worse) allocations during suspend. I did few style/whitespace modifications. It was tested by me, Rafael, and Stefan from SuSE. Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton --- 25-akpm/include/linux/suspend.h | 17 +++ 25-akpm/kernel/power/swsusp.c | 175 +++++++++++++++++++++------------------- 2 files changed, 106 insertions(+), 86 deletions(-) diff -puN include/linux/suspend.h~swsusp-do-not-use-higher-order-memory-allocations-on-suspend include/linux/suspend.h --- 25/include/linux/suspend.h~swsusp-do-not-use-higher-order-memory-allocations-on-suspend 2005-02-23 02:07:56.000000000 -0800 +++ 25-akpm/include/linux/suspend.h 2005-02-23 02:07:56.000000000 -0800 @@ -15,11 +15,22 @@ typedef struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ swp_entry_t swap_address; - swp_entry_t dummy; /* we need scratch space at - * end of page (see link, diskpage) - */ + + struct pbe *next; /* also used as scratch space at + * end of page (see link, diskpage) + */ } suspend_pagedir_t; +#define for_each_pbe(pbe, pblist) \ + for (pbe = pblist ; pbe ; pbe = pbe->next) + +#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe)) +#define PB_PAGE_SKIP (PBES_PER_PAGE-1) + +#define for_each_pb_page(pbe, pblist) \ + for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) + + #define SWAP_FILENAME_MAXLENGTH 32 diff -puN kernel/power/swsusp.c~swsusp-do-not-use-higher-order-memory-allocations-on-suspend kernel/power/swsusp.c --- 25/kernel/power/swsusp.c~swsusp-do-not-use-higher-order-memory-allocations-on-suspend 2005-02-23 02:07:56.000000000 -0800 +++ 25-akpm/kernel/power/swsusp.c 2005-02-23 02:08:07.000000000 -0800 @@ -76,7 +76,6 @@ extern const void __nosave_begin, __nosave_end; /* Variables to be preserved over suspend */ -static int pagedir_order_check; static int nr_copy_pages_check; extern char resume_file[]; @@ -225,8 +224,6 @@ static void lock_swapdevices(void) swap_list_unlock(); } - - /** * write_swap_page - Write one page to a fresh swap location. * @addr: Address we're writing. @@ -239,7 +236,6 @@ static void lock_swapdevices(void) * This is a partial improvement, since we will at least return other * errors, though we need to eventually fix the damn code. */ - static int write_page(unsigned long addr, swp_entry_t * loc) { swp_entry_t entry; @@ -259,14 +255,12 @@ static int write_page(unsigned long addr return error; } - /** * data_free - Free the swap entries used by the saved image. * * Walk the list of used swap entries and free each one. * This is only used for cleanup when suspend fails. */ - static void data_free(void) { swp_entry_t entry; @@ -282,28 +276,27 @@ static void data_free(void) } } - /** * data_write - Write saved image to swap. * * Walk the list of pages in the image and sync each one to swap. */ - static int data_write(void) { - int error = 0; - int i; + int error = 0, i = 0; unsigned int mod = nr_copy_pages / 100; + struct pbe *p; if (!mod) mod = 1; printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); - for (i = 0; i < nr_copy_pages && !error; i++) { + for_each_pbe(p, pagedir_nosave) { if (!(i%mod)) printk( "\b\b\b\b%3d%%", i / mod ); - error = write_page((pagedir_nosave+i)->address, - &((pagedir_nosave+i)->swap_address)); + if ((error = write_page(p->address, &(p->swap_address)))) + return error; + i++; } printk("\b\b\b\bdone\n"); return error; @@ -326,15 +319,14 @@ static void dump_info(void) static void init_header(void) { - memset(&swsusp_info,0,sizeof(swsusp_info)); + memset(&swsusp_info, 0, sizeof(swsusp_info)); swsusp_info.version_code = LINUX_VERSION_CODE; swsusp_info.num_physpages = num_physpages; - memcpy(&swsusp_info.uts,&system_utsname,sizeof(system_utsname)); + memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); swsusp_info.suspend_pagedir = pagedir_nosave; swsusp_info.cpus = num_online_cpus(); swsusp_info.image_pages = nr_copy_pages; - dump_info(); } static int close_swap(void) @@ -342,7 +334,8 @@ static int close_swap(void) swp_entry_t entry; int error; - error = write_page((unsigned long)&swsusp_info,&entry); + dump_info(); + error = write_page((unsigned long)&swsusp_info, &entry); if (!error) { printk( "S" ); error = mark_swapfiles(entry); @@ -373,15 +366,18 @@ static void free_pagedir_entries(void) static int write_pagedir(void) { - unsigned long addr = (unsigned long)pagedir_nosave; int error = 0; - int n = SUSPEND_PD_PAGES(nr_copy_pages); - int i; + unsigned n = 0; + struct pbe * pbe; + + printk( "Writing pagedir..."); + for_each_pb_page(pbe, pagedir_nosave) { + if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) + return error; + } swsusp_info.pagedir_pages = n; - printk( "Writing pagedir (%d pages)\n", n); - for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) - error = write_page(addr, &swsusp_info.pagedir[i]); + printk("done (%u pages)\n", n); return error; } @@ -567,8 +563,8 @@ static void copy_data_pages(void) struct zone *zone; unsigned long zone_pfn; struct pbe * pbe = pagedir_nosave; - int to_copy = nr_copy_pages; + pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages); for_each_zone(zone) { if (is_highmem(zone)) continue; @@ -577,78 +573,94 @@ static void copy_data_pages(void) if (saveable(zone, &zone_pfn)) { struct page * page; page = pfn_to_page(zone_pfn + zone->zone_start_pfn); + BUG_ON(!pbe); pbe->orig_address = (long) page_address(page); /* copy_page is not usable for copying task structs. */ memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); - pbe++; - to_copy--; + pbe = pbe->next; } } } - BUG_ON(to_copy); + BUG_ON(pbe); } /** - * calc_order - Determine the order of allocation needed for pagedir_save. - * - * This looks tricky, but is just subtle. Please fix it some time. - * Since there are %nr_copy_pages worth of pages in the snapshot, we need - * to allocate enough contiguous space to hold - * (%nr_copy_pages * sizeof(struct pbe)), - * which has the saved/orig locations of the page.. - * - * SUSPEND_PD_PAGES() tells us how many pages we need to hold those - * structures, then we call get_bitmask_order(), which will tell us the - * last bit set in the number, starting with 1. (If we need 30 pages, that - * is 0x0000001e in hex. The last bit is the 5th, which is the order we - * would use to allocate 32 contiguous pages). - * - * Since we also need to save those pages, we add the number of pages that - * we need to nr_copy_pages, and in case of an overflow, do the - * calculation again to update the number of pages needed. - * - * With this model, we will tend to waste a lot of memory if we just cross - * an order boundary. Plus, the higher the order of allocation that we try - * to do, the more likely we are to fail in a low-memory situtation - * (though we're unlikely to get this far in such a case, since swsusp - * requires half of memory to be free anyway). + * calc_nr - Determine the number of pages needed for a pbe list. */ - -static void calc_order(void) +static int calc_nr(int nr_copy) { - int diff = 0; - int order = 0; + int extra = 0; + int mod = !!(nr_copy % PBES_PER_PAGE); + int diff = (nr_copy / PBES_PER_PAGE) + mod; do { - diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order; - if (diff) { - order += diff; - nr_copy_pages += 1 << diff; - } - } while(diff); - pagedir_order = order; + extra += diff; + nr_copy += diff; + mod = !!(nr_copy % PBES_PER_PAGE); + diff = (nr_copy / PBES_PER_PAGE) + mod - extra; + } while (diff > 0); + + return nr_copy; } +/** + * free_pagedir - free pages allocated with alloc_pagedir() + */ + +static inline void free_pagedir(struct pbe *pblist) +{ + struct pbe *pbe; + + while (pblist) { + pbe = (pblist + PB_PAGE_SKIP)->next; + free_page((unsigned long)pblist); + pblist = pbe; + } +} /** * alloc_pagedir - Allocate the page directory. * - * First, determine exactly how many contiguous pages we need and + * First, determine exactly how many pages we need and * allocate them. + * + * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE + * struct pbe elements (pbes) and the last element in the page points + * to the next page. + * + * On each page we set up a list of struct_pbe elements. */ -static int alloc_pagedir(void) +static struct pbe * alloc_pagedir(unsigned nr_pages) { - calc_order(); - pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, - pagedir_order); - if (!pagedir_save) - return -ENOMEM; - memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE); - pagedir_nosave = pagedir_save; - return 0; + unsigned num; + struct pbe *pblist, *pbe, *p; + + if (!nr_pages) + return NULL; + + pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); + pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; + pbe = pbe->next, num += PBES_PER_PAGE) { + p = pbe; + pbe += PB_PAGE_SKIP; + do + p->next = p + 1; + while (p++ < pbe); + pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + } + if (pbe) { + for (num -= PBES_PER_PAGE - 1, p = pbe; num < nr_pages; p++, num++) + p->next = p + 1; + } else { /* get_zeroed_page() failed */ + free_pagedir(pblist); + pblist = NULL; + } + pr_debug("alloc_pagedir(): allocated %d PBEs\n", num); + return pblist; } /** @@ -658,10 +670,8 @@ static int alloc_pagedir(void) static void free_image_pages(void) { struct pbe * p; - int i; - p = pagedir_save; - for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) { + for_each_pbe(p, pagedir_save) { if (p->address) { ClearPageNosave(virt_to_page(p->address)); free_page(p->address); @@ -672,15 +682,13 @@ static void free_image_pages(void) /** * alloc_image_pages - Allocate pages for the snapshot. - * */ static int alloc_image_pages(void) { struct pbe * p; - int i; - for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) { + for_each_pbe(p, pagedir_save) { p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); if (!p->address) return -ENOMEM; @@ -694,7 +702,7 @@ void swsusp_free(void) BUG_ON(PageNosave(virt_to_page(pagedir_save))); BUG_ON(PageNosaveFree(virt_to_page(pagedir_save))); free_image_pages(); - free_pages((unsigned long) pagedir_save, pagedir_order); + free_pagedir(pagedir_save); } @@ -752,10 +760,13 @@ static int swsusp_alloc(void) if (!enough_swap()) return -ENOSPC; - if ((error = alloc_pagedir())) { + nr_copy_pages = calc_nr(nr_copy_pages); + + if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); - return error; + return -ENOMEM; } + pagedir_nosave = pagedir_save; if ((error = alloc_image_pages())) { printk(KERN_ERR "suspend: Allocating image pages failed.\n"); swsusp_free(); @@ -763,7 +774,6 @@ static int swsusp_alloc(void) } nr_copy_pages_check = nr_copy_pages; - pagedir_order_check = pagedir_order; return 0; } @@ -780,7 +790,7 @@ static int suspend_prepare_image(void) drain_local_pages(); count_data_pages(); - printk("swsusp: Need to copy %u pages\n",nr_copy_pages); + printk("swsusp: Need to copy %u pages\n", nr_copy_pages); error = swsusp_alloc(); if (error) @@ -867,7 +877,6 @@ int swsusp_suspend(void) asmlinkage int swsusp_restore(void) { BUG_ON (nr_copy_pages_check != nr_copy_pages); - BUG_ON (pagedir_order_check != pagedir_order); /* Even mappings of "global" things (vmalloc) need to be fixed */ __flush_tlb_global(); _