diff options
author | Fan Du <fan.du@intel.com> | 2019-06-19 13:33:42 +0800 |
---|---|---|
committer | Fengguang Wu <fengguang.wu@intel.com> | 2019-06-27 11:33:41 +0800 |
commit | d9c2d4fb343f1d18bab1dcdf4ab6f92c474256dd (patch) | |
tree | 9db83524c3570bf16663013a3abe5db98e6923d8 | |
parent | 249155c20f9b0754bc1b932a33344cfb4e0c2101 (diff) | |
download | linux-fan-mapcount.tar.gz |
mm: Introduce pagecache page access counterfan-mapcount
Motivation
==========
To get overall statistics of page access pattern,
the ultimate method is to scan pagetable A bit.
But that's for page mapped in page table.
For unmapped pagecache page, PageReferenced bit
could be queried intervally, the precision depends
on the scan frequency. This could incur more
cpu consumption, as well as sophsicated code design.
What's real need is the page access counter,
either in hardware form or software implementation.
This patch tried to introduce the later, software
implementation of page access counter.
Howto
=====
Allocate additional space for access counter is
by no means costy and hard to maitain.
We can reuse mapcount in page structure, because
it only be valid for page mapped in page table.
For unmaped page cache, it's free!
In detail, introduce another PG_Mapped flag
to denote when mapcount is valid to show how many
times the page has been mapped in page table.
But when PG_Mapped is not set, store access
counter there.
Every buffered read/write will lookup pagecache,
so pagecache_get_page is a perfect place to add
the profile code. Moreover for fine granularity
control to the access counter, FGP_WRITE/FGP_ACCESSED
could be used to multiplex the counter useage.
e.g.
Additional code could set option to track read
access by FGP_ACCESSED, or respectively FGP_WRITE
to track write access.
Potential user
=============
a. Future in-kernel file LRU scan based implementation
could be simplied greatly.
b. Can create user level interface to export the access
counter.
Test
===
a. Boot/reboot ok
b. Need more sufficient test.
Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/page-flags.h | 5 | ||||
-rw-r--r-- | include/trace/events/mmflags.h | 3 | ||||
-rw-r--r-- | mm/filemap.c | 21 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/rmap.c | 11 | ||||
-rw-r--r-- | mm/util.c | 2 |
7 files changed, 42 insertions, 5 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index dd0b5f4e1e45a..f025682b5f71b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -703,7 +703,7 @@ static inline int page_mapcount(struct page *page) if (unlikely(PageCompound(page))) return __page_mapcount(page); - return atomic_read(&page->_mapcount) + 1; + return PageMapped(page) ? atomic_read(&page->_mapcount) + 1 : 0; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 9f8712a4b1a5b..e34b4e45bcbae 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -131,6 +131,7 @@ enum pageflags { PG_young, PG_idle, #endif + PG_mapped, /* Indicate page cache page is mapped in page table */ __NR_PAGEFLAGS, /* Filesystems */ @@ -324,6 +325,10 @@ __PAGEFLAG(Slab, slab, PF_NO_TAIL) __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ + +PAGEFLAG(Mapped, mapped, PF_HEAD) __CLEARPAGEFLAG(Mapped, mapped, PF_HEAD) + TESTCLEARFLAG(Mapped, mapped, PF_HEAD) + /* Xen */ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a1675d43777e8..c665d76e58cef 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -100,7 +100,8 @@ {1UL << PG_mappedtodisk, "mappedtodisk" }, \ {1UL << PG_reclaim, "reclaim" }, \ {1UL << PG_swapbacked, "swapbacked" }, \ - {1UL << PG_unevictable, "unevictable" } \ + {1UL << PG_unevictable, "unevictable" }, \ + {1UL << PG_mapped, "mapped" } \ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ diff --git a/mm/filemap.c b/mm/filemap.c index df2006ba0cfa5..b72f444dec716 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -249,6 +249,8 @@ static void page_cache_free_page(struct address_space *mapping, page_ref_sub(page, HPAGE_PMD_NR); VM_BUG_ON_PAGE(page_count(page) <= 0, page); } else { + if (!PageMapped(page)) + page_mapcount_reset(page); put_page(page); } } @@ -1632,7 +1634,24 @@ repeat: if (fgp_flags & FGP_ACCESSED) mark_page_accessed(page); - + /* + * w/o PageMapped set, mapcount indicates reference counter. + * From now on, we have software page access counter for + * unmapped pagecache page. We can expand more ways by + * leveraging this counter: + * a. Scan file LRU list + * b. Export interface to user space + * + * The access pattern of mapped pagecache page will be + * handled w/ method of anon pages. + * + * TODO: + * - Need more thinking to account for read and write sperately. + * - Opt-in a knob to multiplex the mapcount, i.e. profile + * read or profile write. + */ + if(!PageMapped(page)) + atomic_inc(&page->_mapcount); no_page: if (!page && (fgp_flags & FGP_CREAT)) { int err; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d66bc8abe0afd..4814ab4febe7e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -991,7 +991,8 @@ done_merging: static inline bool page_expected_state(struct page *page, unsigned long check_flags) { - if (unlikely(atomic_read(&page->_mapcount) != -1)) + if (unlikely(PageMapped(page) && + atomic_read(&page->_mapcount) != -1)) return false; if (unlikely((unsigned long)page->mapping | diff --git a/mm/rmap.c b/mm/rmap.c index e5dfe2ae6b0d5..51842dfef1d87 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1202,6 +1202,17 @@ void page_add_file_rmap(struct page *page, bool compound) if (PageMlocked(page)) clear_page_mlock(compound_head(page)); } + + /* + * Reset mapcount in case it carries invalid mapcount + * which used by page cache refernece accounting. + */ + if (!PageMapped(page)) { + /* Mark page cache page as mapped in page table. */ + SetPageMapped(page); + page_mapcount_reset(page); + } + if (!atomic_inc_and_test(&page->_mapcount)) goto out; } diff --git a/mm/util.c b/mm/util.c index 9834c4ab7d8e8..21936e97bb65d 100644 --- a/mm/util.c +++ b/mm/util.c @@ -487,7 +487,7 @@ bool page_mapped(struct page *page) int i; if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; + return PageMapped(page) ? (atomic_read(&page->_mapcount) >= 0): false; page = compound_head(page); if (atomic_read(compound_mapcount_ptr(page)) >= 0) return true; |