This changes the fadvise(FADV_DONTNEED) operation to start async writeout of any dirty pages in the file. The thinking is that if the application doesn't want to use those pages in the future, we may as well get IO underway against them so they can be freed up on the next call to fadvise(). The POSIX spec does not go into any detail as to whether this is the right or wrong behaviour. This provides a nice way for applications whihc are writing streaming data (the main users of fadvise) to keep the amount of dirty pagecache under control without having to resort to system-wide VM tuning. It also provides an "async fsync()". If the application passes in a length of zero, fadvise will start async writeout of the pages, but will not invalidate any of the file's pagecache. include/linux/fs.h | 1 + mm/fadvise.c | 2 ++ mm/filemap.c | 18 ++++++++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff -puN include/linux/fs.h~fadvise-flush-data include/linux/fs.h --- 25/include/linux/fs.h~fadvise-flush-data 2003-04-02 22:51:22.000000000 -0800 +++ 25-akpm/include/linux/fs.h 2003-04-02 23:33:09.000000000 -0800 @@ -1112,6 +1112,7 @@ unsigned long invalidate_inode_pages(str extern void invalidate_inode_pages2(struct address_space *mapping); extern void write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); +extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *); extern void sync_supers(void); extern void sync_filesystems(int wait); diff -puN mm/fadvise.c~fadvise-flush-data mm/fadvise.c --- 25/mm/fadvise.c~fadvise-flush-data 2003-04-02 22:51:22.000000000 -0800 +++ 25-akpm/mm/fadvise.c 2003-04-02 22:51:22.000000000 -0800 @@ -61,6 +61,8 @@ long sys_fadvise64(int fd, loff_t offset ret = 0; break; case POSIX_FADV_DONTNEED: + if (!bdi_write_congested(mapping->backing_dev_info)) + filemap_flush(mapping); invalidate_mapping_pages(mapping, offset >> PAGE_CACHE_SHIFT, (len >> PAGE_CACHE_SHIFT) + 1); break; diff -puN mm/filemap.c~fadvise-flush-data mm/filemap.c --- 25/mm/filemap.c~fadvise-flush-data 2003-04-02 22:51:22.000000000 -0800 +++ 25-akpm/mm/filemap.c 2003-04-02 22:51:22.000000000 -0800 @@ -122,11 +122,11 @@ static inline int sync_page(struct page * if a dirty page/buffer is encountered, it must be waited upon, and not just * skipped over. */ -int filemap_fdatawrite(struct address_space *mapping) +static int __filemap_fdatawrite(struct address_space *mapping, int sync_mode) { int ret; struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, + .sync_mode = sync_mode, .nr_to_write = mapping->nrpages * 2, }; @@ -140,6 +140,20 @@ int filemap_fdatawrite(struct address_sp return ret; } +int filemap_fdatawrite(struct address_space *mapping) +{ + return __filemap_fdatawrite(mapping, WB_SYNC_ALL); +} + +/* + * This is a mostly non-blocking flush. Not suitable for data-integrity + * purposes. + */ +int filemap_flush(struct address_space *mapping) +{ + return __filemap_fdatawrite(mapping, WB_SYNC_NONE); +} + /** * filemap_fdatawait - walk the list of locked pages of the given address * space and wait for all of them. _