[PATCH v2 1/7] mm/compaction: split freepages without holding the zone lock

classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 1/7] mm/compaction: split freepages without holding the zone lock

js1304
From: Joonsoo Kim <[hidden email]>

We don't need to split freepages with holding the zone lock. It will cause
more contention on zone lock so not desirable.

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 include/linux/mm.h |  1 -
 mm/compaction.c    | 42 ++++++++++++++++++++++++++++++------------
 mm/page_alloc.c    | 27 ---------------------------
 3 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a00ec81..1a1782c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -537,7 +537,6 @@ void __put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
-int split_free_page(struct page *page);
 
 /*
  * Compound pages have a destructor function.  Provide a
diff --git a/mm/compaction.c b/mm/compaction.c
index 1427366..8e013eb 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -65,13 +65,31 @@ static unsigned long release_freepages(struct list_head *freelist)
 
 static void map_pages(struct list_head *list)
 {
- struct page *page;
+ unsigned int i, order, nr_pages;
+ struct page *page, *next;
+ LIST_HEAD(tmp_list);
+
+ list_for_each_entry_safe(page, next, list, lru) {
+ list_del(&page->lru);
+
+ order = page_private(page);
+ nr_pages = 1 << order;
+ set_page_private(page, 0);
+ set_page_refcounted(page);
+
+ arch_alloc_page(page, order);
+ kernel_map_pages(page, nr_pages, 1);
+ kasan_alloc_pages(page, order);
+ if (order)
+ split_page(page, order);
 
- list_for_each_entry(page, list, lru) {
- arch_alloc_page(page, 0);
- kernel_map_pages(page, 1, 1);
- kasan_alloc_pages(page, 0);
+ for (i = 0; i < nr_pages; i++) {
+ list_add(&page->lru, &tmp_list);
+ page++;
+ }
  }
+
+ list_splice(&tmp_list, list);
 }
 
 static inline bool migrate_async_suitable(int migratetype)
@@ -368,12 +386,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
  unsigned long flags = 0;
  bool locked = false;
  unsigned long blockpfn = *start_pfn;
+ unsigned int order;
 
  cursor = pfn_to_page(blockpfn);
 
  /* Isolate free pages. */
  for (; blockpfn < end_pfn; blockpfn++, cursor++) {
- int isolated, i;
+ int isolated;
  struct page *page = cursor;
 
  /*
@@ -439,13 +458,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
  goto isolate_fail;
  }
 
- /* Found a free page, break it into order-0 pages */
- isolated = split_free_page(page);
+ /* Found a free page, will break it into order-0 pages */
+ order = page_order(page);
+ isolated = __isolate_free_page(page, page_order(page));
+ set_page_private(page, order);
  total_isolated += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
+ list_add_tail(&page->lru, freelist);
 
  /* If a page was split, advance to the end of it */
  if (isolated) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d27e8b9..5134f46 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2525,33 +2525,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
 }
 
 /*
- * Similar to split_page except the page is already free. As this is only
- * being used for migration, the migratetype of the block also changes.
- * As this is called with interrupts disabled, the caller is responsible
- * for calling arch_alloc_page() and kernel_map_page() after interrupts
- * are enabled.
- *
- * Note: this is probably too low level an operation for use in drivers.
- * Please consult with lkml before using this in your driver.
- */
-int split_free_page(struct page *page)
-{
- unsigned int order;
- int nr_pages;
-
- order = page_order(page);
-
- nr_pages = __isolate_free_page(page, order);
- if (!nr_pages)
- return 0;
-
- /* Split into individual pages */
- set_page_refcounted(page);
- split_page(page, order);
- return nr_pages;
-}
-
-/*
  * Update NUMA hit/miss statistics
  *
  * Must be called with interrupts disabled.
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 5/7] tools/vm/page_owner: increase temporary buffer size

js1304
From: Joonsoo Kim <[hidden email]>

Page owner will be changed to store more deep stacktrace
so current temporary buffer size isn't enough. Increase it.

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 tools/vm/page_owner_sort.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 77147b4..f1c055f 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -79,12 +79,12 @@ static void add_list(char *buf, int len)
  }
 }
 
-#define BUF_SIZE 1024
+#define BUF_SIZE (128 * 1024)
 
 int main(int argc, char **argv)
 {
  FILE *fin, *fout;
- char buf[BUF_SIZE];
+ char *buf;
  int ret, i, count;
  struct block_list *list2;
  struct stat st;
@@ -107,6 +107,11 @@ int main(int argc, char **argv)
  max_size = st.st_size / 100; /* hack ... */
 
  list = malloc(max_size * sizeof(*list));
+ buf = malloc(BUF_SIZE);
+ if (!list || !buf) {
+ printf("Out of memory\n");
+ exit(1);
+ }
 
  for ( ; ; ) {
  ret = read_block(buf, BUF_SIZE, fin);
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 6/7] mm/page_owner: use stackdepot to store stacktrace

js1304
In reply to this post by js1304
From: Joonsoo Kim <[hidden email]>

Currently, we store each page's allocation stacktrace on corresponding
page_ext structure and it requires a lot of memory. This causes the problem
that memory tight system doesn't work well if page_owner is enabled.
Moreover, even with this large memory consumption, we cannot get full
stacktrace because we allocate memory at boot time and just maintain
8 stacktrace slots to balance memory consumption. We could increase it
to more but it would make system unusable or change system behaviour.

To solve the problem, this patch uses stackdepot to store stacktrace.
It obviously provides memory saving but there is a drawback that
stackdepot could fail.

stackdepot allocates memory at runtime so it could fail if system has
not enough memory. But, most of allocation stack are generated at very
early time and there are much memory at this time. So, failure would not
happen easily. And, one failure means that we miss just one page's
allocation stacktrace so it would not be a big problem. In this patch,
when memory allocation failure happens, we store special stracktrace
handle to the page that is failed to save stacktrace. With it, user
can guess memory usage properly even if failure happens.

Memory saving looks as following. (4GB memory system with page_owner)

static allocation:
92274688 bytes -> 25165824 bytes

dynamic allocation after kernel build:
0 bytes -> 327680 bytes

total:
92274688 bytes -> 25493504 bytes

72% reduction in total.

Note that implementation looks complex than someone would imagine because
there is recursion issue. stackdepot uses page allocator and page_owner
is called at page allocation. Using stackdepot in page_owner could re-call
page allcator and then page_owner. That is a recursion. To detect and
avoid it, whenever we obtain stacktrace, recursion is checked and
page_owner is set to dummy information if found. Dummy information means
that this page is allocated for page_owner feature itself
(such as stackdepot) and it's understandable behavior for user.

v2:
o calculate memory saving with including dynamic allocation
after kernel build
o change maximum stacktrace entry size due to possible stack overflow

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 include/linux/page_ext.h |   4 +-
 lib/Kconfig.debug        |   1 +
 mm/page_owner.c          | 138 ++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 122 insertions(+), 21 deletions(-)

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index e1fe7cf..03f2a3e 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
 
 struct pglist_data;
 struct page_ext_operations {
@@ -44,9 +45,8 @@ struct page_ext {
 #ifdef CONFIG_PAGE_OWNER
  unsigned int order;
  gfp_t gfp_mask;
- unsigned int nr_entries;
  int last_migrate_reason;
- unsigned long trace_entries[8];
+ depot_stack_handle_t handle;
 #endif
 };
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 930bf8e..fc37c66 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -248,6 +248,7 @@ config PAGE_OWNER
  depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
  select DEBUG_FS
  select STACKTRACE
+ select STACKDEPOT
  select PAGE_EXTENSION
  help
   This keeps track of what call chain is the owner of a page, may
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 499ad26..587dcca 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -7,11 +7,18 @@
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
 #include <linux/migrate.h>
+#include <linux/stackdepot.h>
+
 #include "internal.h"
 
+#define PAGE_OWNER_STACK_DEPTH (16)
+
 static bool page_owner_disabled = true;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
+static depot_stack_handle_t dummy_handle;
+static depot_stack_handle_t failure_handle;
+
 static void init_early_allocated_pages(void);
 
 static int early_page_owner_param(char *buf)
@@ -34,11 +41,41 @@ static bool need_page_owner(void)
  return true;
 }
 
+static noinline void register_dummy_stack(void)
+{
+ unsigned long entries[4];
+ struct stack_trace dummy;
+
+ dummy.nr_entries = 0;
+ dummy.max_entries = ARRAY_SIZE(entries);
+ dummy.entries = &entries[0];
+ dummy.skip = 0;
+
+ save_stack_trace(&dummy);
+ dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
+}
+
+static noinline void register_failure_stack(void)
+{
+ unsigned long entries[4];
+ struct stack_trace failure;
+
+ failure.nr_entries = 0;
+ failure.max_entries = ARRAY_SIZE(entries);
+ failure.entries = &entries[0];
+ failure.skip = 0;
+
+ save_stack_trace(&failure);
+ failure_handle = depot_save_stack(&failure, GFP_KERNEL);
+}
+
 static void init_page_owner(void)
 {
  if (page_owner_disabled)
  return;
 
+ register_dummy_stack();
+ register_failure_stack();
  static_branch_enable(&page_owner_inited);
  init_early_allocated_pages();
 }
@@ -61,25 +98,66 @@ void __reset_page_owner(struct page *page, unsigned int order)
  }
 }
 
-void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
+static inline bool check_recursive_alloc(struct stack_trace *trace,
+ unsigned long ip)
 {
- struct page_ext *page_ext = lookup_page_ext(page);
+ int i, count;
+
+ if (!trace->nr_entries)
+ return false;
+
+ for (i = 0, count = 0; i < trace->nr_entries; i++) {
+ if (trace->entries[i] == ip && ++count == 2)
+ return true;
+ }
 
+ return false;
+}
+
+static noinline depot_stack_handle_t save_stack(gfp_t flags)
+{
+ unsigned long entries[PAGE_OWNER_STACK_DEPTH];
  struct stack_trace trace = {
  .nr_entries = 0,
- .max_entries = ARRAY_SIZE(page_ext->trace_entries),
- .entries = &page_ext->trace_entries[0],
- .skip = 3,
+ .entries = entries,
+ .max_entries = PAGE_OWNER_STACK_DEPTH,
+ .skip = 0
  };
+ depot_stack_handle_t handle;
+
+ save_stack_trace(&trace);
+ if (trace.nr_entries != 0 &&
+    trace.entries[trace.nr_entries-1] == ULONG_MAX)
+ trace.nr_entries--;
+
+ /*
+ * We need to check recursion here because our request to stackdepot
+ * could trigger memory allocation to save new entry. New memory
+ * allocation would reach here and call depot_save_stack() again
+ * if we don't catch it. There is still not enough memory in stackdepot
+ * so it would try to allocate memory again and loop forever.
+ */
+ if (check_recursive_alloc(&trace, _RET_IP_))
+ return dummy_handle;
+
+ handle = depot_save_stack(&trace, flags);
+ if (!handle)
+ handle = failure_handle;
+
+ return handle;
+}
+
+noinline void __set_page_owner(struct page *page, unsigned int order,
+ gfp_t gfp_mask)
+{
+ struct page_ext *page_ext = lookup_page_ext(page);
 
  if (unlikely(!page_ext))
  return;
 
- save_stack_trace(&trace);
-
+ page_ext->handle = save_stack(gfp_mask);
  page_ext->order = order;
  page_ext->gfp_mask = gfp_mask;
- page_ext->nr_entries = trace.nr_entries;
  page_ext->last_migrate_reason = -1;
 
  __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
@@ -111,7 +189,6 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 {
  struct page_ext *old_ext = lookup_page_ext(oldpage);
  struct page_ext *new_ext = lookup_page_ext(newpage);
- int i;
 
  if (unlikely(!old_ext || !new_ext))
  return;
@@ -119,10 +196,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
  new_ext->order = old_ext->order;
  new_ext->gfp_mask = old_ext->gfp_mask;
  new_ext->last_migrate_reason = old_ext->last_migrate_reason;
- new_ext->nr_entries = old_ext->nr_entries;
-
- for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
- new_ext->trace_entries[i] = old_ext->trace_entries[i];
+ new_ext->handle = old_ext->handle;
 
  /*
  * We don't clear the bit on the oldpage as it's going to be freed
@@ -138,14 +212,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
- struct page *page, struct page_ext *page_ext)
+ struct page *page, struct page_ext *page_ext,
+ depot_stack_handle_t handle)
 {
  int ret;
  int pageblock_mt, page_mt;
  char *kbuf;
+ unsigned long entries[PAGE_OWNER_STACK_DEPTH];
  struct stack_trace trace = {
- .nr_entries = page_ext->nr_entries,
- .entries = &page_ext->trace_entries[0],
+ .nr_entries = 0,
+ .entries = entries,
+ .max_entries = PAGE_OWNER_STACK_DEPTH,
+ .skip = 0
  };
 
  kbuf = kmalloc(count, GFP_KERNEL);
@@ -174,6 +252,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
  if (ret >= count)
  goto err;
 
+ depot_fetch_stack(handle, &trace);
  ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
  if (ret >= count)
  goto err;
@@ -204,10 +283,14 @@ err:
 void __dump_page_owner(struct page *page)
 {
  struct page_ext *page_ext = lookup_page_ext(page);
+ unsigned long entries[PAGE_OWNER_STACK_DEPTH];
  struct stack_trace trace = {
- .nr_entries = page_ext->nr_entries,
- .entries = &page_ext->trace_entries[0],
+ .nr_entries = 0,
+ .entries = entries,
+ .max_entries = PAGE_OWNER_STACK_DEPTH,
+ .skip = 0
  };
+ depot_stack_handle_t handle;
  gfp_t gfp_mask = page_ext->gfp_mask;
  int mt = gfpflags_to_migratetype(gfp_mask);
 
@@ -221,6 +304,13 @@ void __dump_page_owner(struct page *page)
  return;
  }
 
+ handle = READ_ONCE(page_ext->handle);
+ if (!handle) {
+ pr_alert("page_owner info is not active (free page?)\n");
+ return;
+ }
+
+ depot_fetch_stack(handle, &trace);
  pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
  page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
  print_stack_trace(&trace, 0);
@@ -236,6 +326,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  unsigned long pfn;
  struct page *page;
  struct page_ext *page_ext;
+ depot_stack_handle_t handle;
 
  if (!static_branch_unlikely(&page_owner_inited))
  return -EINVAL;
@@ -284,10 +375,19 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
  continue;
 
+ /*
+ * Access to page_ext->handle isn't synchronous so we should
+ * be careful to access it.
+ */
+ handle = READ_ONCE(page_ext->handle);
+ if (!handle)
+ continue;
+
  /* Record the next PFN to read in the file offset */
  *ppos = (pfn - min_low_pfn) + 1;
 
- return print_page_owner(buf, count, pfn, page, page_ext);
+ return print_page_owner(buf, count, pfn, page,
+ page_ext, handle);
  }
 
  return 0;
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 3/7] mm/page_owner: copy last_migrate_reason in copy_page_owner()

js1304
In reply to this post by js1304
From: Joonsoo Kim <[hidden email]>

Currently, copy_page_owner() doesn't copy all the owner information.
It skips last_migrate_reason because copy_page_owner() is used for
migration and it will be properly set soon. But, following patch
will use copy_page_owner() and this skip will cause the problem that
allocated page has uninitialied last_migrate_reason. To prevent it,
this patch also copy last_migrate_reason in copy_page_owner().

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 mm/page_owner.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index c6cda3e..73e202f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -118,6 +118,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 
  new_ext->order = old_ext->order;
  new_ext->gfp_mask = old_ext->gfp_mask;
+ new_ext->last_migrate_reason = old_ext->last_migrate_reason;
  new_ext->nr_entries = old_ext->nr_entries;
 
  for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 7/7] mm/page_alloc: introduce post allocation processing on page allocator

js1304
In reply to this post by js1304
From: Joonsoo Kim <[hidden email]>

This patch is motivated from Hugh and Vlastimil's concern [1].

There are two ways to get freepage from the allocator. One is using
normal memory allocation API and the other is __isolate_free_page() which
is internally used for compaction and pageblock isolation. Later usage is
rather tricky since it doesn't do whole post allocation processing
done by normal API.

One problematic thing I already know is that poisoned page would not be
checked if it is allocated by __isolate_free_page(). Perhaps, there would
be more.

We could add more debug logic for allocated page in the future and this
separation would cause more problem. I'd like to fix this situation
at this time. Solution is simple. This patch commonize some logic
for newly allocated page and uses it on all sites. This will solve
the problem.

[1] http://marc.info/?i=alpine.LSU.2.11.1604270029350.7066%40eggly.anvils%3E

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 mm/compaction.c     |  8 +-------
 mm/internal.h       |  2 ++
 mm/page_alloc.c     | 22 +++++++++++++---------
 mm/page_isolation.c |  4 +---
 4 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 6043ef8..e15d350 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -75,14 +75,8 @@ static void map_pages(struct list_head *list)
 
  order = page_private(page);
  nr_pages = 1 << order;
- set_page_private(page, 0);
- set_page_refcounted(page);
 
- arch_alloc_page(page, order);
- kernel_map_pages(page, nr_pages, 1);
- kasan_alloc_pages(page, order);
-
- set_page_owner(page, order, __GFP_MOVABLE);
+ post_alloc_hook(page, order, __GFP_MOVABLE);
  if (order)
  split_page(page, order);
 
diff --git a/mm/internal.h b/mm/internal.h
index b6ead95..420bbe3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -153,6 +153,8 @@ extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
  unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
+extern void post_alloc_hook(struct page *page, unsigned int order,
+ gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 616ada9..baa5999 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1722,6 +1722,18 @@ static bool check_new_pages(struct page *page, unsigned int order)
  return false;
 }
 
+void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags)
+{
+ set_page_private(page, 0);
+ set_page_refcounted(page);
+
+ arch_alloc_page(page, order);
+ kernel_map_pages(page, 1 << order, 1);
+ kernel_poison_pages(page, 1 << order, 1);
+ kasan_alloc_pages(page, order);
+ set_page_owner(page, order, gfp_flags);
+}
+
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
  unsigned int alloc_flags)
 {
@@ -1734,13 +1746,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
  poisoned &= page_is_poisoned(p);
  }
 
- set_page_private(page, 0);
- set_page_refcounted(page);
-
- arch_alloc_page(page, order);
- kernel_map_pages(page, 1 << order, 1);
- kernel_poison_pages(page, 1 << order, 1);
- kasan_alloc_pages(page, order);
+ post_alloc_hook(page, order, gfp_flags);
 
  if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
  for (i = 0; i < (1 << order); i++)
@@ -1749,8 +1755,6 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
  if (order && (gfp_flags & __GFP_COMP))
  prep_compound_page(page, order);
 
- set_page_owner(page, order, gfp_flags);
-
  /*
  * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
  * allocate the page. The expectation is that the caller is taking
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 927f5ee..4639163 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -128,9 +128,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 out:
  spin_unlock_irqrestore(&zone->lock, flags);
  if (isolated_page) {
- kernel_map_pages(page, (1 << order), 1);
- set_page_refcounted(page);
- set_page_owner(page, order, __GFP_MOVABLE);
+ post_alloc_hook(page, order, __GFP_MOVABLE);
  __free_pages(isolated_page, order);
  }
 }
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 2/7] mm/page_owner: initialize page owner without holding the zone lock

js1304
In reply to this post by js1304
From: Joonsoo Kim <[hidden email]>

It's not necessary to initialized page_owner with holding the zone lock.
It would cause more contention on the zone lock although it's not
a big problem since it is just debug feature. But, it is better
than before so do it. This is also preparation step to use stackdepot
in page owner feature. Stackdepot allocates new pages when there is no
reserved space and holding the zone lock in this case will cause deadlock.

Signed-off-by: Joonsoo Kim <[hidden email]>
---
 mm/compaction.c     | 3 +++
 mm/page_alloc.c     | 2 --
 mm/page_isolation.c | 9 ++++++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 8e013eb..6043ef8 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -20,6 +20,7 @@
 #include <linux/kasan.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/page_owner.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -80,6 +81,8 @@ static void map_pages(struct list_head *list)
  arch_alloc_page(page, order);
  kernel_map_pages(page, nr_pages, 1);
  kasan_alloc_pages(page, order);
+
+ set_page_owner(page, order, __GFP_MOVABLE);
  if (order)
  split_page(page, order);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5134f46..1b1ca57 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2507,8 +2507,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
  zone->free_area[order].nr_free--;
  rmv_page_order(page);
 
- set_page_owner(page, order, __GFP_MOVABLE);
-
  /* Set the pageblock if the isolated page is at least a pageblock */
  if (order >= pageblock_order - 1) {
  struct page *endpage = page + (1 << order) - 1;
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 612122b..927f5ee 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -7,6 +7,7 @@
 #include <linux/pageblock-flags.h>
 #include <linux/memory.h>
 #include <linux/hugetlb.h>
+#include <linux/page_owner.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -108,8 +109,6 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
  if (pfn_valid_within(page_to_pfn(buddy)) &&
     !is_migrate_isolate_page(buddy)) {
  __isolate_free_page(page, order);
- kernel_map_pages(page, (1 << order), 1);
- set_page_refcounted(page);
  isolated_page = page;
  }
  }
@@ -128,8 +127,12 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
  zone->nr_isolate_pageblock--;
 out:
  spin_unlock_irqrestore(&zone->lock, flags);
- if (isolated_page)
+ if (isolated_page) {
+ kernel_map_pages(page, (1 << order), 1);
+ set_page_refcounted(page);
+ set_page_owner(page, order, __GFP_MOVABLE);
  __free_pages(isolated_page, order);
+ }
 }
 
 static inline struct page *
--
1.9.1

Reply | Threaded
Open this post in threaded view
|

[PATCH v2 4/7] mm/page_owner: introduce split_page_owner and replace manual handling

js1304
In reply to this post by js1304
From: Joonsoo Kim <[hidden email]>

split_page() calls set_page_owner() to set up page_owner to each pages.
But, it has a drawback that head page and the others have different
stacktrace because callsite of set_page_owner() is slightly differnt.
To avoid this problem, this patch copies head page's page_owner to
the others. It needs to introduce new function, split_page_owner() but
it also remove the other function, get_page_owner_gfp() so looks good
to do.

Acked-by: Vlastimil Babka <[hidden email]>
Signed-off-by: Joonsoo Kim <[hidden email]>
---
 include/linux/page_owner.h | 12 +++++-------
 mm/page_alloc.c            |  8 ++------
 mm/page_owner.c            | 14 +++++++-------
 3 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 46f1b93..30583ab 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -10,7 +10,7 @@ extern struct page_ext_operations page_owner_ops;
 extern void __reset_page_owner(struct page *page, unsigned int order);
 extern void __set_page_owner(struct page *page,
  unsigned int order, gfp_t gfp_mask);
-extern gfp_t __get_page_owner_gfp(struct page *page);
+extern void __split_page_owner(struct page *page, unsigned int order);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 extern void __dump_page_owner(struct page *page);
@@ -28,12 +28,10 @@ static inline void set_page_owner(struct page *page,
  __set_page_owner(page, order, gfp_mask);
 }
 
-static inline gfp_t get_page_owner_gfp(struct page *page)
+static inline void split_page_owner(struct page *page, unsigned int order)
 {
  if (static_branch_unlikely(&page_owner_inited))
- return __get_page_owner_gfp(page);
- else
- return 0;
+ __split_page_owner(page, order);
 }
 static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 {
@@ -58,9 +56,9 @@ static inline void set_page_owner(struct page *page,
  unsigned int order, gfp_t gfp_mask)
 {
 }
-static inline gfp_t get_page_owner_gfp(struct page *page)
+static inline void split_page_owner(struct page *page,
+ unsigned int order)
 {
- return 0;
 }
 static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b1ca57..616ada9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2459,7 +2459,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
 void split_page(struct page *page, unsigned int order)
 {
  int i;
- gfp_t gfp_mask;
 
  VM_BUG_ON_PAGE(PageCompound(page), page);
  VM_BUG_ON_PAGE(!page_count(page), page);
@@ -2473,12 +2472,9 @@ void split_page(struct page *page, unsigned int order)
  split_page(virt_to_page(page[0].shadow), order);
 #endif
 
- gfp_mask = get_page_owner_gfp(page);
- set_page_owner(page, 0, gfp_mask);
- for (i = 1; i < (1 << order); i++) {
+ for (i = 1; i < (1 << order); i++)
  set_page_refcounted(page + i);
- set_page_owner(page + i, 0, gfp_mask);
- }
+ split_page_owner(page, order);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 73e202f..499ad26 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -94,17 +94,17 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
  page_ext->last_migrate_reason = reason;
 }
 
-gfp_t __get_page_owner_gfp(struct page *page)
+void __split_page_owner(struct page *page, unsigned int order)
 {
+ int i;
  struct page_ext *page_ext = lookup_page_ext(page);
+
  if (unlikely(!page_ext))
- /*
- * The caller just returns 0 if no valid gfp
- * So return 0 here too.
- */
- return 0;
+ return;
 
- return page_ext->gfp_mask;
+ page_ext->order = 0;
+ for (i = 1; i < (1 << order); i++)
+ __copy_page_owner(page, page + i);
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
--
1.9.1