diff --git a/config/spl-build.m4 b/config/spl-build.m4 index d0cf86d9ab..1013e1f4ce 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -752,7 +752,11 @@ AC_DEFUN([SPL_AC_KALLSYMS_LOOKUP_NAME], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_GET_VMALLOC_INFO], [ SPL_CHECK_SYMBOL_EXPORT( @@ -764,7 +768,11 @@ AC_DEFUN([SPL_AC_GET_VMALLOC_INFO], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [ SPL_CHECK_SYMBOL_EXPORT( @@ -776,7 +784,11 @@ AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [ SPL_CHECK_SYMBOL_EXPORT( @@ -788,7 +800,11 @@ AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_NEXT_ZONE], [ SPL_CHECK_SYMBOL_EXPORT( @@ -800,7 +816,11 @@ AC_DEFUN([SPL_AC_NEXT_ZONE], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_GET_ZONE_COUNTS], [ SPL_CHECK_SYMBOL_EXPORT( @@ -810,3 +830,26 @@ AC_DEFUN([SPL_AC_GET_ZONE_COUNTS], [ [get_zone_counts() is available])], []) ]) + +dnl # +dnl # 2.6.21 API change, +dnl # Public global zone stats now include free/inactive/active page +dnl # counts. This replaced the priviate get_zone_counts() interface. +dnl # +AC_DEFUN([SPL_AC_ZONE_STAT_ITEM_FIA], [ + AC_MSG_CHECKING([whether free/inactive/active page state is available]) + SPL_LINUX_TRY_COMPILE([ + #include + ],[ + enum zone_stat_item i1, i2, i3; + i1 = NR_FREE_PAGES; + i2 = NR_INACTIVE; + i3 = NR_ACTIVE; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ZONE_STAT_ITEM_FIA, 1, + [free/inactive/active page state is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/configure b/configure index 0467b602d4..f3deee9c88 100755 --- a/configure +++ b/configure @@ -20872,6 +20872,74 @@ _ACEOF + echo "$as_me:$LINENO: checking whether free/inactive/active page state is available" >&5 +echo $ECHO_N "checking whether free/inactive/active page state is available... $ECHO_C" >&6 + + +cat >conftest.c <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + + #include + +int +main (void) +{ + + enum zone_stat_item i1, i2, i3; + i1 = NR_FREE_PAGES; + i2 = NR_INACTIVE; + i3 = NR_ACTIVE; + + ; + return 0; +} + +_ACEOF + + + rm -Rf build && mkdir -p build + echo "obj-m := conftest.o" >build/Makefile + if { ac_try='cp conftest.c build && make modules CC="$CC" LINUXINCLUDE="-Iinclude -Iinclude2 -I$LINUX/include -include include/linux/autoconf.h" -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } >/dev/null && { ac_try='test -s build/conftest.o' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ZONE_STAT_ITEM_FIA 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + + +fi + + rm -Rf build + + + + ac_config_files="$ac_config_files Makefile lib/Makefile cmd/Makefile module/Makefile module/spl/Makefile module/splat/Makefile include/Makefile scripts/Makefile spl.spec" diff --git a/configure.ac b/configure.ac index 623d54dd58..bdec31c4bf 100644 --- a/configure.ac +++ b/configure.ac @@ -74,6 +74,7 @@ SPL_AC_FIRST_ONLINE_PGDAT SPL_AC_NEXT_ONLINE_PGDAT SPL_AC_NEXT_ZONE SPL_AC_GET_ZONE_COUNTS +SPL_AC_ZONE_STAT_ITEM_FIA AC_CONFIG_FILES([ Makefile diff --git a/include/sys/vmsystm.h b/include/sys/vmsystm.h index 861e51164c..123005e5e0 100644 --- a/include/sys/vmsystm.h +++ b/include/sys/vmsystm.h @@ -115,13 +115,24 @@ extern next_zone_t next_zone_fn; #endif /* HAVE_NEXT_ZONE */ /* Source linux/mm/vmstat.c */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS typedef void (*get_zone_counts_t)(unsigned long *, unsigned long *, unsigned long *); extern get_zone_counts_t get_zone_counts_fn; -#define get_zone_counts(a,i,f) get_zone_counts_fn(a,i,f) -#endif /* HAVE_GET_ZONE_COUNTS */ +# define get_zone_counts(a,i,f) get_zone_counts_fn(a,i,f) +extern unsigned long spl_global_page_state(int); +/* Defines designed to simulate enum but large enough to ensure no overlap */ +# define NR_FREE_PAGES 0x8001 +# define NR_INACTIVE 0x8002 +# define NR_ACTIVE 0x8003 +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#else +#define spl_global_page_state(item) global_page_state(item) +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ #define xcopyin(from, to, size) copy_from_user(to, from, size) #define xcopyout(from, to, size) copy_to_user(to, from, size) diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 944300bb49..6723dcd081 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -99,22 +99,47 @@ next_zone_t next_zone_fn = NULL; EXPORT_SYMBOL(next_zone_fn); #endif /* HAVE_NEXT_ZONE */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS get_zone_counts_t get_zone_counts_fn = NULL; EXPORT_SYMBOL(get_zone_counts_fn); -#endif /* HAVE_GET_ZONE_COUNTS */ -pgcnt_t -spl_kmem_availrmem(void) +unsigned long +spl_global_page_state(int item) { unsigned long active; unsigned long inactive; unsigned long free; - get_zone_counts(&active, &inactive, &free); + if (item == NR_FREE_PAGES) { + get_zone_counts(&active, &inactive, &free); + return free; + } + if (item == NR_INACTIVE) { + get_zone_counts(&active, &inactive, &free); + return inactive; + } + + if (item == NR_ACTIVE) { + get_zone_counts(&active, &inactive, &free); + return active; + } + + return global_page_state((enum zone_stat_item)item); +} +EXPORT_SYMBOL(spl_global_page_state); +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ + +pgcnt_t +spl_kmem_availrmem(void) +{ /* The amount of easily available memory */ - return free + inactive; + return (spl_global_page_state(NR_FREE_PAGES) + + spl_global_page_state(NR_INACTIVE)); } EXPORT_SYMBOL(spl_kmem_availrmem); @@ -1773,37 +1798,51 @@ spl_kmem_init_kallsyms_lookup(void) #ifndef HAVE_GET_VMALLOC_INFO get_vmalloc_info_fn = (get_vmalloc_info_t) spl_kallsyms_lookup_name("get_vmalloc_info"); - if (!get_vmalloc_info_fn) + if (!get_vmalloc_info_fn) { + printk(KERN_ERR "Error: Unknown symbol get_vmalloc_info\n"); return -EFAULT; + } #endif /* HAVE_GET_VMALLOC_INFO */ #ifndef HAVE_FIRST_ONLINE_PGDAT first_online_pgdat_fn = (first_online_pgdat_t) spl_kallsyms_lookup_name("first_online_pgdat"); - if (!first_online_pgdat_fn) + if (!first_online_pgdat_fn) { + printk(KERN_ERR "Error: Unknown symbol first_online_pgdat\n"); return -EFAULT; + } #endif /* HAVE_FIRST_ONLINE_PGDAT */ #ifndef HAVE_NEXT_ONLINE_PGDAT next_online_pgdat_fn = (next_online_pgdat_t) spl_kallsyms_lookup_name("next_online_pgdat"); - if (!next_online_pgdat_fn) + if (!next_online_pgdat_fn) { + printk(KERN_ERR "Error: Unknown symbol next_online_pgdat\n"); return -EFAULT; + } #endif /* HAVE_NEXT_ONLINE_PGDAT */ #ifndef HAVE_NEXT_ZONE next_zone_fn = (next_zone_t) spl_kallsyms_lookup_name("next_zone"); - if (!next_zone_fn) + if (!next_zone_fn) { + printk(KERN_ERR "Error: Unknown symbol next_zone\n"); return -EFAULT; + } #endif /* HAVE_NEXT_ZONE */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS get_zone_counts_fn = (get_zone_counts_t) spl_kallsyms_lookup_name("get_zone_counts"); - if (!get_zone_counts_fn) + if (!get_zone_counts_fn) { + printk(KERN_ERR "Error: Unknown symbol get_zone_counts\n"); return -EFAULT; -#endif /* HAVE_GET_ZONE_COUNTS */ + } +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ /* * It is now safe to initialize the global tunings which rely on diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c index 35718e2f84..f12cd34b38 100644 --- a/module/splat/splat-kmem.c +++ b/module/splat/splat-kmem.c @@ -74,6 +74,10 @@ #define SPLAT_KMEM_TEST11_NAME "slab_overcommit" #define SPLAT_KMEM_TEST11_DESC "Slab memory overcommit test" +#define SPLAT_KMEM_TEST12_ID 0x010c +#define SPLAT_KMEM_TEST12_NAME "vmem_size" +#define SPLAT_KMEM_TEST12_DESC "Memory zone test" + #define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10 @@ -652,7 +656,7 @@ splat_kmem_cache_thread_test(struct file *file, void *arg, char *name, splat_kmem_cache_test_constructor, splat_kmem_cache_test_destructor, splat_kmem_cache_test_reclaim, - kcp, NULL, KMC_VMEM); + kcp, NULL, KMC_KMEM); if (!kcp->kcp_cache) { splat_vprint(file, name, "Unable to create '%s'\n", cache_name); rc = -ENOMEM; @@ -973,9 +977,8 @@ splat_kmem_test9(struct file *file, void *arg) static int splat_kmem_test10(struct file *file, void *arg) { - uint64_t size, alloc, free_mem, rc = 0; + uint64_t size, alloc, rc = 0; - free_mem = nr_free_pages() * PAGE_SIZE; for (size = 16; size <= 1024*1024; size *= 2) { splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", @@ -985,8 +988,9 @@ splat_kmem_test10(struct file *file, void *arg) for (alloc = 1; alloc <= 1024; alloc *= 2) { - /* Skip tests which exceed free memory */ - if (size * alloc * SPLAT_KMEM_THREADS > free_mem / 2) + /* Skip tests which exceed available memory. We + * leverage availrmem here for some extra testing */ + if (size * alloc * SPLAT_KMEM_THREADS > availrmem / 2) continue; rc = splat_kmem_cache_thread_test(file, arg, @@ -1014,12 +1018,12 @@ splat_kmem_test11(struct file *file, void *arg) { uint64_t size, alloc, rc; - size = 1024*1024; - alloc = ((4 * num_physpages * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS; + size = 256*1024; + alloc = ((4 * physmem * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS; - splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", + splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "name", "time (sec)\tslabs \tobjs \thash\n"); - splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "", + splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "", " \ttot/max/calc\ttot/max/calc\n"); rc = splat_kmem_cache_thread_test(file, arg, @@ -1028,6 +1032,81 @@ splat_kmem_test11(struct file *file, void *arg) return rc; } +/* + * Check vmem_size() behavior by acquiring the alloc/free/total vmem + * space, then allocate a known buffer size from vmem space. We can + * then check that vmem_size() values were updated properly with in + * a fairly small tolerence. The tolerance is important because we + * are not the only vmem consumer on the system. Other unrelated + * allocations might occur during the small test window. The vmem + * allocation itself may also add in a little extra private space to + * the buffer. Finally, verify total space always remains unchanged. + */ +static int +splat_kmem_test12(struct file *file, void *arg) +{ + ssize_t alloc1, free1, total1; + ssize_t alloc2, free2, total2; + int size = 8*1024*1024; + void *ptr; + + alloc1 = vmem_size(NULL, VMEM_ALLOC); + free1 = vmem_size(NULL, VMEM_FREE); + total1 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%d free=%d " + "total=%d\n", (int)alloc1, (int)free1, (int)total1); + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Alloc %d bytes\n", size); + ptr = vmem_alloc(size, KM_SLEEP); + if (!ptr) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed to alloc %d bytes\n", size); + return -ENOMEM; + } + + alloc2 = vmem_size(NULL, VMEM_ALLOC); + free2 = vmem_size(NULL, VMEM_FREE); + total2 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%d free=%d " + "total=%d\n", (int)alloc2, (int)free2, (int)total2); + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Free %d bytes\n", size); + vmem_free(ptr, size); + if (alloc2 < (alloc1 + size - (size / 100)) || + alloc2 > (alloc1 + size + (size / 100))) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_ALLOC size: %d != %d+%d (+/- 1%%)\n", + (int)alloc2, (int)alloc1, size); + return -ERANGE; + } + + if (free2 < (free1 - size - (size / 100)) || + free2 > (free1 - size + (size / 100))) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_FREE size: %d != %d-%d (+/- 1%%)\n", + (int)free2, (int)free1, size); + return -ERANGE; + } + + if (total1 != total2) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_ALLOC | VMEM_FREE not constant: " + "%d != %d\n", (int)total2, (int)total1); + return -ERANGE; + } + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "VMEM_ALLOC within tolerance: ~%d%% (%d/%d)\n", + (int)(((alloc1 + size) - alloc2) * 100 / size), + (int)((alloc1 + size) - alloc2), size); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "VMEM_FREE within tolerance: ~%d%% (%d/%d)\n", + (int)(((free1 - size) - free2) * 100 / size), + (int)((free1 - size) - free2), size); + + return 0; +} + splat_subsystem_t * splat_kmem_init(void) { @@ -1067,6 +1146,8 @@ splat_kmem_init(void) SPLAT_KMEM_TEST10_ID, splat_kmem_test10); SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC, SPLAT_KMEM_TEST11_ID, splat_kmem_test11); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST12_NAME, SPLAT_KMEM_TEST12_DESC, + SPLAT_KMEM_TEST12_ID, splat_kmem_test12); return sub; } @@ -1075,6 +1156,7 @@ void splat_kmem_fini(splat_subsystem_t *sub) { ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST12_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID); diff --git a/spl_config.h.in b/spl_config.h.in index f2895054d2..4d11efb3c0 100644 --- a/spl_config.h.in +++ b/spl_config.h.in @@ -132,6 +132,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* free/inactive/active page state is available */ +#undef HAVE_ZONE_STAT_ITEM_FIA + /* Name of package */ #undef PACKAGE