Mercurial > hg > release > icedtea6-1.10
view patches/openjdk/7034464-hugepage.patch @ 2545:2aa0bd4d0654
S7037939, S7034464: Import Andrew Haley's HugePage support patches
author | Deepak Bhole <dbhole@redhat.com> |
---|---|
date | Thu, 05 Jan 2012 10:37:17 -0500 |
parents | |
children |
line wrap: on
line source
# HG changeset patch # User iveresov # Date 1303344724 25200 # Node ID 139667d9836ae218ab06a74d5813a5a700a076c9 # Parent 49a67202bc671d23d719c9e14752d80295a9e8f6 7034464: Support transparent large pages on Linux Summary: Support transparent huge pages on Linux available since 2.6.38 Reviewed-by: iveresov, ysr Contributed-by: aph@redhat.com diff -u -r openjdk.patched/hotspot/src/os/linux/vm/globals_linux.hpp openjdk/hotspot/src/os/linux/vm/globals_linux.hpp --- openjdk.patched/hotspot/src/os/linux/vm/globals_linux.hpp 2011-03-16 02:30:16.000000000 +0000 +++ openjdk/hotspot/src/os/linux/vm/globals_linux.hpp 2011-05-04 11:57:15.083470027 +0100 @@ -29,13 +29,19 @@ // Defines Linux specific flags. They are not available on other platforms. // #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ - product(bool, UseOprofile, false, \ - "enable support for Oprofile profiler") \ - \ - product(bool, UseLinuxPosixThreadCPUClocks, true, \ - "enable fast Linux Posix clocks where available") -// NB: The default value of UseLinuxPosixThreadCPUClocks may be -// overridden in Arguments::parse_each_vm_init_arg. + product(bool, UseOprofile, false, \ + "enable support for Oprofile profiler") \ + \ + product(bool, UseLinuxPosixThreadCPUClocks, true, \ + "enable fast Linux Posix clocks where available") \ +/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \ + overridden in Arguments::parse_each_vm_init_arg. */ \ + \ + product(bool, UseHugeTLBFS, false, \ + "Use MAP_HUGETLB for large pages") \ + \ + product(bool, UseSHM, false, \ + "Use SYSV shared memory for large pages") // // Defines Linux-specific default values. The flags are available on all diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-03 14:32:54.285722656 +0100 +++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-09 14:31:51.358502774 +0100 @@ -2495,16 +2495,40 @@ return res != (uintptr_t) MAP_FAILED; } +// Define MAP_HUGETLB here so we can build HotSpot on old systems. +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif + +// Define MADV_HUGEPAGE here so we can build HotSpot on old systems. +#ifndef MADV_HUGEPAGE +#define MADV_HUGEPAGE 14 +#endif + bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool exec) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + uintptr_t res = + (uintptr_t) ::mmap(addr, size, prot, + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + return res != (uintptr_t) MAP_FAILED; + } + return commit_memory(addr, size, exec); } -void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { } +void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + // We don't check the return value: madvise(MADV_HUGEPAGE) may not + // be supported or the memory may already be backed by huge pages. + ::madvise(addr, bytes, MADV_HUGEPAGE); + } +} void os::free_memory(char *addr, size_t bytes) { - ::mmap(addr, bytes, PROT_READ | PROT_WRITE, - MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); + ::madvise(addr, bytes, MADV_DONTNEED); } void os::numa_make_global(char *addr, size_t bytes) { @@ -2870,7 +2894,16 @@ static size_t _large_page_size = 0; bool os::large_page_init() { - if (!UseLargePages) return false; + if (!UseLargePages) { + UseHugeTLBFS = false; + UseSHM = false; + return false; + } + + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) { + // Our user has not expressed a preference, so we'll try both. + UseHugeTLBFS = UseSHM = true; + } if (LargePageSizeInBytes) { _large_page_size = LargePageSizeInBytes; @@ -2915,6 +2948,9 @@ } } + // print a warning if any large page related flag is specified on command line + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); + const size_t default_page_size = (size_t)Linux::page_size(); if (_large_page_size > default_page_size) { _page_sizes[0] = _large_page_size; @@ -2922,6 +2958,14 @@ _page_sizes[2] = 0; } + UseHugeTLBFS = UseHugeTLBFS && + Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size); + + if (UseHugeTLBFS) + UseSHM = false; + + UseLargePages = UseHugeTLBFS || UseSHM; + // Large page support is available on 2.6 or newer kernel, some vendors // (e.g. Redhat) have backported it to their 2.4 based distributions. // We optimistically assume the support is available. If later it turns out @@ -2936,7 +2980,7 @@ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. - assert(UseLargePages, "only for large pages"); + assert(UseLargePages && UseSHM, "only for SHM large pages"); key_t key = IPC_PRIVATE; char *addr; @@ -3003,19 +3047,19 @@ return _large_page_size; } -// Linux does not support anonymous mmap with large page memory. The only way -// to reserve large page memory without file backing is through SysV shared -// memory API. The entire memory region is committed and pinned upfront. -// Hopefully this will change in the future... +// HugeTLBFS allows application to commit large page memory on demand; +// with SysV SHM the entire memory region must be allocated as shared +// memory. bool os::can_commit_large_page_memory() { - return false; + return UseHugeTLBFS; } bool os::can_execute_large_page_memory() { - return false; + return UseHugeTLBFS; } // Reserve memory at an arbitrary address, only if that area is +// Reserve memory at an arbitrary address, only if that area is // available (and not reserved for something else). char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) { @@ -5009,6 +5053,43 @@ // JSR166 // ------------------------------------------------------- +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, + -1, 0); + + if (p != (void *) -1) { + // We don't know if this really is a huge page or not. + FILE *fp = fopen("/proc/self/maps", "r"); + if (fp) { + while (!feof(fp)) { + char chars[257]; + long x = 0; + if (fgets(chars, sizeof(chars), fp)) { + if (sscanf(chars, "%lx-%*lx", &x) == 1 + && x == (long)p) { + if (strstr (chars, "hugepage")) { + result = true; + break; + } + } + } + } + fclose(fp); + } + munmap (p, page_size); + if (result) + return true; + } + + if (warn) { + warning("HugeTLBFS is not supported by the operating system."); + } + + return result; +} + /* * The solaris and linux implementations of park/unpark are fairly * conservative for now, but can be improved. They currently use a diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-03 14:32:53.972649764 +0100 +++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-04 11:57:15.085470494 +0100 @@ -117,6 +117,10 @@ # include <inttypes.h> # include <sys/ioctl.h> +#if __x86_64__ +#include <asm/vsyscall.h> +#endif + #define MAX_PATH (2 * K) // for timer info max values which include all bits @@ -2491,16 +2495,40 @@ return res != (uintptr_t) MAP_FAILED; } +// Define MAP_HUGETLB here so we can build HotSpot on old systems. +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif + +// Define MADV_HUGEPAGE here so we can build HotSpot on old systems. +#ifndef MADV_HUGEPAGE +#define MADV_HUGEPAGE 14 +#endif + bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool exec) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + uintptr_t res = + (uintptr_t) ::mmap(addr, size, prot, + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + return res != (uintptr_t) MAP_FAILED; + } + return commit_memory(addr, size, exec); } -void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { } +void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + // We don't check the return value: madvise(MADV_HUGEPAGE) may not + // be supported or the memory may already be backed by huge pages. + ::madvise(addr, bytes, MADV_HUGEPAGE); + } +} void os::free_memory(char *addr, size_t bytes) { - ::mmap(addr, bytes, PROT_READ | PROT_WRITE, - MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); + ::madvise(addr, bytes, MADV_DONTNEED); } void os::numa_make_global(char *addr, size_t bytes) { @@ -2544,6 +2572,21 @@ return end; } +static int sched_getcpu_syscall(void) { + unsigned int cpu; + int retval = -1; + +#if __x86_64__ + typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache); + vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu); + retval = vgetcpu(&cpu, NULL, NULL); +#elif __i386__ + retval = syscall(SYS_getcpu, &cpu, NULL, NULL); +#endif + + return (retval == -1) ? retval : cpu; +} + extern "C" void numa_warn(int number, char *where, ...) { } extern "C" void numa_error(char *where) { } @@ -2565,6 +2608,10 @@ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, dlsym(RTLD_DEFAULT, "sched_getcpu"))); + // If it's not, try a direct syscall. + if (sched_getcpu() == -1) + set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, (void*)&sched_getcpu_syscall)); + if (sched_getcpu() != -1) { // Does it work? void *handle = dlopen("libnuma.so.1", RTLD_LAZY); if (handle != NULL) { @@ -2847,7 +2894,16 @@ static size_t _large_page_size = 0; bool os::large_page_init() { - if (!UseLargePages) return false; + if (!UseLargePages) { + UseHugeTLBFS = false; + UseSHM = false; + return false; + } + + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) { + // Our user has not expressed a preference, so we'll try both. + UseHugeTLBFS = UseSHM = true; + } if (LargePageSizeInBytes) { _large_page_size = LargePageSizeInBytes; @@ -2892,6 +2948,9 @@ } } + // print a warning if any large page related flag is specified on command line + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); + const size_t default_page_size = (size_t)Linux::page_size(); if (_large_page_size > default_page_size) { _page_sizes[0] = _large_page_size; @@ -2899,6 +2958,14 @@ _page_sizes[2] = 0; } + UseHugeTLBFS = UseHugeTLBFS && + Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size); + + if (UseHugeTLBFS) + UseSHM = false; + + UseLargePages = UseHugeTLBFS || UseSHM; + // Large page support is available on 2.6 or newer kernel, some vendors // (e.g. Redhat) have backported it to their 2.4 based distributions. // We optimistically assume the support is available. If later it turns out @@ -2913,7 +2980,7 @@ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. - assert(UseLargePages, "only for large pages"); + assert(UseLargePages && UseSHM, "only for SHM large pages"); key_t key = IPC_PRIVATE; char *addr; @@ -2980,19 +3047,19 @@ return _large_page_size; } -// Linux does not support anonymous mmap with large page memory. The only way -// to reserve large page memory without file backing is through SysV shared -// memory API. The entire memory region is committed and pinned upfront. -// Hopefully this will change in the future... +// HugeTLBFS allows application to commit large page memory on demand; +// with SysV SHM the entire memory region must be allocated as shared +// memory. bool os::can_commit_large_page_memory() { - return false; + return UseHugeTLBFS; } bool os::can_execute_large_page_memory() { - return false; + return UseHugeTLBFS; } // Reserve memory at an arbitrary address, only if that area is +// Reserve memory at an arbitrary address, only if that area is // available (and not reserved for something else). char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) { @@ -4081,6 +4148,23 @@ UseNUMA = false; } } + // With SHM large pages we cannot uncommit a page, so there's not way + // we can make the adaptive lgrp chunk resizing work. If the user specified + // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and + // disable adaptive resizing. + if (UseNUMA && UseLargePages && UseSHM) { + if (!FLAG_IS_DEFAULT(UseNUMA)) { + if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) { + UseLargePages = false; + } else { + warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing"); + UseAdaptiveSizePolicy = false; + UseAdaptiveNUMAChunkSizing = false; + } + } else { + UseNUMA = false; + } + } if (!UseNUMA && ForceNUMA) { UseNUMA = true; } @@ -4986,6 +5070,43 @@ // JSR166 // ------------------------------------------------------- +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, + -1, 0); + + if (p != (void *) -1) { + // We don't know if this really is a huge page or not. + FILE *fp = fopen("/proc/self/maps", "r"); + if (fp) { + while (!feof(fp)) { + char chars[257]; + long x = 0; + if (fgets(chars, sizeof(chars), fp)) { + if (sscanf(chars, "%lx-%*lx", &x) == 1 + && x == (long)p) { + if (strstr (chars, "hugepage")) { + result = true; + break; + } + } +<+ } + } + fclose(fp); + } + munmap (p, page_size); + if (result) + return true; + } + + if (warn) { + warning("HugeTLBFS is not supported by the operating system."); + } + + return result; +} + /* * The solaris and linux implementations of park/unpark are fairly * conservative for now, but can be improved. They currently use a diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp 2011-03-16 02:30:16.000000000 +0000 +++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp 2011-05-04 11:57:15.088471194 +0100 @@ -86,6 +86,9 @@ static void rebuild_cpu_to_node_map(); static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; } + + static bool hugetlbfs_sanity_check(bool warn, size_t page_size); + public: static void init_thread_fpu_state(); static int get_fpu_control_word();