Mercurial > hg > icedtea8-forest > hotspot
changeset 10794:fd3484fadbe3
8240124: Better VM Interning
Reviewed-by: mbalao, andrew
author | vkempik |
---|---|
date | Wed, 23 Sep 2020 15:18:53 +0300 |
parents | cb1e375e88a9 |
children | 8adf45218add |
files | src/share/vm/classfile/altHashing.cpp src/share/vm/classfile/altHashing.hpp src/share/vm/classfile/symbolTable.cpp src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp src/share/vm/memory/filemap.cpp src/share/vm/oops/oop.cpp src/share/vm/oops/symbol.cpp |
diffstat | 8 files changed, 314 insertions(+), 256 deletions(-) [+] |
line wrap: on
line diff
--- a/src/share/vm/classfile/altHashing.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/classfile/altHashing.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,12 +22,29 @@ * */ +/* + * halfsiphash code adapted from reference implementation + * (https://github.com/veorq/SipHash/blob/master/halfsiphash.c) + * which is distributed with the following copyright: + * + * SipHash reference C implementation + * + * Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> + * + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. This software is distributed without any warranty. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with this software. If not, see + * <http://creativecommons.org/publicdomain/zero/1.0/>. + */ + #include "precompiled.hpp" #include "classfile/altHashing.hpp" -#include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" #include "oops/markOop.hpp" -#include "runtime/thread.hpp" +#include "runtime/os.hpp" // Get the hash code of the classes mirror if it exists, otherwise just // return a random number, which is one of the possible hash code used for @@ -39,266 +56,284 @@ } // Seed value used for each alternative hash calculated. -juint AltHashing::compute_seed() { - jlong nanos = os::javaTimeNanos(); - jlong now = os::javaTimeMillis(); - int SEED_MATERIAL[8] = { - (int) object_hash(SystemDictionary::String_klass()), - (int) object_hash(SystemDictionary::System_klass()), - (int) os::random(), // current thread isn't a java thread - (int) (((julong)nanos) >> 32), - (int) nanos, - (int) (((julong)now) >> 32), - (int) now, - (int) (os::javaTimeNanos() >> 2) +uint64_t AltHashing::compute_seed() { + uint64_t nanos = os::javaTimeNanos(); + uint64_t now = os::javaTimeMillis(); + uint32_t SEED_MATERIAL[8] = { + (uint32_t) object_hash(SystemDictionary::String_klass()), + (uint32_t) object_hash(SystemDictionary::System_klass()), + (uint32_t) os::random(), // current thread isn't a java thread + (uint32_t) (((uint64_t)nanos) >> 32), + (uint32_t) nanos, + (uint32_t) (((uint64_t)now) >> 32), + (uint32_t) now, + (uint32_t) (os::javaTimeNanos() >> 2) }; - return murmur3_32(SEED_MATERIAL, 8); + return halfsiphash_64(SEED_MATERIAL, 8); +} + +// utility function copied from java/lang/Integer +static uint32_t Integer_rotateLeft(uint32_t i, int distance) { + return (i << distance) | (i >> (32 - distance)); } +static void halfsiphash_rounds(uint32_t v[4], int rounds) { + while (rounds-- > 0) { + v[0] += v[1]; + v[1] = Integer_rotateLeft(v[1], 5); + v[1] ^= v[0]; + v[0] = Integer_rotateLeft(v[0], 16); + v[2] += v[3]; + v[3] = Integer_rotateLeft(v[3], 8); + v[3] ^= v[2]; + v[0] += v[3]; + v[3] = Integer_rotateLeft(v[3], 7); + v[3] ^= v[0]; + v[2] += v[1]; + v[1] = Integer_rotateLeft(v[1], 13); + v[1] ^= v[2]; + v[2] = Integer_rotateLeft(v[2], 16); + } + } -// Murmur3 hashing for Symbol -juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) { - juint h1 = seed; +static void halfsiphash_adddata(uint32_t v[4], uint32_t newdata, int rounds) { + v[3] ^= newdata; + halfsiphash_rounds(v, rounds); + v[0] ^= newdata; +} + +static void halfsiphash_init32(uint32_t v[4], uint64_t seed) { + v[0] = seed & 0xffffffff; + v[1] = seed >> 32; + v[2] = 0x6c796765 ^ v[0]; + v[3] = 0x74656462 ^ v[1]; +} + +static void halfsiphash_init64(uint32_t v[4], uint64_t seed) { + halfsiphash_init32(v, seed); + v[1] ^= 0xee; +} + +static uint64_t halfsiphash_finish64(uint32_t v[4], int rounds) { + uint64_t rv; + v[2] ^= 0xee; + halfsiphash_rounds(v, rounds); + rv = v[1] ^ v[3]; + v[1] ^= 0xdd; + halfsiphash_rounds(v, rounds); + rv |= (uint64_t)(v[1] ^ v[3]) << 32; + return rv; +} + +// HalfSipHash-2-4 (64-bit output) for Symbols +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const int8_t* data, int len) { + uint32_t v[4]; + uint32_t newdata; + int off = 0; int count = len; - int offset = 0; + halfsiphash_init64(v, seed); // body while (count >= 4) { - juint k1 = (data[offset] & 0x0FF) - | (data[offset + 1] & 0x0FF) << 8 - | (data[offset + 2] & 0x0FF) << 16 - | data[offset + 3] << 24; + // Avoid sign extension with 0x0ff + newdata = (data[off] & 0x0FF) + | (data[off + 1] & 0x0FF) << 8 + | (data[off + 2] & 0x0FF) << 16 + | data[off + 3] << 24; count -= 4; - offset += 4; + off += 4; - k1 *= 0xcc9e2d51; - k1 = Integer_rotateLeft(k1, 15); - k1 *= 0x1b873593; - - h1 ^= k1; - h1 = Integer_rotateLeft(h1, 13); - h1 = h1 * 5 + 0xe6546b64; + halfsiphash_adddata(v, newdata, 2); } // tail + newdata = ((uint32_t)len) << 24; // (Byte.SIZE / Byte.SIZE); if (count > 0) { - juint k1 = 0; - switch (count) { case 3: - k1 ^= (data[offset + 2] & 0xff) << 16; + newdata |= (data[off + 2] & 0x0ff) << 16; // fall through case 2: - k1 ^= (data[offset + 1] & 0xff) << 8; + newdata |= (data[off + 1] & 0x0ff) << 8; // fall through case 1: - k1 ^= (data[offset] & 0xff); + newdata |= (data[off] & 0x0ff); // fall through - default: - k1 *= 0xcc9e2d51; - k1 = Integer_rotateLeft(k1, 15); - k1 *= 0x1b873593; - h1 ^= k1; } } - // finalization - h1 ^= len; + halfsiphash_adddata(v, newdata, 2); - // finalization mix force all bits of a hash block to avalanche - h1 ^= h1 >> 16; - h1 *= 0x85ebca6b; - h1 ^= h1 >> 13; - h1 *= 0xc2b2ae35; - h1 ^= h1 >> 16; - - return h1; + // finalization + return halfsiphash_finish64(v, 4); } -// Murmur3 hashing for Strings -juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) { - juint h1 = seed; - +// HalfSipHash-2-4 (64-bit output) for Strings +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint16_t* data, int len) { + uint32_t v[4]; + uint32_t newdata; int off = 0; int count = len; + halfsiphash_init64(v, seed); + // body while (count >= 2) { - jchar d1 = data[off++] & 0xFFFF; - jchar d2 = data[off++]; - juint k1 = (d1 | d2 << 16); + uint16_t d1 = data[off++] & 0x0FFFF; + uint16_t d2 = data[off++]; + newdata = (d1 | d2 << 16); count -= 2; - k1 *= 0xcc9e2d51; - k1 = Integer_rotateLeft(k1, 15); - k1 *= 0x1b873593; - - h1 ^= k1; - h1 = Integer_rotateLeft(h1, 13); - h1 = h1 * 5 + 0xe6546b64; + halfsiphash_adddata(v, newdata, 2); } // tail - + newdata = ((uint32_t)len * 2) << 24; // (Character.SIZE / Byte.SIZE); if (count > 0) { - juint k1 = (juint)data[off]; - - k1 *= 0xcc9e2d51; - k1 = Integer_rotateLeft(k1, 15); - k1 *= 0x1b873593; - h1 ^= k1; + newdata |= (uint32_t)data[off]; } + halfsiphash_adddata(v, newdata, 2); // finalization - h1 ^= len * 2; // (Character.SIZE / Byte.SIZE); - - // finalization mix force all bits of a hash block to avalanche - h1 ^= h1 >> 16; - h1 *= 0x85ebca6b; - h1 ^= h1 >> 13; - h1 *= 0xc2b2ae35; - h1 ^= h1 >> 16; - - return h1; + return halfsiphash_finish64(v, 4); } -// Hash used for the seed. -juint AltHashing::murmur3_32(juint seed, const int* data, int len) { - juint h1 = seed; +// HalfSipHash-2-4 (64-bit output) for integers (used to create seed) +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint32_t* data, int len) { + uint32_t v[4]; int off = 0; int end = len; + halfsiphash_init64(v, seed); + // body while (off < end) { - juint k1 = (juint)data[off++]; - - k1 *= 0xcc9e2d51; - k1 = Integer_rotateLeft(k1, 15); - k1 *= 0x1b873593; - - h1 ^= k1; - h1 = Integer_rotateLeft(h1, 13); - h1 = h1 * 5 + 0xe6546b64; + halfsiphash_adddata(v, (uint32_t)data[off++], 2); } // tail (always empty, as body is always 32-bit chunks) // finalization - - h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE); - - // finalization mix force all bits of a hash block to avalanche - h1 ^= h1 >> 16; - h1 *= 0x85ebca6b; - h1 ^= h1 >> 13; - h1 *= 0xc2b2ae35; - h1 ^= h1 >> 16; - - return h1; + halfsiphash_adddata(v, ((uint32_t)len * 4) << 24, 2); // (Integer.SIZE / Byte.SIZE); + return halfsiphash_finish64(v, 4); } -juint AltHashing::murmur3_32(const int* data, int len) { - return murmur3_32(0, data, len); +// HalfSipHash-2-4 (64-bit output) for integers (used to create seed) +uint64_t AltHashing::halfsiphash_64(const uint32_t* data, int len) { + return halfsiphash_64((uint64_t)0, data, len); } #ifndef PRODUCT -// Overloaded versions for internal test. -juint AltHashing::murmur3_32(const jbyte* data, int len) { - return murmur3_32(0, data, len); -} + void AltHashing::testHalfsiphash_64_ByteArray() { + // printf("testHalfsiphash_64_CharArray\n"); + const int factor = 4; -juint AltHashing::murmur3_32(const jchar* data, int len) { - return murmur3_32(0, data, len); -} + int8_t vector[256]; + int8_t hashes[factor * 256]; + + for (int i = 0; i < 256; i++) { + vector[i] = (int8_t) i; + } -// Internal test for alternate hashing. Translated from JDK version -// test/sun/misc/Hashing.java -static const jbyte ONE_BYTE[] = { (jbyte) 0x80}; -static const jbyte TWO_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81}; -static const jchar ONE_CHAR[] = { (jchar) 0x8180}; -static const jbyte THREE_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82}; -static const jbyte FOUR_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83}; -static const jchar TWO_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382}; -static const jint ONE_INT[] = { (jint) 0x83828180}; -static const jbyte SIX_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85}; -static const jchar THREE_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382, (jchar) 0x8584}; -static const jbyte EIGHT_BYTE[] = { - (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, - (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85, - (jbyte) 0x86, (jbyte) 0x87}; -static const jchar FOUR_CHAR[] = { - (jchar) 0x8180, (jchar) 0x8382, - (jchar) 0x8584, (jchar) 0x8786}; + // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} + for (int i = 0; i < 256; i++) { + uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i); + hashes[i * factor] = (int8_t) hash; + hashes[i * factor + 1] = (int8_t)(hash >> 8); + hashes[i * factor + 2] = (int8_t)(hash >> 16); + hashes[i * factor + 3] = (int8_t)(hash >> 24); + } + + // hash to get const result. + uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256); -static const jint TWO_INT[] = { (jint) 0x83828180, (jint) 0x87868584}; + // Value found using reference implementation for the hashes array. + // halfsiphash((const uint8_t*)hashes, factor*256, (const uint8_t *)&k, + // (uint8_t*)&reference, 8); -static const juint MURMUR3_32_X86_CHECK_VALUE = 0xB0F57EE3; + static const uint64_t HALFSIPHASH_64_BYTE_CHECK_VALUE = 0x15a7911e30917ee8; -void AltHashing::testMurmur3_32_ByteArray() { - // printf("testMurmur3_32_ByteArray\n"); - - jbyte vector[256]; - jbyte hashes[4 * 256]; - - for (int i = 0; i < 256; i++) { - vector[i] = (jbyte) i; + assert (HALFSIPHASH_64_BYTE_CHECK_VALUE == final_hash, + err_msg( + "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT, + HALFSIPHASH_64_BYTE_CHECK_VALUE, + final_hash)); } - // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} - for (int i = 0; i < 256; i++) { - juint hash = murmur3_32(256 - i, vector, i); - hashes[i * 4] = (jbyte) hash; - hashes[i * 4 + 1] = (jbyte)(hash >> 8); - hashes[i * 4 + 2] = (jbyte)(hash >> 16); - hashes[i * 4 + 3] = (jbyte)(hash >> 24); + void AltHashing::testHalfsiphash_64_CharArray() { + // printf("testHalfsiphash_64_CharArray\n"); + const int factor = 2; + + uint16_t vector[256]; + uint16_t hashes[factor * 256]; + + for (int i = 0; i < 256; i++) { + vector[i] = (uint16_t) i; + } + + // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} + for (int i = 0; i < 256; i++) { + uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i); + hashes[i * factor] = (uint16_t) hash; + hashes[i * factor + 1] = (uint16_t)(hash >> 16); + } + + // hash to get const result. + uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256); + + // Value found using reference implementation for the hashes array. + // halfsiphash((const uint8_t*)hashes, 2*factor*256, (const uint8_t *)&k, + // (uint8_t*)&reference, 8); + static const uint64_t HALFSIPHASH_64_CHAR_CHECK_VALUE = 0xf392d8a6a9e24103; + + assert(HALFSIPHASH_64_CHAR_CHECK_VALUE == final_hash, + err_msg( + "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT, + HALFSIPHASH_64_CHAR_CHECK_VALUE, + final_hash)); } - // hash to get const result. - juint final_hash = murmur3_32(hashes, 4*256); - - assert (MURMUR3_32_X86_CHECK_VALUE == final_hash, - err_msg( - "Calculated hash result not as expected. Expected %08X got %08X\n", - MURMUR3_32_X86_CHECK_VALUE, - final_hash)); -} + // Test against sample hashes published with the reference implementation: + // https://github.com/veorq/SipHash + void AltHashing::testHalfsiphash_64_FromReference() { + // printf("testHalfsiphash_64_FromReference\n"); -void AltHashing::testEquivalentHashes() { - juint jbytes, jchars, ints; - - // printf("testEquivalentHashes\n"); - - jbytes = murmur3_32(TWO_BYTE, 2); - jchars = murmur3_32(ONE_CHAR, 1); - assert (jbytes == jchars, - err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); + const uint64_t seed = 0x0706050403020100; + const uint64_t results[16] = { + 0xc83cb8b9591f8d21, 0xa12ee55b178ae7d5, + 0x8c85e4bc20e8feed, 0x99c7f5ae9f1fc77b, + 0xb5f37b5fd2aa3673, 0xdba7ee6f0a2bf51b, + 0xf1a63fae45107470, 0xb516001efb5f922d, + 0x6c6211d8469d7028, 0xdc7642ec407ad686, + 0x4caec8671cc8385b, 0x5ab1dc27adf3301e, + 0x3e3ea94bc0a8eaa9, 0xe150f598795a4402, + 0x1d5ff142f992a4a1, 0x60e426bf902876d6 + }; + uint32_t vector[16]; - jbytes = murmur3_32(FOUR_BYTE, 4); - jchars = murmur3_32(TWO_CHAR, 2); - ints = murmur3_32(ONE_INT, 1); - assert ((jbytes == jchars) && (jbytes == ints), - err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); - - jbytes = murmur3_32(SIX_BYTE, 6); - jchars = murmur3_32(THREE_CHAR, 3); - assert (jbytes == jchars, - err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); + for (int i = 0; i < 16; i++) + vector[i] = 0x03020100 + i * 0x04040404; - jbytes = murmur3_32(EIGHT_BYTE, 8); - jchars = murmur3_32(FOUR_CHAR, 4); - ints = murmur3_32(TWO_INT, 2); - assert ((jbytes == jchars) && (jbytes == ints), - err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); -} + for (int i = 0; i < 16; i++) { + uint64_t hash = AltHashing::halfsiphash_64(seed, vector, i); + assert(results[i] == hash, + err_msg( + "Calculated hash result not as expected. Round %d: " + "Expected " UINT64_FORMAT_X " got " UINT64_FORMAT_X "\n", + i, + results[i], + hash)); + } + } -// Returns true if the alternate hashcode is correct void AltHashing::test_alt_hash() { - testMurmur3_32_ByteArray(); - testEquivalentHashes(); + testHalfsiphash_64_ByteArray(); + testHalfsiphash_64_CharArray(); + testHalfsiphash_64_FromReference(); } #endif // PRODUCT
--- a/src/share/vm/classfile/altHashing.hpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/classfile/altHashing.hpp Wed Sep 23 15:18:53 2020 +0300 @@ -26,37 +26,30 @@ #define SHARE_VM_CLASSFILE_ALTHASHING_HPP #include "prims/jni.h" -#include "classfile/symbolTable.hpp" +#include "memory/allocation.hpp" /** - * Hashing utilities. - * - * Implementation of Murmur3 hashing. - * This code was translated from src/share/classes/sun/misc/Hashing.java - * code in the JDK. + * Implementation of alternate more secure hashing. */ class AltHashing : AllStatic { - // utility function copied from java/lang/Integer - static juint Integer_rotateLeft(juint i, int distance) { - return (i << distance) | (i >> (32-distance)); - } - static juint murmur3_32(const int* data, int len); - static juint murmur3_32(juint seed, const int* data, int len); + // For the seed computation + static uint64_t halfsiphash_64(const uint32_t* data, int len); + static uint64_t halfsiphash_64(uint64_t seed, const uint32_t* data, int len); + #ifndef PRODUCT + // Hashing functions used for internal testing + static void testHalfsiphash_64_ByteArray(); + static void testHalfsiphash_64_CharArray(); + static void testHalfsiphash_64_FromReference(); + #endif // PRODUCT + public: + static uint64_t compute_seed(); -#ifndef PRODUCT - // Hashing functions used for internal testing - static juint murmur3_32(const jbyte* data, int len); - static juint murmur3_32(const jchar* data, int len); - static void testMurmur3_32_ByteArray(); - static void testEquivalentHashes(); -#endif // PRODUCT - - public: - static juint compute_seed(); - static juint murmur3_32(juint seed, const jbyte* data, int len); - static juint murmur3_32(juint seed, const jchar* data, int len); + // For Symbols + static uint64_t halfsiphash_64(uint64_t seed, const int8_t* data, int len); + // For Strings + static uint64_t halfsiphash_64(uint64_t seed, const uint16_t* data, int len); NOT_PRODUCT(static void test_alt_hash();) }; #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
--- a/src/share/vm/classfile/symbolTable.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/classfile/symbolTable.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -224,7 +224,7 @@ // Pick hashing algorithm. unsigned int SymbolTable::hash_symbol(const char* s, int len) { return use_alternate_hashcode() ? - AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : + AltHashing::halfsiphash_64(seed(), (const int8_t*)s, len) : java_lang_String::hash_code(s, len); } @@ -650,7 +650,7 @@ // Pick hashing algorithm unsigned int StringTable::hash_string(const jchar* s, int len) { - return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : + return use_alternate_hashcode() ? AltHashing::halfsiphash_64(seed(), s, len) : java_lang_String::hash_code(s, len); }
--- a/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -150,7 +150,7 @@ assert(worker_id < _nlists, "Invalid worker id"); entry->set_obj(NULL); - entry->set_hash(0); + entry->set_java_hash(0); if (_cached[worker_id].length() < _max_list_length) { // Cache is not full @@ -215,7 +215,7 @@ uintx G1StringDedupTable::_resize_count = 0; uintx G1StringDedupTable::_rehash_count = 0; -G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) : +G1StringDedupTable::G1StringDedupTable(size_t size, uint64_t hash_seed) : _size(size), _entries(0), _grow_threshold((uintx)(size * _grow_load_factor)), @@ -237,10 +237,14 @@ _table = new G1StringDedupTable(_min_size); } -void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) { +void G1StringDedupTable::add(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list) { G1StringDedupEntry* entry = _entry_cache->alloc(); entry->set_obj(value); - entry->set_hash(hash); + if (use_java_hash()) { + entry->set_java_hash((unsigned int)hash); + } else { + entry->set_alt_hash(hash); + } entry->set_next(*list); *list = entry; _entries++; @@ -255,7 +259,7 @@ void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) { G1StringDedupEntry* entry = *pentry; *pentry = entry->next(); - unsigned int hash = entry->hash(); + uint64_t hash = use_java_hash() ? entry->java_hash() : entry->alt_hash(); size_t index = dest->hash_to_index(hash); G1StringDedupEntry** list = dest->bucket(index); entry->set_next(*list); @@ -270,10 +274,10 @@ value1->length() * sizeof(jchar))))); } -typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash, +typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list, uintx &count) { for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) { - if (entry->hash() == hash) { + if ((use_java_hash() ? entry->java_hash() : entry->alt_hash()) == hash) { typeArrayOop existing_value = entry->obj(); if (equals(value, existing_value)) { // Match found @@ -287,7 +291,7 @@ return NULL; } -typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) { +typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, uint64_t hash) { size_t index = hash_to_index(hash); G1StringDedupEntry** list = bucket(index); uintx count = 0; @@ -311,20 +315,25 @@ return existing_value; } -unsigned int G1StringDedupTable::hash_code(typeArrayOop value) { +unsigned int G1StringDedupTable::java_hash_code(typeArrayOop value) { + assert(use_java_hash(), "Should not use java hash code"); unsigned int hash; int length = value->length(); const jchar* data = (jchar*)value->base(T_CHAR); - if (use_java_hash()) { - hash = java_lang_String::hash_code(data, length); - } else { - hash = AltHashing::murmur3_32(_table->_hash_seed, data, length); - } + hash = java_lang_String::hash_code(data, length); return hash; } +uint64_t G1StringDedupTable::alt_hash_code(typeArrayOop value) { + assert(!use_java_hash(), "Should not use alt hash code"); + + int length = value->length(); + const jbyte* data = (jbyte*)value->base(T_BYTE); + return AltHashing::halfsiphash_64(_table->_hash_seed, (const int8_t*)data, length); +} + void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) { assert(java_lang_String::is_instance(java_string), "Must be a string"); No_Safepoint_Verifier nsv; @@ -338,7 +347,7 @@ return; } - unsigned int hash = 0; + uint64_t hash = 0; if (use_java_hash()) { // Get hash code from cache @@ -347,7 +356,7 @@ if (hash == 0) { // Compute hash - hash = hash_code(value); + hash = alt_hash_code(value); stat.inc_hashed(); } @@ -501,8 +510,9 @@ // destination partitions. finish_rehash() will do a single // threaded transfer of all entries. typeArrayOop value = (typeArrayOop)*p; - unsigned int hash = hash_code(value); - (*entry)->set_hash(hash); + assert(!use_java_hash(), "Should not be using Java hash"); + uint64_t hash = alt_hash_code(value); + (*entry)->set_alt_hash(hash); } // Move to next entry @@ -565,8 +575,14 @@ guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap"); guarantee(!value->is_forwarded(), "Object must not be forwarded"); guarantee(value->is_typeArray(), "Object must be a typeArrayOop"); - unsigned int hash = hash_code(value); - guarantee((*entry)->hash() == hash, "Table entry has inorrect hash"); + uint64_t hash; + if (use_java_hash()) { + hash = (*entry)->java_hash(); + guarantee(java_hash_code(value) == hash, "Table entry has incorrect hash"); + } else { + hash = (*entry)->alt_hash(); + guarantee(alt_hash_code(value) == hash, "Table entry has incorrect hash"); + } guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index"); entry = (*entry)->next_addr(); } @@ -581,7 +597,10 @@ G1StringDedupEntry** entry2 = (*entry1)->next_addr(); while (*entry2 != NULL) { typeArrayOop value2 = (*entry2)->obj(); - guarantee(!equals(value1, value2), "Table entries must not have identical arrays"); + guarantee(value1 != value2, "Table entries must not have the same array"); + if (use_java_hash()) { + guarantee(!equals(value1, value2), "Table entries must not have identical arrays"); + } entry2 = (*entry2)->next_addr(); } entry1 = (*entry1)->next_addr(); @@ -600,7 +619,7 @@ " [Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT "]\n" " [Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT "]\n" " [Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")]\n" - " [Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: 0x%x]\n" + " [Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: " UINT64_FORMAT_X "]\n" " [Age Threshold: " UINTX_FORMAT "]", G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)), _table->_size, _min_size, _max_size,
--- a/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp Wed Sep 23 15:18:53 2020 +0300 @@ -38,8 +38,10 @@ class G1StringDedupEntry : public CHeapObj<mtGC> { private: G1StringDedupEntry* _next; - unsigned int _hash; - typeArrayOop _obj; + uint64_t _hash; + typeArrayOop _obj; + + static const uint64_t java_hash_mask = ((uint64_t)1 << 32) - 1; public: G1StringDedupEntry() : @@ -60,11 +62,19 @@ _next = next; } - unsigned int hash() { + unsigned int java_hash() { + return (unsigned int)(_hash & java_hash_mask); + } + + void set_java_hash(unsigned int hash) { + _hash = hash; + } + + uint64_t alt_hash() { return _hash; } - void set_hash(unsigned int hash) { + void set_alt_hash(uint64_t hash) { _hash = hash; } @@ -119,8 +129,8 @@ // The hash seed also dictates which hash function to use. A // zero hash seed means we will use the Java compatible hash // function (which doesn't use a seed), and a non-zero hash - // seed means we use the murmur3 hash function. - jint _hash_seed; + // seed means we use the alternate and better hash function. + uint64_t _hash_seed; // Constants governing table resize/rehash/cache. static const size_t _min_size; @@ -137,7 +147,7 @@ static uintx _resize_count; static uintx _rehash_count; - G1StringDedupTable(size_t size, jint hash_seed = 0); + G1StringDedupTable(size_t size, uint64_t hash_seed = 0); ~G1StringDedupTable(); // Returns the hash bucket at the given index. @@ -146,12 +156,12 @@ } // Returns the hash bucket index for the given hash code. - size_t hash_to_index(unsigned int hash) { + size_t hash_to_index(uint64_t hash) { return (size_t)hash & (_size - 1); } // Adds a new table entry to the given hash bucket. - void add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list); + void add(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list); // Removes the given table entry from the table. void remove(G1StringDedupEntry** pentry, uint worker_id); @@ -161,15 +171,15 @@ // Returns an existing character array in the given hash bucket, or NULL // if no matching character array exists. - typeArrayOop lookup(typeArrayOop value, unsigned int hash, + typeArrayOop lookup(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list, uintx &count); // Returns an existing character array in the table, or inserts a new // table entry if no matching character array exists. - typeArrayOop lookup_or_add_inner(typeArrayOop value, unsigned int hash); + typeArrayOop lookup_or_add_inner(typeArrayOop value, uint64_t hash); // Thread safe lookup or add of table entry - static typeArrayOop lookup_or_add(typeArrayOop value, unsigned int hash) { + static typeArrayOop lookup_or_add(typeArrayOop value, uint64_t hash) { // Protect the table from concurrent access. Also note that this lock // acts as a fence for _table, which could have been replaced by a new // instance if the table was resized or rehashed. @@ -187,7 +197,8 @@ // Computes the hash code for the given character array, using the // currently active hash function and hash seed. - static unsigned int hash_code(typeArrayOop value); + static unsigned int java_hash_code(typeArrayOop value); + static uint64_t alt_hash_code(typeArrayOop value); static uintx unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t partition_begin,
--- a/src/share/vm/memory/filemap.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/memory/filemap.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -125,13 +125,13 @@ } else { // Get the hash value. Use a static seed because the hash needs to return the same // value over multiple jvm invocations. - unsigned int hash = AltHashing::murmur3_32(8191, (const jbyte*)vm_version, version_len); + uint64_t hash = AltHashing::halfsiphash_64(8191, (const int8_t*)vm_version, version_len); - // Truncate the ident, saving room for the 8 hex character hash value. - strncpy(header_version, vm_version, JVM_IDENT_MAX-9); + // Truncate the ident, saving room for the 16 hex character hash value. + strncpy(header_version, vm_version, JVM_IDENT_MAX-17); - // Append the hash code as eight hex digits. - sprintf(&header_version[JVM_IDENT_MAX-9], "%08x", hash); + // Append the hash code as 16 hex digits. + sprintf(&header_version[JVM_IDENT_MAX-17], "%016" PRIx64, hash); header_version[JVM_IDENT_MAX-1] = 0; // Null terminate. } }
--- a/src/share/vm/oops/oop.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/oops/oop.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -111,7 +111,7 @@ jchar* chars = java_lang_String::as_unicode_string(this, length, THREAD); if (chars != NULL) { // Use alternate hashing algorithm on the string - return AltHashing::murmur3_32(seed, chars, length); + return AltHashing::halfsiphash_64(seed, chars, length); } else { vm_exit_out_of_memory(length, OOM_MALLOC_ERROR, "unable to create Unicode strings for String table rehash"); return 0;
--- a/src/share/vm/oops/symbol.cpp Sat Sep 12 00:09:03 2020 +0300 +++ b/src/share/vm/oops/symbol.cpp Wed Sep 23 15:18:53 2020 +0300 @@ -210,7 +210,7 @@ unsigned int Symbol::new_hash(juint seed) { ResourceMark rm; // Use alternate hashing algorithm on this symbol. - return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length()); + return AltHashing::halfsiphash_64(seed, (const int8_t*)as_C_string(), utf8_length()); } void Symbol::increment_refcount() {