view src/cpu/aarch64/vm/immediate_aarch64.cpp @ 10905:f57189b7648d

8257192: Integrate AArch64 JIT port into 8u 7009641: Don't fail VM when CodeCache is full 8073108: [AArch64] Use x86 and SPARC CPU instructions for GHASH acceleration 8130309: Need to bailout cleanly if creation of stubs fails when codecache is out of space (AArch64 changes) 8131779: AARCH64: add Montgomery multiply intrinsic 8132875: AArch64: Fix error introduced into AArch64 CodeCache by commit for 8130309 8135018: AARCH64: Missing memory barriers for CMS collector 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64 8148328: aarch64: redundant lsr instructions in stub code. 8148783: aarch64: SEGV running SpecJBB2013 8148948: aarch64: generate_copy_longs calls align() incorrectly 8149080: AArch64: Recognise disjoint array copy in stub code 8149365: aarch64: memory copy does not prefetch on backwards copy 8149907: aarch64: use load/store pair instructions in call_stub 8150038: aarch64: make use of CBZ and CBNZ when comparing narrow pointer with zero 8150045: arraycopy causes segfaults in SATB during garbage collection 8150082: aarch64: optimise small array copy 8150229: aarch64: pipeline class for several instructions is not set correctly 8150313: aarch64: optimise array copy using SIMD instructions 8150394: aarch64: add support for 8.1 LSE CAS instructions 8150652: Remove unused code in AArch64 back end 8151340: aarch64: prefetch the destination word for write prior to ldxr/stxr loops. 8151502: optimize pd_disjoint_words and pd_conjoint_words 8151775: aarch64: add support for 8.1 LSE atomic operations 8152537: aarch64: Make use of CBZ and CBNZ when comparing unsigned values with zero. 8152840: aarch64: improve _unsafe_arraycopy stub routine 8153172: aarch64: hotspot crashes after the 8.1 LSE patch is merged 8153713: aarch64: improve short array clearing using store pair 8153797: aarch64: Add Arrays.fill stub code 8154413: AArch64: Better byte behaviour 8154537: AArch64: some integer rotate instructions are never emitted 8154739: AArch64: TemplateTable::fast_xaccess loads in wrong mode 8155015: Aarch64: bad assert in spill generation code 8155100: AArch64: Relax alignment requirement for byte_map_base 8155612: Aarch64: vector nodes need to support misaligned offset 8155617: aarch64: ClearArray does not use DC ZVA 8155627: Enable SA on AArch64 8155653: TestVectorUnalignedOffset.java not pushed with 8155612 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine 8157841: aarch64: prefetch ignores cache line size 8157906: aarch64: some more integer rotate instructions are never emitted 8158913: aarch64: SEGV running Spark terasort 8159052: aarch64: optimise unaligned copies in pd_disjoint_words and pd_conjoint_words 8159063: aarch64: optimise unaligned array copy long 8160748: [AArch64] Inconsistent types for ideal_reg 8161072: AArch64: jtreg compiler/uncommontrap/TestDeoptOOM failure 8161190: AArch64: Fix overflow in immediate cmp instruction 8164113: AArch64: follow-up the fix for 8161598 8165673: AArch64: Fix JNI floating point argument handling 8167200: AArch64: Broken stack pointer adjustment in interpreter 8167421: AArch64: in one core system, fatal error: Illegal threadstate encountered 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt 8168699: Validate special case invocations [AArch64 support] 8168888: Port 8160591: Improve internal array handling to AArch64. 8170100: AArch64: Crash in C1-compiled code accessing References 8170188: jtreg test compiler/types/TestMeetIncompatibleInterfaceArrays.java causes JVM crash 8170873: PPC64/aarch64: Poor StrictMath performance due to non-optimized compilation 8171537: aarch64: compiler/c1/Test6849574.java generates guarantee failure in C1 8172881: AArch64: assertion failure: the int pressure is incorrect 8173472: AArch64: C1 comparisons with null only use 32-bit instructions 8176100: [AArch64] [REDO][REDO] G1 Needs pre barrier on dereference of weak JNI handles 8177661: Correct ad rule output register types from iRegX to iRegXNoSp 8179954: AArch64: C1 and C2 volatile accesses are not sequentially consistent 8182581: aarch64: fix for crash caused by earlyret of compiled method 8183925: [AArch64] Decouple crash protection from watcher thread 8186325: AArch64: jtreg test hotspot/test/gc/g1/TestJNIWeakG1/TestJNIWeakG1.java SEGV 8187224: aarch64: some inconsistency between aarch64_ad.m4 and aarch64.ad 8189170: [AArch64] Add option to disable stack overflow checking in primordial thread for use with JNI_CreateJavaJVM 8193133: Assertion failure because 0xDEADDEAD can be in-heap 8195685: AArch64 port of 8174962: Better interface invocations 8195859: AArch64: vtableStubs gtest fails after 8174962 8196136: AArch64: Correct register use in patch for JDK-8194686 8196221: AArch64: Mistake in committed patch for JDK-8195859 8199712: [AArch64] Flight Recorder 8203481: Incorrect constraint for unextended_sp in frame:safe_for_sender 8203699: java/lang/invoke/SpecialInterfaceCall fails with SIGILL on aarch64 8205421: AARCH64: StubCodeMark should be placed after alignment 8206163: AArch64: incorrect code generation for StoreCM 8207345: Trampoline generation code reads from uninitialized memory 8207838: AArch64: Float registers incorrectly restored in JNI call 8209413: AArch64: NPE in clhsdb jstack command 8209414: [AArch64] method handle invocation does not respect JVMTI interp_only mode 8209415: Fix JVMTI test failure HS202 8209420: Track membars for volatile accesses so they can be properly optimized 8209835: Aarch64: elide barriers on all volatile operations 8210425: [AArch64] sharedRuntimeTrig/sharedRuntimeTrans compiled without optimization 8211064: [AArch64] Interpreter and c1 don't correctly handle jboolean results in native calls 8211233: MemBarNode::trailing_membar() and MemBarNode::leading_membar() need to handle dying subgraphs better 8213134: AArch64: vector shift failed with MaxVectorSize=8 8213419: [AArch64] C2 may hang in MulLNode::Ideal()/MulINode::Ideal() with gcc 8.2.1 8214857: "bad trailing membar" assert failure at memnode.cpp:3220 8215951: AArch64: jtreg test vmTestbase/nsk/jvmti/PopFrame/popframe005 segfaults 8215961: jdk/jfr/event/os/TestCPUInformation.java fails on AArch64 8216350: AArch64: monitor unlock fast path not called 8216989: CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier() does not check for zero length on AARCH64 8217368: AArch64: C2 recursive stack locking optimisation not triggered 8218185: aarch64: missing LoadStore barrier in TemplateTable::putfield_or_static 8219011: Implement MacroAssembler::warn method on AArch64 8219635: aarch64: missing LoadStore barrier in TemplateTable::fast_storefield 8221220: AArch64: Add StoreStore membar explicitly for Volatile Writes in TemplateTable 8221658: aarch64: add necessary predicate for ubfx patterns 8224671: AArch64: mauve System.arraycopy test failure 8224828: aarch64: rflags is not correct after safepoint poll 8224851: AArch64: fix warnings and errors with Clang and GCC 8.3 8224880: AArch64: java/javac error with AllocatePrefetchDistance 8228400: Remove built-in AArch64 simulator 8228406: Superfluous change in chaitin.hpp 8228593: Revert explicit JDK 7 support additions 8228716: Revert InstanceKlass::print_on debug additions 8228718: Revert incorrect backport of JDK-8129757 to 8-aarch64 8228725: AArch64: Purge method call format support 8228747: Revert "unused" attribute from test_arraycopy_func 8228767: Revert ResourceMark additions 8228770: Revert development hsdis changes 8229123: Revert build fixes for aarch64/zero 8229124: Revert disassembler.cpp changes 8229145: Revert TemplateTable::bytecode() visibility change 8233839: aarch64: missing memory barrier in NewObjectArrayStub and NewTypeArrayStub 8237512: AArch64: aarch64TestHook leaks a BufferBlob 8246482: Build failures with +JFR -PCH 8247979: aarch64: missing side effect of killing flags for clearArray_reg_reg 8248219: aarch64: missing memory barrier in fast_storefield and fast_accessfield Reviewed-by: shade, aph
author andrew
date Mon, 01 Feb 2021 03:48:36 +0000
parents
children f79e943d15a7
line wrap: on
line source

/*
 * Copyright (c) 2013, Red Hat Inc.
 * All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 */

#include <stdlib.h>
#include "immediate_aarch64.hpp"

// there are at most 2^13 possible logical immediate encodings
// however, some combinations of immr and imms are invalid
static const unsigned  LI_TABLE_SIZE = (1 << 13);

static int li_table_entry_count;

// for forward lookup we just use a direct array lookup
// and assume that the cient has supplied a valid encoding
// table[encoding] = immediate
static u_int64_t LITable[LI_TABLE_SIZE];

// for reverse lookup we need a sparse map so we store a table of
// immediate and encoding pairs sorted by immediate value

struct li_pair {
  u_int64_t immediate;
  u_int32_t encoding;
};

static struct li_pair InverseLITable[LI_TABLE_SIZE];

// comparator to sort entries in the inverse table
int compare_immediate_pair(const void *i1, const void *i2)
{
  struct li_pair *li1 = (struct li_pair *)i1;
  struct li_pair *li2 = (struct li_pair *)i2;
  if (li1->immediate < li2->immediate) {
    return -1;
  }
  if (li1->immediate > li2->immediate) {
    return 1;
  }
  return 0;
}

// helper functions used by expandLogicalImmediate

// for i = 1, ... N result<i-1> = 1 other bits are zero
static inline u_int64_t ones(int N)
{
  return (N == 64 ? (u_int64_t)-1UL : ((1UL << N) - 1));
}

/*
 * bit twiddling helpers for instruction decode
 */

// 32 bit mask with bits [hi,...,lo] set
static inline u_int32_t mask32(int hi = 31, int lo = 0)
{
  int nbits = (hi + 1) - lo;
  return ((1 << nbits) - 1) << lo;
}

static inline u_int64_t mask64(int hi = 63, int lo = 0)
{
  int nbits = (hi + 1) - lo;
  return ((1L << nbits) - 1) << lo;
}

// pick bits [hi,...,lo] from val
static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0)
{
  return (val & mask32(hi, lo));
}

// pick bits [hi,...,lo] from val
static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0)
{
  return (val & mask64(hi, lo));
}

// mask [hi,lo] and shift down to start at bit 0
static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0)
{
  return (pick32(val, hi, lo) >> lo);
}

// mask [hi,lo] and shift down to start at bit 0
static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0)
{
  return (pick64(val, hi, lo) >> lo);
}

// result<0> to val<N>
static inline u_int64_t pickbit(u_int64_t val, int N)
{
  return pickbits64(val, N, N);
}

static inline u_int32_t uimm(u_int32_t val, int hi, int lo)
{
  return pickbits32(val, hi, lo);
}

// SPEC bits(M*N) Replicate(bits(M) x, integer N);
// this is just an educated guess

u_int64_t replicate(u_int64_t bits, int nbits, int count)
{
  u_int64_t result = 0;
  // nbits may be 64 in which case we want mask to be -1
  u_int64_t mask = ones(nbits);
  for (int i = 0; i < count ; i++) {
    result <<= nbits;
    result |= (bits & mask);
  }
  return result;
}

// this function writes the supplied bimm reference and returns a
// boolean to indicate success (1) or fail (0) because an illegal
// encoding must be treated as an UNALLOC instruction

// construct a 32 bit immediate value for a logical immediate operation
int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
                            u_int32_t imms, u_int64_t &bimm)
{
  int len;                  // ought to be <= 6
  u_int32_t levels;         // 6 bits
  u_int32_t tmask_and;      // 6 bits
  u_int32_t wmask_and;      // 6 bits
  u_int32_t tmask_or;       // 6 bits
  u_int32_t wmask_or;       // 6 bits
  u_int64_t imm64;          // 64 bits
  u_int64_t tmask, wmask;   // 64 bits
  u_int32_t S, R, diff;     // 6 bits?

  if (immN == 1) {
    len = 6; // looks like 7 given the spec above but this cannot be!
  } else {
    len = 0;
    u_int32_t val = (~imms & 0x3f);
    for (int i = 5; i > 0; i--) {
      if (val & (1 << i)) {
        len = i;
        break;
      }
    }
    if (len < 1) {
      return 0;
    }
    // for valid inputs leading 1s in immr must be less than leading
    // zeros in imms
    int len2 = 0;                   // ought to be < len
    u_int32_t val2 = (~immr & 0x3f);
    for (int i = 5; i > 0; i--) {
      if (!(val2 & (1 << i))) {
        len2 = i;
        break;
      }
    }
    if (len2 >= len) {
      return 0;
    }
  }

  levels = (1 << len) - 1;

  if ((imms & levels) == levels) {
    return 0;
  }

  S = imms & levels;
  R = immr & levels;

 // 6 bit arithmetic!
  diff = S - R;
  tmask_and = (diff | ~levels) & 0x3f;
  tmask_or = (diff & levels) & 0x3f;
  tmask = 0xffffffffffffffffULL;

  for (int i = 0; i < 6; i++) {
    int nbits = 1 << i;
    u_int64_t and_bit = pickbit(tmask_and, i);
    u_int64_t or_bit = pickbit(tmask_or, i);
    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
    u_int64_t and_bits_top = (and_bits_sub << nbits) | ones(nbits);
    u_int64_t or_bits_top = (0 << nbits) | or_bits_sub;

    tmask = ((tmask
              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
  }

  wmask_and = (immr | ~levels) & 0x3f;
  wmask_or = (immr & levels) & 0x3f;

  wmask = 0;

  for (int i = 0; i < 6; i++) {
    int nbits = 1 << i;
    u_int64_t and_bit = pickbit(wmask_and, i);
    u_int64_t or_bit = pickbit(wmask_or, i);
    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
    u_int64_t and_bits_top = (ones(nbits) << nbits) | and_bits_sub;
    u_int64_t or_bits_top = (or_bits_sub << nbits) | 0;

    wmask = ((wmask
              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
  }

  if (diff & (1U << 6)) {
    imm64 = tmask & wmask;
  } else {
    imm64 = tmask | wmask;
  }


  bimm = imm64;
  return 1;
}

// constructor to initialise the lookup tables

static void initLITables() __attribute__ ((constructor));
static void initLITables()
{
  li_table_entry_count = 0;
  for (unsigned index = 0; index < LI_TABLE_SIZE; index++) {
    u_int32_t N = uimm(index, 12, 12);
    u_int32_t immr = uimm(index, 11, 6);
    u_int32_t imms = uimm(index, 5, 0);
    if (expandLogicalImmediate(N, immr, imms, LITable[index])) {
      InverseLITable[li_table_entry_count].immediate = LITable[index];
      InverseLITable[li_table_entry_count].encoding = index;
      li_table_entry_count++;
    }
  }
  // now sort the inverse table
  qsort(InverseLITable, li_table_entry_count,
        sizeof(InverseLITable[0]), compare_immediate_pair);
}

// public APIs provided for logical immediate lookup and reverse lookup

u_int64_t logical_immediate_for_encoding(u_int32_t encoding)
{
  return LITable[encoding];
}

u_int32_t encoding_for_logical_immediate(u_int64_t immediate)
{
  struct li_pair pair;
  struct li_pair *result;

  pair.immediate = immediate;

  result = (struct li_pair *)
    bsearch(&pair, InverseLITable, li_table_entry_count,
            sizeof(InverseLITable[0]), compare_immediate_pair);

  if (result) {
    return result->encoding;
  }

  return 0xffffffff;
}

// floating point immediates are encoded in 8 bits
// fpimm[7] = sign bit
// fpimm[6:4] = signed exponent
// fpimm[3:0] = fraction (assuming leading 1)
// i.e. F = s * 1.f * 2^(e - b)

u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp)
{
  union {
    float fpval;
    double dpval;
    u_int64_t val;
  };

  u_int32_t s, e, f;
  s = (imm8 >> 7 ) & 0x1;
  e = (imm8 >> 4) & 0x7;
  f = imm8 & 0xf;
  // the fp value is s * n/16 * 2r where n is 16+e
  fpval = (16.0 + f) / 16.0;
  // n.b. exponent is signed
  if (e < 4) {
    int epos = e;
    for (int i = 0; i <= epos; i++) {
      fpval *= 2.0;
    }
  } else {
    int eneg = 7 - e;
    for (int i = 0; i < eneg; i++) {
      fpval /= 2.0;
    }
  }

  if (s) {
    fpval = -fpval;
  }
  if (is_dp) {
    dpval = (double)fpval;
  }
  return val;
}

u_int32_t encoding_for_fp_immediate(float immediate)
{
  // given a float which is of the form
  //
  //     s * n/16 * 2r
  //
  // where n is 16+f and imm1:s, imm4:f, simm3:r
  // return the imm8 result [s:r:f]
  //

  union {
    float fpval;
    u_int32_t val;
  };
  fpval = immediate;
  u_int32_t s, r, f, res;
  // sign bit is 31
  s = (val >> 31) & 0x1;
  // exponent is bits 30-23 but we only want the bottom 3 bits
  // strictly we ought to check that the bits bits 30-25 are
  // either all 1s or all 0s
  r = (val >> 23) & 0x7;
  // fraction is bits 22-0
  f = (val >> 19) & 0xf;
  res = (s << 7) | (r << 4) | f;
  return res;
}