Mercurial > hg > icedtea8-forest > hotspot
changeset 8679:2ee4407fe4e4 icedtea-3.0.0pre06
Merge aarch64 port to jdk8u60-b24
line wrap: on
line diff
--- a/.hgtags Wed Sep 30 16:43:15 2015 +0100 +++ b/.hgtags Fri Oct 02 04:37:30 2015 +0100 @@ -586,6 +586,9 @@ 6b93bf9ea3ea57ed0fe53cfedb2f9ab912c324e5 jdk8u40-b12 521e269ae1daa9df1cb0835b97aa76bdf340fcb2 hs25.40-b17 86307d47790785398d0695acc361bccaefe25f94 jdk8u40-b13 +b280f4f4f11916e202aaa4d458630d4c26b59e2a jdk8u40-b12-aarch64 +26fc60dd5da8d3f1554fb8f2553f050839a539c6 jdk8u40-b12-aarch64-1262 +d7c03eb8b2c2bc4d34438699f07609ba4c4bca5c jdk8u40-b12-aarch64-1263 4d5dc0d0f8799fafa1135d51d85edd4edd566501 hs25.40-b18 b8ca8ec1daea70f7c0d519e866f9f147ec247055 jdk8u40-b14 eb16b24e2eba9bdf04a9b377bebc2db9f713ff5e jdk8u40-b15 @@ -696,3 +699,6 @@ 878cb0df27c22c6b1e9f4add1eb3da3edc8ab51d jdk8u60-b22 0e4094950cd312c8f95c7f37336606323fe049fe jdk8u60-b23 d89ceecf1bad55e1aee2932b8895d60fc64c15db hs25.60-b23 +fb157d537278cda4150740e27bb57cd8694e15bf jdk8u60-b24 +11098f828fb815a467e77729f2055d6b1575ad3e arch64-jdk8u60-b24 +8ec803e97a0d578eaeaf8375ee295a5928eb546f aarch64-jdk8u60-b24.2
--- a/agent/make/Makefile Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/make/Makefile Fri Oct 02 04:37:30 2015 +0100 @@ -58,11 +58,13 @@ sun.jvm.hotspot.debugger.dummy \ sun.jvm.hotspot.debugger.linux \ sun.jvm.hotspot.debugger.linux.amd64 \ +sun.jvm.hotspot.debugger.linux.aarch64 \ sun.jvm.hotspot.debugger.linux.x86 \ sun.jvm.hotspot.debugger.posix \ sun.jvm.hotspot.debugger.posix.elf \ sun.jvm.hotspot.debugger.proc \ sun.jvm.hotspot.debugger.proc.amd64 \ +sun.jvm.hotspot.debugger.proc.aarch64 \ sun.jvm.hotspot.debugger.proc.sparc \ sun.jvm.hotspot.debugger.proc.x86 \ sun.jvm.hotspot.debugger.remote \ @@ -88,11 +90,13 @@ sun.jvm.hotspot.prims \ sun.jvm.hotspot.runtime \ sun.jvm.hotspot.runtime.amd64 \ +sun.jvm.hotspot.runtime.aarch64 \ sun.jvm.hotspot.runtime.bsd \ sun.jvm.hotspot.runtime.bsd_amd64 \ sun.jvm.hotspot.runtime.bsd_x86 \ sun.jvm.hotspot.runtime.linux \ sun.jvm.hotspot.runtime.linux_amd64 \ +sun.jvm.hotspot.runtime.linux_aarch64 \ sun.jvm.hotspot.runtime.linux_sparc \ sun.jvm.hotspot.runtime.linux_x86 \ sun.jvm.hotspot.runtime.posix \ @@ -143,12 +147,13 @@ sun/jvm/hotspot/debugger/dummy/*.java \ sun/jvm/hotspot/debugger/linux/*.java \ sun/jvm/hotspot/debugger/linux/x86/*.java \ +sun/jvm/hotspot/debugger/linux/aarch64/*.java \ sun/jvm/hotspot/debugger/posix/*.java \ sun/jvm/hotspot/debugger/posix/elf/*.java \ sun/jvm/hotspot/debugger/proc/*.java \ -sun/jvm/hotspot/debugger/proc/amd64/*.java \ sun/jvm/hotspot/debugger/proc/sparc/*.java \ sun/jvm/hotspot/debugger/proc/x86/*.java \ +sun/jvm/hotspot/debugger/proc/aarch64/*.java \ sun/jvm/hotspot/debugger/remote/*.java \ sun/jvm/hotspot/debugger/remote/amd64/*.java \ sun/jvm/hotspot/debugger/remote/sparc/*.java \ @@ -169,11 +174,13 @@ sun/jvm/hotspot/prims/*.java \ sun/jvm/hotspot/runtime/*.java \ sun/jvm/hotspot/runtime/amd64/*.java \ +sun/jvm/hotspot/runtime/aarch64/*.java \ sun/jvm/hotspot/runtime/bsd/*.java \ sun/jvm/hotspot/runtime/bsd_amd64/*.java \ sun/jvm/hotspot/runtime/bsd_x86/*.java \ sun/jvm/hotspot/runtime/linux/*.java \ sun/jvm/hotspot/runtime/linux_amd64/*.java \ +sun/jvm/hotspot/runtime/linux_aarch64/*.java \ sun/jvm/hotspot/runtime/linux_sparc/*.java \ sun/jvm/hotspot/runtime/linux_x86/*.java \ sun/jvm/hotspot/runtime/posix/*.java \
--- a/agent/src/os/linux/LinuxDebuggerLocal.c Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/os/linux/LinuxDebuggerLocal.c Fri Oct 02 04:37:30 2015 +0100 @@ -49,6 +49,10 @@ #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h" #endif +#ifdef aarch64 +#include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -353,7 +357,7 @@ #define NPRGREG sun_jvm_hotspot_debugger_amd64_AMD64ThreadContext_NPRGREG #endif #ifdef aarch64 -#define NPRGREG 32 +#define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG #endif #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG @@ -462,6 +466,13 @@ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_##reg + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(SP)] = gregs.sp; + regs[REG_INDEX(PC)] = gregs.pc; + } #endif /* aarch64 */
--- a/agent/src/os/linux/Makefile Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/os/linux/Makefile Fri Oct 02 04:37:30 2015 +0100 @@ -53,14 +53,15 @@ $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.debugger.x86.X86ThreadContext \ sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \ - sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \ + sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(GCC) $(CFLAGS) $< -o $@ $(ARCH)/sadis.o: ../../share/native/sadis.c $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.asm.Disassembler $(GCC) $(CFLAGS) $< -o $@ - + $(ARCH)/%.o: %.c $(GCC) $(CFLAGS) $< -o $@
--- a/agent/src/os/linux/libproc.h Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/os/linux/libproc.h Fri Oct 02 04:37:30 2015 +0100 @@ -40,10 +40,6 @@ #include "asm/ptrace.h" #endif -#if defined(aarch64) -#include "asm/ptrace.h" -#endif - /************************************************************************************ 0. This is very minimal subset of Solaris libproc just enough for current application.
--- a/agent/src/share/classes/sun/jvm/hotspot/HSDB.java Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/HSDB.java Fri Oct 02 04:37:30 2015 +0100 @@ -985,19 +985,15 @@ curFrame.getFP(), anno)); } else { - if (VM.getVM().getCPU().equals("x86") || VM.getVM().getCPU().equals("amd64")) { - // For C2, which has null frame pointers on x86/amd64 - CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); - Address sp = curFrame.getSP(); - if (Assert.ASSERTS_ENABLED) { - Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size"); - } - annoPanel.addAnnotation(new Annotation(sp, - sp.addOffsetTo(cb.getFrameSize()), - anno)); - } else { - Assert.that(VM.getVM().getCPU().equals("ia64"), "only ia64 should reach here"); + // For C2, which has null frame pointers on x86/amd64/aarch64 + CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); + Address sp = curFrame.getSP(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size"); } + annoPanel.addAnnotation(new Annotation(sp, + sp.addOffsetTo(cb.getFrameSize()), + anno)); } // Add interpreter frame annotations
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionAARCH64.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionAARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + public boolean isLP64() { + return true; + } + + public boolean isBigEndian() { + return false; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/aarch64/AARCH64ThreadContext.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on aarch64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class AARCH64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/AARCH64. + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + public static final int R0 = 0; + public static final int R1 = 1; + public static final int R2 = 2; + public static final int R3 = 3; + public static final int R4 = 4; + public static final int R5 = 5; + public static final int R6 = 6; + public static final int R7 = 7; + public static final int R8 = 8; + public static final int R9 = 9; + public static final int R10 = 10; + public static final int R11 = 11; + public static final int R12 = 12; + public static final int R13 = 13; + public static final int R14 = 14; + public static final int R15 = 15; + public static final int R16 = 16; + public static final int R17 = 17; + public static final int R18 = 18; + public static final int R19 = 19; + public static final int R20 = 20; + public static final int R21 = 21; + public static final int R22 = 22; + public static final int R23 = 23; + public static final int R24 = 24; + public static final int R25 = 25; + public static final int R26 = 26; + public static final int R27 = 27; + public static final int R28 = 28; + public static final int FP = 29; + public static final int LR = 30; + public static final int SP = 31; + public static final int PC = 32; + + public static final int NPRGREG = 33; + + private long[] data; + + public AARCH64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + switch (index) { + case LR: return "lr"; + case SP: return "sp"; + case PC: return "pc"; + default: + return "r" + index; + } + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java Fri Oct 02 04:37:30 2015 +0100 @@ -1,5 +1,6 @@ /* * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +34,8 @@ import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.aarch64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.utilities.*; @@ -106,6 +109,13 @@ Address pc = context.getRegisterAsAddress(SPARCThreadContext.R_O7); if (pc == null) return null; return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); + } else if (cpu.equals("aarch64")) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64CFrame.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +final public class LinuxAARCH64CFrame extends BasicCFrame { + public LinuxAARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxAARCH64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64ThreadContext.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxAARCH64ThreadContext extends AARCH64ThreadContext { + private LinuxDebugger debugger; + + public LinuxAARCH64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java Fri Oct 02 04:37:30 2015 +0100 @@ -31,9 +31,11 @@ import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.proc.amd64.*; +import sun.jvm.hotspot.debugger.proc.aarch64.*; import sun.jvm.hotspot.debugger.proc.sparc.*; import sun.jvm.hotspot.debugger.proc.x86.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.utilities.*; @@ -86,6 +88,10 @@ threadFactory = new ProcAMD64ThreadFactory(this); pcRegIndex = AMD64ThreadContext.RIP; fpRegIndex = AMD64ThreadContext.RBP; + } else if (cpu.equals("aarch64")) { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; + fpRegIndex = AARCH64ThreadContext.FP; } else { try { Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64Thread.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcAARCH64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcAARCH64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcAARCH64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcAARCH64ThreadContext context = new ProcAARCH64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcAARCH64Thread)) { + return false; + } + + return (((ProcAARCH64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadContext.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadContext extends AARCH64ThreadContext { + private ProcDebugger debugger; + + public ProcAARCH64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadFactory.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcAARCH64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcAARCH64Thread(debugger, id); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64Thread.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteAARCH64Thread extends RemoteThread { + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteAARCH64ThreadContext context = new RemoteAARCH64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadContext.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadContext extends AARCH64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadFactory.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteAARCH64Thread(debugger, id); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java Fri Oct 02 04:37:30 2015 +0100 @@ -35,6 +35,7 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; import sun.jvm.hotspot.utilities.*; @@ -87,6 +88,8 @@ access = new LinuxAMD64JavaThreadPDAccess(); } else if (cpu.equals("sparc")) { access = new LinuxSPARCJavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64CurrentFrameGuess.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; + +/** <P> Should be able to be used on all aarch64 platforms we support + (Linux/aarch64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an AARCH64ThreadContext; output is SP, FP, + and PC for an AARCH64Frame. Instantiation of the AARCH64Frame is + left to the caller, since we may need to subclass AARCH64Frame to + support signal handler frames on Unix platforms. </P> + + <P> Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at. </P> */ + +public class AARCH64CurrentFrameGuess { + private AARCH64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") + != null; + + public AARCH64CurrentFrameGuess(AARCH64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new AARCH64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from AARCH64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from AARCH64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + setValues(sp, fp, null); + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct AARCH64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64Frame.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the aarch64 family of CPUs. */ + +public class AARCH64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -8; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(29); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private AARCH64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + AARCH64Frame frame = new AARCH64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof AARCH64Frame)) { + return false; + } + + AARCH64Frame other = (AARCH64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + AARCH64RegisterMap map = (AARCH64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new AARCH64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + AARCH64JavaCallWrapper jcw = (AARCH64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + AARCH64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new AARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(AARCH64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated AARCH64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + OopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new AARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + // FIXME: not implementable yet + //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new AARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64JavaCallWrapper.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public AARCH64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64RegisterMap.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public AARCH64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected AARCH64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + AARCH64RegisterMap retval = new AARCH64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_aarch64/LinuxAARCH64JavaThreadPDAccess.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_aarch64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxAARCH64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new AARCH64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new AARCH64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + AARCH64CurrentFrameGuess guesser = new AARCH64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new AARCH64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new AARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); +// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(AARCH64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java Wed Sep 30 16:43:15 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java Fri Oct 02 04:37:30 2015 +0100 @@ -63,7 +63,9 @@ return "sparc"; } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64")) { return cpu; - } else { + } else {if (cpu.equals("aarch64")) { + return cpu; + } else try { Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); AltPlatformInfo api = (AltPlatformInfo)pic.newInstance();
--- a/make/defs.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/defs.make Fri Oct 02 04:37:30 2015 +0100 @@ -322,7 +322,7 @@ endif endif - LP64_ARCH = sparcv9 amd64 ia64 ppc64 aarch64 zero + LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero endif # Required make macro settings for all platforms
--- a/make/linux/makefiles/defs.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/linux/makefiles/defs.make Fri Oct 02 04:37:30 2015 +0100 @@ -305,6 +305,8 @@ $(EXPORT_LIB_DIR)/sa-jdi.jar ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar +ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
--- a/make/linux/makefiles/gcc.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/linux/makefiles/gcc.make Fri Oct 02 04:37:30 2015 +0100 @@ -345,7 +345,7 @@ DEBUG_CFLAGS/amd64 = -g DEBUG_CFLAGS/aarch64 = -g DEBUG_CFLAGS/ppc64 = -g - DEBUG_CFLAGS/zero = -g + DEBUG_CFLAGS/zero = -g DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH)) ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),) ifeq ($(USE_CLANG), true) @@ -361,7 +361,7 @@ FASTDEBUG_CFLAGS/amd64 = -g FASTDEBUG_CFLAGS/aarch64 = -g FASTDEBUG_CFLAGS/ppc64 = -g - FASTDEBUG_CFLAGS/zero = -g + FASTDEBUG_CFLAGS/zero = -g FASTDEBUG_CFLAGS += $(FASTDEBUG_CFLAGS/$(BUILDARCH)) ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),) ifeq ($(USE_CLANG), true) @@ -376,7 +376,7 @@ OPT_CFLAGS/amd64 = -g OPT_CFLAGS/aarch64 = -g OPT_CFLAGS/ppc64 = -g - OPT_CFLAGS/zero = -g + OPT_CFLAGS/zero = -g OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH)) ifeq ($(OPT_CFLAGS/$(BUILDARCH)),) ifeq ($(USE_CLANG), true)
--- a/make/linux/makefiles/sa.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/linux/makefiles/sa.make Fri Oct 02 04:37:30 2015 +0100 @@ -62,8 +62,7 @@ all: if [ -d $(AGENT_DIR) -a "$(SRCARCH)" != "ia64" \ - -a "$(SRCARCH)" != "zero" \ - -a "$(SRCARCH)" != "aarch64" ] ; then \ + -a "$(SRCARCH)" != "zero" ] ; then \ $(MAKE) -f sa.make $(GENERATED)/sa-jdi.jar; \ fi @@ -109,6 +108,7 @@ $(QUIETLY) $(REMOTE) $(RUN.JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler
--- a/make/linux/makefiles/saproc.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/linux/makefiles/saproc.make Fri Oct 02 04:37:30 2015 +0100 @@ -63,7 +63,7 @@ # also, we don't build SA on Itanium or zero. ifneq ($(wildcard $(AGENT_DIR)),) -ifneq ($(filter-out ia64 zero aarch64,$(SRCARCH)),) +ifneq ($(filter-out ia64 zero,$(SRCARCH)),) BUILDLIBSAPROC = $(LIBSAPROC) endif endif
--- a/make/linux/makefiles/vm.make Wed Sep 30 16:43:15 2015 +0100 +++ b/make/linux/makefiles/vm.make Fri Oct 02 04:37:30 2015 +0100 @@ -295,7 +295,7 @@ mapfile : $(MAPFILE) vm.def mapfile_ext rm -f $@ awk '{ if ($$0 ~ "INSERT VTABLE SYMBOLS HERE") \ - { system ("cat mapfile_ext"); system ("cat vm.def"); } \ + { system ("cat mapfile_ext"); system ("cat vm.def"); } \ else \ { print $$0 } \ }' > $@ < $(MAPFILE)
--- a/make/sa.files Wed Sep 30 16:43:15 2015 +0100 +++ b/make/sa.files Fri Oct 02 04:37:30 2015 +0100 @@ -43,6 +43,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/compiler/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/x86/*.java \ @@ -52,17 +53,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/x86/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/win32/coff/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/windbg/*.java \ @@ -83,11 +87,13 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/prims/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \
--- a/src/cpu/aarch64/vm/aarch64.ad Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/aarch64.ad Fri Oct 02 04:37:30 2015 +0100 @@ -162,70 +162,165 @@ // the platform ABI treats v8-v15 as callee save). float registers // v16-v31 are SOC as per the platform spec - reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); - reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); - reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); - reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); - reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); - reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); - reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); - reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); - reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); - reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); - reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); - reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); - reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); - reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); - reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); - reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); - reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() ); - reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() ); - reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() ); - reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() ); - reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() ); - reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next()); - reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() ); - reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next()); - reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() ); - reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next()); - reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() ); - reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next()); - reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() ); - reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next()); - reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() ); - reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next()); - reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); - reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next()); - reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); - reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next()); - reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); - reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next()); - reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); - reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next()); - reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); - reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next()); - reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); - reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next()); - reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); - reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next()); - reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); - reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next()); - reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); - reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next()); - reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); - reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next()); - reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); - reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next()); - reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); - reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next()); - reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); - reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next()); - reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); - reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next()); - reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); - reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next()); - reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); - reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next()); + reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); + reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); + reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); + reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); + + reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); + reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); + reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); + reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); + + reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); + reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); + reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); + reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); + + reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); + reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); + reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); + reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); + + reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); + reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); + reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); + reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); + + reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); + reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); + reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); + reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); + + reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); + reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); + reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); + reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); + + reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); + reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); + reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); + reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); + + reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); + reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); + reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); + reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); + + reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); + reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); + reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); + reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); + + reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); + reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); + reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); + reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); + + reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); + reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); + reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); + reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); + + reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); + reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); + reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); + reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); + + reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); + reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); + reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); + reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); + + reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); + reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); + reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); + reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); + + reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); + reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); + reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); + reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); + + reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); + reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); + reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); + reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); + + reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); + reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); + reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); + reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); + + reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); + reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); + reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); + reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); + + reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); + reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); + reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); + reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); + + reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); + reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); + reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); + reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); + + reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); + reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); + reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); + reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); + + reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); + reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); + reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); + reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); + + reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); + reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); + reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); + reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); + + reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); + reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); + reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); + reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); + + reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); + reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); + reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); + reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); + + reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); + reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); + reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); + reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); + + reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); + reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); + reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); + reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); + + reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); + reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); + reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); + reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); + + reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); + reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); + reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); + reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); + + reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); + reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); + reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); + reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); + + reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); + reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); + reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); + reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); // ---------------------------- // Special Registers @@ -292,42 +387,42 @@ alloc_class chunk1( // no save - V16, V16_H, - V17, V17_H, - V18, V18_H, - V19, V19_H, - V20, V20_H, - V21, V21_H, - V22, V22_H, - V23, V23_H, - V24, V24_H, - V25, V25_H, - V26, V26_H, - V27, V27_H, - V28, V28_H, - V29, V29_H, - V30, V30_H, - V31, V31_H, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K, // arg registers - V0, V0_H, - V1, V1_H, - V2, V2_H, - V3, V3_H, - V4, V4_H, - V5, V5_H, - V6, V6_H, - V7, V7_H, + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, // non-volatiles - V8, V8_H, - V9, V9_H, - V10, V10_H, - V11, V11_H, - V12, V12_H, - V13, V13_H, - V14, V14_H, - V15, V15_H, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, ); alloc_class chunk2(RFLAGS); @@ -381,6 +476,9 @@ // Singleton class for R2 int register reg_class int_r2_reg(R2); +// Singleton class for R3 int register +reg_class int_r3_reg(R3); + // Singleton class for R4 int register reg_class int_r4_reg(R4); @@ -447,7 +545,7 @@ R26 /* R27, */ // heapbase /* R28, */ // thread - /* R29, */ // fp + R29, // fp /* R30, */ // lr /* R31 */ // sp ); @@ -481,7 +579,7 @@ R26, R26_H, /* R27, R27_H, */ // heapbase /* R28, R28_H, */ // thread - /* R29, R29_H, */ // fp + R29, R29_H, // fp /* R30, R30_H, */ // lr /* R31, R31_H */ // sp ); @@ -698,6 +796,98 @@ V31, V31_H ); +// Class for all 64bit vector registers +reg_class vectord_reg( + V0, V0_H, + V1, V1_H, + V2, V2_H, + V3, V3_H, + V4, V4_H, + V5, V5_H, + V6, V6_H, + V7, V7_H, + V8, V8_H, + V9, V9_H, + V10, V10_H, + V11, V11_H, + V12, V12_H, + V13, V13_H, + V14, V14_H, + V15, V15_H, + V16, V16_H, + V17, V17_H, + V18, V18_H, + V19, V19_H, + V20, V20_H, + V21, V21_H, + V22, V22_H, + V23, V23_H, + V24, V24_H, + V25, V25_H, + V26, V26_H, + V27, V27_H, + V28, V28_H, + V29, V29_H, + V30, V30_H, + V31, V31_H +); + +// Class for all 128bit vector registers +reg_class vectorx_reg( + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K +); + +// Class for 128 bit register v0 +reg_class v0_reg( + V0, V0_H +); + +// Class for 128 bit register v1 +reg_class v1_reg( + V1, V1_H +); + +// Class for 128 bit register v2 +reg_class v2_reg( + V2, V2_H +); + +// Class for 128 bit register v3 +reg_class v3_reg( + V3, V3_H +); + // Singleton class for condition codes reg_class int_flags(RFLAGS); @@ -772,62 +962,10 @@ } }; - bool followed_by_ordered_store(const Node *barrier); - bool preceded_by_ordered_load(const Node *barrier); - %} source %{ - // AArch64 has load acquire and store release instructions which we - // use for ordered memory accesses, e.g. for volatiles. The ideal - // graph generator also inserts memory barriers around volatile - // accesses, and we don't want to generate both barriers and acq/rel - // instructions. So, when we emit a MemBarAcquire we look back in - // the ideal graph for an ordered load and only emit the barrier if - // we don't find one. - -bool preceded_by_ordered_load(const Node *barrier) { - Node *x = barrier->lookup(TypeFunc::Parms); - - if (! x) - return false; - - if (x->is_DecodeNarrowPtr()) - x = x->in(1); - - if (x->is_Load()) - return ! x->as_Load()->is_unordered(); - - return false; -} - -bool followed_by_ordered_store(const Node *barrier) { - - // Find following mem node. - // - Node *mem_proj = NULL; - for (DUIterator_Fast imax, i = barrier->fast_outs(imax); i < imax; i++) { - mem_proj = barrier->fast_out(i); // Throw out-of-bounds if proj not found - assert(mem_proj->is_Proj(), "only projections here"); - ProjNode *proj = mem_proj->as_Proj(); - if (proj->_con == TypeFunc::Memory && - !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only - break; - } - assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken"); - - // Search behind Proj. - for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) { - Node *x = mem_proj->fast_out(j); - if (x->is_Store() && ! x->as_Store()->is_unordered()) { - return true; - } - } - - return false; -} - #define __ _masm. // advance declaratuons for helper functions to convert register @@ -1107,7 +1245,7 @@ } // we have 32 float register * 2 halves - if (reg < 60 + 64) { + if (reg < 60 + 128) { return rc_float; } @@ -1143,258 +1281,128 @@ return 0; // Self copy, no move. } - switch (src_lo_rc) { - case rc_int: - if (dst_lo_rc == rc_int) { // gpr --> gpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ mov(as_Register(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("mov %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ movw(as_Register(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("movw %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); + bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); + if (cbuf) { + MacroAssembler _masm(cbuf); + assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); + if (ireg == Op_VecD) { + __ unspill(rscratch1, true, src_offset); + __ spill(rscratch1, true, dst_offset); + } else { + __ spill_copy128(src_offset, dst_offset); } - } - } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ T8B : __ T16B, + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } - } else { // gpr --> stack spill - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ str(as_Register(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("str %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strw(as_Register(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strw %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } + ShouldNotReachHere(); } } - return 4; - case rc_float: - if (dst_lo_rc == rc_int) { // fpr --> gpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); + } else if (cbuf) { + MacroAssembler _masm(cbuf); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy + if (is64) { + __ mov(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + MacroAssembler _masm(cbuf); + __ movw(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); } - } - } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); + } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy + if (is64) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); } - } - } else { // fpr --> stack spill - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strd %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strs %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } + } else { // gpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); } - } - return 4; - case rc_stack: - int src_offset = ra_->reg2offset(src_lo); - if (dst_lo_rc == rc_int) { // stack --> gpr load - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldr(as_Register(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldr %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); + break; + case rc_float: + if (dst_lo_rc == rc_int) { // fpr --> gpr copy + if (is64) { + __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrw(as_Register(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldr %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } - } - return 4; - } else if (dst_lo_rc == rc_float) { // stack --> fpr load - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldrd %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); + } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy + if (cbuf) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldrs %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } + } else { // fpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + is64 ? __ D : __ S, dst_offset); } - return 4; - } else { // stack --> stack copy - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldr(rscratch1, Address(sp, src_offset)); - __ str(rscratch1, Address(sp, dst_offset)); - } else if (st) { - st->print("ldr rscratch1, [sp, %d]\t# mem-mem spill", - src_offset); - st->print("\n\t"); - st->print("str rscratch1, [sp, %d]", - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrw(rscratch1, Address(sp, src_offset)); - __ strw(rscratch1, Address(sp, dst_offset)); - } else if (st) { - st->print("ldrw rscratch1, [sp, %d]\t# mem-mem spill", - src_offset); - st->print("\n\t"); - st->print("strw rscratch1, [sp, %d]", - dst_offset); - } + break; + case rc_stack: + if (dst_lo_rc == rc_int) { // stack --> gpr load + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } else if (dst_lo_rc == rc_float) { // stack --> fpr load + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + is64 ? __ D : __ S, src_offset); + } else { // stack --> stack copy + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ unspill(rscratch1, is64, src_offset); + __ spill(rscratch1, is64, dst_offset); } - return 8; + break; + default: + assert(false, "bad rc_class for spill"); + ShouldNotReachHere(); } } - assert(false," bad rc_class for spill "); - Unimplemented(); + if (st) { + st->print("spill "); + if (src_lo_rc == rc_stack) { + st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo)); + } else { + st->print("%s -> ", Matcher::regName[src_lo]); + } + if (dst_lo_rc == rc_stack) { + st->print("[sp, #%d]", ra_->reg2offset(dst_lo)); + } else { + st->print("%s", Matcher::regName[dst_lo]); + } + if (bottom_type()->isa_vect() != NULL) { + st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + } else { + st->print("\t# spill size = %d", is64 ? 64:32); + } + } + return 0; } @@ -1413,7 +1421,7 @@ } uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { - return implementation(NULL, ra_, true, NULL); + return MachNode::size(ra_); } //============================================================================= @@ -1567,8 +1575,12 @@ // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - // TODO fixme - return 0; + int size = MIN2(16,(int)MaxVectorSize); + // Minimum 2 values in vector + if (size < 2*type2aelembytes(bt)) size = 0; + // But never < 4 + if (size < 4) size = 0; + return size; } // Limits on vector size (number of elements) loaded into vector. @@ -1576,22 +1588,24 @@ return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { - int max_size = max_vector_size(bt); - // Min size which can be loaded into vector is 4 bytes. - int size = (type2aelembytes(bt) == 1) ? 4 : 2; - return MIN2(size,max_size); +// For the moment limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (size < 2) size = 2; + return size; } // Vector ideal reg. const int Matcher::vector_ideal_reg(int len) { - // TODO fixme - return Op_RegD; + switch(len) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; } -// Only lowest bits of xmm reg are used for vector shift count. const int Matcher::vector_shift_count_ideal_reg(int size) { - // TODO fixme - return Op_RegL; + return Op_VecX; } // AES support not yet implemented @@ -1601,9 +1615,7 @@ // x86 supports misaligned vectors store/load. const bool Matcher::misaligned_vectors_ok() { - // TODO fixme - // return !AlignVector; // can be changed by flag - return false; + return !AlignVector; // can be changed by flag } // false => size gets scaled to BytesPerLong, ok. @@ -1746,7 +1758,7 @@ } const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return RegMask(); + return FP_REG_mask(); } // helper for encoding java_to_runtime calls on sim @@ -1802,6 +1814,8 @@ typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); +typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + MacroAssembler::SIMD_RegVariant T, const Address &adr); // Used for all non-volatile memory accesses. The use of // $mem->opcode() to discover whether this pattern uses sign-extended @@ -1820,6 +1834,8 @@ case INDINDEXSCALEDI2L: case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: + case INDINDEXOFFSETI2L: + case INDINDEXOFFSETI2LN: scale = Address::sxtw(size); break; default: @@ -1867,6 +1883,18 @@ } } + static void loadStore(MacroAssembler masm, mem_vector_insn insn, + FloatRegister reg, MacroAssembler::SIMD_RegVariant T, + int opcode, Register base, int index, int size, int disp) + { + if (index == -1) { + (masm.*insn)(reg, T, Address(base, disp)); + } else { + assert(disp == 0, "unsupported address mode"); + (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size))); + } + } + %} @@ -1998,6 +2026,24 @@ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_strb(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(), @@ -2066,6 +2112,24 @@ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvD(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvQ(vecX src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + // END Non-volatile memory access // this encoding writes the address of the first instruction in the @@ -2135,16 +2199,22 @@ enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrb); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrh); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrw); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} @@ -2235,6 +2305,8 @@ } MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlr); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{ @@ -2245,6 +2317,8 @@ } MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrw); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{ @@ -2255,6 +2329,8 @@ } MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlr); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); %} // synchronized read/update encodings @@ -2413,16 +2489,13 @@ int disp = $mem$$disp; if (index == -1) { __ prfm(Address(base, disp), PLDL1KEEP); - __ nop(); } else { Register index_reg = as_Register(index); if (disp == 0) { - // __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP); - __ nop(); + __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP); } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP); - __ nop(); } } %} @@ -2440,11 +2513,9 @@ Register index_reg = as_Register(index); if (disp == 0) { __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP); - __ nop(); } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP); - __ nop(); } } %} @@ -2457,7 +2528,6 @@ int disp = $mem$$disp; if (index == -1) { __ prfm(Address(base, disp), PSTL1STRM); - __ nop(); } else { Register index_reg = as_Register(index); if (disp == 0) { @@ -2466,7 +2536,6 @@ } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM); - __ nop(); } } %} @@ -2979,7 +3048,8 @@ __ ldxr(tmp, oop); __ cmp(tmp, disp_hdr); __ br(Assembler::NE, cas_failed); - __ stxr(tmp, box, oop); + // use stlxr to ensure update is immediately visible + __ stlxr(tmp, box, oop); __ cbzw(tmp, cont); __ b(retry_load); } @@ -3028,7 +3098,8 @@ __ ldxr(rscratch1, tmp); __ cmp(disp_hdr, rscratch1); __ br(Assembler::NE, fail); - __ stxr(rscratch1, rthread, tmp); + // use stlxr to ensure update is immediately visible + __ stlxr(rscratch1, rthread, tmp); __ cbnzw(rscratch1, retry_load); __ bind(fail); } @@ -3116,7 +3187,8 @@ __ ldxr(tmp, oop); __ cmp(box, tmp); __ br(Assembler::NE, cas_failed); - __ stxr(tmp, disp_hdr, oop); + // use stlxr to ensure update is immediately visible + __ stlxr(tmp, disp_hdr, oop); __ cbzw(tmp, cont); __ b(retry_load); } @@ -3392,6 +3464,16 @@ interface(CONST_INTER); %} +operand immI_le_4() +%{ + predicate(n->get_int() <= 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_31() %{ predicate(n->get_int() == 31); @@ -4116,6 +4198,18 @@ interface(REG_INTER); %} +// Register R3 only +operand iRegI_R3() +%{ + constraint(ALLOC_IN_RC(int_r3_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + + // Register R2 only operand iRegI_R4() %{ @@ -4185,6 +4279,62 @@ interface(REG_INTER); %} +operand vecD() +%{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vecX() +%{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V0() +%{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V1() +%{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V2() +%{ + constraint(ALLOC_IN_RC(v2_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V3() +%{ + constraint(ALLOC_IN_RC(v3_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of signed compare instructions // note that on AArch64 we also use this register as the output for @@ -4311,6 +4461,20 @@ %} %} +operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) %{ constraint(ALLOC_IN_RC(ptr_reg)); @@ -4371,7 +4535,7 @@ %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); - op_cost(INSN_COST); + op_cost(0); format %{ "[$reg, $off]" %} interface(MEMORY_INTER) %{ base($reg); @@ -4441,6 +4605,21 @@ %} %} +operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) %{ predicate(Universe::narrow_oop_shift() == 0); @@ -4692,6 +4871,7 @@ interface(REG_INTER) %} +opclass vmem(indirect, indIndex, indOffI, indOffL); //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify @@ -4703,8 +4883,10 @@ // memory is used to define read/write location for load/store // instruction defs. we can turn a memory op into an Address -opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, - indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); +opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, + indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); + + // iRegIorL2I is used for src inputs in rules for 32 bit int (I) // iRegIorL2I is used for src inputs in rules for 32 bit int (I) @@ -4720,7 +4902,6 @@ // the result of the l2i as an iRegI input. That's a shame since the // movw is actually redundant but its not too costly. - opclass iRegIorL2I(iRegI, iRegL2I); //----------PIPELINE----------------------------------------------------------- @@ -4731,17 +4912,14 @@ attributes %{ // ARM instructions are of fixed length fixed_size_instructions; // Fixed size instructions TODO does - // TODO does this relate to how many instructions can be scheduled - // at once? just guess 8 for now - max_instructions_per_bundle = 8; // Up to 8 instructions per bundle + max_instructions_per_bundle = 2; // A53 = 2, A57 = 4 // ARM instructions come in 32-bit word units instruction_unit_size = 4; // An instruction is 4 bytes long - // TODO identify correct cache line size just guess 64 for now instruction_fetch_unit_size = 64; // The processor fetches one line instruction_fetch_units = 1; // of 64 bytes // List of nop instructions - //nops( MachNop ); + nops( MachNop ); %} // We don't use an actual pipeline model so don't care about resources @@ -4751,21 +4929,386 @@ //----------RESOURCES---------------------------------------------------------- // Resources are the functional units available to the machine -resources( D0, D1, D2, DECODE = D0 | D1 | D2, - MS0, MS1, MS2, MEM = MS0 | MS1 | MS2, - BR, FPU, - ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2); +resources( INS0, INS1, INS01 = INS0 | INS1, + ALU0, ALU1, ALU = ALU0 | ALU1, + MAC, + DIV, + BRANCH, + LDST, + NEON_FP); //----------PIPELINE DESCRIPTION----------------------------------------------- // Pipeline Description specifies the stages in the machine's pipeline -// Generic P2/P3 pipeline -pipe_desc(S0, S1, S2, S3, S4, S5); +pipe_desc(ISS, EX1, EX2, WR); //----------PIPELINE CLASSES--------------------------------------------------- // Pipeline Classes describe the stages in which input and output are // referenced by the hardware pipeline. +//------- Integer ALU operations -------------------------- + +// Integer ALU reg-reg operation +// Operands needed in EX1, result generated in EX2 +// Eg. ADD x0, x1, x2 +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : EX1(read); + INS01 : ISS; // Dual issue as instruction 0 or 1 + ALU : EX2; +%} + +// Integer ALU reg-reg operation with constant shift +// Shifted register must be available in LATE_ISS instead of EX1 +// Eg. ADD x0, x1, x2, LSL #2 +pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg operation with constant shift +// Eg. LSL x0, x1, #shift +pipe_class ialu_reg_shift(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg-reg operation with variable shift +// Both operands must be available in LATE_ISS instead of EX1 +// Result is available in EX1 instead of EX2 +// Eg. LSLV x0, x1, x2 +pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX1(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX1; +%} + +// Integer ALU reg-reg operation with extract +// As for _vshift above, but result generated in EX2 +// Eg. EXTR x0, x1, x2, #N +pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + src2 : ISS(read); + INS1 : ISS; // Can only dual issue as Instruction 1 + ALU : EX1; +%} + +// Integer ALU reg operation +// Eg. NEG x0, x1 +pipe_class ialu_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : EX2(write); + src : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg mmediate operation +// Eg. ADD x0, x1, #N +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU immediate operation (no source operands) +// Eg. MOV x0, #N +pipe_class ialu_imm(iRegI dst) +%{ + single_instruction; + dst : EX1(write); + INS01 : ISS; + ALU : EX1; +%} + +//------- Compare operation ------------------------------- + +// Compare reg-reg +// Eg. CMP x0, x1 +pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + op2 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Compare reg-reg +// Eg. CMP x0, #N +pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +//------- Conditional instructions ------------------------ + +// Conditional no operands +// Eg. CSINC x0, zr, zr, <cond> +pipe_class icond_none(iRegI dst, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, <cond> +pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src1 : EX1(read); + src2 : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, <cond> +pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +//------- Multiply pipeline operations -------------------- + +// Multiply reg-reg +// Eg. MUL w0, w1, w2 +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Eg. MUL w0, w1, w2 +pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +//------- Divide pipeline operations -------------------- + +// Eg. SDIV w0, w1, w2 +pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(8); // Maximum latency for 32 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +// Eg. SDIV x0, x1, x2 +pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(16); // Maximum latency for 64 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +//------- Load pipeline operations ------------------------ + +// Load - prefetch +// Eg. PFRM <mem> +pipe_class iload_prefetch(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, mem +// Eg. LDR x0, <mem> +pipe_class iload_reg_mem(iRegI dst, memory mem) +%{ + single_instruction; + dst : WR(write); + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, reg +// Eg. LDR x0, [sp, x1] +pipe_class iload_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : WR(write); + src : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Store - zr, mem +// Eg. STR zr, <mem> +pipe_class istore_mem(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, mem +// Eg. STR x0, <mem> +pipe_class istore_reg_mem(iRegI src, memory mem) +%{ + single_instruction; + mem : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, reg +// Eg. STR x0, [sp, x1] +pipe_class istore_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Branch +pipe_class pipe_branch() +%{ + single_instruction; + INS01 : ISS; + BRANCH : EX1; +%} + +// Conditional branch +pipe_class pipe_branch_cond(rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +// Compare & Branch +// EG. CBZ/CBNZ +pipe_class pipe_cmp_branch(iRegI op1) +%{ + single_instruction; + op1 : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +//------- Synchronisation operations ---------------------- + +// Any operation requiring serialization. +// EG. DMB/Atomic Ops/Load Acquire/Str Release +pipe_class pipe_serial() +%{ + single_instruction; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + +// Generic big/slow expanded idiom - also serialized +pipe_class pipe_slow() +%{ + instruction_count(10); + multiple_bundles; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + // Empty pipeline class pipe_class pipe_class_empty() %{ @@ -4787,13 +5330,6 @@ fixed_latency(16); %} -// Pipeline class for traps. -pipe_class pipe_class_trap() -%{ - single_instruction; - fixed_latency(100); -%} - // Pipeline class for memory operations. pipe_class pipe_class_memory() %{ @@ -4810,7 +5346,7 @@ // Define the class for the Nop node. define %{ - MachNop = pipe_class_default; + MachNop = pipe_class_empty; %} %} @@ -4844,168 +5380,168 @@ instruct loadB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadB mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsbw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit signed) into long instruct loadB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadB mem))); - predicate(n->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit unsigned) instruct loadUB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUB mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit unsigned) into long instruct loadUB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUB mem))); - predicate(n->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short (16 bit signed) instruct loadS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadS mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrshw $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrshw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short (16 bit signed) into long instruct loadS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadS mem))); - predicate(n->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrsh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Char (16 bit unsigned) instruct loadUS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUS mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short/Char (16 bit unsigned) into long instruct loadUS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUS mem))); - predicate(n->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit signed) instruct loadI(iRegINoSp dst, memory mem) %{ match(Set dst (LoadI mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit signed) into long instruct loadI2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadI mem))); - predicate(n->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrsw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit unsigned) into long instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); - predicate(n->in(1)->in(1)->as_Load()->is_unordered()); + // predicate(n->in(1)->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Long (64 bit signed) instruct loadL(iRegLNoSp dst, memory mem) %{ match(Set dst (LoadL mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Range @@ -5018,70 +5554,70 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Pointer instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Compressed Pointer instruct loadN(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadN mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Klass Pointer instruct loadKlass(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadKlass mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# class" %} ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Narrow Klass Pointer instruct loadNKlass(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadNKlass mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed class ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Float instruct loadF(vRegF dst, memory mem) %{ match(Set dst (LoadF mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrs $dst, $mem\t# float" %} @@ -5095,7 +5631,7 @@ instruct loadD(vRegD dst, memory mem) %{ match(Set dst (LoadD mem)); - predicate(n->as_Load()->is_unordered()); + // predicate(n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrd $dst, $mem\t# double" %} @@ -5116,7 +5652,7 @@ ins_encode( aarch64_enc_movw_imm(dst, src) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Long Constant @@ -5129,7 +5665,7 @@ ins_encode( aarch64_enc_mov_imm(dst, src) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Pointer Constant @@ -5145,7 +5681,7 @@ ins_encode(aarch64_enc_mov_p(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Null Pointer Constant @@ -5159,7 +5695,7 @@ ins_encode(aarch64_enc_mov_p0(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Pointer Constant One @@ -5173,7 +5709,7 @@ ins_encode(aarch64_enc_mov_p1(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Poll Page Constant @@ -5187,7 +5723,7 @@ ins_encode(aarch64_enc_mov_poll_page(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Byte Map Base Constant @@ -5201,7 +5737,7 @@ ins_encode(aarch64_enc_mov_byte_map_base(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Narrow Pointer Constant @@ -5215,7 +5751,7 @@ ins_encode(aarch64_enc_mov_n(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Narrow Null Pointer Constant @@ -5229,7 +5765,7 @@ ins_encode(aarch64_enc_mov_n0(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Narrow Klass Constant @@ -5243,7 +5779,7 @@ ins_encode(aarch64_enc_mov_nk(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Packed Float Constant @@ -5319,62 +5855,62 @@ ins_encode(aarch64_enc_strb0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Byte -instruct storeB(iRegI src, memory mem) +instruct storeB(iRegIorL2I src, memory mem) %{ match(Set mem (StoreB mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strb $src, $mem\t# byte" %} ins_encode(aarch64_enc_strb(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeimmB0(immI0 zero, memory mem) %{ match(Set mem (StoreB mem zero)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strb zr, $mem\t# byte" %} ins_encode(aarch64_enc_strb0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Char/Short -instruct storeC(iRegI src, memory mem) +instruct storeC(iRegIorL2I src, memory mem) %{ match(Set mem (StoreC mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strh $src, $mem\t# short" %} ins_encode(aarch64_enc_strh(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeimmC0(immI0 zero, memory mem) %{ match(Set mem (StoreC mem zero)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strh zr, $mem\t# short" %} ins_encode(aarch64_enc_strh0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Integer @@ -5382,83 +5918,83 @@ instruct storeI(iRegIorL2I src, memory mem) %{ match(Set mem(StoreI mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# int" %} ins_encode(aarch64_enc_strw(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeimmI0(immI0 zero, memory mem) %{ match(Set mem(StoreI mem zero)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# int" %} ins_encode(aarch64_enc_strw0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Long (64 bit signed) instruct storeL(iRegL src, memory mem) %{ match(Set mem (StoreL mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str $src, $mem\t# int" %} ins_encode(aarch64_enc_str(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // Store Long (64 bit signed) instruct storeimmL0(immL0 zero, memory mem) %{ match(Set mem (StoreL mem zero)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str zr, $mem\t# int" %} ins_encode(aarch64_enc_str0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Pointer instruct storeP(iRegP src, memory mem) %{ match(Set mem (StoreP mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str $src, $mem\t# ptr" %} ins_encode(aarch64_enc_str(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // Store Pointer instruct storeimmP0(immP0 zero, memory mem) %{ match(Set mem (StoreP mem zero)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str zr, $mem\t# ptr" %} ins_encode(aarch64_enc_str0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Save last Java PC to thread anchor @@ -5488,7 +6024,7 @@ ins_encode(aarch64_enc_save_pc()); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} instruct storeLastJavaPC_with_retaddr(thread_anchor_pc mem, immP_M2 dummy_m2) @@ -5503,43 +6039,44 @@ ins_encode(aarch64_enc_save_pc()); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Compressed Pointer instruct storeN(iRegN src, memory mem) %{ match(Set mem (StoreN mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_strw(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) %{ match(Set mem (StoreN mem zero)); predicate(Universe::narrow_oop_base() == NULL && - Universe::narrow_klass_base() == NULL && - n->as_Store()->is_unordered()); + Universe::narrow_klass_base() == NULL// && + // n->as_Store()->is_unordered() + ); ins_cost(INSN_COST); format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} ins_encode(aarch64_enc_strw(heapbase, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // Store Float instruct storeF(vRegF src, memory mem) %{ match(Set mem (StoreF mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strs $src, $mem\t# float" %} @@ -5556,7 +6093,7 @@ instruct storeD(vRegD src, memory mem) %{ match(Set mem (StoreD mem src)); - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strd $src, $mem\t# double" %} @@ -5569,7 +6106,7 @@ // Store Compressed Klass Pointer instruct storeNKlass(iRegN src, memory mem) %{ - predicate(n->as_Store()->is_unordered()); +// predicate(n->as_Store()->is_unordered()); match(Set mem (StoreNKlass mem src)); ins_cost(INSN_COST); @@ -5577,7 +6114,7 @@ ins_encode(aarch64_enc_strw(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // TODO @@ -5594,7 +6131,7 @@ ins_encode( aarch64_enc_prefetchr(mem) ); - ins_pipe(pipe_class_memory); + ins_pipe(iload_prefetch); %} instruct prefetchw( memory mem ) %{ @@ -5605,7 +6142,7 @@ ins_encode( aarch64_enc_prefetchw(mem) ); - ins_pipe(pipe_class_memory); + ins_pipe(iload_prefetch); %} instruct prefetchnta( memory mem ) %{ @@ -5616,376 +6153,70 @@ ins_encode( aarch64_enc_prefetchnta(mem) ); - ins_pipe(pipe_class_memory); -%} - -// ---------------- volatile loads and stores ---------------- - -// Load Byte (8 bit signed) -instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadB mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarsb $dst, $mem\t# byte" %} - - ins_encode(aarch64_enc_ldarsb(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Byte (8 bit signed) into long -instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (ConvI2L (LoadB mem))); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarsb $dst, $mem\t# byte" %} - - ins_encode(aarch64_enc_ldarsb(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Byte (8 bit unsigned) -instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadUB mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarb $dst, $mem\t# byte" %} - - ins_encode(aarch64_enc_ldarb(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Byte (8 bit unsigned) into long -instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (ConvI2L (LoadUB mem))); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarb $dst, $mem\t# byte" %} - - ins_encode(aarch64_enc_ldarb(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Short (16 bit signed) -instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadS mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarshw $dst, $mem\t# short" %} - - ins_encode(aarch64_enc_ldarshw(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadUS mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarhw $dst, $mem\t# short" %} - - ins_encode(aarch64_enc_ldarhw(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Short/Char (16 bit unsigned) into long -instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (ConvI2L (LoadUS mem))); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarh $dst, $mem\t# short" %} - - ins_encode(aarch64_enc_ldarh(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Short/Char (16 bit signed) into long -instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (ConvI2L (LoadS mem))); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarh $dst, $mem\t# short" %} - - ins_encode(aarch64_enc_ldarsh(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Integer (32 bit signed) -instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadI mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarw $dst, $mem\t# int" %} - - ins_encode(aarch64_enc_ldarw(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Integer (32 bit unsigned) into long -instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask) -%{ - match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarw $dst, $mem\t# int" %} - - ins_encode(aarch64_enc_ldarw(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Long (64 bit signed) -instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadL mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldar $dst, $mem\t# int" %} - - ins_encode(aarch64_enc_ldar(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Pointer -instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadP mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldar $dst, $mem\t# ptr" %} - - ins_encode(aarch64_enc_ldar(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Compressed Pointer -instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadN mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldarw $dst, $mem\t# compressed ptr" %} - - ins_encode(aarch64_enc_ldarw(dst, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Load Float -instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadF mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldars $dst, $mem\t# float" %} - - ins_encode( aarch64_enc_fldars(dst, mem) ); - - ins_pipe(pipe_class_memory); -%} - -// Load Double -instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem) -%{ - match(Set dst (LoadD mem)); - - ins_cost(VOLATILE_REF_COST); - format %{ "ldard $dst, $mem\t# double" %} - - ins_encode( aarch64_enc_fldard(dst, mem) ); - - ins_pipe(pipe_class_memory); -%} - -// Store Byte -instruct storeB_volatile(iRegI src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreB mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrb $src, $mem\t# byte" %} - - ins_encode(aarch64_enc_stlrb(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Char/Short -instruct storeC_volatile(iRegI src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreC mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrh $src, $mem\t# short" %} - - ins_encode(aarch64_enc_stlrh(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Integer - -instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) -%{ - match(Set mem(StoreI mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrw $src, $mem\t# int" %} - - ins_encode(aarch64_enc_stlrw(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Long (64 bit signed) -instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreL mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlr $src, $mem\t# int" %} - - ins_encode(aarch64_enc_stlr(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Pointer -instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreP mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlr $src, $mem\t# ptr" %} - - ins_encode(aarch64_enc_stlr(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Compressed Pointer -instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreN mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrw $src, $mem\t# compressed ptr" %} - - ins_encode(aarch64_enc_stlrw(src, mem)); - - ins_pipe(pipe_class_memory); -%} - -// Store Float -instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreF mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrs $src, $mem\t# float" %} - - ins_encode( aarch64_enc_fstlrs(src, mem) ); - - ins_pipe(pipe_class_memory); -%} - -// TODO -// implement storeImmF0 and storeFImmPacked - -// Store Double -instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem) -%{ - match(Set mem (StoreD mem src)); - - ins_cost(VOLATILE_REF_COST); - format %{ "stlrd $src, $mem\t# double" %} - - ins_encode( aarch64_enc_fstlrd(src, mem) ); - - ins_pipe(pipe_class_memory); -%} - -// ---------------- end of volatile loads and stores ---------------- + ins_pipe(iload_prefetch); +%} // ============================================================================ // BSWAP Instructions -instruct bytes_reverse_int(iRegINoSp dst) %{ - match(Set dst (ReverseBytesI dst)); - - ins_cost(INSN_COST); - format %{ "revw $dst, $dst" %} - - ins_encode %{ - __ revw(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_long(iRegLNoSp dst) %{ - match(Set dst (ReverseBytesL dst)); - - ins_cost(INSN_COST); - format %{ "rev $dst, $dst" %} - - ins_encode %{ - __ rev(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_unsigned_short(iRegINoSp dst) %{ - match(Set dst (ReverseBytesUS dst)); - - ins_cost(INSN_COST); - format %{ "rev16w $dst, $dst" %} - - ins_encode %{ - __ rev16w(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_short(iRegINoSp dst) %{ - match(Set dst (ReverseBytesS dst)); - - ins_cost(INSN_COST); - format %{ "rev16w $dst, $dst\n\t" +instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesI src)); + + ins_cost(INSN_COST); + format %{ "revw $dst, $src" %} + + ins_encode %{ + __ revw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); + + ins_cost(INSN_COST); + format %{ "rev $dst, $src" %} + + ins_encode %{ + __ rev(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesUS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src" %} + + ins_encode %{ + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src\n\t" "sbfmw $dst, $dst, #0, #15" %} ins_encode %{ - __ rev16w(as_Register($dst$$reg), as_Register($dst$$reg)); + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); %} - ins_pipe( pipe_class_default ); + ins_pipe(ialu_reg); %} // ============================================================================ // Zero Count Instructions -instruct countLeadingZerosI(iRegI dst, iRegI src) %{ +instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{ match(Set dst (CountLeadingZerosI src)); ins_cost(INSN_COST); @@ -5994,10 +6225,10 @@ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe( pipe_class_default ); -%} - -instruct countLeadingZerosL(iRegI dst, iRegL src) %{ + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{ match(Set dst (CountLeadingZerosL src)); ins_cost(INSN_COST); @@ -6006,10 +6237,10 @@ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe( pipe_class_default ); -%} - -instruct countTrailingZerosI(iRegI dst, iRegI src) %{ + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{ match(Set dst (CountTrailingZerosI src)); ins_cost(INSN_COST * 2); @@ -6020,10 +6251,10 @@ __ clzw(as_Register($dst$$reg), as_Register($dst$$reg)); %} - ins_pipe( pipe_class_default ); -%} - -instruct countTrailingZerosL(iRegI dst, iRegL src) %{ + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{ match(Set dst (CountTrailingZerosL src)); ins_cost(INSN_COST * 2); @@ -6034,7 +6265,97 @@ __ clz(as_Register($dst$$reg), as_Register($dst$$reg)); %} - ins_pipe( pipe_class_default ); + ins_pipe(ialu_reg); +%} + +//---------- Population Count Instructions ------------------------------------- +// + +instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "movw $src, $src\n\t" + "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrs $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrd $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); %} // ============================================================================ @@ -6049,21 +6370,7 @@ ins_encode %{ __ membar(Assembler::LoadLoad|Assembler::LoadStore); %} - ins_pipe(pipe_class_memory); -%} - -instruct unnecessary_membar_acquire() %{ - predicate(preceded_by_ordered_load(n)); - match(MemBarAcquire); - ins_cost(0); - - format %{ "membar_acquire (elided)" %} - - ins_encode %{ - __ block_comment("membar_acquire (elided)"); - %} - - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_acquire() %{ @@ -6076,7 +6383,7 @@ __ membar(Assembler::LoadLoad|Assembler::LoadStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} @@ -6090,7 +6397,7 @@ __ membar(Assembler::LoadLoad|Assembler::LoadStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct store_fence() %{ @@ -6102,20 +6409,7 @@ ins_encode %{ __ membar(Assembler::LoadStore|Assembler::StoreStore); %} - ins_pipe(pipe_class_memory); -%} - -instruct unnecessary_membar_release() %{ - match(MemBarRelease); - predicate(followed_by_ordered_store(n)); - ins_cost(0); - - format %{ "membar_release (elided)" %} - - ins_encode %{ - __ block_comment("membar_release (elided)"); - %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_release() %{ @@ -6127,7 +6421,7 @@ ins_encode %{ __ membar(Assembler::LoadStore|Assembler::StoreStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_storestore() %{ @@ -6139,7 +6433,7 @@ ins_encode %{ __ membar(Assembler::StoreStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_release_lock() %{ @@ -6152,7 +6446,7 @@ __ membar(Assembler::LoadStore|Assembler::StoreStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_volatile() %{ @@ -6163,9 +6457,9 @@ ins_encode %{ __ membar(Assembler::StoreLoad); - %} - - ins_pipe(pipe_class_memory); + %} + + ins_pipe(pipe_serial); %} // ============================================================================ @@ -6183,7 +6477,7 @@ } %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct castP2X(iRegLNoSp dst, iRegP src) %{ @@ -6198,7 +6492,7 @@ } %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Convert oop into int for vectors alignment masking @@ -6211,7 +6505,7 @@ __ movw($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Convert compressed oop into int for vectors alignment masking @@ -6227,7 +6521,7 @@ __ movw($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} @@ -6243,7 +6537,7 @@ Register d = $dst$$Register; __ encode_heap_oop(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ @@ -6254,7 +6548,7 @@ ins_encode %{ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ @@ -6268,7 +6562,7 @@ Register d = $dst$$Register; __ decode_heap_oop(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ @@ -6282,7 +6576,7 @@ Register d = $dst$$Register; __ decode_heap_oop_not_null(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // n.b. AArch64 implementations of encode_klass_not_null and @@ -6301,7 +6595,7 @@ __ encode_klass_not_null(dst_reg, src_reg); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ @@ -6320,7 +6614,7 @@ } %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct checkCastPP(iRegPNoSp dst) @@ -6392,7 +6686,7 @@ ins_encode(aarch64_enc_ldaxr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // Conditional-store of the updated heap-top. @@ -6417,7 +6711,7 @@ ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // this has to be implemented as a CAS @@ -6434,7 +6728,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} // this has to be implemented as a CAS @@ -6451,7 +6745,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher @@ -6471,7 +6765,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ @@ -6488,7 +6782,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -6505,7 +6799,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ @@ -6522,7 +6816,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} @@ -6532,7 +6826,7 @@ ins_encode %{ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{ @@ -6541,7 +6835,7 @@ ins_encode %{ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{ @@ -6550,7 +6844,7 @@ ins_encode %{ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{ @@ -6559,7 +6853,7 @@ ins_encode %{ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} @@ -6570,7 +6864,7 @@ ins_encode %{ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{ @@ -6581,7 +6875,7 @@ ins_encode %{ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{ @@ -6591,7 +6885,7 @@ ins_encode %{ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{ @@ -6602,7 +6896,7 @@ ins_encode %{ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ @@ -6612,7 +6906,7 @@ ins_encode %{ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{ @@ -6623,7 +6917,7 @@ ins_encode %{ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{ @@ -6633,7 +6927,7 @@ ins_encode %{ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{ @@ -6644,7 +6938,7 @@ ins_encode %{ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // ============================================================================ @@ -6660,7 +6954,7 @@ // which throws a ShouldNotHappen. So, we have to provide two flavours // of each rule, one for a cmpOp and a second for a cmpOpU (sigh). -instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegI src1, iRegI src2) %{ +instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); ins_cost(INSN_COST * 2); @@ -6673,10 +6967,10 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src1, iRegI src2) %{ + ins_pipe(icond_reg_reg); +%} + +instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); ins_cost(INSN_COST * 2); @@ -6689,7 +6983,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero @@ -6701,68 +6995,68 @@ // we ought only to be able to cull one of these variants as the ideal // transforms ought always to order the zero consistently (to left/right?) -instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp cr) (Binary zero src2))); +instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, $src2, zr $cmp\t# signed, int" %} + format %{ "cselw $dst, $src, zr $cmp\t# signed, int" %} ins_encode %{ __ cselw(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, $src2, zr $cmp\t# unsigned, int" %} + format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int" %} ins_encode %{ __ cselw(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegI src1, immI0 zero) %{ - match(Set dst (CMoveI (Binary cmp cr) (Binary src1 zero))); + ins_pipe(icond_reg); +%} + +instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, zr, $src1 $cmp\t# signed, int" %} + format %{ "cselw $dst, zr, $src $cmp\t# signed, int" %} ins_encode %{ __ cselw(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src1, immI0 zero) %{ - match(Set dst (CMoveI (Binary cmp cr) (Binary src1 zero))); + ins_pipe(icond_reg); +%} + +instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, zr, $src1 $cmp\t# unsigned, int" %} + format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int" %} ins_encode %{ __ cselw(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} // special case for creating a boolean 0 or 1 @@ -6786,7 +7080,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_none); %} instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ @@ -6805,7 +7099,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_none); %} instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ @@ -6821,7 +7115,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ @@ -6837,73 +7131,73 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero -instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, immL0 zero) %{ - match(Set dst (CMoveL (Binary cmp cr) (Binary src1 zero))); +instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, zr, $src1 $cmp\t# signed, long" %} + format %{ "csel $dst, zr, $src $cmp\t# signed, long" %} ins_encode %{ __ csel(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, immL0 zero) %{ - match(Set dst (CMoveL (Binary cmp cr) (Binary src1 zero))); + ins_pipe(icond_reg); +%} + +instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, zr, $src1 $cmp\t# unsigned, long" %} + format %{ "csel $dst, zr, $src $cmp\t# unsigned, long" %} ins_encode %{ __ csel(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, $src2, zr $cmp\t# signed, long" %} + format %{ "csel $dst, $src, zr $cmp\t# signed, long" %} ins_encode %{ __ csel(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, $src2, zr $cmp\t# unsigned, long" %} + format %{ "csel $dst, $src, zr $cmp\t# unsigned, long" %} ins_encode %{ __ csel(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ @@ -6919,7 +7213,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ @@ -6935,73 +7229,73 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero -instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, immP0 zero) %{ - match(Set dst (CMoveP (Binary cmp cr) (Binary src1 zero))); +instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, zr, $src1 $cmp\t# signed, ptr" %} + format %{ "csel $dst, zr, $src $cmp\t# signed, ptr" %} ins_encode %{ __ csel(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, immP0 zero) %{ - match(Set dst (CMoveP (Binary cmp cr) (Binary src1 zero))); + ins_pipe(icond_reg); +%} + +instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, zr, $src1 $cmp\t# unsigned, ptr" %} + format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr" %} ins_encode %{ __ csel(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, $src2, zr $cmp\t# signed, ptr" %} + format %{ "csel $dst, $src, zr $cmp\t# signed, ptr" %} ins_encode %{ __ csel(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "csel $dst, $src2, zr $cmp\t# unsigned, ptr" %} + format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr" %} ins_encode %{ __ csel(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ @@ -7017,7 +7311,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ @@ -7033,73 +7327,73 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero -instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, immN0 zero) %{ - match(Set dst (CMoveN (Binary cmp cr) (Binary src1 zero))); +instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, zr, $src1 $cmp\t# signed, compressed ptr" %} + format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr" %} ins_encode %{ __ cselw(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, immN0 zero) %{ - match(Set dst (CMoveN (Binary cmp cr) (Binary src1 zero))); + ins_pipe(icond_reg); +%} + +instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, zr, $src1 $cmp\t# unsigned, compressed ptr" %} + format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr" %} ins_encode %{ __ cselw(as_Register($dst$$reg), zr, - as_Register($src1$$reg), + as_Register($src$$reg), (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, $src2, zr $cmp\t# signed, compressed ptr" %} + format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr" %} ins_encode %{ __ cselw(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); -%} - -instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp cr) (Binary zero src2))); + ins_pipe(icond_reg); +%} + +instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); ins_cost(INSN_COST * 2); - format %{ "cselw $dst, $src2, zr $cmp\t# unsigned, compressed ptr" %} + format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr" %} ins_encode %{ __ cselw(as_Register($dst$$reg), - as_Register($src2$$reg), + as_Register($src$$reg), zr, (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1, vRegF src2) @@ -7198,10 +7492,10 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); -%} - -instruct addI_reg_imm(iRegINoSp dst, iRegI src1, immIAddSub src2) %{ + ins_pipe(ialu_reg_reg); +%} + +instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{ match(Set dst (AddI src1 src2)); ins_cost(INSN_COST); @@ -7212,7 +7506,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{ @@ -7226,7 +7520,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Pointer Addition @@ -7242,13 +7536,13 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ match(Set dst (AddP src1 (ConvI2L src2))); - ins_cost(INSN_COST); + ins_cost(1.9 * INSN_COST); format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %} ins_encode %{ @@ -7257,7 +7551,7 @@ as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{ @@ -7272,7 +7566,7 @@ Address::lsl($scale$$constant))); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{ @@ -7287,7 +7581,7 @@ Address::sxtw($scale$$constant))); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{ @@ -7302,7 +7596,7 @@ $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Pointer Immediate Addition @@ -7319,7 +7613,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Addition @@ -7336,7 +7630,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // No constant pool entries requiredLong Immediate Addition. @@ -7351,7 +7645,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Integer Subtraction @@ -7367,7 +7661,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // Immediate Subtraction @@ -7382,7 +7676,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Subtraction @@ -7399,7 +7693,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // No constant pool entries requiredLong Immediate Subtraction. @@ -7414,7 +7708,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Integer Negation (special case for sub) @@ -7426,11 +7720,11 @@ format %{ "negw $dst, $src\t# int" %} ins_encode %{ - __ negsw(as_Register($dst$$reg), - as_Register($src$$reg)); - %} - - ins_pipe(pipe_class_default); + __ negw(as_Register($dst$$reg), + as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); %} // Long Negation @@ -7446,7 +7740,7 @@ as_Register($src$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Integer Multiply @@ -7463,7 +7757,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imul_reg_reg); %} instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ @@ -7478,7 +7772,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imul_reg_reg); %} // Long Multiply @@ -7495,7 +7789,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmul_reg_reg); %} instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) @@ -7511,7 +7805,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmul_reg_reg); %} // Combined Integer Multiply & Add/Sub @@ -7529,7 +7823,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imac_reg_reg); %} instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ @@ -7545,7 +7839,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imac_reg_reg); %} // Combined Long Multiply & Add/Sub @@ -7563,7 +7857,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmac_reg_reg); %} instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ @@ -7579,7 +7873,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmac_reg_reg); %} // Integer Divide @@ -7591,20 +7885,20 @@ format %{ "sdivw $dst, $src1, $src2" %} ins_encode(aarch64_enc_divw(dst, src1, src2)); - ins_pipe(pipe_class_default); -%} - -instruct signExtract(iRegINoSp dst, iRegI src, immI_31 div1, immI_31 div2) %{ - match(Set dst (URShiftI (RShiftI src div1) div2)); - ins_cost(INSN_COST); - format %{ "lsrw $dst, $src, $div1" %} - ins_encode %{ - __ lsrw(as_Register($dst$$reg), as_Register($src$$reg), 31); - %} - ins_pipe(pipe_class_default); -%} - -instruct div2Round(iRegINoSp dst, iRegI src, immI_31 div1, immI_31 div2) %{ + ins_pipe(idiv_reg_reg); +%} + +instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ + match(Set dst (URShiftI (RShiftI src1 div1) div2)); + ins_cost(INSN_COST); + format %{ "lsrw $dst, $src1, $div1" %} + ins_encode %{ + __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31); + %} + ins_pipe(ialu_reg_shift); +%} + +instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{ match(Set dst (AddI src (URShiftI (RShiftI src div1) div2))); ins_cost(INSN_COST); format %{ "addw $dst, $src, LSR $div1" %} @@ -7615,7 +7909,7 @@ as_Register($src$$reg), Assembler::LSR, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Long Divide @@ -7627,17 +7921,17 @@ format %{ "sdiv $dst, $src1, $src2" %} ins_encode(aarch64_enc_div(dst, src1, src2)); - ins_pipe(pipe_class_default); -%} - -instruct signExtractL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ - match(Set dst (URShiftL (RShiftL src div1) div2)); - ins_cost(INSN_COST); - format %{ "lsr $dst, $src, $div1" %} - ins_encode %{ - __ lsr(as_Register($dst$$reg), as_Register($src$$reg), 63); - %} - ins_pipe(pipe_class_default); + ins_pipe(ldiv_reg_reg); +%} + +instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{ + match(Set dst (URShiftL (RShiftL src1 div1) div2)); + ins_cost(INSN_COST); + format %{ "lsr $dst, $src1, $div1" %} + ins_encode %{ + __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} + ins_pipe(ialu_reg_shift); %} instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ @@ -7651,7 +7945,7 @@ as_Register($src$$reg), Assembler::LSR, 63); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Integer Remainder @@ -7664,7 +7958,7 @@ "msubw($dst, rscratch1, $src2, $src1" %} ins_encode(aarch64_enc_modw(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(idiv_reg_reg); %} // Long Remainder @@ -7677,7 +7971,7 @@ "msub($dst, rscratch1, $src2, $src1" %} ins_encode(aarch64_enc_mod(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ldiv_reg_reg); %} // Integer Shifts @@ -7695,7 +7989,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Left Immediate @@ -7711,7 +8005,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Logical Register @@ -7727,7 +8021,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Logical Immediate @@ -7743,7 +8037,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Arithmetic Register @@ -7759,7 +8053,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Arithmetic Immediate @@ -7775,7 +8069,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Combined Int Mask and Right Shift (using UBFM) @@ -7796,7 +8090,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Left Immediate @@ -7812,7 +8106,7 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Logical Register @@ -7828,7 +8122,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Logical Immediate @@ -7844,7 +8138,23 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); +%} + +// A special-case pattern for card table stores. +instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + + ins_cost(INSN_COST); + format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %} + + ins_encode %{ + __ lsr(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); %} // Shift Right Arithmetic Register @@ -7860,7 +8170,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Arithmetic Immediate @@ -7876,7 +8186,7 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // BEGIN This section of the file is automatically generated. Do not edit -------------- @@ -7895,10 +8205,10 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct regI_not_reg(iRegINoSp dst, - iRegI src1, immI_M1 m1, + iRegIorL2I src1, immI_M1 m1, rFlagsReg cr) %{ match(Set dst (XorI src1 m1)); ins_cost(INSN_COST); @@ -7911,24 +8221,24 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct AndI_reg_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, immI_M1 m1, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, rFlagsReg cr) %{ match(Set dst (AndI src1 (XorI src2 m1))); ins_cost(INSN_COST); - format %{ "bic $dst, $src1, $src2" %} - - ins_encode %{ - __ bic(as_Register($dst$$reg), + format %{ "bicw $dst, $src1, $src2" %} + + ins_encode %{ + __ bicw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AndL_reg_not_reg(iRegLNoSp dst, @@ -7945,24 +8255,24 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct OrI_reg_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, immI_M1 m1, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, rFlagsReg cr) %{ match(Set dst (OrI src1 (XorI src2 m1))); ins_cost(INSN_COST); - format %{ "orn $dst, $src1, $src2" %} - - ins_encode %{ - __ orn(as_Register($dst$$reg), + format %{ "ornw $dst, $src1, $src2" %} + + ins_encode %{ + __ ornw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct OrL_reg_not_reg(iRegLNoSp dst, @@ -7979,24 +8289,24 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct XorI_reg_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, immI_M1 m1, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, rFlagsReg cr) %{ match(Set dst (XorI m1 (XorI src2 src1))); ins_cost(INSN_COST); - format %{ "eon $dst, $src1, $src2" %} - - ins_encode %{ - __ eon(as_Register($dst$$reg), + format %{ "eonw $dst, $src1, $src2" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct XorL_reg_not_reg(iRegLNoSp dst, @@ -8013,11 +8323,11 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AndI_reg_URShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8031,7 +8341,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_URShift_not_reg(iRegLNoSp dst, @@ -8049,11 +8359,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_RShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8067,7 +8377,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_RShift_not_reg(iRegLNoSp dst, @@ -8085,11 +8395,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_LShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8103,7 +8413,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_LShift_not_reg(iRegLNoSp dst, @@ -8121,11 +8431,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_URShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1))); ins_cost(1.9 * INSN_COST); @@ -8139,7 +8449,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_URShift_not_reg(iRegLNoSp dst, @@ -8157,11 +8467,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_RShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1))); ins_cost(1.9 * INSN_COST); @@ -8175,7 +8485,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_RShift_not_reg(iRegLNoSp dst, @@ -8193,11 +8503,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_LShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1))); ins_cost(1.9 * INSN_COST); @@ -8211,7 +8521,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_LShift_not_reg(iRegLNoSp dst, @@ -8229,11 +8539,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_URShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8247,7 +8557,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_URShift_not_reg(iRegLNoSp dst, @@ -8265,11 +8575,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_RShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8283,7 +8593,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_RShift_not_reg(iRegLNoSp dst, @@ -8301,11 +8611,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_LShift_not_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, immI_M1 src4, rFlagsReg cr) %{ match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4))); ins_cost(1.9 * INSN_COST); @@ -8319,7 +8629,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_LShift_not_reg(iRegLNoSp dst, @@ -8337,11 +8647,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_URShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AndI src1 (URShiftI src2 src3))); @@ -8356,7 +8666,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_URShift_reg(iRegLNoSp dst, @@ -8375,11 +8685,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_RShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AndI src1 (RShiftI src2 src3))); @@ -8394,7 +8704,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_RShift_reg(iRegLNoSp dst, @@ -8413,11 +8723,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_LShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AndI src1 (LShiftI src2 src3))); @@ -8432,7 +8742,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_LShift_reg(iRegLNoSp dst, @@ -8451,11 +8761,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_URShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (XorI src1 (URShiftI src2 src3))); @@ -8470,7 +8780,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_URShift_reg(iRegLNoSp dst, @@ -8489,11 +8799,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_RShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (XorI src1 (RShiftI src2 src3))); @@ -8508,7 +8818,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_RShift_reg(iRegLNoSp dst, @@ -8527,11 +8837,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_LShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (XorI src1 (LShiftI src2 src3))); @@ -8546,7 +8856,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_LShift_reg(iRegLNoSp dst, @@ -8565,11 +8875,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_URShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (OrI src1 (URShiftI src2 src3))); @@ -8584,7 +8894,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_URShift_reg(iRegLNoSp dst, @@ -8603,11 +8913,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_RShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (OrI src1 (RShiftI src2 src3))); @@ -8622,7 +8932,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_RShift_reg(iRegLNoSp dst, @@ -8641,11 +8951,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_LShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (OrI src1 (LShiftI src2 src3))); @@ -8660,7 +8970,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_LShift_reg(iRegLNoSp dst, @@ -8679,11 +8989,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_URShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AddI src1 (URShiftI src2 src3))); @@ -8698,7 +9008,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_URShift_reg(iRegLNoSp dst, @@ -8717,11 +9027,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_RShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AddI src1 (RShiftI src2 src3))); @@ -8736,7 +9046,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_RShift_reg(iRegLNoSp dst, @@ -8755,11 +9065,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_LShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (AddI src1 (LShiftI src2 src3))); @@ -8774,7 +9084,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_LShift_reg(iRegLNoSp dst, @@ -8793,11 +9103,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_URShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (SubI src1 (URShiftI src2 src3))); @@ -8812,7 +9122,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_URShift_reg(iRegLNoSp dst, @@ -8831,11 +9141,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_RShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (SubI src1 (RShiftI src2 src3))); @@ -8850,7 +9160,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_RShift_reg(iRegLNoSp dst, @@ -8869,11 +9179,11 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_LShift_reg(iRegINoSp dst, - iRegI src1, iRegI src2, + iRegIorL2I src1, iRegIorL2I src2, immI src3, rFlagsReg cr) %{ match(Set dst (SubI src1 (LShiftI src2 src3))); @@ -8888,7 +9198,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_LShift_reg(iRegLNoSp dst, @@ -8907,7 +9217,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} @@ -8932,12 +9242,12 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. // This idiom is used by the compiler for the i2b bytecode etc. -instruct sbfmwI(iRegINoSp dst, iRegI src, immI lshift_count, immI rshift_count) +instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) %{ match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count)); // Make sure we are not going to exceed what sbfmw can do. @@ -8955,7 +9265,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. @@ -8978,12 +9288,12 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. // This idiom is used by the compiler for the i2b bytecode etc. -instruct ubfmwI(iRegINoSp dst, iRegI src, immI lshift_count, immI rshift_count) +instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) %{ match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count)); // Make sure we are not going to exceed what ubfmw can do. @@ -9001,11 +9311,11 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Bitfield extract with shift & mask -instruct ubfxwI(iRegINoSp dst, iRegI src, immI rshift, immI_bitmask mask) +instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) %{ match(Set dst (AndI (URShiftI src rshift) mask)); @@ -9018,7 +9328,7 @@ __ ubfxw(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask) %{ @@ -9033,7 +9343,7 @@ __ ubfx(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // We can use ubfx when extending an And with a mask when we know mask @@ -9051,7 +9361,7 @@ __ ubfx(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Rotations @@ -9068,10 +9378,10 @@ __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 63); %} - ins_pipe(pipe_class_default); -%} - -instruct extrOrI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr) + ins_pipe(ialu_reg_reg_extr); +%} + +instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) %{ match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift))); predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); @@ -9083,7 +9393,7 @@ __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) @@ -9098,10 +9408,10 @@ __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 63); %} - ins_pipe(pipe_class_default); -%} - -instruct extrAddI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr) + ins_pipe(ialu_reg_reg_extr); +%} + +instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) %{ match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift))); predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); @@ -9113,13 +9423,13 @@ __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} // rol expander -instruct rolL_rReg(iRegL dst, iRegL src, iRegI shift, rFlagsReg cr) +instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -9130,12 +9440,12 @@ __ rorv(as_Register($dst$$reg), as_Register($src$$reg), rscratch1); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // rol expander -instruct rolI_rReg(iRegI dst, iRegI src, iRegI shift, rFlagsReg cr) +instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -9146,10 +9456,10 @@ __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), rscratch1); %} - ins_pipe(pipe_class_default); -%} - -instruct rolL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) + ins_pipe(ialu_reg_reg_vshift); +%} + +instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) %{ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift)))); @@ -9158,7 +9468,7 @@ %} %} -instruct rolL_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) %{ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift)))); @@ -9167,7 +9477,7 @@ %} %} -instruct rolI_rReg_Var_C_32(iRegL dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) +instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) %{ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift)))); @@ -9176,7 +9486,7 @@ %} %} -instruct rolI_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) %{ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift)))); @@ -9187,7 +9497,7 @@ // ror expander -instruct rorL_rReg(iRegL dst, iRegL src, iRegI shift, rFlagsReg cr) +instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -9197,12 +9507,12 @@ __ rorv(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // ror expander -instruct rorI_rReg(iRegI dst, iRegI src, iRegI shift, rFlagsReg cr) +instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -9212,10 +9522,10 @@ __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); -%} - -instruct rorL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) + ins_pipe(ialu_reg_reg_vshift); +%} + +instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) %{ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift)))); @@ -9224,7 +9534,7 @@ %} %} -instruct rorL_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) %{ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift)))); @@ -9233,7 +9543,7 @@ %} %} -instruct rorI_rReg_Var_C_32(iRegL dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) +instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) %{ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift)))); @@ -9242,7 +9552,7 @@ %} %} -instruct rorI_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) %{ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift)))); @@ -9263,7 +9573,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}; instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) @@ -9276,11 +9586,11 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}; -instruct AddExtI_sxth(iRegINoSp dst, iRegI src1, iRegI src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr) +instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr) %{ match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); ins_cost(INSN_COST); @@ -9290,10 +9600,10 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxth); %} - ins_pipe(pipe_class_default); -%} - -instruct AddExtI_sxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) %{ match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); ins_cost(INSN_COST); @@ -9303,10 +9613,10 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtb); %} - ins_pipe(pipe_class_default); -%} - -instruct AddExtI_uxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) %{ match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift))); ins_cost(INSN_COST); @@ -9316,7 +9626,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr) @@ -9329,7 +9639,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr) @@ -9342,7 +9652,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) @@ -9355,7 +9665,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) @@ -9368,11 +9678,11 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); -%} - - -instruct AddExtI_uxtb_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_255 mask, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + + +instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) %{ match(Set dst (AddI src1 (AndI src2 mask))); ins_cost(INSN_COST); @@ -9382,10 +9692,10 @@ __ addw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); -%} - -instruct AddExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) %{ match(Set dst (AddI src1 (AndI src2 mask))); ins_cost(INSN_COST); @@ -9395,7 +9705,7 @@ __ addw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) @@ -9408,7 +9718,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) @@ -9421,7 +9731,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) @@ -9434,10 +9744,10 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtw); %} - ins_pipe(pipe_class_default); -%} - -instruct SubExtI_uxtb_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_255 mask, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) %{ match(Set dst (SubI src1 (AndI src2 mask))); ins_cost(INSN_COST); @@ -9447,10 +9757,10 @@ __ subw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); -%} - -instruct SubExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) %{ match(Set dst (SubI src1 (AndI src2 mask))); ins_cost(INSN_COST); @@ -9460,7 +9770,7 @@ __ subw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) @@ -9473,7 +9783,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) @@ -9486,7 +9796,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) @@ -9499,12 +9809,11 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // END This section of the file is automatically generated. Do not edit -------------- - // ============================================================================ // Floating Point Arithmetic Instructions @@ -9861,7 +10170,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{ @@ -9876,7 +10185,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Or Instructions @@ -9893,7 +10202,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ @@ -9908,7 +10217,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Xor Instructions @@ -9925,7 +10234,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ @@ -9940,7 +10249,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Logical Instructions @@ -9958,7 +10267,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{ @@ -9973,7 +10282,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Or Instructions @@ -9990,7 +10299,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ @@ -10005,7 +10314,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Xor Instructions @@ -10022,7 +10331,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ @@ -10037,7 +10346,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) @@ -10049,11 +10358,11 @@ ins_encode %{ __ sbfm($dst$$Register, $src$$Register, 0, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // this pattern occurs in bigmath arithmetic -instruct convUI2L_reg_reg(iRegLNoSp dst, iRegI src, immL_32bits mask) +instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L src) mask)); @@ -10063,7 +10372,7 @@ __ ubfm($dst$$Register, $src$$Register, 0, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ @@ -10076,10 +10385,10 @@ __ movw(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe(pipe_class_default); -%} - -instruct convI2B(iRegINoSp dst, iRegI src, rFlagsReg cr) + ins_pipe(ialu_reg); +%} + +instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ match(Set dst (Conv2B src)); effect(KILL cr); @@ -10094,7 +10403,7 @@ __ cset(as_Register($dst$$reg), Assembler::NE); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr) @@ -10112,7 +10421,7 @@ __ cset(as_Register($dst$$reg), Assembler::NE); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct convD2F_reg(vRegF dst, vRegD src) %{ @@ -10167,7 +10476,7 @@ ins_pipe(pipe_class_default); %} -instruct convI2F_reg_reg(vRegF dst, iRegI src) %{ +instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{ match(Set dst (ConvI2F src)); ins_cost(INSN_COST * 5); @@ -10219,7 +10528,7 @@ ins_pipe(pipe_class_default); %} -instruct convI2D_reg_reg(vRegD dst, iRegI src) %{ +instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{ match(Set dst (ConvI2D src)); ins_cost(INSN_COST * 5); @@ -10261,7 +10570,7 @@ __ ldrw($dst$$Register, Address(sp, $src$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_reg); %} @@ -10297,7 +10606,7 @@ __ ldr($dst$$Register, Address(sp, $src$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_reg); %} @@ -10351,7 +10660,7 @@ __ strw($src$$Register, Address(sp, $dst$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_reg); %} @@ -10387,7 +10696,7 @@ __ str($src$$Register, Address(sp, $dst$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_reg); %} @@ -10482,7 +10791,7 @@ // ============================================================================ // Overflow Math Instructions -instruct overflowAddI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) +instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) %{ match(Set cr (OverflowAddI op1 op2)); @@ -10492,10 +10801,10 @@ __ cmnw($op1$$Register, $op2$$Register); %} - ins_pipe(pipe_class_default); -%} - -instruct overflowAddI_reg_imm(rFlagsReg cr, iRegI op1, immIAddSub op2) + ins_pipe(icmp_reg_reg); +%} + +instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) %{ match(Set cr (OverflowAddI op1 op2)); @@ -10505,7 +10814,7 @@ __ cmnw($op1$$Register, $op2$$constant); %} - ins_pipe(pipe_class_default); + ins_pipe(icmp_reg_imm); %} instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) @@ -10518,7 +10827,7 @@ __ cmn($op1$$Register, $op2$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(icmp_reg_reg); %} instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) @@ -10531,10 +10840,10 @@ __ cmn($op1$$Register, $op2$$constant); %} - ins_pipe(pipe_class_default); -%} - -instruct overflowSubI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) + ins_pipe(icmp_reg_imm); +%} + +instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) %{ match(Set cr (OverflowSubI op1 op2)); @@ -10544,10 +10853,10 @@ __ cmpw($op1$$Register, $op2$$Register); %} - ins_pipe(pipe_class_default); -%} - -instruct overflowSubI_reg_imm(rFlagsReg cr, iRegI op1, immIAddSub op2) + ins_pipe(icmp_reg_reg); +%} + +instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) %{ match(Set cr (OverflowSubI op1 op2)); @@ -10557,7 +10866,7 @@ __ cmpw($op1$$Register, $op2$$constant); %} - ins_pipe(pipe_class_default); + ins_pipe(icmp_reg_imm); %} instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) @@ -10570,7 +10879,7 @@ __ cmp($op1$$Register, $op2$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(icmp_reg_reg); %} instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) @@ -10583,36 +10892,36 @@ __ cmp($op1$$Register, $op2$$constant); %} - ins_pipe(pipe_class_default); -%} - -instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegI op2) -%{ - match(Set cr (OverflowSubI zero op2)); - - format %{ "cmpw zr, $op2\t# overflow check int" %} - ins_cost(INSN_COST); - ins_encode %{ - __ cmpw(zr, $op2$$Register); - %} - - ins_pipe(pipe_class_default); -%} - -instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op2) -%{ - match(Set cr (OverflowSubL zero op2)); - - format %{ "cmp zr, $op2\t# overflow check long" %} - ins_cost(INSN_COST); - ins_encode %{ - __ cmp(zr, $op2$$Register); - %} - - ins_pipe(pipe_class_default); -%} - -instruct overflowMulI_reg(rFlagsReg cr, iRegI op1, iRegI op2) + ins_pipe(icmp_reg_imm); +%} + +instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1) +%{ + match(Set cr (OverflowSubI zero op1)); + + format %{ "cmpw zr, $op1\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmpw(zr, $op1$$Register); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1) +%{ + match(Set cr (OverflowSubL zero op1)); + + format %{ "cmp zr, $op1\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmp(zr, $op1$$Register); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) %{ match(Set cr (OverflowMulI op1 op2)); @@ -10630,10 +10939,10 @@ __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS %} - ins_pipe(pipe_class_default); -%} - -instruct overflowMulI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, rFlagsReg cr) + ins_pipe(pipe_slow); +%} + +instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr) %{ match(If cmp (OverflowMulI op1 op2)); predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow @@ -10652,7 +10961,7 @@ __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_serial); %} instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2) @@ -10675,7 +10984,7 @@ __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_slow); %} instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr) @@ -10699,7 +11008,7 @@ __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_serial); %} // ============================================================================ @@ -10716,7 +11025,7 @@ ins_encode(aarch64_enc_cmpw(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero) @@ -10730,7 +11039,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2) @@ -10744,7 +11053,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2) @@ -10758,7 +11067,7 @@ ins_encode(aarch64_enc_cmpw_imm(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} // Unsigned compare Instructions; really, same as signed compare @@ -10776,7 +11085,7 @@ ins_encode(aarch64_enc_cmpw(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero) @@ -10790,7 +11099,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2) @@ -10804,7 +11113,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2) @@ -10818,7 +11127,7 @@ ins_encode(aarch64_enc_cmpw_imm(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) @@ -10832,7 +11141,7 @@ ins_encode(aarch64_enc_cmp(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero) @@ -10846,7 +11155,7 @@ ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2) @@ -10860,7 +11169,7 @@ ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2) @@ -10874,7 +11183,7 @@ ins_encode(aarch64_enc_cmp_imm(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2) @@ -10888,7 +11197,7 @@ ins_encode(aarch64_enc_cmpp(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2) @@ -10902,7 +11211,7 @@ ins_encode(aarch64_enc_cmpn(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero) @@ -10916,7 +11225,7 @@ ins_encode(aarch64_enc_testp(op1)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero) @@ -10930,7 +11239,7 @@ ins_encode(aarch64_enc_testn(op1)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} // FP comparisons @@ -10965,6 +11274,7 @@ ins_pipe(pipe_class_compare); %} +// FROM HERE instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2) %{ @@ -11102,7 +11412,30 @@ %} -instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q, rFlagsReg cr) +// Manifest a CmpL result in an integer register. +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) +instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags) +%{ + match(Set dst (CmpL3 src1 src2)); + effect(KILL flags); + + ins_cost(INSN_COST * 6); + format %{ + "cmp $src1, $src2" + "csetw $dst, ne" + "cnegw $dst, lt" + %} + // format %{ "CmpL3 $dst, $src1, $src2" %} + ins_encode %{ + __ cmp($src1$$Register, $src2$$Register); + __ csetw($dst$$Register, Assembler::NE); + __ cnegw($dst$$Register, $dst$$Register, Assembler::LT); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr) %{ match(Set dst (CmpLTMask p q)); effect(KILL cr); @@ -11120,10 +11453,10 @@ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); %} - ins_pipe(pipe_class_default); -%} - -instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegI src, immI0 zero, rFlagsReg cr) + ins_pipe(ialu_reg_reg); +%} + +instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{ match(Set dst (CmpLTMask src zero)); effect(KILL cr); @@ -11136,7 +11469,7 @@ __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // ============================================================================ @@ -11164,8 +11497,9 @@ Assembler::LT); %} - ins_pipe(pipe_class_compare); -%} + ins_pipe(ialu_reg_reg); +%} +// FROM HERE instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) %{ @@ -11189,7 +11523,7 @@ Assembler::GT); %} - ins_pipe(pipe_class_compare); + ins_pipe(ialu_reg_reg); %} // ============================================================================ @@ -11207,7 +11541,7 @@ ins_encode(aarch64_enc_b(lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // Conditional Near Branch @@ -11228,7 +11562,7 @@ ins_encode(aarch64_enc_br_con(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch_cond); %} // Conditional Near Branch Unsigned @@ -11249,14 +11583,14 @@ ins_encode(aarch64_enc_br_conU(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch_cond); %} // Make use of CBZ and CBNZ. These instructions, as well as being // shorter than (cmp; branch), have the additional benefit of not // killing the flags. -instruct cmpI_imm0_branch(cmpOp cmp, iRegI op1, immI0 op2, label labl, rFlagsReg cr) %{ +instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{ match(If cmp (CmpI op1 op2)); predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne || n->in(1)->as_Bool()->_test._test == BoolTest::eq); @@ -11272,7 +11606,7 @@ else __ cbnzw($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{ @@ -11291,7 +11625,7 @@ else __ cbnz($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{ @@ -11310,7 +11644,7 @@ else __ cbnz($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} // Conditional Far Branch @@ -11331,7 +11665,7 @@ ins_encode(aarch64_enc_br_con(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // counted loop end branch near Unsigned @@ -11348,7 +11682,7 @@ ins_encode(aarch64_enc_br_conU(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // counted loop end branch far @@ -11370,7 +11704,7 @@ ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2)); - ins_pipe(pipe_class_compare); + ins_pipe(pipe_serial); %} instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) @@ -11383,7 +11717,7 @@ ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2)); - ins_pipe(pipe_class_compare); + ins_pipe(pipe_serial); %} @@ -11403,7 +11737,7 @@ ins_encode %{ __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); %} @@ -11430,6 +11764,8 @@ ins_pipe(pipe_class_call); %} +// TO HERE + // Call Java Static Instruction (method handle version) instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save) @@ -11563,7 +11899,7 @@ ins_encode( /*empty*/ ); - ins_pipe(pipe_class_default); + ins_pipe(pipe_class_empty); %} // Rethrow exception: The exception oop will come in the first @@ -11590,7 +11926,7 @@ ins_encode( aarch64_enc_ret() ); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // Die now. @@ -11662,6 +11998,44 @@ ins_pipe(pipe_class_memory); %} +instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2, + iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + -1, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, + immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2, + iRegI tmp3, iRegI tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr) %{ @@ -11677,6 +12051,39 @@ ins_pipe(pipe_class_memory); %} +instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, + iRegP_R10 tmp, rFlagsReg cr) +%{ + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr); + + format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} + ins_encode %{ + __ char_arrays_equals($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// encode char[] to byte[] in ISO_8859_1 +instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, + vRegD_V0 Vtmp1, vRegD_V1 Vtmp2, + vRegD_V2 Vtmp3, vRegD_V3 Vtmp4, + iRegI_R0 result, rFlagsReg cr) +%{ + match(Set result (EncodeISOArray src (Binary dst len))); + effect(USE_KILL src, USE_KILL dst, USE_KILL len, + KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); + + format %{ "Encode array $src,$dst,$len -> $result" %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $Vtmp1$$FloatRegister, $Vtmp2$$FloatRegister, + $Vtmp3$$FloatRegister, $Vtmp4$$FloatRegister); + %} + ins_pipe( pipe_class_memory ); +%} + // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type @@ -11696,7 +12103,1363 @@ ins_pipe(pipe_class_empty); %} - +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vector (32 bits) +instruct loadV4(vecD dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} + ins_encode( aarch64_enc_ldrvS(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load vector (64 bits) +instruct loadV8(vecD dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} + ins_encode( aarch64_enc_ldrvD(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Vector (128 bits) +instruct loadV16(vecX dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} + ins_encode( aarch64_enc_ldrvQ(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (32 bits) +instruct storeV4(vecD src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strs $mem,$src\t# vector (32 bits)" %} + ins_encode( aarch64_enc_strvS(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (64 bits) +instruct storeV8(vecD src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strd $mem,$src\t# vector (64 bits)" %} + ins_encode( aarch64_enc_strvD(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (128 bits) +instruct storeV16(vecX src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strq $mem,$src\t# vector (128 bits)" %} + ins_encode( aarch64_enc_strvQ(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +instruct replicate8B(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate16B(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate8B_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate16B_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(16B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4S(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate8S(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4S_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate8S_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2I(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4I(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2I_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(2I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4I_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2L(vecX dst, iRegL src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2L_zero(vecX dst, immI0 zero) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector(4I)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2F(vecD dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4F(vecX dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2D(vecX dst, vRegD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +instruct vadd8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct vsub8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- MUL -------------------------------------- + +instruct vmul4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- DIV -------------------------------------- + +instruct vdiv2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vdiv4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vdiv2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- AND -------------------------------------- + +instruct vand8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vand16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- OR --------------------------------------- + +instruct vor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vxor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ------------------------------ Shift --------------------------------------- + +instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{ + match(Set dst (LShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count (vecX)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// Right shifts on aarch64 SIMD are implemented as left shift by -ve amount +instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{ + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count (vecX)\n\tneg $dst, $dst\t T16B" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names
--- a/src/cpu/aarch64/vm/aarch64_ad.m4 Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/aarch64_ad.m4 Fri Oct 02 04:37:30 2015 +0100 @@ -1,9 +1,36 @@ +dnl Copyright (c) 2014, Red Hat Inc. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +dnl +dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic +dnl and shift patterns patterns used in aarch64.ad. +dnl // BEGIN This section of the file is automatically generated. Do not edit -------------- - +dnl +define(`ORL2I', `ifelse($1,I,orL2I)') +dnl define(`BASE_SHIFT_INSN', ` instruct $2$1_reg_$4_reg(iReg$1NoSp dst, - iReg$1 src1, iReg$1 src2, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI src3, rFlagsReg cr) %{ match(Set dst ($2$1 src1 ($4$1 src2 src3))); @@ -15,15 +42,15 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::$5, - $src3$$constant & 0x3f); + $src3$$constant & ifelse($1,I,0x1f,0x3f)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %}')dnl define(`BASE_INVERTED_INSN', ` instruct $2$1_reg_not_reg(iReg$1NoSp dst, - iReg$1 src1, iReg$1 src2, imm$1_M1 m1, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_M1 m1, rFlagsReg cr) %{ dnl This ifelse is because hotspot reassociates (xor (xor ..)..) dnl into this canonical form. @@ -40,12 +67,12 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}')dnl define(`INVERTED_SHIFT_INSN', ` instruct $2$1_reg_$4_not_reg(iReg$1NoSp dst, - iReg$1 src1, iReg$1 src2, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI src3, imm$1_M1 src4, rFlagsReg cr) %{ dnl This ifelse is because hotspot reassociates (xor (xor ..)..) dnl into this canonical form. @@ -60,14 +87,14 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::$5, - $src3$$constant & 0x3f); + $src3$$constant & ifelse($1,I,0x1f,0x3f)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %}')dnl define(`NOT_INSN', `instruct reg$1_not_reg(iReg$1NoSp dst, - iReg$1 src1, imm$1_M1 m1, + iReg$1`'ORL2I($1) src1, imm$1_M1 m1, rFlagsReg cr) %{ match(Set dst (Xor$1 src1 m1)); ins_cost(INSN_COST); @@ -80,7 +107,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %}')dnl dnl define(`BOTH_SHIFT_INSNS', @@ -88,7 +115,7 @@ BASE_SHIFT_INSN(L, $1, $2, $3, $4)')dnl dnl define(`BOTH_INVERTED_INSNS', -`BASE_INVERTED_INSN(I, $1, $2, $3, $4) +`BASE_INVERTED_INSN(I, $1, $2w, $3, $4) BASE_INVERTED_INSN(L, $1, $2, $3, $4)')dnl dnl define(`BOTH_INVERTED_SHIFT_INSNS', @@ -124,7 +151,7 @@ define(`BFM_INSN',` // Shift Left followed by Shift Right. // This idiom is used by the compiler for the i2b bytecode etc. -instruct $4$1(iReg$1NoSp dst, iReg$1 src, immI lshift_count, immI rshift_count) +instruct $4$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI lshift_count, immI rshift_count) %{ match(Set dst EXTEND($1, $3, src, lshift_count, rshift_count)); // Make sure we are not going to exceed what $4 can do. @@ -138,11 +165,11 @@ int s = $2 - lshift; int r = (rshift - lshift) & $2; __ $4(as_Register($dst$$reg), - as_Register($src$$reg), - r, s); + as_Register($src$$reg), + r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %}') BFM_INSN(L, 63, RShift, sbfm) BFM_INSN(I, 31, RShift, sbfmw) @@ -151,7 +178,7 @@ dnl // Bitfield extract with shift & mask define(`BFX_INSN', -`instruct $3$1(iReg$1NoSp dst, iReg$1 src, immI rshift, imm$1_bitmask mask) +`instruct $3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI rshift, imm$1_bitmask mask) %{ match(Set dst (And$1 ($2$1 src rshift) mask)); @@ -162,9 +189,9 @@ long mask = $mask$$constant; int width = exact_log2(mask+1); __ $3(as_Register($dst$$reg), - as_Register($src$$reg), rshift, width); + as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %}') BFX_INSN(I,URShift,ubfxw) BFX_INSN(L,URShift,ubfx) @@ -182,15 +209,15 @@ long mask = $mask$$constant; int width = exact_log2(mask+1); __ ubfx(as_Register($dst$$reg), - as_Register($src$$reg), rshift, width); + as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Rotations define(`EXTRACT_INSN', -`instruct extr$3$1(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, immI lshift, immI rshift, rFlagsReg cr) +`instruct extr$3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI lshift, immI rshift, rFlagsReg cr) %{ match(Set dst ($3$1 (LShift$1 src1 lshift) (URShift$1 src2 rshift))); predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & $2)); @@ -202,7 +229,7 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & $2); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} ')dnl EXTRACT_INSN(L, 63, Or, extr) @@ -212,7 +239,7 @@ define(`ROL_EXPAND', ` // $2 expander -instruct $2$1_rReg(iReg$1 dst, iReg$1 src, iRegI shift, rFlagsReg cr) +instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -221,14 +248,14 @@ ins_encode %{ __ subw(rscratch1, zr, as_Register($shift$$reg)); __ $3(as_Register($dst$$reg), as_Register($src$$reg), - rscratch1); + rscratch1); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %}')dnl define(`ROR_EXPAND', ` // $2 expander -instruct $2$1_rReg(iReg$1 dst, iReg$1 src, iRegI shift, rFlagsReg cr) +instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr) %{ effect(DEF dst, USE src, USE shift); @@ -236,12 +263,12 @@ ins_cost(INSN_COST); ins_encode %{ __ $3(as_Register($dst$$reg), as_Register($src$$reg), - as_Register($shift$$reg)); + as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %}')dnl define(ROL_INSN, ` -instruct $3$1_rReg_Var_C$2(iRegL dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr) +instruct $3$1_rReg_Var_C$2(iRegLNoSp dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr) %{ match(Set dst (Or$1 (LShift$1 src shift) (URShift$1 src (SubI c$2 shift)))); @@ -250,7 +277,7 @@ %} %}')dnl define(ROR_INSN, ` -instruct $3$1_rReg_Var_C$2(iRegL dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr) +instruct $3$1_rReg_Var_C$2(iRegLNoSp dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr) %{ match(Set dst (Or$1 (URShift$1 src shift) (LShift$1 src (SubI c$2 shift)))); @@ -274,7 +301,7 @@ // Add/subtract (extended) dnl ADD_SUB_EXTENDED(mode, size, add node, shift node, insn, shift type, wordsize define(`ADD_SUB_CONV', ` -instruct $3Ext$1(iReg$2NoSp dst, iReg$2 src1, iReg$1orL2I src2, rFlagsReg cr) +instruct $3Ext$1(iReg$2NoSp dst, iReg$2`'ORL2I($2) src1, iReg$1`'ORL2I($1) src2, rFlagsReg cr) %{ match(Set dst ($3$2 src1 (ConvI2L src2))); ins_cost(INSN_COST); @@ -284,13 +311,13 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$5); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}')dnl ADD_SUB_CONV(I,L,Add,add,sxtw); ADD_SUB_CONV(I,L,Sub,sub,sxtw); dnl define(`ADD_SUB_EXTENDED', ` -instruct $3Ext$1_$6(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, immI_`'eval($7-$2) lshift, immI_`'eval($7-$2) rshift, rFlagsReg cr) +instruct $3Ext$1_$6(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI_`'eval($7-$2) lshift, immI_`'eval($7-$2) rshift, rFlagsReg cr) %{ match(Set dst ($3$1 src1 EXTEND($1, $4, src2, lshift, rshift))); ins_cost(INSN_COST); @@ -300,7 +327,7 @@ __ $5(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$6); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}') ADD_SUB_EXTENDED(I,16,Add,RShift,add,sxth,32) ADD_SUB_EXTENDED(I,8,Add,RShift,add,sxtb,32) @@ -312,7 +339,7 @@ dnl dnl ADD_SUB_ZERO_EXTEND(mode, size, add node, insn, shift type) define(`ADD_SUB_ZERO_EXTEND', ` -instruct $3Ext$1_$5_and(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, imm$1_$2 mask, rFlagsReg cr) +instruct $3Ext$1_$5_and(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_$2 mask, rFlagsReg cr) %{ match(Set dst ($3$1 src1 (And$1 src2 mask))); ins_cost(INSN_COST); @@ -322,7 +349,7 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$5); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}') dnl ADD_SUB_ZERO_EXTEND(I,255,Add,addw,uxtb)
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -278,14 +278,14 @@ // LoadStoreExclusiveOp __ stxr(r20, r21, r2); // stxr w20, x21, [x2] - __ stlxr(r7, r29, r7); // stlxr w7, x29, [x7] + __ stlxr(r5, r29, r7); // stlxr w5, x29, [x7] __ ldxr(r5, r16); // ldxr x5, [x16] __ ldaxr(r27, r29); // ldaxr x27, [x29] __ stlr(r0, r29); // stlr x0, [x29] __ ldar(r21, r28); // ldar x21, [x28] // LoadStoreExclusiveOp - __ stxrw(r24, r24, r7); // stxr w24, w24, [x7] + __ stxrw(r21, r24, r7); // stxr w21, w24, [x7] __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28] __ ldxrw(r21, r6); // ldxr w21, [x6] __ ldaxrw(r15, r30); // ldaxr w15, [x30] @@ -312,11 +312,11 @@ __ ldxp(r8, r2, r19); // ldxp x8, x2, [x19] __ ldaxp(r7, r19, r14); // ldaxp x7, x19, [x14] __ stxp(r8, r27, r28, r5); // stxp w8, x27, x28, [x5] - __ stlxp(r6, r8, r14, r6); // stlxp w6, x8, x14, [x6] + __ stlxp(r5, r8, r14, r6); // stlxp w5, x8, x14, [x6] // LoadStoreExclusiveOp __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] - __ ldaxpw(r14, r14, r15); // ldaxp w14, w14, [x15] + __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15] __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10] __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18] @@ -785,12 +785,12 @@ 24c: d61f0040 br x2 250: d63f00a0 blr x5 254: c8147c55 stxr w20, x21, [x2] - 258: c807fcfd stlxr w7, x29, [x7] + 258: c805fcfd stlxr w5, x29, [x7] 25c: c85f7e05 ldxr x5, [x16] 260: c85fffbb ldaxr x27, [x29] 264: c89fffa0 stlr x0, [x29] 268: c8dfff95 ldar x21, [x28] - 26c: 88187cf8 stxr w24, w24, [x7] + 26c: 88157cf8 stxr w21, w24, [x7] 270: 8815ff9a stlxr w21, w26, [x28] 274: 885f7cd5 ldxr w21, [x6] 278: 885fffcf ldaxr w15, [x30] @@ -811,9 +811,9 @@ 2b4: c87f0a68 ldxp x8, x2, [x19] 2b8: c87fcdc7 ldaxp x7, x19, [x14] 2bc: c82870bb stxp w8, x27, x28, [x5] - 2c0: c826b8c8 stlxp w6, x8, x14, [x6] + 2c0: c825b8c8 stlxp w5, x8, x14, [x6] 2c4: 887f12d9 ldxp w25, w4, [x22] - 2c8: 887fb9ee ldaxp w14, w14, [x15] + 2c8: 887fb9ed ldaxp w13, w14, [x15] 2cc: 8834215a stxp w20, w26, w8, [x10] 2d0: 8837ca52 stlxp w23, w18, w18, [x18] 2d4: f806317e str x30, [x11,#99] @@ -1101,14 +1101,14 @@ 0xd4063721, 0xd4035082, 0xd400bfe3, 0xd4282fc0, 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, - 0xd63f00a0, 0xc8147c55, 0xc807fcfd, 0xc85f7e05, - 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88187cf8, + 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, + 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8, 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, - 0xc826b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a, + 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a, 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3,
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -467,6 +467,11 @@ case base_plus_offset: { unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } unsigned mask = (1 << size) - 1; if (_offset < 0 || _offset & mask) { @@ -487,6 +492,11 @@ i->rf(_index, 16); i->f(_ext.option(), 15, 13); unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } if (size == 0) // It's a byte i->f(_ext.shift() >= 0, 12); else { @@ -1086,11 +1096,13 @@ #define INSN4(NAME, sz, op, o0) /* Four registers */ \ void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ } #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ + guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ } @@ -1102,6 +1114,7 @@ #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ void NAME(Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rt1 != Rt2, "unpredictable instruction"); \ load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ } @@ -1187,6 +1200,7 @@ INSN(ldrs, 0b00, 1); INSN(ldrd, 0b01, 1); + INSN(ldrq, 0x10, 1); #undef INSN @@ -1248,6 +1262,8 @@ INSN(ldps, 0b00, 0b101, 1, 1, false); INSN(stpd, 0b01, 0b101, 1, 0, false); INSN(ldpd, 0b01, 0b101, 1, 1, false); + INSN(stpq, 0b10, 0b101, 1, 0, false); + INSN(ldpq, 0b10, 0b101, 1, 1, false); #undef INSN @@ -1320,6 +1336,8 @@ INSN(strs, 0b10, 0b00); INSN(ldrd, 0b11, 0b01); INSN(ldrs, 0b10, 0b01); + INSN(strq, 0b00, 0b10); + INSN(ldrq, 0x00, 0b11); #undef INSN @@ -1454,7 +1472,7 @@ f(op, 31, 29); f(0b11010000, 28, 21); f(0b000000, 15, 10); - rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); + zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); } #define INSN(NAME, op) \ @@ -1873,9 +1891,18 @@ }; enum SIMD_RegVariant { - S32, D64, Q128 + B, H, S, D, Q }; +#define INSN(NAME, op) \ + void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ + ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ + } \ + + INSN(ldr, 1); + INSN(str, 0); + +#undef INSN private: @@ -1982,14 +2009,120 @@ rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ } - INSN(eor, 0b101110001); - INSN(orr, 0b001110101); + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); INSN(andr, 0b001110001); - INSN(bic, 0b001110011); - INSN(bif, 0b101110111); - INSN(bit, 0b101110101); - INSN(bsl, 0b101110011); - INSN(orn, 0b001110111); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); + +#undef INSN + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(addv, 0, 0b100001); + INSN(subv, 1, 0b100001); + INSN(mulv, 0, 0b100111); + INSN(sshl, 0, 0b010001); + INSN(ushl, 1, 0b010001); + +#undef INSN + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(absr, 0, 0b100000101110); + INSN(negr, 1, 0b100000101110); + INSN(notr, 1, 0b100000010110); + INSN(addv, 0, 0b110001101110); + INSN(cls, 0, 0b100000010010); + INSN(clz, 1, 0b100000010010); + INSN(cnt, 0, 0b100000010110); + +#undef INSN + +#define INSN(NAME, op0, cmode0) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ + unsigned cmode = cmode0; \ + unsigned op = op0; \ + starti; \ + assert(lsl == 0 || \ + ((T == T4H || T == T8H) && lsl == 8) || \ + ((T == T2S || T == T4S) && ((lsl >> 3) < 4)), "invalid shift"); \ + cmode |= lsl >> 2; \ + if (T == T4H || T == T8H) cmode |= 0b1000; \ + if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \ + assert(op == 0 && cmode0 == 0, "must be MOVI"); \ + cmode = 0b1110; \ + if (T == T1D || T == T2D) op = 1; \ + } \ + f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ + f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ + rf(Vd, 0); \ + } + + INSN(movi, 0, 0); + INSN(orri, 0, 1); + INSN(mvni, 1, 0); + INSN(bici, 1, 1); + +#undef INSN + +#define INSN(NAME, op1, op2, op3) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ + f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ + f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fadd, 0, 0, 0b110101); + INSN(fdiv, 1, 0, 0b111111); + INSN(fmul, 1, 0, 0b110111); + INSN(fsub, 0, 1, 0b110101); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T4S, "arrangement must be T4S"); \ + f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(sha1c, 0b000000); + INSN(sha1m, 0b001000); + INSN(sha1p, 0b000100); + INSN(sha1su0, 0b001100); + INSN(sha256h2, 0b010100); + INSN(sha256h, 0b010000); + INSN(sha256su1, 0b011000); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + assert(T == T4S, "arrangement must be T4S"); \ + f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(sha1h, 0b000010); + INSN(sha1su1, 0b000110); + INSN(sha256su0, 0b001010); #undef INSN @@ -2006,19 +2139,40 @@ #undef INSN - void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ + void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) { + starti; + assert(T != Q, "invalid register variant"); + f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15); + f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); + } + + void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { starti; - /* The encodings for the immh:immb fields (bits 22:16) are - * 0001 xxx 8B/16B, shift = xxx - * 001x xxx 4H/8H, shift = xxxx - * 01xx xxx 2S/4S, shift = xxxxx - * 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) - */ - assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); - f(0, 31), f(T & 1, 30), f(0b0011110, 29, 23), f((1 << ((T>>1)+3))|shift, 22, 16); - f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0); + f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); + f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); + rf(Vn, 5), rf(Rd, 0); } +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ + starti; \ + /* The encodings for the immh:immb fields (bits 22:16) are \ + * 0001 xxx 8B/16B, shift = xxx \ + * 001x xxx 4H/8H, shift = xxxx \ + * 01xx xxx 2S/4S, shift = xxxxx \ + * 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \ + */ \ + assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \ + f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \ + f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(shl, 0, 0b010101); + INSN(sshr, 0, 0b000001); + INSN(ushr, 1, 0b000001); + +#undef INSN + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are @@ -2073,6 +2227,15 @@ pmull(Vd, Ta, Vn, Vm, Tb); } + void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { + starti; + int size_b = (int)Tb >> 1; + int size_a = (int)Ta >> 1; + assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); + f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22); + f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); + } + void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { starti; @@ -2082,6 +2245,23 @@ rf(Vn, 5), rf(Vd, 0); } + void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), rf(Xs, 5), rf(Vd, 0); + } + + void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); + f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); + } + // CRC32 instructions #define INSN(NAME, sf, sz) \ void NAME(Register Rd, Register Rn, Register Rm) { \
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -237,7 +237,7 @@ // build frame ciMethod* m = compilation()->method(); - __ build_frame(initial_frame_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); // OSR buffer is // @@ -354,7 +354,7 @@ // This specifies the rsp decrement needed to build the frame -int LIR_Assembler::initial_frame_size_in_bytes() { +int LIR_Assembler::initial_frame_size_in_bytes() const { // if rounding, must let FrameMap know! // The frame_map records size in slots (32bit word) @@ -518,6 +518,7 @@ __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset())); __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub)); __ blrt(rscratch1, 1, 0, 1); + __ maybe_isb(); __ pop(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1 __ mov(rscratch1, r0); __ pop(0x3, sp); // r0 & r1 @@ -557,9 +558,10 @@ assert(os::is_poll_address(polling_page), "should be"); unsigned long off; __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off); + assert(off == 0, "must be"); add_debug_info_for_branch(info); // This isn't just debug info: // it's the oop map - __ ldrw(zr, Address(rscratch1, off)); + __ read_polling_page(rscratch1, relocInfo::poll_type); } else { poll_for_safepoint(relocInfo::poll_type, info); } @@ -659,6 +661,11 @@ } } break; + case T_ADDRESS: + { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } case T_INT: case T_FLOAT: { @@ -899,7 +906,7 @@ if (type == T_ARRAY || type == T_OBJECT) { __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); __ verify_oop(dest->as_register()); - } else if (type == T_METADATA || type == T_DOUBLE) { + } else if (type == T_METADATA) { __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); } else { __ ldrw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); @@ -952,8 +959,9 @@ } void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + LIR_Opr temp; - if (type == T_LONG) + if (type == T_LONG || type == T_DOUBLE) temp = FrameMap::rscratch1_long_opr; else temp = FrameMap::rscratch1_opr; @@ -2814,7 +2822,6 @@ __ stop("unexpected profiling mismatch"); __ bind(ok); - __ pop(tmp); } #endif // first time here. Set profile type. @@ -2913,6 +2920,7 @@ if (info != NULL) { add_call_info_here(info); } + __ maybe_isb(); } void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { @@ -3000,6 +3008,7 @@ void LIR_Assembler::peephole(LIR_List *lir) { +#if 0 if (tableswitch_count >= max_tableswitches) return; @@ -3124,6 +3133,7 @@ next_state: ; } +#endif } void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
--- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -1089,7 +1089,7 @@ #endif CodeEmitInfo* info = state_for(x, x->state()); LIR_Opr reg = result_register_for(x->type()); - new_instance(reg, x->klass(), + new_instance(reg, x->klass(), x->is_unresolved(), FrameMap::r2_oop_opr, FrameMap::r5_oop_opr, FrameMap::r4_oop_opr,
--- a/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -29,1218 +29,6 @@ #include "c1/c1_LinearScan.hpp" #include "utilities/bitMap.inline.hpp" - -//---------------------------------------------------------------------- -// Allocation of FPU stack slots (Intel x86 only) -//---------------------------------------------------------------------- - void LinearScan::allocate_fpu_stack() { - // First compute which FPU registers are live at the start of each basic block - // (To minimize the amount of work we have to do if we have to merge FPU stacks) - if (ComputeExactFPURegisterUsage) { - Interval* intervals_in_register, *intervals_in_memory; - create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL); - - // ignore memory intervals by overwriting intervals_in_memory - // the dummy interval is needed to enforce the walker to walk until the given id: - // without it, the walker stops when the unhandled-list is empty -> live information - // beyond this point would be incorrect. - Interval* dummy_interval = new Interval(any_reg); - dummy_interval->add_range(max_jint - 2, max_jint - 1); - dummy_interval->set_next(Interval::end()); - intervals_in_memory = dummy_interval; - - IntervalWalker iw(this, intervals_in_register, intervals_in_memory); - - const int num_blocks = block_count(); - for (int i = 0; i < num_blocks; i++) { - BlockBegin* b = block_at(i); - - // register usage is only needed for merging stacks -> compute only - // when more than one predecessor. - // the block must not have any spill moves at the beginning (checked by assertions) - // spill moves would use intervals that are marked as handled and so the usage bit - // would been set incorrectly - - // NOTE: the check for number_of_preds > 1 is necessary. A block with only one - // predecessor may have spill moves at the begin of the block. - // If an interval ends at the current instruction id, it is not possible - // to decide if the register is live or not at the block begin -> the - // register information would be incorrect. - if (b->number_of_preds() > 1) { - int id = b->first_lir_instruction_id(); - BitMap regs(FrameMap::nof_fpu_regs); - regs.clear(); - - iw.walk_to(id); // walk after the first instruction (always a label) of the block - assert(iw.current_position() == id, "did not walk completely to id"); - - // Only consider FPU values in registers - Interval* interval = iw.active_first(fixedKind); - while (interval != Interval::end()) { - int reg = interval->assigned_reg(); - assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register"); - assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)"); - assert(interval->from() <= id && id < interval->to(), "interval out of range"); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print(); - } -#endif - - regs.set_bit(reg - pd_first_fpu_reg); - interval = interval->next(); - } - - b->set_fpu_register_usage(regs); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->print_cr(""); - } -#endif - } - } - } - -#ifndef TARGET_ARCH_aarch64 - FpuStackAllocator alloc(ir()->compilation(), this); - _fpu_stack_allocator = &alloc; - alloc.allocate(); - _fpu_stack_allocator = NULL; -#endif -} - - -FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator) - : _compilation(compilation) - , _lir(NULL) - , _pos(-1) - , _allocator(allocator) - , _sim(compilation) - , _temp_sim(compilation) -{} - -void FpuStackAllocator::allocate() { - int num_blocks = allocator()->block_count(); - for (int i = 0; i < num_blocks; i++) { - // Set up to process block - BlockBegin* block = allocator()->block_at(i); - intArray* fpu_stack_state = block->fpu_stack_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- Begin of new Block %d -------", block->block_id()); - } -#endif - - assert(fpu_stack_state != NULL || - block->end()->as_Base() != NULL || - block->is_set(BlockBegin::exception_entry_flag), - "FPU stack state must be present due to linear-scan order for FPU stack allocation"); - // note: exception handler entries always start with an empty fpu stack - // because stack merging would be too complicated - - if (fpu_stack_state != NULL) { - sim()->read_state(fpu_stack_state); - } else { - sim()->clear(); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Reading FPU state for block %d:", block->block_id()); - sim()->print(); - tty->cr(); - } -#endif - - allocate_block(block); - CHECK_BAILOUT(); - } -} - -void FpuStackAllocator::allocate_block(BlockBegin* block) { - bool processed_merge = false; - LIR_OpList* insts = block->lir()->instructions_list(); - set_lir(block->lir()); - set_pos(0); - - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - _debug_information_computed = false; - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - LIR_OpBranch* branch = op->as_OpBranch(); - LIR_Op1* op1 = op->as_Op1(); - LIR_Op2* op2 = op->as_Op2(); - LIR_OpCall* opCall = op->as_OpCall(); - - if (branch != NULL && branch->block() != NULL) { - if (!processed_merge) { - // propagate stack at first branch to a successor - processed_merge = true; - bool required_merge = merge_fpu_stack_with_successors(block); - - assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches"); - } - - } else if (op1 != NULL) { - handle_op1(op1); - } else if (op2 != NULL) { - handle_op2(op2); - } else if (opCall != NULL) { - handle_opCall(opCall); - } - - compute_debug_information(op); - - set_pos(1 + pos()); - } - - // Propagate stack when block does not end with branch - if (!processed_merge) { - merge_fpu_stack_with_successors(block); - } -} - - -void FpuStackAllocator::compute_debug_information(LIR_Op* op) { - if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) { - visitor.visit(op); - - // exception handling - if (allocator()->compilation()->has_exception_handlers()) { - XHandlers* xhandlers = visitor.all_xhandler(); - int n = xhandlers->length(); - for (int k = 0; k < n; k++) { - allocate_exception_handler(xhandlers->handler_at(k)); - } - } else { - assert(visitor.all_xhandler()->length() == 0, "missed exception handler"); - } - - // compute debug information - int n = visitor.info_count(); - assert(n > 0, "should not visit operation otherwise"); - - for (int j = 0; j < n; j++) { - CodeEmitInfo* info = visitor.info_at(j); - // Compute debug information - allocator()->compute_debug_info(info, op->id()); - } - } - _debug_information_computed = true; -} - -void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) { - if (!sim()->is_empty()) { - LIR_List* old_lir = lir(); - int old_pos = pos(); - intArray* old_state = sim()->write_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- begin of exception handler -------"); - } -#endif - - if (xhandler->entry_code() == NULL) { - // need entry code to clear FPU stack - LIR_List* entry_code = new LIR_List(_compilation); - entry_code->jump(xhandler->entry_block()); - xhandler->set_entry_code(entry_code); - } - - LIR_OpList* insts = xhandler->entry_code()->instructions_list(); - set_lir(xhandler->entry_code()); - set_pos(0); - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - switch (op->code()) { - case lir_move: - assert(op->as_Op1() != NULL, "must be LIR_Op1"); - assert(pos() != insts->length() - 1, "must not be last operation"); - - handle_op1((LIR_Op1*)op); - break; - - case lir_branch: - assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch"); - assert(pos() == insts->length() - 1, "must be last operation"); - - // remove all remaining dead registers from FPU stack - clear_fpu_stack(LIR_OprFact::illegalOpr); - break; - - default: - // other operations not allowed in exception entry code - ShouldNotReachHere(); - } - - set_pos(pos() + 1); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- end of exception handler -------"); - } -#endif - - set_lir(old_lir); - set_pos(old_pos); - sim()->read_state(old_state); - } -} - - -int FpuStackAllocator::fpu_num(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo(); -} - -int FpuStackAllocator::tos_offset(LIR_Opr opr) { - return sim()->offset_from_tos(fpu_num(opr)); -} - - -LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - - int stack_offset = tos_offset(opr); - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - -LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top"); - - int stack_offset = 0; - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - - - -void FpuStackAllocator::insert_op(LIR_Op* op) { - lir()->insert_before(pos(), op); - set_pos(1 + pos()); -} - - -void FpuStackAllocator::insert_exchange(int offset) { - if (offset > 0) { - LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fxch_op); - sim()->swap(offset); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", sim()->get_slot(0)); sim()->print(); tty->cr(); - } -#endif - - } -} - -void FpuStackAllocator::insert_exchange(LIR_Opr opr) { - insert_exchange(tos_offset(opr)); -} - - -void FpuStackAllocator::insert_free(int offset) { - // move stack slot to the top of stack and then pop it - insert_exchange(offset); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - insert_op(fpop); - sim()->pop(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted pop New state: "); sim()->print(); tty->cr(); - } -#endif -} - - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) { - if (sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) { - if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) { - int offset = tos_offset(from); - LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fld); - - sim()->push(fpu_num(to)); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted copy (%d -> %d) New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr(); - } -#endif -} - -void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) { - sim()->rename(fpu_num(from), fpu_num(to)); -} - -void FpuStackAllocator::do_push(LIR_Opr opr) { - sim()->push(fpu_num(opr)); -} - -void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - if (opr->is_last_use()) { - op->set_fpu_pop_count(1); - sim()->pop(); - } -} - -void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - op->set_fpu_pop_count(1); - sim()->pop(); -} - -void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) { - int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0); - while (sim()->stack_size() > result_stack_size) { - assert(!sim()->slot_is_empty(0), "not allowed"); - - if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) { - insert_free(0); - } else { - // move "preserve" to bottom of stack so that all other stack slots can be popped - insert_exchange(sim()->stack_size() - 1); - } - } + // No FPU stack on AArch64 } - - -void FpuStackAllocator::handle_op1(LIR_Op1* op1) { - LIR_Opr in = op1->in_opr(); - LIR_Opr res = op1->result_opr(); - - LIR_Opr new_in = in; // new operands relative to the actual fpu stack top - LIR_Opr new_res = res; - - // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments, - // so checks for is_float_kind() are necessary inside the cases - switch (op1->code()) { - - case lir_return: { - // FPU-Stack must only contain the (optional) fpu return value. - // All remaining dead values are popped from the stack - // If the input operand is a fpu-register, it is exchanged to the bottom of the stack - - clear_fpu_stack(in); - if (in->is_fpu_register() && !in->is_xmm_register()) { - new_in = to_fpu_stack_top(in); - } - - break; - } - - case lir_move: { - if (in->is_fpu_register() && !in->is_xmm_register()) { - if (res->is_xmm_register()) { - // move from fpu register to xmm register (necessary for operations that - // are not available in the SSE instruction set) - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from fpu-register to fpu-register: - // * input and result register equal: - // nothing to do - // * input register is last use: - // rename the input register to result register -> input register - // not present on fpu-stack afterwards - // * input register not last use: - // duplicate input register to result register to preserve input - // - // Note: The LIR-Assembler does not produce any code for fpu register moves, - // so input and result stack index must be equal - - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - } - new_in = to_fpu_stack(res); - new_res = new_in; - - } else { - // move from fpu-register to memory - // input operand must be on top of stack - - insert_exchange(in); - - // create debug information here because afterwards the register may have been popped - compute_debug_information(op1); - - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - } - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from memory/constant to fpu register - // result is pushed on the stack - - insert_free_if_dead(res); - - // create debug information before register is pushed - compute_debug_information(op1); - - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - } - - case lir_neg: { - if (in->is_fpu_register() && !in->is_xmm_register()) { - assert(res->is_fpu_register() && !res->is_xmm_register(), "must be"); - assert(in->is_last_use(), "old value gets destroyed"); - - insert_free_if_dead(res, in); - insert_exchange(in); - new_in = to_fpu_stack_top(in); - - do_rename(in, res); - new_res = to_fpu_stack_top(res); - } - break; - } - - case lir_convert: { - Bytecodes::Code bc = op1->as_OpConvert()->bytecode(); - switch (bc) { - case Bytecodes::_d2f: - case Bytecodes::_f2d: - assert(res->is_fpu_register(), "must be"); - assert(in->is_fpu_register(), "must be"); - - if (!in->is_xmm_register() && !res->is_xmm_register()) { - // this is quite the same as a move from fpu-register to fpu-register - // Note: input and result operands must have different types - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - new_in = to_fpu_stack(in); - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - new_in = to_fpu_stack(in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - new_in = to_fpu_stack_top(in, true); - } - new_res = to_fpu_stack(res); - } - - break; - - case Bytecodes::_i2f: - case Bytecodes::_l2f: - case Bytecodes::_i2d: - case Bytecodes::_l2d: - assert(res->is_fpu_register(), "must be"); - if (!res->is_xmm_register()) { - insert_free_if_dead(res); - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - - case Bytecodes::_f2i: - case Bytecodes::_d2i: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - - // TODO: update registes of stub - } - break; - - case Bytecodes::_f2l: - case Bytecodes::_d2l: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - } - break; - - case Bytecodes::_i2l: - case Bytecodes::_l2i: - case Bytecodes::_i2b: - case Bytecodes::_i2c: - case Bytecodes::_i2s: - // no fpu operands - break; - - default: - ShouldNotReachHere(); - } - break; - } - - case lir_roundfp: { - assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register"); - assert(res->is_stack(), "result must be on stack"); - - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - break; - } - - default: { - assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation"); - } - } - - op1->set_in_opr(new_in); - op1->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_op2(LIR_Op2* op2) { - LIR_Opr left = op2->in_opr1(); - if (!left->is_float_kind()) { - return; - } - if (left->is_xmm_register()) { - return; - } - - LIR_Opr right = op2->in_opr2(); - LIR_Opr res = op2->result_opr(); - LIR_Opr new_left = left; // new operands relative to the actual fpu stack top - LIR_Opr new_right = right; - LIR_Opr new_res = res; - - assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers"); - - switch (op2->code()) { - case lir_cmp: - case lir_cmp_fd2i: - case lir_ucmp_fd2i: - case lir_assert: { - assert(left->is_fpu_register(), "invalid LIR"); - assert(right->is_fpu_register(), "invalid LIR"); - - // the left-hand side must be on top of stack. - // the right-hand side is never popped, even if is_last_use is set - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - pop_if_last_use(op2, left); - break; - } - - case lir_mul_strictfp: - case lir_div_strictfp: { - assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot"); - insert_free_if_dead(op2->tmp1_opr()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - // fall-through: continue with the normal handling of lir_mul and lir_div - } - case lir_add: - case lir_sub: - case lir_mul: - case lir_div: { - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // either the left-hand or the right-hand side must be on top of stack - // (if right is not a register, left must be on top) - if (!right->is_fpu_register()) { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - } else { - // no exchange necessary if right is alredy on top of stack - if (tos_offset(right) == 0) { - new_left = to_fpu_stack(left); - new_right = to_fpu_stack_top(right); - } else { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - } - - if (right->is_last_use()) { - op2->set_fpu_pop_count(1); - - if (tos_offset(right) == 0) { - sim()->pop(); - } else { - // if left is on top of stack, the result is placed in the stack - // slot of right, so a renaming from right to res is necessary - assert(tos_offset(left) == 0, "must be"); - sim()->pop(); - do_rename(right, res); - } - } - } - new_res = to_fpu_stack(res); - - break; - } - - case lir_rem: { - assert(left->is_fpu_register(), "must be"); - assert(right->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // Must bring both operands to top of stack with following operand ordering: - // * fpu stack before rem: ... right left - // * fpu stack after rem: ... left - if (tos_offset(right) != 1) { - insert_exchange(right); - insert_exchange(1); - } - insert_exchange(left); - assert(tos_offset(right) == 1, "check"); - assert(tos_offset(left) == 0, "check"); - - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - - op2->set_fpu_pop_count(1); - sim()->pop(); - do_rename(right, res); - - new_res = to_fpu_stack_top(res); - break; - } - - case lir_abs: - case lir_sqrt: { - // Right argument appears to be unused - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_last_use(), "old value gets destroyed"); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - break; - } - - case lir_log: - case lir_log10: { - // log and log10 need one temporary fpu stack slot, so - // there is one temporary registers stored in temp of the - // operation. the stack allocator must guarantee that the stack - // slots are really free, otherwise there might be a stack - // overflow. - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(op2->tmp1_opr()->is_fpu_register(), "must be"); - - insert_free_if_dead(op2->tmp1_opr()); - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - break; - } - - - case lir_tan: - case lir_sin: - case lir_cos: - case lir_exp: { - // sin, cos and exp need two temporary fpu stack slots, so there are two temporary - // registers (stored in right and temp of the operation). - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - // assert(left->is_last_use(), "old value gets destroyed"); - assert(right->is_fpu_register(), "right is used as the first temporary register"); - assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); - - insert_free_if_dead(right); - insert_free_if_dead(op2->tmp1_opr()); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); - break; - } - - case lir_pow: { - // pow needs two temporary fpu stack slots, so there are two temporary - // registers (stored in tmp1 and tmp2 of the operation). - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. - assert(left->is_fpu_register(), "must be"); - assert(right->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - - assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register"); - assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers"); - - insert_free_if_dead(op2->tmp1_opr()); - insert_free_if_dead(op2->tmp2_opr()); - - // Must bring both operands to top of stack with following operand ordering: - // * fpu stack before pow: ... right left - // * fpu stack after pow: ... left - - insert_free_if_dead(res, right); - - if (tos_offset(right) != 1) { - insert_exchange(right); - insert_exchange(1); - } - insert_exchange(left); - assert(tos_offset(right) == 1, "check"); - assert(tos_offset(left) == 0, "check"); - - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); - - sim()->pop(); - - do_rename(right, res); - - new_res = to_fpu_stack_top(res); - break; - } - - default: { - assert(false, "missed a fpu-operation"); - } - } - - op2->set_in_opr1(new_left); - op2->set_in_opr2(new_right); - op2->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) { - LIR_Opr res = opCall->result_opr(); - - // clear fpu-stack before call - // it may contain dead values that could not have been remved by previous operations - clear_fpu_stack(LIR_OprFact::illegalOpr); - assert(sim()->is_empty(), "fpu stack must be empty now"); - - // compute debug information before (possible) fpu result is pushed - compute_debug_information(opCall); - - if (res->is_fpu_register() && !res->is_xmm_register()) { - do_push(res); - opCall->set_result_opr(to_fpu_stack_top(res)); - } -} - -#ifndef PRODUCT -void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) { - switch (op->code()) { - case lir_24bit_FPU: - case lir_reset_FPU: - case lir_ffree: - assert(false, "operations not allowed in lir. If one of these operations is needed, check if they have fpu operands"); - break; - - case lir_fpop_raw: - case lir_fxch: - case lir_fld: - assert(false, "operations only inserted by FpuStackAllocator"); - break; - } -} -#endif - - -void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) { - LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset()); - - instrs->instructions_list()->push(move); - - cur_sim->push(reg); - move->set_result_opr(to_fpu_stack(move->result_opr())); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Added new register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) { - assert(slot > 0, "no exchange necessary"); - - LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot)); - instrs->instructions_list()->push(fxch); - cur_sim->swap(slot); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) { - int reg = cur_sim->get_slot(0); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - instrs->instructions_list()->push(fpop); - cur_sim->pop(reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Removed register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) { - int reg = cur_sim->get_slot(change_slot); - - for (int slot = start_slot; slot >= 0; slot--) { - int new_reg = sux_sim->get_slot(slot); - - if (!cur_sim->contains(new_reg)) { - cur_sim->set_slot(change_slot, new_reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Renamed register %d to %d New state: ", reg, new_reg); cur_sim->print(); tty->cr(); - } - #endif - - return true; - } - } - return false; -} - - -void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - } - - int slot; - for (slot = 0; slot < cur_sim->stack_size(); slot++) { - assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm"); - } - for (slot = 0; slot < sux_sim->stack_size(); slot++) { - assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm"); - } -#endif - - // size difference between cur and sux that must be resolved by adding or removing values form the stack - int size_diff = cur_sim->stack_size() - sux_sim->stack_size(); - - if (!ComputeExactFPURegisterUsage) { - // add slots that are currently free, but used in successor - // When the exact FPU register usage is computed, the stack does - // not contain dead values at merging -> no values must be added - - int sux_slot = sux_sim->stack_size() - 1; - while (size_diff < 0) { - assert(sux_slot >= 0, "slot out of bounds -> error in algorithm"); - - int reg = sux_sim->get_slot(sux_slot); - if (!cur_sim->contains(reg)) { - merge_insert_add(instrs, cur_sim, reg); - size_diff++; - - if (sux_slot + size_diff != 0) { - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - } - } - sux_slot--; - } - } - - assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now"); - assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be"); - - // stack merge algorithm: - // 1) as long as the current stack top is not in the right location (that meens - // it should not be on the stack top), exchange it into the right location - // 2) if the stack top is right, but the remaining stack is not ordered correctly, - // the stack top is exchanged away to get another value on top -> - // now step 1) can be continued - // the stack can also contain unused items -> these items are removed from stack - - int finished_slot = sux_sim->stack_size() - 1; - while (finished_slot >= 0 || size_diff > 0) { - while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) { - int reg = cur_sim->get_slot(0); - if (sux_sim->contains(reg)) { - int sux_slot = sux_sim->offset_from_tos(reg); - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - - } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) { - assert(size_diff > 0, "must be"); - - merge_insert_pop(instrs, cur_sim); - size_diff--; - } - assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed"); - } - - while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) { - finished_slot--; - } - - if (finished_slot >= 0) { - int reg = cur_sim->get_slot(finished_slot); - - if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) { - assert(sux_sim->contains(reg) || size_diff > 0, "must be"); - merge_insert_xchg(instrs, cur_sim, finished_slot); - } - assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed"); - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - tty->cr(); - } -#endif - assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now"); -} - - -void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - } -#endif - - int slot = 0; - while (slot < cur_sim->stack_size()) { - int reg = cur_sim->get_slot(slot); - if (!live_fpu_regs.at(reg)) { - if (slot != 0) { - merge_insert_xchg(instrs, cur_sim, slot); - } - merge_insert_pop(instrs, cur_sim); - } else { - slot++; - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - tty->cr(); - } - - // check if fpu stack only contains live registers - for (unsigned int i = 0; i < live_fpu_regs.size(); i++) { - if (live_fpu_regs.at(i) != cur_sim->contains(i)) { - tty->print_cr("mismatch between required and actual stack content"); - break; - } - } -#endif -} - - -bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:", - block->block_id(), pos()); - sim()->print(); - tty->cr(); - } -#endif - - bool changed = false; - int number_of_sux = block->number_of_sux(); - - if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) { - // The successor has at least two incoming edges, so a stack merge will be necessary - // If this block is the first predecessor, cleanup the current stack and propagate it - // If this block is not the first predecessor, a stack merge will be necessary - - BlockBegin* sux = block->sux_at(0); - intArray* state = sux->fpu_stack_state(); - LIR_List* instrs = new LIR_List(_compilation); - - if (state != NULL) { - // Merge with a successors that already has a FPU stack state - // the block must only have one successor because critical edges must been split - FpuStackSim* cur_sim = sim(); - FpuStackSim* sux_sim = temp_sim(); - sux_sim->read_state(state); - - merge_fpu_stack(instrs, cur_sim, sux_sim); - - } else { - // propagate current FPU stack state to successor without state - // clean up stack first so that there are no dead values on the stack - if (ComputeExactFPURegisterUsage) { - FpuStackSim* cur_sim = sim(); - BitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage(); - assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage"); - - merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs); - } - - intArray* state = sim()->write_state(); - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id()); - sim()->print(); tty->cr(); - } - sux->set_fpu_stack_state(state); - } - - if (instrs->instructions_list()->length() > 0) { - lir()->insert_before(pos(), instrs); - set_pos(instrs->instructions_list()->length() + pos()); - changed = true; - } - - } else { - // Propagate unmodified Stack to successors where a stack merge is not necessary - intArray* state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - -#ifdef ASSERT - for (int j = 0; j < sux->number_of_preds(); j++) { - assert(block == sux->pred_at(j), "all critical edges must be broken"); - } - - // check if new state is same - if (sux->fpu_stack_state() != NULL) { - intArray* sux_state = sux->fpu_stack_state(); - assert(state->length() == sux_state->length(), "overwriting existing stack state"); - for (int j = 0; j < state->length(); j++) { - assert(state->at(j) == sux_state->at(j), "overwriting existing stack state"); - } - } -#endif -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d", sux->block_id()); - sim()->print(); tty->cr(); - } -#endif - - sux->set_fpu_stack_state(state); - } - } - -#ifndef PRODUCT - // assertions that FPU stack state conforms to all successors' states - intArray* cur_state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - intArray* sux_state = sux->fpu_stack_state(); - - assert(sux_state != NULL, "no fpu state"); - assert(cur_state->length() == sux_state->length(), "incorrect length"); - for (int i = 0; i < cur_state->length(); i++) { - assert(cur_state->at(i) == sux_state->at(i), "element not equal"); - } - } -#endif - - return changed; -}
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -50,21 +50,14 @@ if (unordered_result < 0) { // we want -1 for unordered or less than, 0 for equal and 1 for // greater than. - mov(result, (u_int64_t)-1L); - // for FP LT tests less than or unordered - br(Assembler::LT, done); - // install 0 for EQ otherwise 1 - csinc(result, zr, zr, Assembler::EQ); + cset(result, NE); // Not equal or unordered + cneg(result, result, LT); // Less than or unordered } else { // we want -1 for less than, 0 for equal and 1 for unordered or // greater than. - mov(result, 1L); - // for FP HI tests greater than or unordered - br(Assembler::HI, done); - // install 0 for EQ otherwise ~0 - csinv(result, zr, zr, Assembler::EQ); + cset(result, NE); // Not equal or unordered + cneg(result, result, LO); // Less than } - bind(done); } int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { @@ -409,15 +402,16 @@ } -void C1_MacroAssembler::build_frame(int framesize) { +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a B, BL, NOP, BKPT, // SVC, HVC, or SMC. Make it a NOP. nop(); + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). - generate_stack_overflow_check(framesize); + generate_stack_overflow_check(bang_size_in_bytes); MacroAssembler::build_frame(framesize + 2 * wordSize); if (NotifySimulator) { notify(Assembler::method_entry);
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -27,6 +27,8 @@ #ifndef CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP #define CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP +using MacroAssembler::build_frame; + // C1_MacroAssembler contains high-level macros for C1 private:
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -80,6 +80,7 @@ pop(r0, sp); #endif reset_last_Java_frame(true, true); + maybe_isb(); // check for pending exceptions { Label L; @@ -376,7 +377,7 @@ case handle_exception_nofpu_id: case handle_exception_id: // At this point all registers MAY be live. - oop_map = save_live_registers(sasm, id == handle_exception_nofpu_id); + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); break; case handle_exception_from_callee_id: { // At this point all registers except exception oop (r0) and @@ -440,7 +441,7 @@ case handle_exception_nofpu_id: case handle_exception_id: // Restore the registers that were saved at the beginning. - restore_live_registers(sasm, id == handle_exception_nofpu_id); + restore_live_registers(sasm, id != handle_exception_nofpu_id); break; case handle_exception_from_callee_id: // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP @@ -569,6 +570,7 @@ } #endif __ reset_last_Java_frame(true, false); + __ maybe_isb(); // check for pending exceptions { Label L;
--- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -47,34 +47,6 @@ return is_icholder_entry(call->destination()); } -//----------------------------------------------------------------------------- -// High-level access to an inline cache. Guaranteed to be MT-safe. - -CompiledIC::CompiledIC(nmethod* nm, NativeCall* call) - : _ic_call(call) -{ - address ic_call = call->instruction_address(); - - assert(ic_call != NULL, "ic_call address must be set"); - assert(nm != NULL, "must pass nmethod"); - assert(nm->contains(ic_call), "must be in nmethod"); - - // Search for the ic_call at the given address. - RelocIterator iter(nm, ic_call, ic_call+1); - bool ret = iter.next(); - assert(ret == true, "relocInfo must exist at this address"); - assert(iter.addr() == ic_call, "must find ic_call"); - if (iter.type() == relocInfo::virtual_call_type) { - virtual_call_Relocation* r = iter.virtual_call_reloc(); - _is_optimized = false; - _value = nativeMovConstReg_at(r->cached_value()); - } else { - assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call"); - _is_optimized = true; - _value = NULL; - } -} - // ---------------------------------------------------------------------------- #define __ _masm. @@ -106,15 +78,13 @@ #undef __ int CompiledStaticCall::to_interp_stub_size() { - // count a mov mem --> to 4 movz/k and a branch - return 6 * NativeInstruction::instruction_size; + // count a mov mem --> to 3 movz/k and a branch + return 4 * NativeInstruction::instruction_size; } // Relocation entries for call stub, compiled java to interpreter. int CompiledStaticCall::reloc_to_interp_stub() { - // TODO fixme - // return a large number - return 5; + return 4; // 3 in emit_to_interp_stub + 1 in emit_call } void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { @@ -130,18 +100,18 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); - NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#ifndef PRODUCT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), "a) MT-unsafe modification of inline cache"); - assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + assert(method_holder->data() == 0 || jump->jump_destination() == entry, "b) MT-unsafe modification of inline cache"); - +#endif // Update stub. method_holder->set_data((intptr_t)callee()); - method_holder->flush(); - jump->set_jump_destination(entry); - + NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); + ICache::invalidate_range(stub, to_interp_stub_size()); // Update jump to call. set_destination_mt_safe(stub); } @@ -153,9 +123,7 @@ assert(stub != NULL, "stub not found"); // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); - NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); method_holder->set_data(0); - jump->set_jump_destination((address)-1); } //-----------------------------------------------------------------------------
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/frame_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -124,7 +124,9 @@ } intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; address sender_pc = NULL; + intptr_t* saved_fp = NULL; if (is_interpreted_frame()) { // fp must be safe @@ -133,7 +135,12 @@ } sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; } else { // must be some sort of compiled/runtime frame @@ -145,7 +152,10 @@ } sender_sp = _unextended_sp + _cb->frame_size(); + sender_unextended_sp = sender_sp; sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); } @@ -156,7 +166,6 @@ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp // is really a frame pointer. - intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset); bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); if (!saved_fp_safe) { @@ -165,7 +174,7 @@ // construct the potential sender - frame sender(sender_sp, saved_fp, sender_pc); + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); return sender.is_interpreted_frame_valid(thread); @@ -194,7 +203,6 @@ // Could be the call_stub if (StubRoutines::returns_to_call_stub(sender_pc)) { - intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset); bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); if (!saved_fp_safe) { @@ -203,7 +211,7 @@ // construct the potential sender - frame sender(sender_sp, saved_fp, sender_pc); + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); // Validate the JavaCallWrapper an entry frame must have address jcw = (address)sender.entry_frame_call_wrapper(); @@ -573,8 +581,11 @@ if (!m->is_valid_method()) return false; // stack frames shouldn't be much larger than max_stack elements - - if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { return false; } @@ -807,7 +818,7 @@ unsigned long bcx, unsigned long thread) { RegisterMap map((JavaThread*)thread, false); if (!reg_map) { - reg_map = (RegisterMap*)new char[sizeof map]; + reg_map = (RegisterMap*)os::malloc(sizeof map, mtNone); } memcpy(reg_map, &map, sizeof map); { @@ -827,3 +838,10 @@ Method* m = (Method*)p[frame::interpreter_frame_method_offset]; printbc(m, bcx); } + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} +#endif
--- a/src/cpu/aarch64/vm/frame_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/frame_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -188,6 +188,8 @@ frame(intptr_t* sp, intptr_t* fp); + void init(intptr_t* sp, intptr_t* fp, address pc); + // accessors for the instance variables // Note: not necessarily the real 'frame pointer' (see real_fp) intptr_t* fp() const { return _fp; }
--- a/src/cpu/aarch64/vm/frame_aarch64.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/frame_aarch64.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -42,17 +42,11 @@ _deopt_state = unknown; } -static int spin; +//static int spin; -inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { intptr_t a = intptr_t(sp); intptr_t b = intptr_t(fp); -#ifndef PRODUCT - if (fp) - if (sp > fp || (fp - sp > 0x100000)) - for(;;) - asm("nop"); -#endif _sp = sp; _unextended_sp = sp; _fp = fp; @@ -70,15 +64,13 @@ } } +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { intptr_t a = intptr_t(sp); intptr_t b = intptr_t(fp); -#ifndef PRODUCT - if (fp) - if (sp > fp || (fp - sp > 0x100000)) - for(;;) - asm("nop"); -#endif _sp = sp; _unextended_sp = unextended_sp; _fp = fp; @@ -100,12 +92,6 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) { intptr_t a = intptr_t(sp); intptr_t b = intptr_t(fp); -#ifndef PRODUCT - if (fp) - if (sp > fp || (fp - sp > 0x100000)) - for(;;) - asm("nop"); -#endif _sp = sp; _unextended_sp = sp; _fp = fp;
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -65,10 +65,12 @@ define_pd_global(intx, PreInflateSpin, 10); define_pd_global(bool, RewriteBytecodes, true); -define_pd_global(bool, RewriteFrequentPairs, false); +define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, true); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread @@ -95,11 +97,13 @@ "constant pool is close to instructions") \ \ notproduct(bool, UseAcqRelForVolatileFields, false, \ - "Use acquire and release insns for volatile fields") + "Use acquire and release insns for volatile fields") \ + \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ // Don't attempt to use Neon on builtin sim until builtin sim supports it #define UseNeon false -#define UseCRC32 false #else #define UseBuiltinSim false
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -612,6 +612,7 @@ Label done; const Register swap_reg = r0; + const Register tmp = c_rarg2; const Register obj_reg = c_rarg3; // Will contain the oop const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); @@ -625,7 +626,7 @@ ldr(obj_reg, Address(lock_reg, obj_offset)); if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case); + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); } // Load (object->mark() | 1) into swap_reg @@ -644,7 +645,7 @@ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); bind(fast); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); b(done); bind(fail); } else { @@ -672,7 +673,7 @@ if (PrintBiasedLockingStatistics) { br(Assembler::NE, slow_case); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); } br(Assembler::EQ, done); @@ -852,9 +853,10 @@ // jcc(Assembler::negative, L); // addptr(data, (int32_t) DataLayout::counter_increment); // so we do this + ldr(rscratch1, addr); subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment); Label L; - br(Assembler::CS, L); // skip store if counter overflow + br(Assembler::LO, L); // skip store if counter overflow str(rscratch1, addr); bind(L); } else { @@ -1314,7 +1316,7 @@ // case_array_offset_in_bytes() movw(reg2, in_bytes(MultiBranchData::per_case_size())); movw(rscratch1, in_bytes(MultiBranchData::case_array_offset())); - maddw(index, index, reg2, rscratch1); + Assembler::maddw(index, index, reg2, rscratch1); // Update the case count increment_mdp_data_at(mdp,
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -40,7 +40,11 @@ protected: protected: + using MacroAssembler::call_VM_leaf_base; + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + virtual void call_VM_leaf_base(address entry_point, int number_of_arguments);
--- a/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -42,25 +42,28 @@ void clear(void) { // clearing _last_Java_sp must be first _last_Java_sp = NULL; - // fence? + OrderAccess::release(); _last_Java_fp = NULL; _last_Java_pc = NULL; } void copy(JavaFrameAnchor* src) { - // In order to make sure the transition state is valid for "this" - // We must clear _last_Java_sp before copying the rest of the new data - // - // Hack Alert: Temporary bugfix for 4717480/4721647 - // To act like previous version (pd_cache_state) don't NULL _last_Java_sp - // unless the value is changing - // - if (_last_Java_sp != src->_last_Java_sp) - _last_Java_sp = NULL; - + // n.b. the writes to fp and pc do not require any preceding + // release(). when copying into the thread anchor, which only + // happens under ~JavaCallWrapper(), sp will have been NULLed by a + // call to zap() and the NULL write will have been published by a + // fence in the state transition to in_vm. contrariwise, when + // copying into the wrapper anchor, which only happens under + // JavaCallWrapper(), there is no ordering requirement at all + // since that object is thread local until the subsequent entry + // into java. JavaCallWrapper() call clear() after copy() thus + // ensuring that all 3 writes are visible() before the wrapper is + // accessible to other threads. _last_Java_fp = src->_last_Java_fp; _last_Java_pc = src->_last_Java_pc; - // Must be last so profiler will always see valid frame if has_last_frame() is true + // Must be last so profiler will always see valid frame if + // has_last_frame() is true + OrderAccess::release(); _last_Java_sp = src->_last_Java_sp; } @@ -79,10 +82,14 @@ public: - void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + // n.b. set_last_Java_sp and set_last_Java_fp are never called + // (which is good because they would need a preceding or following + // call to OrderAccess::release() to make sure the writes are + // visible in the correct order). +void set_last_Java_sp(intptr_t* sp) { assert(false, "should not be called"); _last_Java_sp = sp; } intptr_t* last_Java_fp(void) { return _last_Java_fp; } // Assert (last_Java_sp == NULL || fp == NULL) - void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + void set_last_Java_fp(intptr_t* fp) { assert(false, "should not be called"); _last_Java_fp = fp; } #endif // CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP
--- a/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -127,10 +127,15 @@ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } - // tail call - __ lea(rscratch1, ExternalAddress(slow_case_addr)); - __ br(rscratch1); + { + __ enter(); + __ lea(rscratch1, ExternalAddress(slow_case_addr)); + __ blr(rscratch1); + __ maybe_isb(); + __ leave(); + __ ret(lr); + } __ flush (); return fast_entry;
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -1,3 +1,4 @@ +/* /* * Copyright (c) 2013, Red Hat Inc. * Copyright (c) 1997, 2012, Oracle and/or its affiliates. @@ -64,7 +65,10 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") -void MacroAssembler::pd_patch_instruction(address branch, address target) { +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + int instructions = 1; assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant"); long offset = (target - branch) >> 2; unsigned insn = *(unsigned*)branch; @@ -118,12 +122,14 @@ Instruction_aarch64::patch(branch + sizeof (unsigned), 21, 10, offset_lo >> size); guarantee(((dest >> size) << size) == dest, "misaligned target"); + instructions = 2; } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && Instruction_aarch64::extract(insn, 4, 0) == Instruction_aarch64::extract(insn2, 4, 0)) { // add (immediate) Instruction_aarch64::patch(branch + sizeof (unsigned), 21, 10, offset_lo); + instructions = 2; } else { assert((jbyte *)target == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || @@ -146,6 +152,7 @@ Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); assert(pd_call_destination(branch) == target, "should be"); + instructions = 3; } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { // nothing to do @@ -153,19 +160,33 @@ } else { ShouldNotReachHere(); } + return instructions * NativeInstruction::instruction_size; } -void MacroAssembler::patch_oop(address insn_addr, address o) { +int MacroAssembler::patch_oop(address insn_addr, address o) { + int instructions; unsigned insn = *(unsigned*)insn_addr; + assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); + + // OOPs are either narrow (32 bits) or wide (48 bits). We encode + // narrow OOPs by setting the upper 16 bits in the first + // instruction. if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { - // Move narrow constant - assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); - narrowOop n = oopDesc::encode_heap_oop((oop)o); - Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); - Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + // Move narrow OOP + narrowOop n = oopDesc::encode_heap_oop((oop)o); + Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); + Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + instructions = 2; } else { - pd_patch_instruction(insn_addr, o); + // Move wide OOP + assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch"); + uintptr_t dest = (uintptr_t)o; + Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff); + Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff); + Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff); + instructions = 3; } + return instructions * NativeInstruction::instruction_size; } address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { @@ -347,11 +368,7 @@ if (PrintBiasedLockingStatistics && counters == NULL) counters = BiasedLocking::counters(); - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - tmp_reg = rscratch2; - } - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1); + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg); assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); @@ -381,7 +398,7 @@ if (counters != NULL) { Label around; cbnz(tmp_reg, around); - atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2); b(done); bind(around); } else { @@ -434,7 +451,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -460,7 +477,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -488,7 +505,7 @@ // removing the bias bit from the object's header. if (counters != NULL) { atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, - rscratch1); + rscratch1, rscratch2); } bind(nope); } @@ -735,15 +752,29 @@ while (offset() % modulus != 0) nop(); } -// these are meant to be no-ops overridden by InterpreterMacroAssembler - -void MacroAssembler::check_and_handle_earlyret(Register java_thread) { Unimplemented(); } - -void MacroAssembler::check_and_handle_popframe(Register java_thread) { Unimplemented(); } +// these are no-ops overridden by InterpreterMacroAssembler + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { } + RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, - int offset) { Unimplemented(); return RegisterOrConstant(r0); } + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + ldr(tmp, ExternalAddress((address) delayed_value_addr)); + + if (offset != 0) + add(tmp, tmp, offset); + + return RegisterOrConstant(tmp); +} + void MacroAssembler:: notify(int type) { if (type == bytecode_start) { @@ -1046,7 +1077,7 @@ // We will consult the secondary-super array. ldr(r5, secondary_supers_addr); // Load the array length. (Positive movl does right thing on LP64.) - ldr(r2, Address(r5, Array<Klass*>::length_offset_in_bytes())); + ldrw(r2, Address(r5, Array<Klass*>::length_offset_in_bytes())); // Skip to start of data. add(r5, r5, Array<Klass*>::base_offset_in_bytes()); @@ -1181,6 +1212,7 @@ bind(*retaddr); ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize))); + maybe_isb(); } void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { @@ -1285,6 +1317,52 @@ movk(r, imm64 & 0xffff, 32); } +// Macro to mov replicated immediate to vector register. +// Vd will get the following values for different arrangements in T +// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh +// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh +// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh +// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh +// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh +// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh +// T1D/T2D: invalid +void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { + assert(T != T1D && T != T2D, "invalid arrangement"); + if (T == T8B || T == T16B) { + assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)"); + movi(Vd, T, imm32 & 0xff, 0); + return; + } + u_int32_t nimm32 = ~imm32; + if (T == T4H || T == T8H) { + assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)"); + imm32 &= 0xffff; + nimm32 &= 0xffff; + } + u_int32_t x = imm32; + int movi_cnt = 0; + int movn_cnt = 0; + while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } + x = nimm32; + while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } + if (movn_cnt < movi_cnt) imm32 = nimm32; + unsigned lsl = 0; + while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + mvni(Vd, T, imm32 & 0xff, lsl); + else + movi(Vd, T, imm32 & 0xff, lsl); + imm32 >>= 8; lsl += 8; + while (imm32) { + while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + bici(Vd, T, imm32 & 0xff, lsl); + else + orri(Vd, T, imm32 & 0xff, lsl); + lsl += 8; imm32 >>= 8; + } +} + void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) { #ifndef PRODUCT @@ -1466,15 +1544,15 @@ return Address(Rd); } -void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { Label retry_load; bind(retry_load); // flush and load exclusive from the memory location ldxrw(tmp, counter_addr); addw(tmp, tmp, 1); // if we store+flush with no intervening write tmp wil be zero - stxrw(tmp, tmp, counter_addr); - cbnzw(tmp, retry_load); + stxrw(tmp2, tmp, counter_addr); + cbnzw(tmp2, retry_load); } @@ -1502,7 +1580,7 @@ sdivw(result, ra, rb); } else { sdivw(scratch, ra, rb); - msubw(result, scratch, rb, ra); + Assembler::msubw(result, scratch, rb, ra); } return idivl_offset; @@ -1532,7 +1610,7 @@ sdiv(result, ra, rb); } else { sdiv(scratch, ra, rb); - msub(result, scratch, rb, ra); + Assembler::msub(result, scratch, rb, ra); } return idivq_offset; @@ -1671,7 +1749,7 @@ } } -void MacroAssembler::increment(Address dst, int value) +void MacroAssembler::incrementw(Address dst, int value) { assert(!dst.uses(rscratch1), "invalid dst for address increment"); ldrw(rscratch1, dst); @@ -1679,7 +1757,7 @@ strw(rscratch1, dst); } -void MacroAssembler::incrementw(Address dst, int value) +void MacroAssembler::increment(Address dst, int value) { assert(!dst.uses(rscratch1), "invalid dst for address increment"); ldr(rscratch1, dst); @@ -1839,6 +1917,22 @@ } } +void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + sub(Rd, Rn, decrement.as_register()); + } else { + sub(Rd, Rn, decrement.as_constant()); + } +} + +void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + subw(Rd, Rn, decrement.as_register()); + } else { + subw(Rd, Rn, decrement.as_constant()); + } +} + void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { @@ -1928,7 +2022,7 @@ return a != b.as_register() && a != c && b.as_register() != c; } -#define ATOMIC_OP(LDXR, OP, STXR) \ +#define ATOMIC_OP(LDXR, OP, IOP, STXR) \ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \ Register result = rscratch2; \ if (prev->is_valid()) \ @@ -1938,14 +2032,15 @@ bind(retry_load); \ LDXR(result, addr); \ OP(rscratch1, result, incr); \ - STXR(rscratch1, rscratch1, addr); \ - cbnzw(rscratch1, retry_load); \ - if (prev->is_valid() && prev != result) \ - mov(prev, result); \ + STXR(rscratch2, rscratch1, addr); \ + cbnzw(rscratch2, retry_load); \ + if (prev->is_valid() && prev != result) { \ + IOP(prev, rscratch1, incr); \ + } \ } -ATOMIC_OP(ldxr, add, stxr) -ATOMIC_OP(ldxrw, addw, stxrw) +ATOMIC_OP(ldxr, add, sub, stxr) +ATOMIC_OP(ldxrw, addw, subw, stxrw) #undef ATOMIC_OP @@ -2115,6 +2210,363 @@ } /** + * Helpers for multiply_to_len(). + */ +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2) { + adds(dest_lo, dest_lo, src1); + adc(dest_hi, dest_hi, zr); + adds(dest_lo, dest_lo, src2); + adc(final_dest_hi, dest_hi, zr); +} + +// Generate an address from (r + r1 extend offset). "size" is the +// size of the operand. The result may be in rscratch2. +Address MacroAssembler::offsetted_address(Register r, Register r1, + Address::extend ext, int offset, int size) { + if (offset || (ext.shift() % size != 0)) { + lea(rscratch2, Address(r, r1, ext)); + return Address(rscratch2, offset); + } else { + return Address(r, r1, ext); + } +} + +Address MacroAssembler::spill_address(int size, int offset, Register tmp) +{ + assert(offset >= 0, "spill to negative address?"); + // Offset reachable ? + // Not aligned - 9 bits signed offset + // Aligned - 12 bits unsigned offset shifted + Register base = sp; + if ((offset & (size-1)) && offset >= (1<<8)) { + add(tmp, base, offset & ((1<<12)-1)); + base = tmp; + offset &= -1<<12; + } + + if (offset >= (1<<12) * size) { + add(tmp, base, offset & (((1<<12)-1)<<12)); + base = tmp; + offset &= ~(((1<<12)-1)<<12); + } + + return Address(base, offset); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) { + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + subsw(xstart, xstart, 1); + br(Assembler::MI, L_one_x); + + lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt))); + ldr(x_xstart, Address(rscratch1)); + ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + subsw(idx, idx, 1); + br(Assembler::MI, L_first_loop_exit); + subsw(idx, idx, 1); + br(Assembler::MI, L_one_y); + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(y_idx, Address(rscratch1)); + ror(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + + // AArch64 has a multiply-accumulate instruction that we can't use + // here because it has no way to process carries, so we have to use + // separate add and adc instructions. Bah. + umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product + mul(product, x_xstart, y_idx); + adds(product, product, carry); + adc(carry, rscratch1, zr); // x_xstart * y_idx + carry -> carry:product + + subw(kdx, kdx, 2); + ror(product, product, 32); // back to big-endian + str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong)); + + b(L_first_loop); + + bind(L_one_y); + ldrw(y_idx, Address(y, 0)); + b(L_multiply); + + bind(L_one_x); + ldrw(x_xstart, Address(x, 0)); + b(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) { + + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + lsrw(jdx, idx, 2); + + bind(L_third_loop); + + subsw(jdx, jdx, 1); + br(Assembler::MI, L_third_loop_exit); + subw(idx, idx, 4); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + + ldp(yz_idx2, yz_idx1, Address(rscratch1, 0)); + + lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror(yz_idx2, yz_idx2, 32); + + ldp(rscratch2, rscratch1, Address(tmp6, 0)); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + + ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian + ror(rscratch2, rscratch2, 32); + + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + umulh(carry2, product_hi, yz_idx2); + + // propagate sum of both multiplications into carry:tmp4:tmp3 + adds(tmp3, tmp3, carry); + adc(tmp4, tmp4, zr); + adds(tmp3, tmp3, rscratch1); + adcs(tmp4, tmp4, tmp); + adc(carry, carry2, zr); + adds(tmp4, tmp4, rscratch2); + adc(carry, carry, zr); + + ror(tmp3, tmp3, 32); // convert little-endian to big-endian + ror(tmp4, tmp4, 32); + stp(tmp4, tmp3, Address(tmp6, 0)); + + b(L_third_loop); + bind (L_third_loop_exit); + + andw (idx, idx, 0x3); + cbz(idx, L_post_third_loop_done); + + Label L_check_1; + subsw(idx, idx, 2); + br(Assembler::MI, L_check_1); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx1, Address(rscratch1, 0)); + ror(yz_idx1, yz_idx1, 32); + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx2, Address(rscratch1, 0)); + ror(yz_idx2, yz_idx2, 32); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2); + + ror(tmp3, tmp3, 32); + str(tmp3, Address(rscratch1, 0)); + + bind (L_check_1); + + andw (idx, idx, 0x1); + subsw(idx, idx, 1); + br(Assembler::MI, L_post_third_loop_done); + ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt))); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + umulh(carry2, tmp4, product_hi); + ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + add2_with_carry(carry2, tmp3, tmp4, carry); + + strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt))); + extr(carry, carry2, tmp3, 32); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * r0: x + * r1: xlen + * r2: y + * r3: ylen + * r4: z + * r5: zlen + * r10: tmp1 + * r11: tmp2 + * r12: tmp3 + * r13: tmp4 + * r14: tmp5 + * r15: tmp6 + * r16: tmp7 + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) { + + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + movw(idx, ylen); // idx = ylen; + movw(kdx, zlen); // kdx = xlen+ylen; + mov(carry, zr); // carry = 0; + + Label L_done; + + movw(xstart, xlen); + subsw(xstart, xstart, 1); + br(Assembler::MI, L_done); + + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop; + cbzw(kdx, L_second_loop); + + Label L_carry; + subw(kdx, kdx, 1); + cbzw(kdx, L_carry); + + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + lsr(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + const Register jdx = tmp1; + + bind(L_second_loop); + mov(carry, zr); // carry = 0; + movw(jdx, ylen); // j = ystart+1 + + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_done); + + str(z, Address(pre(sp, -4 * wordSize))); + + Label L_last_x; + lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_last_x); + + lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt))); + ldr(product_hi, Address(rscratch1)); + ror(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + str(ylen, Address(sp, wordSize)); + stp(x, xstart, Address(sp, 2 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ldp(z, ylen, Address(post(sp, 2 * wordSize))); + ldp(x, xlen, Address(post(sp, 2 * wordSize))); // copy old xstart -> xlen + + addw(tmp3, xlen, 1); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + subsw(tmp3, tmp3, 1); + br(Assembler::MI, L_done); + + lsr(carry, carry, 32); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + b(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + ldrw(product_hi, Address(x, 0)); + b(L_third_loop_prologue); + + bind(L_done); +} + +/** * Emits code to update CRC-32 with a byte value according to constants in table * * @param [in,out]crc Register containing the crc. @@ -2344,8 +2796,8 @@ uzp2(v21, v20, v16, T2D); eor(v20, T16B, v17, v21); - shl(v16, v28, T2D, 1); - shl(v17, v20, T2D, 1); + shl(v16, T2D, v28, 1); + shl(v17, T2D, v20, 1); eor(v0, T16B, v0, v16); eor(v1, T16B, v1, v17); @@ -2482,6 +2934,11 @@ if (Universe::narrow_klass_base() == NULL) { cmp(trial_klass, tmp, LSL, Universe::narrow_klass_shift()); return; + } else if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + // Only the bottom 32 bits matter + cmpw(trial_klass, tmp); + return; } decode_klass_not_null(tmp); } else { @@ -2666,6 +3123,12 @@ return; } + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + movw(dst, src); + return; + } + #ifdef ASSERT verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); #endif @@ -2709,6 +3172,14 @@ return; } + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + if (dst != src) + movw(dst, src); + movk(dst, (uint64_t)Universe::narrow_klass_base() >> 32, 32); + return; + } + // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. @@ -3214,8 +3685,8 @@ br(Assembler::HI, slow_case); // If heap_top hasn't been changed by some other thread, update it. - stlxr(rscratch1, end, rscratch1); - cbnzw(rscratch1, retry); + stlxr(rscratch2, end, rscratch1); + cbnzw(rscratch2, retry); } } @@ -3353,6 +3824,346 @@ } } +// Search for str1 in str2 and return index or -1 +void MacroAssembler::string_indexof(Register str2, Register str1, + Register cnt2, Register cnt1, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int icnt1, Register result) { + Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH; + + Register ch1 = rscratch1; + Register ch2 = rscratch2; + Register cnt1tmp = tmp1; + Register cnt2tmp = tmp2; + Register cnt1_neg = cnt1; + Register cnt2_neg = cnt2; + Register result_tmp = tmp4; + + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + +// We have two strings, a source string in str2, cnt2 and a pattern string +// in str1, cnt1. Find the 1st occurence of pattern in source or return -1. + +// For larger pattern and source we use a simplified Boyer Moore algorithm. +// With a small pattern and source we use linear scan. + + if (icnt1 == -1) { + cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 + ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use + br(LO, LINEARSEARCH); // a byte array. + cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM + br(HS, LINEARSEARCH); + } + +// The Boyer Moore alogorithm is based on the description here:- +// +// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm +// +// This describes and algorithm with 2 shift rules. The 'Bad Character' rule +// and the 'Good Suffix' rule. +// +// These rules are essentially heuristics for how far we can shift the +// pattern along the search string. +// +// The implementation here uses the 'Bad Character' rule only because of the +// complexity of initialisation for the 'Good Suffix' rule. +// +// This is also known as the Boyer-Moore-Horspool algorithm:- +// +// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm +// +// #define ASIZE 128 +// +// int bm(unsigned char *x, int m, unsigned char *y, int n) { +// int i, j; +// unsigned c; +// unsigned char bc[ASIZE]; +// +// /* Preprocessing */ +// for (i = 0; i < ASIZE; ++i) +// bc[i] = 0; +// for (i = 0; i < m - 1; ) { +// c = x[i]; +// ++i; +// if (c < ASIZE) bc[c] = i; +// } +// +// /* Searching */ +// j = 0; +// while (j <= n - m) { +// c = y[i+j]; +// if (x[m-1] == c) +// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); +// if (i < 0) return j; +// if (c < ASIZE) +// j = j - bc[y[j+m-1]] + m; +// else +// j += 1; // Advance by 1 only if char >= ASIZE +// } +// } + + if (icnt1 == -1) { + BIND(BM); + + Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP; + Label BMADV, BMMATCH, BMCHECKEND; + + Register cnt1end = tmp2; + Register str2end = cnt2; + Register skipch = tmp2; + + // Restrict ASIZE to 128 to reduce stack space/initialisation. + // The presence of chars >= ASIZE in the target string does not affect + // performance, but we must be careful not to initialise them in the stack + // array. + // The presence of chars >= ASIZE in the source string may adversely affect + // performance since we can only advance by one when we encounter one. + + stp(zr, zr, pre(sp, -128)); + for (int i = 1; i < 8; i++) + stp(zr, zr, Address(sp, i*16)); + + mov(cnt1tmp, 0); + sub(cnt1end, cnt1, 1); + BIND(BCLOOP); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + cmp(ch1, 128); + add(cnt1tmp, cnt1tmp, 1); + br(HS, BCSKIP); + strb(cnt1tmp, Address(sp, ch1)); + BIND(BCSKIP); + cmp(cnt1tmp, cnt1end); + br(LT, BCLOOP); + + mov(result_tmp, str2); + + sub(cnt2, cnt2, cnt1); + add(str2end, str2, cnt2, LSL, 1); + BIND(BMLOOPSTR2); + sub(cnt1tmp, cnt1, 1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, skipch); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(LT, BMMATCH); + BIND(BMLOOPSTR1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, ch2); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(GE, BMLOOPSTR1); + BIND(BMMATCH); + sub(result_tmp, str2, result_tmp); + lsr(result, result_tmp, 1); + add(sp, sp, 128); + b(DONE); + BIND(BMADV); + add(str2, str2, 2); + b(BMCHECKEND); + BIND(BMSKIP); + cmp(skipch, 128); + br(HS, BMADV); + ldrb(ch2, Address(sp, skipch)); + add(str2, str2, cnt1, LSL, 1); + sub(str2, str2, ch2, LSL, 1); + BIND(BMCHECKEND); + cmp(str2, str2end); + br(LE, BMLOOPSTR2); + add(sp, sp, 128); + b(NOMATCH); + } + + BIND(LINEARSEARCH); + { + Label DO1, DO2, DO3; + + Register str2tmp = tmp2; + Register first = tmp3; + + if (icnt1 == -1) + { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD; + + cmp(cnt1, 4); + br(LT, DOSHORT); + + sub(cnt2, cnt2, cnt1); + sub(cnt1, cnt1, 4); + mov(result_tmp, cnt2); + + lea(str1, Address(str1, cnt1, Address::uxtw(1))); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt1_neg, zr, cnt1, LSL, 1); + sub(cnt2_neg, zr, cnt2, LSL, 1); + ldr(first, Address(str1, cnt1_neg)); + + BIND(FIRST_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + adds(cnt1tmp, cnt1_neg, 8); + add(cnt2tmp, cnt2_neg, 8); + br(GE, LAST_WORD); + + BIND(STR1_NEXT); + ldr(ch1, Address(str1, cnt1tmp)); + ldr(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + adds(cnt1tmp, cnt1tmp, 8); + add(cnt2tmp, cnt2tmp, 8); + br(LT, STR1_NEXT); + + BIND(LAST_WORD); + ldr(ch1, Address(str1)); + sub(str2tmp, str2, cnt1_neg); // adjust to corresponding + ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2 + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + + BIND(DOSHORT); + cmp(cnt1, 2); + br(LT, DO1); + br(GT, DO3); + } + + if (icnt1 == 4) { + Label CH1_LOOP; + + ldr(ch1, str1); + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 2) { + Label CH1_LOOP; + + BIND(DO2); + ldrw(ch1, str1); + sub(cnt2, cnt2, 2); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + + BIND(DO3); + ldrw(first, str1); + ldrh(ch1, Address(str1, 4)); + + sub(cnt2, cnt2, 3); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(FIRST_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmpw(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + add(cnt2tmp, cnt2_neg, 4); + ldrh(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + } + + if (icnt1 == -1 || icnt1 == 1) { + Label CH1_LOOP, HAS_ZERO; + Label DO1_SHORT, DO1_LOOP; + + BIND(DO1); + ldrh(ch1, str1); + cmp(cnt2, 4); + br(LT, DO1_SHORT); + + orr(ch1, ch1, ch1, LSL, 16); + orr(ch1, ch1, ch1, LSL, 32); + + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + mov(tmp3, 0x0001000100010001); + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + eor(ch2, ch1, ch2); + sub(tmp1, ch2, tmp3); + orr(tmp2, ch2, 0x7fff7fff7fff7fff); + bics(tmp1, tmp1, tmp2); + br(NE, HAS_ZERO); + adds(cnt2_neg, cnt2_neg, 8); + br(LT, CH1_LOOP); + + cmp(cnt2_neg, 8); + mov(cnt2_neg, 0); + br(LT, CH1_LOOP); + b(NOMATCH); + + BIND(HAS_ZERO); + rev(tmp1, tmp1); + clz(tmp1, tmp1); + add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); + b(MATCH); + + BIND(DO1_SHORT); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + BIND(DO1_LOOP); + ldrh(ch2, Address(str2, cnt2_neg)); + cmpw(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LT, DO1_LOOP); + } + } + BIND(NOMATCH); + mov(result, -1); + b(DONE); + BIND(MATCH); + add(result, result_tmp, cnt2_neg, ASR, 1); + BIND(DONE); +} + // Compare strings. void MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, @@ -3512,3 +4323,136 @@ BLOCK_COMMENT("} string_equals"); } + +// Compare char[] arrays aligned to 4 bytes +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1) +{ + Register cnt1 = rscratch1; + Register cnt2 = rscratch2; + Register tmp2 = rscratch2; + + Label SAME, DIFFER, NEXT, TAIL03, TAIL01; + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + BLOCK_COMMENT("char_arrays_equals {"); + + // different until proven equal + mov(result, false); + + // same array? + cmp(ary1, ary2); + br(Assembler::EQ, SAME); + + // ne if either null + cbz(ary1, DIFFER); + cbz(ary2, DIFFER); + + // lengths ne? + ldrw(cnt1, Address(ary1, length_offset)); + ldrw(cnt2, Address(ary2, length_offset)); + cmp(cnt1, cnt2); + br(Assembler::NE, DIFFER); + + lea(ary1, Address(ary1, base_offset)); + lea(ary2, Address(ary2, base_offset)); + + subs(cnt1, cnt1, 4); + br(LT, TAIL03); + + BIND(NEXT); + ldr(tmp1, Address(post(ary1, 8))); + ldr(tmp2, Address(post(ary2, 8))); + subs(cnt1, cnt1, 4); + eor(tmp1, tmp1, tmp2); + cbnz(tmp1, DIFFER); + br(GE, NEXT); + + BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4 + tst(cnt1, 0b10); + br(EQ, TAIL01); + ldrw(tmp1, Address(post(ary1, 4))); + ldrw(tmp2, Address(post(ary2, 4))); + cmp(tmp1, tmp2); + br(NE, DIFFER); + BIND(TAIL01); // 0-1 chars left + tst(cnt1, 0b01); + br(EQ, SAME); + ldrh(tmp1, ary1); + ldrh(tmp2, ary2); + cmp(tmp1, tmp2); + br(NE, DIFFER); + + BIND(SAME); + mov(result, true); + BIND(DIFFER); // result already set + + BLOCK_COMMENT("} char_arrays_equals"); +} + +// encode char[] to byte[] in ISO_8859_1 +void MacroAssembler::encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4) +{ + Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1; + Register tmp1 = rscratch1; + + mov(result, len); // Save initial len + +#ifndef BUILTIN_SIM + subs(len, len, 32); + br(LT, LOOP_8); + +// The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions +// to convert chars to bytes. These set the 'QC' bit in the FPSR if +// any char could not fit in a byte, so clear the FPSR so we can test it. + clear_fpsr(); + + BIND(NEXT_32); + ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); + uqxtn(Vtmp1, T8B, Vtmp1, T8H); // uqxtn - write bottom half + uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half + uqxtn(Vtmp2, T8B, Vtmp3, T8H); + uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2 + get_fpsr(tmp1); + cbnzw(tmp1, LOOP_8); + st1(Vtmp1, Vtmp2, T16B, post(dst, 32)); + subs(len, len, 32); + add(src, src, 64); + br(GE, NEXT_32); + + BIND(LOOP_8); + adds(len, len, 32-8); + br(LT, LOOP_1); + clear_fpsr(); // QC may be set from loop above, clear again + BIND(NEXT_8); + ld1(Vtmp1, T8H, src); + uqxtn(Vtmp1, T8B, Vtmp1, T8H); + get_fpsr(tmp1); + cbnzw(tmp1, LOOP_1); + st1(Vtmp1, T8B, post(dst, 8)); + subs(len, len, 8); + add(src, src, 16); + br(GE, NEXT_8); + + BIND(LOOP_1); + adds(len, len, 8); + br(LE, DONE); +#else + cbz(len, DONE); +#endif + BIND(NEXT_1); + ldrh(tmp1, Address(post(src, 2))); + tst(tmp1, 0xff00); + br(NE, DONE); + strb(tmp1, Address(post(dst, 1))); + subs(len, len, 1); + br(GT, NEXT_1); + + BIND(DONE); + sub(result, result, len); // Return index where we stopped +}
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -37,7 +37,9 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + public: using Assembler::mov; + using Assembler::movi; protected: @@ -106,9 +108,7 @@ // Biased locking support // lock_reg and obj_reg must be loaded up with the appropriate values. // swap_reg is killed. - // tmp_reg is optional. If it is supplied (i.e., != noreg) it will - // be killed; if not supplied, push/pop will be used internally to - // allocate a temporary (inefficient, avoid if possible). + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 // Optional slow case is for implementations (interpreter and C1) which branch to // slow case directly. Leaves condition codes set for C2's Fast_Lock node. // Returns offset of first potentially-faulting instruction for null @@ -125,10 +125,10 @@ // Helper functions for statistics gathering. // Unconditional atomic increment. - void atomic_incw(Register counter_addr, Register tmp); - void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { + void atomic_incw(Register counter_addr, Register tmp, Register tmp2); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) { lea(tmp1, counter_addr); - atomic_incw(tmp1, tmp2); + atomic_incw(tmp1, tmp2, tmp3); } // Load Effective Address void lea(Register r, const Address &a) { @@ -168,9 +168,8 @@ // aliases defined in AARCH64 spec - template<class T> - inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } + inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } inline void cmp(Register Rd, unsigned imm) { subs(zr, Rd, imm); } inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); } @@ -179,11 +178,17 @@ void cset(Register Rd, Assembler::Condition cond) { csinc(Rd, zr, zr, ~cond); } - void csetw(Register Rd, Assembler::Condition cond) { csincw(Rd, zr, zr, ~cond); } + void cneg(Register Rd, Register Rn, Assembler::Condition cond) { + csneg(Rd, Rn, Rn, ~cond); + } + void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { + csnegw(Rd, Rn, Rn, ~cond); + } + inline void movw(Register Rd, Register Rn) { if (Rd == sp || Rn == sp) { addw(Rd, Rn, 0U); @@ -401,6 +406,16 @@ umaddl(Rd, Rn, Rm, zr); } +#define WRAP(INSN) \ + void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ + if (Ra != zr) nop(); \ + Assembler::INSN(Rd, Rn, Rm, Ra); \ + } + + WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) + WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) +#undef WRAP + // macro assembly operations needed for aarch64 // first two private routines for loading 32 bit or 64 bit constants @@ -448,6 +463,12 @@ void movptr(Register r, uintptr_t imm64); + void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); + + void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { + orr(Vd, T, Vn, Vn); + } + // macro instructions for accessing and updating floating point // status register // @@ -491,7 +512,10 @@ // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. - static void pd_patch_instruction(address branch, address target); + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction(address branch, address target) { + pd_patch_instruction_size(branch, target); + } static address pd_call_destination(address branch) { unsigned insn = *(unsigned*)branch; return target_addr_for_insn(branch, insn); @@ -500,7 +524,7 @@ static void pd_print_patched_instruction(address branch); #endif - static void patch_oop(address insn_addr, address o); + static int patch_oop(address insn_addr, address o); // The following 4 methods return the offset of the appropriate move instruction @@ -859,7 +883,7 @@ // stack grows down, caller passes positive offset assert(offset > 0, "must bang with negative offset"); mov(rscratch2, -offset); - ldr(zr, Address(sp, rscratch2)); + str(zr, Address(sp, rscratch2)); } // Writes to stack successive pages until offset reached to check for @@ -1005,6 +1029,8 @@ void add(Register Rd, Register Rn, RegisterOrConstant increment); void addw(Register Rd, Register Rn, RegisterOrConstant increment); + void sub(Register Rd, Register Rn, RegisterOrConstant decrement); + void subw(Register Rd, Register Rn, RegisterOrConstant decrement); void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); @@ -1085,6 +1111,85 @@ void string_equals(Register str1, Register str2, Register cnt, Register result, Register tmp1); + void char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1); + void encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4); + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int int_cnt1, Register result); +private: + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2); + void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { + add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); + } + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp7, Register product_hi); +public: + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, + Register zlen, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register tmp7); + // ISB may be needed because of a safepoint + void maybe_isb() { isb(); } + +private: + // Return the effective address r + (r1 << ext) + offset. + // Uses rscratch2. + Address offsetted_address(Register r, Register r1, Address::extend ext, + int offset, int size); + +private: + // Returns an address on the stack which is reachable with a ldr/str of size + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); + +public: + void spill(Register Rx, bool is64, int offset) { + if (is64) { + str(Rx, spill_address(8, offset)); + } else { + strw(Rx, spill_address(4, offset)); + } + } + void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + str(Vx, T, spill_address(1 << (int)T, offset)); + } + void unspill(Register Rx, bool is64, int offset) { + if (is64) { + ldr(Rx, spill_address(8, offset)); + } else { + ldrw(Rx, spill_address(4, offset)); + } + } + void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + ldr(Vx, T, spill_address(1 << (int)T, offset)); + } + void spill_copy128(int src_offset, int dst_offset, + Register tmp1=rscratch1, Register tmp2=rscratch2) { + if (src_offset < 512 && (src_offset & 7) == 0 && + dst_offset < 512 && (dst_offset & 7) == 0) { + ldp(tmp1, tmp2, Address(sp, src_offset)); + stp(tmp1, tmp2, Address(sp, dst_offset)); + } else { + unspill(tmp1, true, src_offset); + spill(tmp1, true, dst_offset); + unspill(tmp1, true, src_offset+8); + spill(tmp1, true, dst_offset+8); + } + } }; // Used by aarch64.ad to control code generation
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -37,11 +37,6 @@ #include "c1/c1_Runtime1.hpp" #endif -void NativeInstruction::wrote(int offset) { - // FIXME: Native needs ISB here -; } - - void NativeCall::verify() { ; } address NativeCall::destination() const { @@ -51,10 +46,13 @@ // Inserts a native call instruction at a given pc void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } +//------------------------------------------------------------------- + void NativeMovConstReg::verify() { // make sure code pattern is actually mov reg64, imm64 instructions } + intptr_t NativeMovConstReg::data() const { // das(uint64_t(instruction_address()),2); address addr = MacroAssembler::pd_call_destination(instruction_address()); @@ -71,6 +69,7 @@ *(intptr_t*)addr = x; } else { MacroAssembler::pd_patch_instruction(instruction_address(), (address)x); + ICache::invalidate_range(instruction_address(), instruction_size); } }; @@ -102,6 +101,7 @@ *(long*)addr = x; } else { MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x)); + ICache::invalidate_range(instruction_address(), instruction_size); } } @@ -138,8 +138,11 @@ dest = instruction_address(); MacroAssembler::pd_patch_instruction(instruction_address(), dest); + ICache::invalidate_range(instruction_address(), instruction_size); }; +//------------------------------------------------------------------- + bool NativeInstruction::is_safepoint_poll() { // a safepoint_poll is implemented in two steps as either // @@ -189,7 +192,9 @@ return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101; } -// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) +//------------------------------------------------------------------- + +// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie) void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ptrdiff_t disp = dest - verified_entry; @@ -203,23 +208,22 @@ ICache::invalidate_range(verified_entry, instruction_size); } - void NativeGeneralJump::verify() { } - void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; ptrdiff_t disp = entry - code_pos; guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff); - *(unsigned int*)code_pos = insn; ICache::invalidate_range(code_pos, instruction_size); } // MT-safe patching of a long jump instruction. void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { - assert(nativeInstruction_at(instr_addr)->is_jump_or_nop(), + NativeGeneralJump* n_jump = (NativeGeneralJump*)instr_addr; + assert(n_jump->is_jump_or_nop(), "Aarch64 cannot replace non-jump with jump"); uint32_t instr = *(uint32_t*)code_buffer; *(uint32_t*)instr_addr = instr;
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -80,14 +80,10 @@ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } - void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } - void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } - void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } - void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } - - // This doesn't really do anything on AArch64, but it is the place where - // cache invalidation belongs, generically: - void wrote(int offset); + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } public: @@ -142,6 +138,7 @@ offset &= (1 << 26) - 1; // mask off insn part insn |= offset; set_int_at(displacement_offset, insn); + ICache::invalidate_range(instruction_address(), instruction_size); } // Similar to replace_mt_safe, but just changes the destination. The
--- a/src/cpu/aarch64/vm/register_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/register_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -62,7 +62,10 @@ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } const char* name() const; int encoding_nocheck() const { return (intptr_t)this; } - unsigned long bit(bool yes = true) const { return yes << encoding(); } + + // Return the bit which represents this register. This is intended + // to be ORed into a bitmask: for usage see class RegSet below. + unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } }; // The integer registers of the aarch64 architecture @@ -185,7 +188,7 @@ // it's optoregs. number_of_registers = (2 * RegisterImpl::number_of_registers + - 2 * FloatRegisterImpl::number_of_registers + + 4 * FloatRegisterImpl::number_of_registers + 1) // flags };
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -33,23 +33,29 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + if (verify_only) + return; + + int bytes; + switch(type()) { case relocInfo::oop_type: { oop_Relocation *reloc = (oop_Relocation *)this; if (NativeInstruction::is_ldr_literal_at(addr())) { address constptr = (address)code()->oop_addr_at(reloc->oop_index()); - MacroAssembler::pd_patch_instruction(addr(), constptr); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); assert(*(address*)constptr == x, "error in oop relocation"); } else{ - MacroAssembler::patch_oop(addr(), x); + bytes = MacroAssembler::patch_oop(addr(), x); } } break; default: - MacroAssembler::pd_patch_instruction(addr(), x); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); break; } + ICache::invalidate_range(addr(), bytes); } address Relocation::pd_call_destination(address orig_addr) {
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -318,6 +318,7 @@ __ mov(c_rarg1, lr); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); __ blrt(rscratch1, 2, 0, 0); + __ maybe_isb(); __ pop_CPU_state(); // restore sp @@ -1171,7 +1172,7 @@ __ lea(rscratch1, RuntimeAddress(dest)); __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); __ blrt(rscratch1, rscratch2); - // __ blrt(rscratch1, gpargs, fpargs, type); + __ maybe_isb(); } } @@ -1797,6 +1798,7 @@ const Register obj_reg = r19; // Will contain the oop const Register lock_reg = r13; // Address of compiler lock object (BasicLock) const Register old_hdr = r13; // value of old header at unlock time + const Register tmp = c_rarg3; Label slow_path_lock; Label lock_done; @@ -1818,7 +1820,7 @@ __ ldr(obj_reg, Address(oop_handle_reg, 0)); if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock); + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); } // Load (object->mark() | 1) into swap_reg %r0 @@ -1868,7 +1870,8 @@ // Now set thread in native __ mov(rscratch1, _thread_in_native); - __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); { int return_type = 0; @@ -1925,7 +1928,8 @@ // Thread A is resumed to finish this native method, but doesn't block here since it // didn't see any synchronization is progress, and escapes. __ mov(rscratch1, _thread_in_native_trans); - __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); if(os::is_MP()) { if (UseMembar) { @@ -1974,6 +1978,7 @@ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); } __ blrt(rscratch1, 1, 0, 1); + __ maybe_isb(); // Restore any method result value restore_native_result(masm, ret_type, stack_slots); @@ -1988,7 +1993,8 @@ // change thread state __ mov(rscratch1, _thread_in_Java); - __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); __ bind(after_transition); Label reguard; @@ -2139,6 +2145,7 @@ save_native_result(masm, ret_type, stack_slots); } + __ mov(c_rarg2, rthread); __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ mov(c_rarg0, obj_reg); @@ -2147,7 +2154,7 @@ __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 2, 0, 1); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1); #ifdef ASSERT { @@ -2813,6 +2820,9 @@ __ reset_last_Java_frame(false, true); + __ maybe_isb(); + __ membar(Assembler::LoadLoad | Assembler::LoadStore); + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); __ cbz(rscratch1, noException); @@ -2882,6 +2892,8 @@ oop_maps->add_gc_map( __ offset() - start, map); + __ maybe_isb(); + // r0 contains the address we are going to jump to assuming no exception got installed // clear last_Java_sp @@ -3004,6 +3016,7 @@ __ mov(c_rarg0, rthread); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); __ blrt(rscratch1, 1, 0, MacroAssembler::ret_type_integral); + __ maybe_isb(); // Set an oopmap for the call site. This oopmap will only be used if we // are unwinding the stack. Hence, all locations will be dead.
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -305,7 +305,8 @@ #endif // pass parameters if any __ mov(esp, sp); - __ sub(sp, sp, os::vm_page_size()); // Move SP out of the way + __ sub(rscratch1, sp, c_rarg6, ext::uxtw, LogBytesPerWord); // Move SP out of the way + __ andr(sp, rscratch1, -2 * wordSize); BLOCK_COMMENT("pass parameters if any"); Label parameters_done; @@ -1891,7 +1892,7 @@ address start = __ pc(); __ enter(); - __ mov(rscratch1, len_reg); + __ mov(rscratch2, len_reg); __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ ld1(v0, __ T16B, rvec); @@ -1958,6 +1959,8 @@ __ leave(); __ ret(lr); + + return start; } // Arguments: @@ -2066,6 +2069,212 @@ return start; } + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - int[] SHA.state + // c_rarg2 - int offset + // c_rarg3 - int limit + // + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + Label keys; + Label sha1_loop; + + // load the keys into v0..v3 + __ adr(rscratch1, keys); + __ ld4r(v0, v1, v2, v3, __ T4S, Address(rscratch1)); + // load 5 words state into v6, v7 + __ ldrq(v6, Address(state, 0)); + __ ldrs(v7, Address(state, 16)); + + + __ BIND(sha1_loop); + // load 64 bytes of data into v16..v19 + __ ld1(v16, v17, v18, v19, __ T4S, multi_block ? __ post(buf, 64) : buf); + __ rev32(v16, __ T16B, v16); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ rev32(v19, __ T16B, v19); + + // do the sha1 + __ addv(v4, __ T4S, v16, v0); + __ orr(v20, __ T16B, v6, v6); + + FloatRegister d0 = v16; + FloatRegister d1 = v17; + FloatRegister d2 = v18; + FloatRegister d3 = v19; + + for (int round = 0; round < 20; round++) { + FloatRegister tmp1 = (round & 1) ? v4 : v5; + FloatRegister tmp2 = (round & 1) ? v21 : v22; + FloatRegister tmp3 = round ? ((round & 1) ? v22 : v21) : v7; + FloatRegister tmp4 = (round & 1) ? v5 : v4; + FloatRegister key = (round < 4) ? v0 : ((round < 9) ? v1 : ((round < 14) ? v2 : v3)); + + if (round < 16) __ sha1su0(d0, __ T4S, d1, d2); + if (round < 19) __ addv(tmp1, __ T4S, d1, key); + __ sha1h(tmp2, __ T4S, v20); + if (round < 5) + __ sha1c(v20, __ T4S, tmp3, tmp4); + else if (round < 10 || round >= 15) + __ sha1p(v20, __ T4S, tmp3, tmp4); + else + __ sha1m(v20, __ T4S, tmp3, tmp4); + if (round < 16) __ sha1su1(d0, __ T4S, d3); + + tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; + } + + __ addv(v7, __ T2S, v7, v21); + __ addv(v6, __ T4S, v6, v20); + + if (multi_block) { + __ add(ofs, ofs, 64); + __ cmp(ofs, limit); + __ br(Assembler::LE, sha1_loop); + __ mov(c_rarg0, ofs); // return ofs + } + + __ strq(v6, Address(state, 0)); + __ strs(v7, Address(state, 16)); + + __ ret(lr); + + __ bind(keys); + __ emit_int32(0x5a827999); + __ emit_int32(0x6ed9eba1); + __ emit_int32(0x8f1bbcdc); + __ emit_int32(0xca62c1d6); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - int[] SHA.state + // c_rarg2 - int offset + // c_rarg3 - int limit + // + address generate_sha256_implCompress(bool multi_block, const char *name) { + static const uint32_t round_consts[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + Label sha1_loop; + + __ stpd(v8, v9, __ pre(sp, -32)); + __ stpd(v10, v11, Address(sp, 16)); + +// dga == v0 +// dgb == v1 +// dg0 == v2 +// dg1 == v3 +// dg2 == v4 +// t0 == v6 +// t1 == v7 + + // load 16 keys to v16..v31 + __ lea(rscratch1, ExternalAddress((address)round_consts)); + __ ld1(v16, v17, v18, v19, __ T4S, __ post(rscratch1, 64)); + __ ld1(v20, v21, v22, v23, __ T4S, __ post(rscratch1, 64)); + __ ld1(v24, v25, v26, v27, __ T4S, __ post(rscratch1, 64)); + __ ld1(v28, v29, v30, v31, __ T4S, rscratch1); + + // load 8 words (256 bits) state + __ ldpq(v0, v1, state); + + __ BIND(sha1_loop); + // load 64 bytes of data into v8..v11 + __ ld1(v8, v9, v10, v11, __ T4S, multi_block ? __ post(buf, 64) : buf); + __ rev32(v8, __ T16B, v8); + __ rev32(v9, __ T16B, v9); + __ rev32(v10, __ T16B, v10); + __ rev32(v11, __ T16B, v11); + + __ addv(v6, __ T4S, v8, v16); + __ orr(v2, __ T16B, v0, v0); + __ orr(v3, __ T16B, v1, v1); + + FloatRegister d0 = v8; + FloatRegister d1 = v9; + FloatRegister d2 = v10; + FloatRegister d3 = v11; + + + for (int round = 0; round < 16; round++) { + FloatRegister tmp1 = (round & 1) ? v6 : v7; + FloatRegister tmp2 = (round & 1) ? v7 : v6; + FloatRegister tmp3 = (round & 1) ? v2 : v4; + FloatRegister tmp4 = (round & 1) ? v4 : v2; + + if (round < 12) __ sha256su0(d0, __ T4S, d1); + __ orr(v4, __ T16B, v2, v2); + if (round < 15) + __ addv(tmp1, __ T4S, d1, as_FloatRegister(round + 17)); + __ sha256h(v2, __ T4S, v3, tmp2); + __ sha256h2(v3, __ T4S, v4, tmp2); + if (round < 12) __ sha256su1(d0, __ T4S, d2, d3); + + tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; + } + + __ addv(v0, __ T4S, v0, v2); + __ addv(v1, __ T4S, v1, v3); + + if (multi_block) { + __ add(ofs, ofs, 64); + __ cmp(ofs, limit); + __ br(Assembler::LE, sha1_loop); + __ mov(c_rarg0, ofs); // return ofs + } + + __ ldpd(v10, v11, Address(sp, 16)); + __ ldpd(v8, v9, __ post(sp, 32)); + + __ stpq(v0, v1, state); + + __ ret(lr); + + return start; + } + #ifndef BUILTIN_SIM // Safefetch stubs. void generate_safefetch(const char* name, int size, address* entry, @@ -2152,8 +2361,45 @@ return start; } -#undef __ -#define __ masm-> + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y lenth + * c_rarg4 - z address + * c_rarg5 - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + const Register x = r0; + const Register xlen = r1; + const Register y = r2; + const Register ylen = r3; + const Register z = r4; + const Register zlen = r5; + + const Register tmp1 = r10; + const Register tmp2 = r11; + const Register tmp3 = r12; + const Register tmp4 = r13; + const Register tmp5 = r14; + const Register tmp6 = r15; + const Register tmp7 = r16; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception @@ -2171,6 +2417,9 @@ // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler. +#undef __ +#define __ masm-> + address generate_throw_exception(const char* name, address runtime_entry, Register arg1 = noreg, @@ -2234,6 +2483,7 @@ oop_maps->add_gc_map(the_pc - start, map); __ reset_last_Java_frame(true, true); + __ maybe_isb(); __ leave(); @@ -2313,6 +2563,10 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + #ifndef BUILTIN_SIM if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); @@ -2321,6 +2575,15 @@ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); } + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -199,8 +199,7 @@ // Restore machine SP __ ldr(rscratch1, Address(rmethod, Method::const_offset())); __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() - + (EnableInvokeDynamic ? 2 : 0)); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); __ ldr(rscratch2, Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); @@ -313,6 +312,7 @@ address entry = __ pc(); __ push(state); __ call_VM(noreg, runtime_entry); + __ membar(Assembler::AnyAny); __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); return entry; } @@ -670,7 +670,78 @@ // Method entry for java.lang.ref.Reference.get. address InterpreterGenerator::generate_Reference_get_entry(void) { - return NULL; +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + // + // rmethod: Method* + // r13: senderSP must preserve for slow path, set SP to it on fast path + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + const Register local_0 = c_rarg0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldr(local_0, Address(esp, 0)); + __ cbz(local_0, slow_path); + + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + __ load_heap_oop(local_0, field_address); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + __ enter(); // g1_write may call runtime + __ g1_write_barrier_pre(noreg /* obj */, + local_0 /* pre_val */, + rthread /* thread */, + rscratch2 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + // areturn + __ andr(sp, r13, -16); // done with stack + __ ret(lr); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return generate_accessor_entry(); } /** @@ -799,7 +870,7 @@ const int page_size = os::vm_page_size(); for (int pages = start_page; pages <= StackShadowPages ; pages++) { __ sub(rscratch2, sp, pages*page_size); - __ ldr(zr, Address(rscratch2)); + __ str(zr, Address(rscratch2)); } } } @@ -1034,13 +1105,15 @@ // Change state to native __ mov(rscratch1, _thread_in_native); - __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); // load call format __ ldrw(rscratch1, Address(rmethod, Method::call_format_offset())); // Call the native method. __ blrt(r10, rscratch1); + __ maybe_isb(); __ get_method(rmethod); // result potentially in r0 or v0 @@ -1057,7 +1130,8 @@ // change thread state __ mov(rscratch1, _thread_in_native_trans); - __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); if (os::is_MP()) { if (UseMembar) { @@ -1098,6 +1172,7 @@ __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); __ blrt(rscratch2, 1, 0, 0); + __ maybe_isb(); __ get_method(rmethod); __ reinit_heapbase(); __ bind(Continue); @@ -1105,7 +1180,8 @@ // change thread state __ mov(rscratch1, _thread_in_Java); - __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); // reset_last_Java_frame __ reset_last_Java_frame(true, true); @@ -1543,29 +1619,18 @@ return (overhead_size + method_stack + stub_code); } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { // Note: This calculation must exactly parallel the frame setup // in AbstractInterpreterGenerator::generate_method_entry. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the - // right size, as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state // fixed size of an interpreter frame: - int max_locals = method->max_locals() * Interpreter::stackElementWords; - int extra_locals = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset; // Our locals were accounted for by the caller (or last_frame_adjust @@ -1573,65 +1638,79 @@ // for the callee's params we only need to account for the extra // locals. int size = overhead + - (callee_locals - callee_param_count)*Interpreter::stackElementWords + - moncount * frame::interpreter_frame_monitor_size() + - tempcount* Interpreter::stackElementWords + popframe_extra_args; + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; // On AArch64 we always keep the stack pointer 16-aligned, so we // must round up here. size = round_to(size, 2); - if (interpreter_frame != NULL) { -#ifdef ASSERT - if (!EnableInvokeDynamic) - // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? - // Probably, since deoptimization doesn't work yet. - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); - assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); -#endif + return size; +} - interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and - // interpreter_frame_sender_sp interpreter_frame_sender_sp is - // the original sp of the caller (the unextended_sp) and - // sender_sp is fp+16 XXX - intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; #ifdef ASSERT - if (caller->is_interpreted_frame()) { - assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); - } + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); #endif - interpreter_frame->interpreter_frame_set_locals(locals); - BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); - BasicObjectLock* monbot = montop - moncount; - interpreter_frame->interpreter_frame_set_monitor_end(monbot); + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; - // Set last_sp - intptr_t* esp = (intptr_t*) monbot - - tempcount*Interpreter::stackElementWords - - popframe_extra_args; - interpreter_frame->interpreter_frame_set_last_sp(esp); +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); - // All frames but the initial (oldest) interpreter frame we fill in have - // a value for sender_sp that allows walking the stack but isn't - // truly correct. Correct the value here. - if (extra_locals != 0 && - interpreter_frame->sender_sp() == - interpreter_frame->interpreter_frame_sender_sp()) { - interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + - extra_locals); - } - *interpreter_frame->interpreter_frame_cache_addr() = - method->constants()->cache(); + // Set last_sp + intptr_t* esp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); - // interpreter_frame->obj_at_put(frame::sender_sp_offset, - // (oop)interpreter_frame->addr_at(frame::sender_sp_offset)); + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); } - return size; + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); } + //----------------------------------------------------------------------------- // Exceptions @@ -1947,15 +2026,18 @@ } void TemplateInterpreterGenerator::count_bytecode() { + Register rscratch3 = r0; __ push(rscratch1); __ push(rscratch2); + __ push(rscratch3); Label L; __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); __ bind(L); __ ldxr(rscratch1, rscratch2); __ add(rscratch1, rscratch1, 1); - __ stxr(rscratch1, rscratch1, rscratch2); - __ cbnzw(rscratch1, L); + __ stxr(rscratch3, rscratch1, rscratch2); + __ cbnzw(rscratch3, L); + __ pop(rscratch3); __ pop(rscratch2); __ pop(rscratch1); }
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -508,23 +508,61 @@ { transition(vtos, itos); if (RewriteFrequentPairs) { - // TODO : check x86 code for what to do here - __ call_Unimplemented(); - } else { - locals_index(r1); - __ ldr(r0, iaddress(r1)); + Label rewrite, done; + Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmpw(r1, Bytecodes::_iload); + __ br(Assembler::EQ, done); + + // if _fast_iload rewrite to _fast_iload2 + __ cmpw(r1, Bytecodes::_fast_iload); + __ movw(bc, Bytecodes::_fast_iload2); + __ br(Assembler::EQ, rewrite); + + // if _caload rewrite to _fast_icaload + __ cmpw(r1, Bytecodes::_caload); + __ movw(bc, Bytecodes::_fast_icaload); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_iload + __ movw(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, r1, false); + __ bind(done); + } + // do iload, get the local value into tos + locals_index(r1); + __ ldr(r0, iaddress(r1)); + } void TemplateTable::fast_iload2() { - __ call_Unimplemented(); + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); + __ push(itos); + locals_index(r1, 3); + __ ldr(r0, iaddress(r1)); } void TemplateTable::fast_iload() { - __ call_Unimplemented(); + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); } void TemplateTable::lload() @@ -716,7 +754,18 @@ // iload followed by caload frequent pair void TemplateTable::fast_icaload() { - __ call_Unimplemented(); + transition(vtos, itos); + // load index out of locals + locals_index(r2); + __ ldr(r1, iaddress(r2)); + + __ pop_ptr(r0); + + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(1))); + __ load_unsigned_short(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_CHAR))); } void TemplateTable::saload() @@ -785,7 +834,47 @@ // These bytecodes with a small amount of code are most profitable // to rewrite if (RewriteFrequentPairs) { - __ call_Unimplemented(); + Label rewrite, done; + const Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ cmpw(r1, Bytecodes::Bytecodes::_getfield); + __ br(Assembler::EQ, done); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_igetfield); + __ movw(bc, Bytecodes::_fast_iaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_agetfield); + __ movw(bc, Bytecodes::_fast_aaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_fgetfield); + __ movw(bc, Bytecodes::_fast_faccess_0); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, r1, false); + + __ bind(done); } else { aload(0); } @@ -1568,6 +1657,12 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + __ profile_taken_branch(r0, r1); const ByteSize be_offset = MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset(); @@ -1849,6 +1944,12 @@ void TemplateTable::ret() { transition(vtos, vtos); + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + locals_index(r1); __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp __ profile_ret(r1, r2); @@ -3327,6 +3428,8 @@ // continue __ bind(done); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); } void TemplateTable::newarray() { @@ -3335,6 +3438,8 @@ __ mov(c_rarg2, r0); call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), c_rarg1, c_rarg2); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); } void TemplateTable::anewarray() { @@ -3344,6 +3449,8 @@ __ mov(c_rarg3, r0); call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), c_rarg1, c_rarg2, c_rarg3); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); } void TemplateTable::arraylength() {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -38,19 +38,30 @@ #ifndef BUILTIN_SIM #include <sys/auxv.h> #include <asm/hwcap.h> +#else +#define getauxval(hwcap) 0 +#endif #ifndef HWCAP_AES #define HWCAP_AES (1<<3) #endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1<<5) +#endif + +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1<<6) +#endif + #ifndef HWCAP_CRC32 #define HWCAP_CRC32 (1<<7) #endif -#endif - int VM_Version::_cpu; int VM_Version::_model; +int VM_Version::_variant; +int VM_Version::_revision; int VM_Version::_stepping; int VM_Version::_cpuFeatures; const char* VM_Version::_features_str = ""; @@ -101,13 +112,51 @@ _supports_atomic_getset8 = true; _supports_atomic_getadd8 = true; - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256); FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256); FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256); + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); -#ifndef BUILTIN_SIM unsigned long auxv = getauxval(AT_HWCAP); + + char buf[512]; + + strcpy(buf, "simd"); + if (auxv & HWCAP_CRC32) strcat(buf, ", crc"); + if (auxv & HWCAP_AES) strcat(buf, ", aes"); + if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); + if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); + + _features_str = strdup(buf); + _cpuFeatures = auxv; + + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[128], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if (p = strchr(buf, ':')) { + long v = strtol(p+1, NULL, 0); + if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) { + _cpu = v; + } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) { + _variant = v; + } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) { + _model = v; + } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) { + _revision = v; + } + } + } + fclose(f); + } + + // Enable vendor specific features + if (_cpu == CPU_CAVIUM) _cpuFeatures |= CPU_DMB_ATOMICS; + if (_cpu == CPU_ARM) _cpuFeatures |= CPU_A53MAC; + if (FLAG_IS_DEFAULT(UseCRC32)) { UseCRC32 = (auxv & HWCAP_CRC32) != 0; } @@ -130,11 +179,60 @@ warning("UseAESIntrinsics specified, but not supported on this CPU"); } } -#endif if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; } + + if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (!UseSHA) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } else { + if (auxv & HWCAP_SHA1) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("SHA1 instruction is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + if (auxv & HWCAP_SHA2) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + if (UseSHA512Intrinsics) { + warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + UseMultiplyToLenIntrinsic = true; + } + + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + UsePopCountInstruction = true; + } + +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(OptoScheduling)) { + OptoScheduling = true; + } +#endif } void VM_Version::initialize() {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -35,6 +35,8 @@ protected: static int _cpu; static int _model; + static int _variant; + static int _revision; static int _stepping; static int _cpuFeatures; // features returned by the "cpuid" instruction // 0 if this instruction is not available @@ -50,7 +52,39 @@ static void assert_is_initialized() { } + enum { + CPU_ARM = 'A', + CPU_BROADCOM = 'B', + CPU_CAVIUM = 'C', + CPU_DEC = 'D', + CPU_INFINEON = 'I', + CPU_MOTOROLA = 'M', + CPU_NVIDIA = 'N', + CPU_AMCC = 'P', + CPU_QUALCOM = 'Q', + CPU_MARVELL = 'V', + CPU_INTEL = 'i', + } cpuFamily; + + enum { + CPU_FP = (1<<0), + CPU_ASIMD = (1<<1), + CPU_EVTSTRM = (1<<2), + CPU_AES = (1<<3), + CPU_PMULL = (1<<4), + CPU_SHA1 = (1<<5), + CPU_SHA2 = (1<<6), + CPU_CRC32 = (1<<7), + CPU_A53MAC = (1 << 30), + CPU_DMB_ATOMICS = (1 << 31), + } cpuFeatureFlags; + static const char* cpu_features() { return _features_str; } + static int cpu_family() { return _cpu; } + static int cpu_model() { return _model; } + static int cpu_variant() { return _variant; } + static int cpu_revision() { return _revision; } + static int cpu_cpuFeatures() { return _cpuFeatures; } };
--- a/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -58,7 +58,8 @@ #ifndef PRODUCT if (CountCompiledCalls) { - __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ lea(r19, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r19)); } #endif @@ -73,12 +74,14 @@ if (DebugVtables) { Label L; // check offset vs vtable length - __ ldrw(rscratch1, Address(r0, InstanceKlass::vtable_length_offset() * wordSize)); + __ ldrw(rscratch1, Address(r19, InstanceKlass::vtable_length_offset() * wordSize)); __ cmpw(rscratch1, vtable_index * vtableEntry::size()); __ br(Assembler::GT, L); + __ enter(); __ mov(r2, vtable_index); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + __ leave(); __ bind(L); } #endif // PRODUCT @@ -109,9 +112,6 @@ (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; @@ -130,7 +130,8 @@ #ifndef PRODUCT if (CountCompiledCalls) { - __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r10)); } #endif @@ -190,9 +191,6 @@ (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; @@ -200,8 +198,49 @@ int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + int size = DebugVtables ? 216 : 0; + if (CountCompiledCalls) + size += 6 * 4; // FIXME - return 200; + if (is_vtable_stub) + size += 52; + else + size += 104; + return size; + + // In order to tune these parameters, run the JVM with VM options + // +PrintMiscellaneous and +WizardMode to see information about + // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. + // + // If Universe::narrow_klass_base is nonzero, decoding a compressed + // class can take zeveral instructions. Run it with -Xmx31G + // -XX:+UseCompressedOops. + // + // The JVM98 app. _202_jess has a megamorphic interface call. + // The itable code looks like this: + // Decoding VtableStub itbl[1]@12 + // ldr w10, [x1,#8] + // lsl x10, x10, #3 + // ldr w11, [x10,#280] + // add x11, x10, x11, uxtx #3 + // add x11, x11, #0x1b8 + // ldr x12, [x11] + // cmp x9, x12 + // b.eq success + // loop: + // cbz x12, throw_icce + // add x11, x11, #0x10 + // ldr x12, [x11] + // cmp x9, x12 + // b.ne loop + // success: + // ldr x11, [x11,#8] + // ldr x12, [x10,x11] + // ldr x8, [x12,#72] + // br x8 + // throw_icce: + // b throw_ICCE_entry + } int VtableStub::pd_code_alignment() { return 4; }
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -2470,7 +2470,8 @@ // Slow case of monitor enter. // Inline a special case of call_VM that disallows any pending_exception. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box); + // Arguments are (oop obj, BasicLock* lock, JavaThread* thread). + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread); __ asm_assert_mem8_is_zero(thread_(pending_exception), "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
--- a/src/cpu/x86/vm/x86_64.ad Wed Sep 30 16:43:15 2015 +0100 +++ b/src/cpu/x86/vm/x86_64.ad Fri Oct 02 04:37:30 2015 +0100 @@ -3732,6 +3732,23 @@ %} %} +// Indirect Memory Plus Positive Index Register Plus Offset Operand +operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP reg (ConvI2L idx)) off); + + op_cost(10); + format %{"[$reg + $off + $idx]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale(0x0); + disp($off); + %} +%} + // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) %{ @@ -3883,6 +3900,23 @@ %} %} +// Indirect Memory Times Plus Positive Index Register Plus Offset Operand +operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off); + + op_cost(10); + format %{"[$reg + $off + $idx]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale(0x0); + disp($off); + %} +%} + // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale) %{ @@ -4074,11 +4108,11 @@ // case of this is memory operands. opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, - indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset, + indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset, indCompressedOopOffset, indirectNarrow, indOffset8Narrow, indOffset32Narrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, - indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow); + indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow); //----------PIPELINE----------------------------------------------------------- // Rules which define the behavior of the target architectures pipeline. @@ -5112,6 +5146,17 @@ ins_pipe(ialu_reg_reg_fat); %} +instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr posidxoff" %} + opcode(0x8D); + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); + ins_pipe(ialu_reg_reg_fat); +%} + instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem) %{ match(Set dst mem); @@ -5196,6 +5241,18 @@ ins_pipe(ialu_reg_reg_fat); %} +instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %} + opcode(0x8D); + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); + ins_pipe(ialu_reg_reg_fat); +%} + instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem) %{ predicate(Universe::narrow_oop_shift() == 0);
--- a/src/os/linux/vm/os_linux.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os/linux/vm/os_linux.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -5953,22 +5953,6 @@ extern char** environ; -#ifndef __NR_fork -#ifdef BUILTIN_SIM -#define __NR_fork 57 -#else -#define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57) AARCH64_ONLY(1079) -#endif -#endif - -#ifndef __NR_execve -#ifdef BUILTIN_SIM -#define __NR_execve 59 -#else -#define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59) AARCH64_ONLY(221) -#endif -#endif - // Run the specified command in a separate process. Return its exit value, // or -1 on failure (e.g. can't fork a new process). // Unlike system(), this function can be called from signal handler. It
--- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -31,6 +31,10 @@ // Implementation of class atomic +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } @@ -71,7 +75,9 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { - return __sync_lock_test_and_set (dest, exchange_value); + jint res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; } inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) @@ -111,7 +117,9 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { - return __sync_lock_test_and_set (dest, exchange_value); + intptr_t res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; } inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value)
--- a/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -39,4 +39,6 @@ // Used on 64 bit platforms for UseCompressedOops base address define_pd_global(uintx,HeapBaseMinAddress, 2*G); +extern __thread Thread *aarch64_currentThread; + #endif // OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP
--- a/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -31,10 +31,6 @@ #include "runtime/os.hpp" #include "vm_version_aarch64.hpp" -#define FULL_MEM_BARRIER __sync_synchronize() -#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); - // Implementation of class OrderAccess. inline void OrderAccess::loadload() { acquire(); }
--- a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -172,10 +172,14 @@ return frame(sp, fp, epc.pc()); } -// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get -// turned off by -fomit-frame-pointer, +// By default, gcc always saves frame pointer rfp on this stack. This +// may get turned off by -fomit-frame-pointer. frame os::get_sender_for_C_frame(frame* fr) { +#ifdef BUILTIN_SIM return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +#else + return frame(fr->link(), fr->link(), fr->sender_pc()); +#endif } intptr_t* _get_previous_fp() {
--- a/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -26,32 +26,6 @@ #include "runtime/threadLocalStorage.hpp" #include "runtime/thread.inline.hpp" -// Map stack pointer (%esp) to thread pointer for faster TLS access -// -// Here we use a flat table for better performance. Getting current thread -// is down to one memory access (read _sp_map[%esp>>12]) in generated code -// and two in runtime code (-fPIC code needs an extra load for _sp_map). -// -// This code assumes stack page is not shared by different threads. It works -// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). -// -// Notice that _sp_map is allocated in the bss segment, which is ZFOD -// (zero-fill-on-demand). While it reserves 4M address space upfront, -// actual memory pages are committed on demand. -// -// If an application creates and destroys a lot of threads, usually the -// stack space freed by a thread will soon get reused by new thread -// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). -// No memory page in _sp_map is wasted. -// -// However, it's still possible that we might end up populating & -// committing a large fraction of the 4M table over time, but the actual -// amount of live data in the table could be quite small. The max wastage -// is less than 4M bytes. If it becomes an issue, we could use madvise() -// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. -// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the -// physical memory page (i.e. similar to MADV_FREE on Solaris). - void ThreadLocalStorage::generate_code_for_get_thread() { // nothing we can do here for user-level thread } @@ -59,6 +33,9 @@ void ThreadLocalStorage::pd_init() { } +__thread Thread *aarch64_currentThread; + void ThreadLocalStorage::pd_set_thread(Thread* thread) { os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); + aarch64_currentThread = thread; }
--- a/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -29,8 +29,8 @@ public: - static Thread* thread() { - return (Thread*) os::thread_local_storage_at(thread_index()); - } + static Thread *thread() { + return aarch64_currentThread; + } #endif // OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP
--- a/src/share/vm/c1/c1_LIRGenerator.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -2102,13 +2102,6 @@ addr = new LIR_Address(base_op, index_op->as_jint(), dst_type); } else { #if defined(X86) || defined(AARCH64) -#ifdef _LP64 - if (!index_op->is_illegal() && index_op->type() == T_INT) { - LIR_Opr tmp = new_pointer_register(); - __ convert(Bytecodes::_i2l, index_op, tmp); - index_op = tmp; - } -#endif addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type); #elif defined(GENERATE_ADDRESS_IS_PREFERRED) addr = generate_address(base_op, index_op, log2_scale, 0, dst_type);
--- a/src/share/vm/code/nmethod.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/code/nmethod.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -47,12 +47,12 @@ #include "shark/sharkCompiler.hpp" #endif +PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC + #ifdef BUILTIN_SIM #include "../../../../../simulator/simulator.hpp" #endif -PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC - unsigned char nmethod::_global_unloading_clock = 0; #ifdef DTRACE_ENABLED
--- a/src/share/vm/memory/metaspace.cpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/memory/metaspace.cpp Fri Oct 02 04:37:30 2015 +0100 @@ -3018,10 +3018,50 @@ // Don't use large pages for the class space. bool large_pages = false; +#ifndef AARCH64 ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), _reserve_alignment, large_pages, requested_addr, 0); +#else // AARCH64 + ReservedSpace metaspace_rs; + + // Our compressed klass pointers may fit nicely into the lower 32 + // bits. + if ((uint64_t)requested_addr + compressed_class_space_size() < 4*G) + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr, 0); + + if (! metaspace_rs.is_reserved()) { + // Try to align metaspace so that we can decode a compressed klass + // with a single MOVK instruction. We can do this iff the + // compressed class base is a multiple of 4G. + for (char *a = (char*)align_ptr_up(requested_addr, 4*G); + a < (char*)(1024*G); + a += 4*G) { + if (UseSharedSpaces + && ! can_use_cds_with_metaspace_addr(a, cds_base)) { + // We failed to find an aligned base that will reach. Fall + // back to using our requested addr. + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr, 0); + break; + } + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + a, 0); + if (metaspace_rs.is_reserved()) + break; + } + } + +#endif // AARCH64 + if (!metaspace_rs.is_reserved()) { #if INCLUDE_CDS if (UseSharedSpaces) {
--- a/src/share/vm/memory/metaspaceShared.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/memory/metaspaceShared.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -126,10 +126,6 @@ static void print_shared_spaces(); -#if defined(BUILTIN_SIM) - static void relocate_vtbl_list(char **buffer); -#endif - static bool try_link_class(InstanceKlass* ik, TRAPS); static void link_one_shared_class(Klass* obj, TRAPS); static void check_one_shared_class(Klass* obj); @@ -137,5 +133,9 @@ static int count_class(const char* classlist_file); static void estimate_regions_size() NOT_CDS_RETURN; + +#if defined(BUILTIN_SIM) + static void relocate_vtbl_list(char **buffer); +#endif }; #endif // SHARE_VM_MEMORY_METASPACE_SHARED_HPP
--- a/src/share/vm/opto/c2_globals.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/opto/c2_globals.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -659,9 +659,6 @@ product(bool, UseMathExactIntrinsics, true, \ "Enables intrinsification of various java.lang.Math functions") \ \ - product(bool, UseMultiplyToLenIntrinsic, false, \ - "Enables intrinsification of BigInteger.multiplyToLen()") \ - \ product(bool, UseTypeSpeculation, true, \ "Speculatively propagate types from profiles") \ \
--- a/src/share/vm/runtime/globals.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/runtime/globals.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -732,6 +732,9 @@ product(bool, UseCRC32Intrinsics, false, \ "use intrinsics for java.util.zip.CRC32") \ \ + product(bool, UseMultiplyToLenIntrinsic, false, \ + "Enables intrinsification of BigInteger.multiplyToLen()") \ + \ develop(bool, TraceCallFixup, false, \ "Trace all call fixups") \ \
--- a/src/share/vm/runtime/orderAccess.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/runtime/orderAccess.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -41,6 +41,9 @@ #ifdef TARGET_OS_ARCH_linux_arm # include "orderAccess_linux_arm.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "orderAccess_linux_aarch64.inline.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_ppc # include "orderAccess_linux_ppc.inline.hpp" #endif
--- a/src/share/vm/runtime/prefetch.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/runtime/prefetch.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -40,6 +40,9 @@ #ifdef TARGET_OS_ARCH_linux_arm # include "prefetch_linux_arm.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "prefetch_linux_aarch64.inline.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_ppc # include "prefetch_linux_ppc.inline.hpp" #endif
--- a/src/share/vm/runtime/thread.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/runtime/thread.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -1050,7 +1050,7 @@ address last_Java_pc(void) { return _anchor.last_Java_pc(); } // Safepoint support -#ifndef PPC64 +#if !(defined(PPC64) || defined(AARCH64)) JavaThreadState thread_state() const { return _thread_state; } void set_thread_state(JavaThreadState s) { _thread_state = s; } #else
--- a/src/share/vm/runtime/thread.inline.hpp Wed Sep 30 16:43:15 2015 +0100 +++ b/src/share/vm/runtime/thread.inline.hpp Fri Oct 02 04:37:30 2015 +0100 @@ -59,7 +59,7 @@ return allocated_bytes; } -#ifdef PPC64 +#if defined(PPC64) || defined (AARCH64) inline JavaThreadState JavaThread::thread_state() const { return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); }
--- a/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java Fri Oct 02 04:37:30 2015 +0100 @@ -34,6 +34,7 @@ * -XX:CompileCommand=inline,java.math.BigInteger::multiply TestMultiplyToLen */ +import java.util.Arrays; import java.util.Random; import java.math.*; @@ -97,12 +98,36 @@ newsum = newsum.add(newres); if (!bytecompare(oldres,newres)) { + System.out.println(b1); + System.out.println(b2); + System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); + throw new Exception("Failed"); + } + } + + // Test carry propagation. Multiple carries during bignum + // multiplication are rare (especially when using 64-bit + // arithmetic) so we have to provoke them deliberately. + for (int j = 4; j <= 396; j += 4) { + byte[] bytes = new byte[j]; + Arrays.fill(bytes, (byte)255); + b1 = new BigInteger(bytes); + b2 = new BigInteger(bytes); + + oldres = base_multiply(b1,b2); + newres = new_multiply(b1,b2); + + oldsum = oldsum.add(oldres); + newsum = newsum.add(newres); + + if (!bytecompare(oldres,newres)) { System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); System.out.println(b1); System.out.println(b2); throw new Exception("Failed"); } } + if (!bytecompare(oldsum,newsum)) { System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum)); throw new Exception("Failed");
--- a/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java Fri Oct 02 04:37:30 2015 +0100 @@ -95,6 +95,19 @@ default: throw new Error("Unexpected option " + optionName); } + } else if (Platform.isAArch64()) { + switch (optionName) { + case SHAOptionsBase.USE_SHA_OPTION: + return SHAOptionsBase.SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION: + return SHAOptionsBase.SHA1_INSTRUCTION_IS_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION: + return SHAOptionsBase.SHA256_INSTRUCTION_IS_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION: + return SHAOptionsBase.SHA512_INSTRUCTION_IS_NOT_AVAILABLE; + default: + throw new Error("Unexpected option " + optionName); + } } else { throw new Error("Support for CPUs other then X86 or SPARC is not " + "implemented.");
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnSupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnSupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -34,7 +34,10 @@ */ public class TestUseSHA1IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION)).test(); } }
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU(
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnSupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnSupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -35,7 +35,10 @@ */ public class TestUseSHA256IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION)).test(); } }
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU(
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnSupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnSupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -35,7 +35,10 @@ */ public class TestUseSHA512IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION)).test(); } }
--- a/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU(
--- a/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnSupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnSupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -38,6 +38,8 @@ new GenericTestCaseForSupportedSparcCPU( SHAOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForSupportedAArch64CPU( SHAOptionsBase.USE_SHA_OPTION)).test(); } }
--- a/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -39,6 +39,8 @@ SHAOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForOtherCPU(
--- a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -36,7 +36,8 @@ public GenericTestCaseForOtherCPU(String optionName) { // Execute the test case on any CPU except SPARC and X86 super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc, - new OrPredicate(Platform::isX64, Platform::isX86)))); + new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isX64, Platform::isX86))))); } @Override
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForSupportedAArch64CPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import com.oracle.java.testlibrary.ExitCode; +import com.oracle.java.testlibrary.Platform; +import com.oracle.java.testlibrary.cli.CommandLineOptionTest; +import com.oracle.java.testlibrary.cli.predicate.AndPredicate; + +/** + * Generic test case for SHA-related options targeted to AArch64 CPUs which + * support instructions required by the tested option. + */ +public class GenericTestCaseForSupportedAArch64CPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForSupportedAArch64CPU(String optionName) { + super(optionName, new AndPredicate(Platform::isAArch64, + SHAOptionsBase.getPredicateForOption(optionName))); + } + + @Override + protected void verifyWarnings() throws Throwable { + // Verify that there are no warning when option is explicitly enabled. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option could be disabled even if +UseSHA was passed to + // JVM. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true), + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + // Verify that it is possible to enable the tested option and disable + // all SHA intrinsics via -UseSHA without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } + + @Override + protected void verifyOptionValues() throws Throwable { + // Verify that on supported CPU option is enabled by default. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "true"); + + // Verify that it is possible to explicitly enable the option. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "true", + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that it is possible to explicitly disable the option. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + // verify that option is disabled when -UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, true), + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, false)); + + // Verify that it is possible to explicitly disable the tested option + // even if +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true), + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedAArch64CPU.java Fri Oct 02 04:37:30 2015 +0100 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import com.oracle.java.testlibrary.ExitCode; +import com.oracle.java.testlibrary.Platform; +import com.oracle.java.testlibrary.cli.CommandLineOptionTest; +import com.oracle.java.testlibrary.cli.predicate.AndPredicate; +import com.oracle.java.testlibrary.cli.predicate.NotPredicate; + +/** + * Generic test case for SHA-related options targeted to AArch64 CPUs which don't + * support instruction required by the tested option. + */ +public class GenericTestCaseForUnsupportedAArch64CPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForUnsupportedAArch64CPU(String optionName) { + super(optionName, new AndPredicate(Platform::isAArch64, + new NotPredicate(SHAOptionsBase.getPredicateForOption( + optionName)))); + } + + @Override + protected void verifyWarnings() throws Throwable { + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + } + + @Override + protected void verifyOptionValues() throws Throwable { + // Verify that option is disabled by default. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false"); + + // Verify that option is disabled even if it was explicitly enabled + // using CLI options. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option is disabled when +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true)); + } +}
--- a/test/compiler/stable/StableConfiguration.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/stable/StableConfiguration.java Fri Oct 02 04:37:30 2015 +0100 @@ -41,10 +41,30 @@ System.out.println("Server Compiler: " + get()); } + // The method 'get' below returns true if the method is server compiled + // and is used by the Stable tests to determine whether methods in + // general are being server compiled or not as the -XX:+FoldStableValues + // option is only applicable to -server. + // + // On aarch64 we DeOptimize when patching. This means that when the + // method is compiled as a result of -Xcomp it DeOptimizes immiediately. + // The result is that getMethodCompilationLevel returns 0. This means + // the method returns true based on java.vm.name. + // + // However when the tests are run with -XX:+TieredCompilation and + // -XX:TieredStopAtLevel=1 this fails because methods will always + // be client compiled. + // + // Solution is to add a simple method 'get1' which should never be + // DeOpted and use that to determine the compilation level instead. + static void get1() { + } + // ::get() is among immediately compiled methods. static boolean get() { try { - Method m = StableConfiguration.class.getDeclaredMethod("get"); + get1(); + Method m = StableConfiguration.class.getDeclaredMethod("get1"); int level = WB.getMethodCompilationLevel(m); if (level > 0) { return (level == 4);
--- a/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java Wed Sep 30 16:43:15 2015 +0100 +++ b/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java Fri Oct 02 04:37:30 2015 +0100 @@ -59,16 +59,25 @@ }; public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE - = new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, - null); + = new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, + null), + new CPUSpecificPredicate("aarch64", new String[] { "sha1" }, + null)); public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE - = new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, - null); + = new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, + null), + new CPUSpecificPredicate("aarch64", new String[] { "sha256" }, + null)); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE - = new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, - null); + = new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, + null), + new CPUSpecificPredicate("aarch64", new String[] { "sha512" }, + null)); public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,