changeset 6763:13031adeeef5 icedtea-2.6.22 icedtea-2.6.23pre00

Merge jdk7u261-b02
author andrew
date Mon, 13 Apr 2020 16:44:26 +0100
parents 589dc1af9e7b (diff) 5530d69c4b0c (current diff)
children 83361fc516e1
files .hgtags make/bsd/makefiles/gcc.make make/linux/makefiles/gcc.make make/linux/makefiles/jsig.make make/linux/makefiles/saproc.make make/solaris/makefiles/gcc.make make/solaris/makefiles/jsig.make make/solaris/makefiles/saproc.make
diffstat 495 files changed, 94932 insertions(+), 3538 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Apr 13 06:13:18 2020 +0100
+++ b/.hgtags	Mon Apr 13 16:44:26 2020 +0100
@@ -50,6 +50,7 @@
 faf94d94786b621f8e13cbcc941ca69c6d967c3f jdk7-b73
 f4b900403d6e4b0af51447bd13bbe23fe3a1dac7 jdk7-b74
 d8dd291a362acb656026a9c0a9da48501505a1e7 jdk7-b75
+b4ab978ce52c41bb7e8ee86285e6c9f28122bbe1 icedtea7-1.12
 9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76
 455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77
 e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78
@@ -87,6 +88,7 @@
 07226e9eab8f74b37346b32715f829a2ef2c3188 hs18-b01
 e7e7e36ccdb5d56edd47e5744351202d38f3b7ad jdk7-b87
 4b60f23c42231f7ecd62ad1fcb6a9ca26fa57d1b jdk7-b88
+a393ff93e7e54dd94cc4211892605a32f9c77dad icedtea7-1.13
 15836273ac2494f36ef62088bc1cb6f3f011f565 jdk7-b89
 4b60f23c42231f7ecd62ad1fcb6a9ca26fa57d1b hs18-b02
 605c9707a766ff518cd841fc04f9bb4b36a3a30b jdk7-b90
@@ -160,6 +162,7 @@
 b898f0fc3cedc972d884d31a751afd75969531cf hs21-b05
 bd586e392d93b7ed7a1636dcc8da2b6a4203a102 jdk7-b136
 bd586e392d93b7ed7a1636dcc8da2b6a4203a102 hs21-b06
+591c7dc0b2ee879f87a7b5519a5388e0d81520be icedtea-1.14
 2dbcb4a4d8dace5fe78ceb563b134f1fb296cd8f jdk7-b137
 2dbcb4a4d8dace5fe78ceb563b134f1fb296cd8f hs21-b07
 0930dc920c185afbf40fed9a655290b8e5b16783 jdk7-b138
@@ -182,6 +185,7 @@
 38fa55e5e79232d48f1bb8cf27d88bc094c9375a hs21-b16
 81d815b05abb564aa1f4100ae13491c949b9a07e jdk7-b147
 81d815b05abb564aa1f4100ae13491c949b9a07e hs21-b17
+7693eb0fce1f6b484cce96c233ea20bdad8a09e0 icedtea-2.0-branchpoint
 9b0ca45cd756d538c4c30afab280a91868eee1a5 jdk7u2-b01
 0cc8a70952c368e06de2adab1f2649a408f5e577 jdk8-b01
 31e253c1da429124bb87570ab095d9bc89850d0a jdk8-b02
@@ -210,6 +214,7 @@
 3ba0bb2e7c8ddac172f5b995aae57329cdd2dafa hs22-b10
 f17fe2f4b6aacc19cbb8ee39476f2f13a1c4d3cd jdk7u2-b13
 0744602f85c6fe62255326df595785eb2b32166d jdk7u2-b21
+f8f4d3f9b16567b91bcef4caaa8417c8de8015f0 icedtea-2.1-branchpoint
 a40d238623e5b1ab1224ea6b36dc5b23d0a53880 jdk7u3-b02
 6986bfb4c82e00b938c140f2202133350e6e73f8 jdk7u3-b03
 8e6375b46717d74d4885f839b4e72d03f357a45f jdk7u3-b04
@@ -264,6 +269,7 @@
 f92a171cf0071ca6c3fa8231d7d570377f8b2f4d hs23-b16
 f92a171cf0071ca6c3fa8231d7d570377f8b2f4d hs23-b16
 931e5f39e365a0d550d79148ff87a7f9e864d2e1 hs23-b16
+a2c5354863dcb3d147b7b6f55ef514b1bfecf920 icedtea-2.2-branchpoint
 efb5f2662c96c472caa3327090268c75a86dd9c0 jdk7u4-b13
 82e719a2e6416838b4421637646cbfd7104c7716 jdk7u4-b14
 e5f7f95411fb9e837800b4152741c962118e5d7a jdk7u5-b01
@@ -302,6 +308,9 @@
 e974e15945658e574e6c344c4a7ba225f5708c10 hs23.2-b03
 f08a3a0e60c32cb0e8350e72fdc54849759096a4 jdk7u6-b12
 7a8d3cd6562170f4c262e962270f679ac503f456 hs23.2-b04
+d72dd66fdc3d52aee909f8dd8f25f62f13569ffa ppc-aix-port-b01
+1efaab66c81d0a5701cc819e67376f1b27bfea47 ppc-aix-port-b02
+b69b779a26dfc5e2333504d0c82fc998ff915499 ppc-aix-port-b03
 28746e6d615f27816f483485a53b790c7a463f0c jdk7u6-b13
 202880d633e646d4936798d0fba6efc0cab04dc8 hs23.2-b05
 6b0f178141388f5721aa5365cb542715acbf0cc7 jdk7u6-b14
@@ -311,6 +320,7 @@
 cefe884c708aa6dfd63aff45f6c698a6bc346791 jdk7u6-b16
 270a40a57b3d05ca64070208dcbb895b5b509d8e hs23.2-b08
 7a37cec9d0d44ae6ea3d26a95407e42d99af6843 jdk7u6-b17
+354cfde7db2f1fd46312d883a63c8a76d5381bab icedtea-2.3-branchpoint
 df0df4ae5af2f40b7f630c53a86e8c3d68ef5b66 jdk7u6-b18
 1257f4373a06f788bd656ae1c7a953a026a285b9 jdk7u6-b19
 a0c2fa4baeb6aad6f33dc87b676b21345794d61e hs23.2-b09
@@ -440,6 +450,7 @@
 4f7ad6299356bfd2cfb448ea4c11e8ce0fbf69f4 jdk7u12-b07
 3bb803664f3d9c831d094cbe22b4ee5757e780c8 jdk7u12-b08
 92e382c3cccc0afbc7f72fccea4f996e05b66b3e jdk7u12-b09
+6e4feb17117d21e0e4360f2d0fbc68397ed3ba80 icedtea-2.4-branchpoint
 7554f9b2bcc72204ac10ba8b08b8e648459504df hs24-b29
 181528fd1e74863a902f171a2ad46270a2fb15e0 jdk7u14-b10
 4008cf63c30133f2fac148a39903552fe7a33cea hs24-b30
@@ -496,6 +507,7 @@
 273e8afccd6ef9e10e9fe121f7b323755191f3cc jdk7u25-b32
 e3d2c238e29c421c3b5c001e400acbfb30790cfc jdk7u14-b14
 860ae068f4dff62a77c8315f0335b7e935087e86 hs24-b34
+ca298f18e21dc66c6b5235600f8b50bcc9bbaa38 ppc-aix-port-b04
 12619005c5e29be6e65f0dc9891ca19d9ffb1aaa jdk7u14-b15
 be21f8a4d42c03cafde4f616fd80ece791ba2f21 hs24-b35
 10e0043bda0878dbc85f3f280157eab592b47c91 jdk7u14-b16
@@ -590,6 +602,9 @@
 12374864c655a2cefb0d65caaacf215d5365ec5f jdk7u45-b18
 3677c8cc3c89c0fa608f485b84396e4cf755634b jdk7u45-b30
 520b7b3d9153c1407791325946b07c5c222cf0d6 jdk7u45-b31
+ae4adc1492d1c90a70bd2d139a939fc0c8329be9 jdk7u60-b00
+af1fc2868a2b919727bfbb0858449bd991bbee4a jdk7u40-b60
+cc83359f5e5eb46dd9176b0a272390b1a0a51fdc hs24.60-b01
 c373a733d5d5147f99eaa2b91d6b937c28214fc9 jdk7u45-b33
 0bcb43482f2ac5615437541ffb8dc0f79ece3148 jdk7u45-b34
 12ea8d416f105f5971c808c89dddc1006bfc4c53 jdk7u45-b35
@@ -646,6 +661,8 @@
 0025a2a965c8f21376278245c2493d8861386fba jdk7u60-b02
 fa59add77d1a8f601a695f137248462fdc68cc2f hs24.60-b05
 a59134ccb1b704b2cd05e157970d425af43e5437 hs24.60-b06
+bc178be7e9d6fcc97e09c909ffe79d96e2305218 icedtea-2.5pre01
+f30e87f16d90f1e659b935515a3fc083ab8a0156 icedtea-2.5pre02
 2c971ed884cec0a9293ccff3def696da81823225 jdk7u60-b03
 1afbeb8cb558429156d432f35e7582716053a9cb hs24.60-b07
 05fe7a87d14908eb3f21a0d29fc72cee2f996b7f jdk7u60-b04
@@ -810,13 +827,36 @@
 ff18bcebe2943527cdbc094375c38c27ec7f2442 hs24.80-b03
 1b9722b5134a8e565d8b8fe851849e034beff057 hs24.80-b04
 04d6919c44db8c9d811ef0ac4775a579f854cdfc hs24.80-b05
+882a93010fb90f928331bf31a226992755d6cfb2 icedtea-2.6pre01
 ee18e60e7e8da9f1912895af353564de0330a2b1 hs24.80-b06
+138ef7288fd40de0012a3a24839fa7cb3569ab43 icedtea-2.6pre02
+4ab69c6e4c85edf628c01c685bc12c591b9807d9 icedtea-2.6pre03
+b226be2040f971855626f5b88cb41a7d5299fea0 jdk7u60-b14
+2fd819c8b5066a480f9524d901dbd34f2cf563ad icedtea-2.6pre04
+fae3b09fe959294f7a091a6ecaae91daf1cb4f5c icedtea-2.6pre05
 05fe7a87d14908eb3f21a0d29fc72cee2f996b7f jdk7u80-b00
 e2533d62ca887078e4b952a75a75680cfb7894b9 jdk7u80-b01
+8ffb87775f56ed5c602f320d2513351298ee4778 icedtea-2.6pre07
+b517477362d1b0d4f9b567c82db85136fd14bc6e icedtea-2.6pre06
+6d5ec408f4cac2c2004bf6120403df1b18051a21 icedtea-2.6pre08
 bad107a5d096b070355c5a2d80aa50bc5576144b jdk7u80-b02
+4722cfd15c8386321c8e857951b3cb55461e858b icedtea-2.6pre09
+c8417820ac943736822e7b84518b5aca80f39593 icedtea-2.6pre10
+e13857ecc7870c28dbebca79ff36612693dac157 icedtea-2.6pre11
 9d2b485d2a58ea57ab2b3c06b2128f456ab39a38 jdk7u80-b03
+0c2099cd04cd24778c5baccc7c8a72c311ef6f84 icedtea-2.6pre12
+c6fa18ed8a01a15e1210bf44dc7075463e0a514b icedtea-2.6pre13
+1d3d9e81c8e16bfe948da9bc0756e922a3802ca4 icedtea-2.6pre14
+5ad4c09169742e076305193c1e0b8256635cf33e icedtea-2.6pre15
+7891f0e7ae10d8f636fdbf29bcfe06f43d057e5f icedtea-2.6pre16
+4d25046abb67ae570ae1dbb5e3e48e7a63d93b88 icedtea-2.6pre17
 a89267b51c40cba0b26fe84831478389723c8321 jdk7u80-b04
 00402b4ff7a90a6deba09816192e335cadfdb4f0 jdk7u80-b05
+1792bfb4a54d87ff87438413a34004a6b6004987 icedtea-2.6pre18
+8f3c9cf0636f4d40e9c3647e03c7d0ca6d1019ee icedtea-2.6pre19
+904317834a259bdddd4568b74874c2472f119a3c icedtea-2.6pre20
+1939c010fd371d22de5c1baf2583a96e8f38da44 icedtea-2.6pre21
+cb42e88f9787c8aa28662f31484d605e550c6d53 icedtea-2.6pre22
 87d4354a3ce8aafccf1f1cd9cb9d88a58731dde8 jdk7u80-b06
 d496bd71dc129828c2b5962e2072cdb591454e4a jdk7u80-b07
 5ce33a4444cf74e04c22fb11b1e1b76b68a6477a jdk7u80-b08
@@ -829,31 +869,60 @@
 27e0103f3b11f06bc3277914564ed9a1976fb3d5 jdk7u80-b30
 426e09df7eda980317d1308af15c29ef691cd471 jdk7u80-b15
 198c700d102cc2051b304fc382ac58c5d76e8d26 jdk7u80-b32
-ea2051eb6ee8be8e292711caaae05a7014466ddc jdk7u85-b00
-1c6c2bdf4321c0ece7723663341f7f1a35cac843 jdk7u85-b01
+1afefe2d5f90112e87034a4eac57fdad53fe5b9f icedtea-2.6pre23
+94f15794d5e7847a60540eacbe3e276dbe127a1a icedtea-2.6pre24
+94f15794d5e7847a60540eacbe3e276dbe127a1a icedtea-2.6.0
+501fc984fa3b3d51e1a7f1220f2de635a2b370b9 jdk7u85-b00
+3f1b4a1fe4a274cd1f89d9ec83d8018f7f4b7d01 jdk7u85-b01
+94f15794d5e7847a60540eacbe3e276dbe127a1a icedtea-2.6-branchpoint
+b19bc5aeaa099ac73ee8341e337a007180409593 icedtea-2.6.1
 e45a07be1cac074dfbde6757f64b91f0608f30fb jdk7u85-b02
+25077ae8f6d2c512e74bfb3e5c1ed511b7c650de icedtea-2.6.2pre01
+1500c88d1b61914b3fbe7dfd8c521038bd95bde3 icedtea-2.6.2pre02
 cce12560430861a962349343b61d3a9eb12c6571 jdk7u91-b00
 5eaaa63440c4416cd9c03d586f72b3be8c7c73f8 jdk7u91-b01
+f40363c111917466319901436650f22f8403b749 icedtea-2.6.2
 2f2d431ace967c9a71194e1bb46f38b35ea43512 jdk7u91-b02
+c3cde6774003850aa6c44315c9c3e4dfdac69798 icedtea-2.6.3
 b3c5ff648bcad305163b323ad15dde1b6234d501 jdk7u95-b00
+19d919ae5506a750e3a0bcc6bd176c66b7e1e65d icedtea-2.6.4
 631da593499ee184ea8efb2bc5491e0d99ac636d jdk7u99-b00
+04d7046d2d41ae18c762fbdad7e114fdd55e2282 icedtea-2.6.5
+825fa447453ad85b3a6f11f87c5147b8b78fb3ab icedtea-2.6.6pre01
 6e42747134be8a4a366d337da1fc84a847ad8381 jdk7u101-b00
+3022a3d80efd705e861b90cece4f6fdc73f6de97 icedtea-2.6.6
+b4a17ff67e26e5e64680ab68c398dbecc46ee7c4 icedtea-2.6.7pre01
 14d0f4da4a74897fc3274f8f549f41544bb4625a jdk7u111-b00
 9efa3fae3c278a7f48badec775d7f9b2d5320b6d jdk7u111-b01
+75297b84957ed87867b0863be1a895a0de76d994 icedtea-2.6.7
+6b87ff3af68f636e473e2254c8e9a8c36ca514a6 icedtea-2.6.8pre01
 17b40d99ea3665de2bfffe163b68f2dfcf675cba jdk7u121-b00
+223f6c442d4987da3508893fb35e4419b33dc7f7 icedtea-2.6.8
+1d30f5a7723eeeb8092eed80d305fe6cdf530b1f icedtea-2.6.9pre01
 95e4ea3d479ebdab9d78725776b6b11d4add6b0e jdk7u131-b00
+9fc0d63c2a741ad9567580ead69b9cbf220f968f icedtea-2.6.9
+ada095ab78a923f10b40935d6e3fa1a42dc2358a icedtea-2.6.10pre01
 be8da42894af5f7d11b4bad83e166186f01ce1b4 jdk7u141-b00
 22c5a6ca09e35b63baf51bad4cb3d8f0cf326705 jdk7u141-b01
 56ad25be7d88c2c2da562fe1e8879c8723d01da1 jdk7u141-b02
+8edec8d2933aa575e7da2656b0fc4d8b904b1997 icedtea-2.6.10
+a8044a7634d086a85ac71b8674b432424606a85c icedtea-2.6.11pre01
 75662a7ec1719b3133636d09bd078968579a55ab jdk7u151-b00
 d0c7cea0660f7a8188a7b8c1f6d1a6c8d6388fb0 jdk7u151-b01
+809ae803d8ea9fd1af5cda606931959086dde30c icedtea-2.6.11
+9777e52ab513f7912e90433ccb6d7d6ecb7e0244 icedtea-2.6.12pre01
 fc789043683d3cf424f97176bd77cf7abe5bd01a jdk7u161-b00
 2965926dc5176c075e7a68c5d82bfd0ffa91cd5e jdk7u161-b01
+7fe1098f101e3bb5a2e5ed47ab0a596f4b74847a icedtea-2.6.12
 66e04addced6cc25701d2f6c6477051279006a14 jdk7u171-b00
 d07df83bf5755c518bb0acc65cd6a33da38014b5 jdk7u171-b01
 ce3abb5889fb01808cab7489e83c1dc448743b70 jdk7u171-b02
+f96baf6b460751580465a599ed2fba0c912e4bad icedtea-2.6.13
+79d8447a461c7319969585c363649901b4c2773a icedtea-2.6.14pre01
 205c34770f355f726055a716ecc8991dd3bbd8fd jdk7u181-b00
 6865c5a6ec36e80772ec47e14f7926b92053b551 jdk7u181-b01
+56142fb6814b3fbe81d580622583829976c6d08e icedtea-2.6.14
+592120da2a2b32c188d93212dd1929064e0723f0 icedtea-2.6.15pre01
 0000000000000000000000000000000000000000 jdk7u181-b00
 82e6757505da54dc58be6653c8b3464b58f6cb62 jdk7u181-b00
 0000000000000000000000000000000000000000 jdk7u181-b01
@@ -861,23 +930,34 @@
 327ea6f9647c0ef2e872a2adbf9b9fa180dd9070 jdk7u191-b00
 b95ee63b279a56f99fbcd5983ce337018aa87861 jdk7u191-b01
 5e348b58b299cb01f17ed1c4d970efccfda096c7 jdk7u191-b02
+e200fdadc48736c9a455cc826e71cdb64c4a13dc icedtea-2.6.15
 a04d398041938b47a3eed225c71f298a3e9410f9 jdk7u201-b00
+ae6068b02261bc413ccc934b2260f1d6277e18ee icedtea-2.6.16
+65b809089544d0723a33745d91265999afa08aeb icedtea-2.6.17pre01
 58964d7eef111a6c1b361e32daeae41cbe9e7f7c jdk7u211-b00
 87b0534bbec2c217e7c4574e962b93b3c5a61193 jdk7u211-b01
 e1b1da173e19b3abe6660d601f3a5cbf116cf8f5 jdk7u211-b02
+d47e668978c8e40925d6b2120aab3f1a82f1fc96 icedtea-2.6.17
+d47e668978c8e40925d6b2120aab3f1a82f1fc96 icedtea-2.6.18pre00
 8e4bf277c1a0063bf5546dffa5e0cc333761ef4b jdk7u221-b00
 4d49c350aea9078c934ee220668fc7dab1b830ce jdk7u221-b01
 39dc424afc0e4acf27b4570f864e5e9378127777 jdk7u221-b02
+6e7512a54bc6162bb648277205c51f38971d0da6 icedtea-2.6.18
 39dc424afc0e4acf27b4570f864e5e9378127777 jdk7u221-ga
 39dc424afc0e4acf27b4570f864e5e9378127777 jdk7u231-b00
 a8f5a2d0d6a0d4f7501c0f6c1efe933532d4e627 jdk7u231-b01
+6774b8e70603686f6d1e8a6749aebf5f2112b97e icedtea-2.6.19
+6774b8e70603686f6d1e8a6749aebf5f2112b97e icedtea-2.6.20pre00
 a8f5a2d0d6a0d4f7501c0f6c1efe933532d4e627 jdk7u231-ga
 a8f5a2d0d6a0d4f7501c0f6c1efe933532d4e627 jdk7u241-b00
 c6a8482b1d38cb24fada6a4de0038a577f384cb7 jdk7u241-b01
+059e07856d09dcb1017565f5a8c21a11c25b88e6 icedtea-2.6.20
+8145b03db545c7be2a8243b785d9bf20caae706b icedtea-2.6.21pre00
 c6a8482b1d38cb24fada6a4de0038a577f384cb7 jdk7u241-ga
 c6a8482b1d38cb24fada6a4de0038a577f384cb7 jdk7u251-b00
 cea2195191744b01d089f12d0264fc572fb35690 jdk7u251-b01
 69946653069110a57f9d49e40a05147c574966ca jdk7u251-b02
+d89441a9ce987745a0342b69213758f0fe109976 icedtea-2.6.21
 69946653069110a57f9d49e40a05147c574966ca jdk7u251-ga
 69946653069110a57f9d49e40a05147c574966ca jdk7u261-b00
 7c318f4ec806e29e25a386751e55c8e138705734 jdk7u261-b01
--- a/.jcheck/conf	Mon Apr 13 06:13:18 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-project=jdk7
-bugids=dup
--- a/agent/src/os/bsd/libproc_impl.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/bsd/libproc_impl.c	Mon Apr 13 16:44:26 2020 +0100
@@ -161,7 +161,12 @@
       return NULL;
    }
 
-   strncpy(newlib->name, libname, sizeof(newlib->name));
+   if (strlen(libname) >= sizeof(newlib->name)) {
+     print_debug("libname %s too long\n", libname);
+     return NULL;
+   }
+   strcpy(newlib->name, libname);
+
    newlib->base = base;
 
    if (fd == -1) {
--- a/agent/src/os/linux/Makefile	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/Makefile	Mon Apr 13 16:44:26 2020 +0100
@@ -23,7 +23,12 @@
 #
 
 ARCH := $(shell if ([ `uname -m` = "ia64" ])  ; then echo ia64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi )
-GCC      = gcc
+
+ifndef BUILD_GCC
+BUILD_GCC = gcc
+endif
+
+GCC      = $(BUILD_GCC)
 
 JAVAH    = ${JAVA_HOME}/bin/javah
 
@@ -40,7 +45,7 @@
 
 LIBS     = -lthread_db
 
-CFLAGS   = -c -fPIC -g -D_GNU_SOURCE -D$(ARCH) $(INCLUDES) -D_FILE_OFFSET_BITS=64
+CFLAGS   = -c -fPIC -g -D_GNU_SOURCE -D_$(ARCH)_ $(if $(filter $(ARCH),alpha),,-D$(ARCH)) $(INCLUDES) -D_FILE_OFFSET_BITS=64
 
 LIBSA = $(ARCH)/libsaproc.so
 
@@ -73,7 +78,7 @@
 	$(GCC) -shared $(LFLAGS_LIBSA) -o $(LIBSA) $(OBJS) $(LIBS)
 
 test.o: test.c
-	$(GCC) -c -o test.o -g -D_GNU_SOURCE -D$(ARCH) $(INCLUDES) test.c
+	$(GCC) -c -o test.o -g -D_GNU_SOURCE -D_$(ARCH)_ $(if $(filter $(ARCH),alpha),,-D$(ARCH)) $(INCLUDES) test.c
 
 test: test.o
 	$(GCC) -o test test.o -L$(ARCH) -lsaproc $(LIBS)
--- a/agent/src/os/linux/elfmacros.h	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/elfmacros.h	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,7 @@
 #define ELF_NHDR        Elf64_Nhdr
 #define ELF_DYN         Elf64_Dyn
 #define ELF_ADDR        Elf64_Addr
+#define ELF_AUXV        Elf64_auxv_t
 
 #define ELF_ST_TYPE     ELF64_ST_TYPE
 
@@ -45,6 +46,7 @@
 #define ELF_NHDR        Elf32_Nhdr
 #define ELF_DYN         Elf32_Dyn
 #define ELF_ADDR        Elf32_Addr
+#define ELF_AUXV        Elf32_auxv_t
 
 #define ELF_ST_TYPE     ELF32_ST_TYPE
 
--- a/agent/src/os/linux/libproc.h	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/libproc.h	Mon Apr 13 16:44:26 2020 +0100
@@ -34,7 +34,7 @@
 #include "libproc_md.h"
 #endif
 
-#include <linux/ptrace.h>
+#include <sys/ptrace.h>
 
 /************************************************************************************
 
--- a/agent/src/os/linux/libproc_impl.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/libproc_impl.c	Mon Apr 13 16:44:26 2020 +0100
@@ -162,7 +162,12 @@
       return NULL;
    }
 
-   strncpy(newlib->name, libname, sizeof(newlib->name));
+   if (strlen(libname) >= sizeof(newlib->name)) {
+     print_debug("libname %s too long\n", libname);
+     return NULL;
+   }
+   strcpy(newlib->name, libname);
+
    newlib->base = base;
 
    if (fd == -1) {
--- a/agent/src/os/linux/ps_core.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/ps_core.c	Mon Apr 13 16:44:26 2020 +0100
@@ -629,6 +629,18 @@
       if (notep->n_type == NT_PRSTATUS) {
          if (core_handle_prstatus(ph, descdata, notep->n_descsz) != true)
             return false;
+      } else if (notep->n_type == NT_AUXV) {
+        // Get first segment from entry point
+        ELF_AUXV *auxv = (ELF_AUXV *)descdata;
+        while (auxv->a_type != AT_NULL) {
+          if (auxv->a_type == AT_ENTRY) {
+            // Set entry point address to address of dynamic section.
+            // We will adjust it in read_exec_segments().
+            ph->core->dynamic_addr = auxv->a_un.a_val;
+            break;
+          }
+          auxv++;
+        }
       }
       p = descdata + ROUNDUP(notep->n_descsz, 4);
    }
@@ -811,7 +823,13 @@
 
          // from PT_DYNAMIC we want to read address of first link_map addr
          case PT_DYNAMIC: {
-            ph->core->dynamic_addr = exec_php->p_vaddr;
+            if (exec_ehdr->e_type == ET_EXEC) {
+                ph->core->dynamic_addr = exec_php->p_vaddr;
+            } else { // ET_DYN
+                // dynamic_addr has entry point of executable.
+                // Thus we should substract it.
+                ph->core->dynamic_addr += exec_php->p_vaddr - exec_ehdr->e_entry;
+            }
             print_debug("address of _DYNAMIC is 0x%lx\n", ph->core->dynamic_addr);
             break;
          }
@@ -1007,8 +1025,9 @@
       goto err;
    }
 
-   if (read_elf_header(ph->core->exec_fd, &exec_ehdr) != true || exec_ehdr.e_type != ET_EXEC) {
-      print_debug("executable file is not a valid ELF ET_EXEC file\n");
+   if (read_elf_header(ph->core->exec_fd, &exec_ehdr) != true ||
+       ((exec_ehdr.e_type != ET_EXEC) && (exec_ehdr.e_type != ET_DYN))) {
+      print_debug("executable file is not a valid ELF file\n");
       goto err;
    }
 
--- a/agent/src/os/linux/ps_proc.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/os/linux/ps_proc.c	Mon Apr 13 16:44:26 2020 +0100
@@ -263,7 +263,7 @@
 
 static bool read_lib_info(struct ps_prochandle* ph) {
   char fname[32];
-  char buf[256];
+  char buf[PATH_MAX];
   FILE *fp = NULL;
 
   sprintf(fname, "/proc/%d/maps", ph->pid);
@@ -273,10 +273,52 @@
     return false;
   }
 
-  while(fgets_no_cr(buf, 256, fp)){
-    char * word[6];
-    int nwords = split_n_str(buf, 6, word, ' ', '\0');
-    if (nwords > 5 && find_lib(ph, word[5]) == false) {
+  while(fgets_no_cr(buf, PATH_MAX, fp)){
+    char * word[7];
+    int nwords = split_n_str(buf, 7, word, ' ', '\0');
+
+    if (nwords < 6) {
+      // not a shared library entry. ignore.
+      continue;
+    }
+
+    if (word[5][0] == '[') {
+        // not a shared library entry. ignore.
+      if (strncmp(word[5],"[stack",6) == 0) {
+        continue;
+      }
+      if (strncmp(word[5],"[heap]",6) == 0) {
+        continue;
+      }
+
+      // SA don't handle VDSO
+      if (strncmp(word[5],"[vdso]",6) == 0) {
+        continue;
+      }
+      if (strncmp(word[5],"[vsyscall]",6) == 0) {
+        continue;
+      }
+    }
+
+    if (nwords > 6) {
+      // prelink altered mapfile when the program is running.
+      // Entries like one below have to be skipped
+      //  /lib64/libc-2.15.so (deleted)
+      // SO name in entries like one below have to be stripped.
+      //  /lib64/libpthread-2.15.so.#prelink#.EECVts
+      char *s = strstr(word[5],".#prelink#");
+      if (s == NULL) {
+        // No prelink keyword. skip deleted library
+        print_debug("skip shared object %s deleted by prelink\n", word[5]);
+        continue;
+      }
+
+      // Fall through
+      print_debug("rectifing shared object name %s changed by prelink\n", word[5]);
+      *s = 0;
+    }
+
+    if (find_lib(ph, word[5]) == false) {
        intptr_t base;
        lib_info* lib;
 #ifdef _LP64
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Mon Apr 13 06:13:18 2020 +0100
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Mon Apr 13 16:44:26 2020 +0100
@@ -44,7 +44,7 @@
     Type type      = db.lookupType("PhaseCFG");
     numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0);
     blocksField = type.getAddressField("_blocks");
-    bbsField = type.getAddressField("_bbs");
+    bbsField = type.getAddressField("_node_to_block_mapping");
     brootField = type.getAddressField("_broot");
   }
 
--- a/make/bsd/Makefile	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/bsd/Makefile	Mon Apr 13 16:44:26 2020 +0100
@@ -282,7 +282,23 @@
 	$(BUILDTREE) VARIANT=shark VARIANTARCH=$(VARIANTARCH)
 
 platform_zero: $(GAMMADIR)/make/$(OSNAME)/platform_zero.in
-	$(SED) 's/@ZERO_ARCHDEF@/$(ZERO_ARCHDEF)/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+ifeq ($(ZERO_ARCHDEF),PPC)
+  ifndef LP64
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC32/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  else
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC64/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  endif
+else
+  ifeq ($(ZERO_ARCHDEF),PPC64) 
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC64/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  else
+    ifeq ($(ZERO_ARCHDEF),PPC32) 
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC32/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+    else
+	$(SED) 's/@ZERO_ARCHDEF@/-D$(ZERO_ARCHDEF)/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+    endif
+  endif
+endif
 
 # Define INSTALL=y at command line to automatically copy JVM into JAVA_HOME
 
--- a/make/bsd/makefiles/gcc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/bsd/makefiles/gcc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -138,6 +138,20 @@
   # Ineffecient 16-byte stack re-alignment on Darwin/IA32
   ARCHFLAG/i486 += -mstackrealign
 endif
+# gcc bug http://gcc.gnu.org/PR63341 in ppc code generation requires -fno-tree-vectorize for now
+ARCHFLAG/ppc     += -fno-tree-vectorize
+ARCHFLAG/ppc64   += -fno-tree-vectorize
+ifeq ($(TYPE),ZERO)
+  ifeq ($(ZERO_ARCHDEF),PPC)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+  ifeq ($(ZERO_ARCHDEF),PPC32)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+  ifeq ($(ZERO_ARCHDEF),PPC64)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+endif
 
 CFLAGS     += $(ARCHFLAG)
 AOUT_FLAGS += $(ARCHFLAG)
--- a/make/bsd/makefiles/vm.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/bsd/makefiles/vm.make	Mon Apr 13 16:44:26 2020 +0100
@@ -187,7 +187,7 @@
 Src_Dirs/SHARK     := $(CORE_PATHS) $(SHARK_PATHS)
 Src_Dirs := $(Src_Dirs/$(TYPE))
 
-COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp chaitin\* c2_\* runtime_\*
+COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp c2_\* runtime_\*
 COMPILER1_SPECIFIC_FILES := c1_\*
 SHARK_SPECIFIC_FILES     := shark
 ZERO_SPECIFIC_FILES      := zero
--- a/make/bsd/platform_zero.in	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/bsd/platform_zero.in	Mon Apr 13 16:44:26 2020 +0100
@@ -14,4 +14,4 @@
 
 gnu_dis_arch = zero
 
-sysdefs = -D_ALLBSD_SOURCE -D_GNU_SOURCE -DCC_INTERP -DZERO -D@ZERO_ARCHDEF@ -DZERO_LIBARCH=\"@ZERO_LIBARCH@\"
+sysdefs = -D_ALLBSD_SOURCE -D_GNU_SOURCE -DCC_INTERP -DZERO @ZERO_ARCHDEF@ -DZERO_LIBARCH=\"@ZERO_LIBARCH@\"
--- a/make/defs.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/defs.make	Mon Apr 13 16:44:26 2020 +0100
@@ -270,7 +270,7 @@
 
   # Use uname output for SRCARCH, but deal with platform differences. If ARCH
   # is not explicitly listed below, it is treated as x86.
-  SRCARCH     = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 arm ppc ppc64 zero,$(ARCH)))
+  SRCARCH     = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 arm ppc ppc64 zero aarch64,$(ARCH)))
   ARCH/       = x86
   ARCH/sparc  = sparc
   ARCH/sparc64= sparc
@@ -281,6 +281,7 @@
   ARCH/ppc    = ppc
   ARCH/arm    = arm
   ARCH/zero   = zero
+  ARCH/aarch64 = aarch64
 
   # BUILDARCH is usually the same as SRCARCH, except for sparcv9
   BUILDARCH = $(SRCARCH)
@@ -303,11 +304,22 @@
       BUILDARCH = ppc
     endif
   endif
+  # special case for aarch64-x86 hybrid
+  ifeq ($(BUILDARCH), aarch64)
+  ifeq ($(BUILTIN_SIM), true)
+    VARIANTARCH = amd64
+  endif
+  endif
 
   # LIBARCH is 1:1 mapping from BUILDARCH
   LIBARCH         = $(LIBARCH/$(BUILDARCH))
   LIBARCH/i486    = i386
   LIBARCH/amd64   = amd64
+  ifeq ($(BUILTIN_SIM), true)
+    LIBARCH/aarch64 = amd64
+  else
+    LIBARCH/aarch64 = aarch64
+  endif
   LIBARCH/sparc   = sparc
   LIBARCH/sparcv9 = sparcv9
   LIBARCH/ia64    = ia64
@@ -316,7 +328,14 @@
   LIBARCH/arm     = arm
   LIBARCH/zero    = $(ZERO_LIBARCH)
 
-  LP64_ARCH = sparcv9 amd64 ia64 ppc64 zero
+  # Override LIBARCH for ppc64le
+  ifeq ($(ARCH), ppc64)
+    ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
+      LIBARCH = ppc64le
+    endif
+  endif
+
+  LP64_ARCH = sparcv9 amd64 aarch64 ia64 ppc64 zero
 endif
 
 # Required make macro settings for all platforms
--- a/make/linux/Makefile	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/Makefile	Mon Apr 13 16:44:26 2020 +0100
@@ -140,6 +140,17 @@
 # is used to give the build directories meaningful names.
 VARIANTARCH = $(subst i386,i486,$(ZERO_LIBARCH))
 
+# special case for aarch64-x86 hybrid
+ifeq ($(BUILTIN_SIM), true)
+  ifeq ($(BUILDARCH), aarch64)
+    # build outputs go into linux_amd64_xxx subdirs
+    VARIANTARCH = amd64
+  else
+    # build outputs go into normal linux_aaaa_xxx subdirs
+    VARIANTARCH=$(BUILDARCH)
+  endif
+endif
+
 # There is a (semi-) regular correspondence between make targets and actions:
 #
 #       Target          Tree Type       Build Dir
@@ -204,6 +215,15 @@
 SUBDIRS_ZERO      = $(addprefix $(OSNAME)_$(VARIANTARCH)_zero/,$(TARGETS))
 SUBDIRS_SHARK     = $(addprefix $(OSNAME)_$(VARIANTARCH)_shark/,$(TARGETS))
 
+# special case for aarch64-x86 hybrid
+ifeq ($(BUILTIN_SIM), true)
+SUBDIRS_C1        = $(addprefix $(OSNAME)_$(VARIANTARCH)_compiler1/,$(TARGETS))
+SUBDIRS_C2        = $(addprefix $(OSNAME)_$(VARIANTARCH)_compiler2/,$(TARGETS))
+SUBDIRS_TIERED    = $(addprefix $(OSNAME)_$(VARIANTARCH)_tiered/,$(TARGETS))
+SUBDIRS_CORE      = $(addprefix $(OSNAME)_$(VARIANTARCH)_core/,$(TARGETS))
+SUBDIR_DOCS     = $(OSNAME)_$(VARIANTARCH)_docs
+endif
+
 TARGETS_C2        = $(TARGETS)
 TARGETS_C1        = $(addsuffix 1,$(TARGETS))
 TARGETS_TIERED    = $(addsuffix tiered,$(TARGETS))
@@ -273,11 +293,33 @@
 	$(BUILDTREE) VARIANT=shark VARIANTARCH=$(VARIANTARCH)
 
 platform_zero: $(GAMMADIR)/make/$(OSNAME)/platform_zero.in
-	$(SED) 's/@ZERO_ARCHDEF@/$(ZERO_ARCHDEF)/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+ifeq ($(ZERO_ARCHDEF),PPC) 
+  ifndef LP64
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC32/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  else
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC64/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  endif
+else
+  ifeq ($(ZERO_ARCHDEF),PPC64) 
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC64/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+  else
+    ifeq ($(ZERO_ARCHDEF),PPC32) 
+	$(SED) 's/@ZERO_ARCHDEF@/-DPPC -DPPC32/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+    else
+	$(SED) 's/@ZERO_ARCHDEF@/-D$(ZERO_ARCHDEF)/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
+    endif
+  endif
+endif
 
 # Define INSTALL=y at command line to automatically copy JVM into JAVA_HOME
 
 $(TARGETS_C2):  $(SUBDIRS_C2)
+ifeq ($(BUILTIN_SIM), true)
+	cd $(OSNAME)_$(VARIANTARCH)_compiler2/$@ && $(MAKE) $(MFLAGS)
+ifdef INSTALL
+	cd $(OSNAME)_$(VARIANTARCH)_compiler2/$@ && $(MAKE) $(MFLAGS) install
+endif
+else
 	cd $(OSNAME)_$(BUILDARCH)_compiler2/$@ && $(MAKE) $(MFLAGS)
 ifeq ($(TEST_IN_BUILD),true)
 	cd $(OSNAME)_$(BUILDARCH)_compiler2/$@ && ./test_gamma
@@ -285,8 +327,15 @@
 ifdef INSTALL
 	cd $(OSNAME)_$(BUILDARCH)_compiler2/$@ && $(MAKE) $(MFLAGS) install
 endif
+endif
 
 $(TARGETS_TIERED):  $(SUBDIRS_TIERED)
+ifeq ($(BUILTIN_SIM), true)
+	cd $(OSNAME)_$(VARIANTARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS)
+ifdef INSTALL
+	cd $(OSNAME)_$(VARIANTARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS) install
+endif
+else
 	cd $(OSNAME)_$(BUILDARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS)
 ifeq ($(TEST_IN_BUILD),true)
 	cd $(OSNAME)_$(BUILDARCH)_tiered/$(patsubst %tiered,%,$@) && ./test_gamma
@@ -294,8 +343,15 @@
 ifdef INSTALL
 	cd $(OSNAME)_$(BUILDARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS) install
 endif
+endif
 
 $(TARGETS_C1):  $(SUBDIRS_C1)
+ifeq ($(BUILTIN_SIM), true)
+	cd $(OSNAME)_$(VARIANTARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS)
+ifdef INSTALL
+	cd $(OSNAME)_$(VARIANTARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS) install
+endif
+else
 	cd $(OSNAME)_$(BUILDARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS)
 ifeq ($(TEST_IN_BUILD),true)
 	cd $(OSNAME)_$(BUILDARCH)_compiler1/$(patsubst %1,%,$@) && ./test_gamma
@@ -303,8 +359,15 @@
 ifdef INSTALL
 	cd $(OSNAME)_$(BUILDARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS) install
 endif
+endif
 
 $(TARGETS_CORE):  $(SUBDIRS_CORE)
+ifeq ($(BUILTIN_SIM), true)
+	cd $(OSNAME)_$(VARIANTARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS)
+ifdef INSTALL
+	cd $(OSNAME)_$(VARIANTARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS) install
+endif
+else
 	cd $(OSNAME)_$(BUILDARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS)
 ifeq ($(TEST_IN_BUILD),true)
 	cd $(OSNAME)_$(BUILDARCH)_core/$(patsubst %core,%,$@) && ./test_gamma
@@ -312,6 +375,7 @@
 ifdef INSTALL
 	cd $(OSNAME)_$(BUILDARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS) install
 endif
+endif
 
 $(TARGETS_ZERO):  $(SUBDIRS_ZERO)
 	cd $(OSNAME)_$(VARIANTARCH)_zero/$(patsubst %zero,%,$@) && $(MAKE) $(MFLAGS)
@@ -361,7 +425,11 @@
 	rm -rf $(SUBDIR_DOCS)
 
 clean_compiler1 clean_compiler2 clean_core clean_zero clean_shark:
+ifeq ($(BUILTIN_SIM), true)
+	rm -rf $(OSNAME)_$(VARIANTARCH)_$(subst clean_,,$@)
+else
 	rm -rf $(OSNAME)_$(BUILDARCH)_$(subst clean_,,$@)
+endif
 
 clean:  clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_docs
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/linux/makefiles/aarch64.make	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+
+# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
+# Must also specify if CPU is little endian
+CFLAGS += -DVM_LITTLE_ENDIAN
+
+# aarch64 needs to define SAFEFETCH_STUBS but only when it is a real
+# AARCH64 build. if we are building for the simulator then we use
+# x86-specific stubs
+
+ifeq ($(BUILTIN_SIM), true)
+  CFLAGS  += -DBUILTIN_SIM -DALLOW_OPERATOR_NEW_USAGE
+else
+  CFLAGS += -DSAFEFETCH_STUBS
+endif
+
+OPT_CFLAGS/compactingPermGenGen.o = -O1
--- a/make/linux/makefiles/adlc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/adlc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -68,7 +68,9 @@
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
+ifneq ($(COMPILER_WARNINGS_FATAL),false)
 CFLAGS_WARN = -Werror
+endif
 CFLAGS += $(CFLAGS_WARN)
 
 OBJECTNAMES = \
--- a/make/linux/makefiles/buildtree.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/buildtree.make	Mon Apr 13 16:44:26 2020 +0100
@@ -210,6 +210,8 @@
 	echo "SA_BUILD_VERSION = $(HS_BUILD_VER)"; \
 	echo "HOTSPOT_BUILD_USER = $(HOTSPOT_BUILD_USER)"; \
 	echo "HOTSPOT_VM_DISTRO = $(HOTSPOT_VM_DISTRO)"; \
+	echo "DERIVATIVE_ID = $(DERIVATIVE_ID)"; \
+	echo "DISTRIBUTION_ID = $(DISTRIBUTION_ID)"; \
 	echo "OPENJDK = $(OPENJDK)"; \
 	echo "OPENJDK_TARGET_CPU_ENDIAN = $(OPENJDK_TARGET_CPU_ENDIAN)"; \
 	echo "ZERO_BUILD = $(ZERO_BUILD)"; \
@@ -413,6 +415,7 @@
 DATA_MODE/ia64    = 64
 DATA_MODE/ppc64   = 64
 DATA_MODE/zero    = $(ARCH_DATA_MODEL)
+DATA_MODE/aarch64 = 64
 
 JAVA_FLAG/32 = -d32
 JAVA_FLAG/64 = -d64
--- a/make/linux/makefiles/defs.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/defs.make	Mon Apr 13 16:44:26 2020 +0100
@@ -106,6 +106,27 @@
   HS_ARCH          = arm
 endif
 
+# AARCH64
+ifeq ($(ARCH), aarch64)
+  ARCH_DATA_MODEL  = 64
+  MAKE_ARGS        += LP64=1
+  PLATFORM         = linux-aarch64
+  VM_PLATFORM      = linux_aarch64
+  HS_ARCH          = aarch64
+endif
+
+#AARCH64 x86 hybrid
+ifeq ($(BUILTIN_SIM), true)
+ifeq ($(ARCH), x86_64)
+  ARCH=aarch64
+  ARCH_DATA_MODEL  = 64
+  MAKE_ARGS        += LP64=1
+  PLATFORM         = linux-amd64
+  VM_PLATFORM      = linux_amd64
+  HS_ARCH          = aarch64
+endif
+endif
+
 # PPC
 ifeq ($(ARCH), ppc)
   ARCH_DATA_MODEL  = 32
@@ -263,10 +284,14 @@
 # client and server subdirectories have symbolic links to ../libjsig.so
 EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
-  else
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+  ifneq ($(ZERO_BUILD), true)
+    ifneq ($(STRIP_POLICY),no_strip)
+      ifeq ($(ZIP_DEBUGINFO_FILES),1)
+        EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
+      else
+        EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+      endif
+    endif
   endif
 endif
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
@@ -275,11 +300,13 @@
 ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
-  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-    ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
-    else
-      EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo
+  ifneq ($(STRIP_POLICY),no_strip)
+    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+      ifeq ($(ZIP_DEBUGINFO_FILES),1)
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
+      else
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo
+      endif
     endif
   endif
 endif
@@ -287,11 +314,13 @@
 ifeq ($(JVM_VARIANT_CLIENT),true)
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
-  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-    ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
-    else
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
+  ifneq ($(STRIP_POLICY),no_strip)
+    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+      ifeq ($(ZIP_DEBUGINFO_FILES),1)
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
+      else
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
+      endif
     endif
   endif 
 endif
@@ -303,12 +332,16 @@
 ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                         $(EXPORT_LIB_DIR)/sa-jdi.jar 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    ADD_SA_BINARIES/x86   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
-    ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
-  else
-    ADD_SA_BINARIES/x86   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
-    ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+  ifneq ($(ZERO_BUILD), true)
+    ifneq ($(STRIP_POLICY),no_strip)
+      ifeq ($(ZIP_DEBUGINFO_FILES),1)
+        ADD_SA_BINARIES/x86   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+        ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+      else
+        ADD_SA_BINARIES/x86   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+        ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+      endif
+    endif
   endif
 endif
 ADD_SA_BINARIES/ppc   = 
--- a/make/linux/makefiles/gcc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/gcc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -31,22 +31,21 @@
   # When cross-compiling the ALT_COMPILER_PATH points
   # to the cross-compilation toolset
   ifdef CROSS_COMPILE_ARCH
-    CXX = $(ALT_COMPILER_PATH)/g++
-    CC  = $(ALT_COMPILER_PATH)/gcc
-    HOSTCXX = g++
-    HOSTCC  = gcc
-    STRIP = $(ALT_COMPILER_PATH)/strip
+    CXX ?= $(ALT_COMPILER_PATH)/g++
+    CC  ?= $(ALT_COMPILER_PATH)/gcc
+    HOSTCXX ?= g++
+    HOSTCC ?= gcc
+    STRIP ?= $(ALT_COMPILER_PATH)/strip
   else
-    CXX = g++
-    CC  = gcc
-    HOSTCXX = $(CXX)
-    HOSTCC  = $(CC)
-    STRIP = strip
+    CXX ?= g++
+    CC  ?= gcc
+    HOSTCXX ?= $(CXX)
+    HOSTCC  ?= $(CC)
+    STRIP ?= strip
   endif
   AS  = $(CC) -c
 endif
 
-
 # -dumpversion in gcc-2.91 shows "egcs-2.91.66". In later version, it only
 # prints the numbers (e.g. "2.95", "3.2.1")
 CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
@@ -67,7 +66,11 @@
 # Compiler flags
 
 # position-independent code
+ifneq ($(filter parisc ppc ppc64 s390 s390x sparc sparc64 sparcv9,$(ZERO_LIBARCH)),)
 PICFLAG = -fPIC
+else
+PICFLAG = -fpic
+endif
 
 VM_PICFLAG/LIBJVM = $(PICFLAG)
 VM_PICFLAG/AOUT   =
@@ -102,6 +105,7 @@
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
 ARCHFLAG/i486    = -m32 -march=i586
 ARCHFLAG/amd64   = -m64
+ARCHFLAG/aarch64 =
 ARCHFLAG/ia64    =
 ARCHFLAG/sparc   = -m32 -mcpu=v9
 ARCHFLAG/sparcv9 = -m64 -mcpu=v9
@@ -111,6 +115,23 @@
 ARCHFLAG/ppc     =  -mcpu=powerpc
 endif
 ARCHFLAG/ppc64   =  -m64
+# gcc bug http://gcc.gnu.org/PR63341 in ppc code generation requires -fno-tree-vectorize for now
+# Fixed in GCC 5 and later
+ifeq "$(shell expr \( $(CC_VER_MAJOR) \< 5 \) )" "1"
+ARCHFLAG/ppc     += -fno-tree-vectorize
+ARCHFLAG/ppc64   += -fno-tree-vectorize
+ifeq ($(TYPE),ZERO)
+  ifeq ($(ZERO_ARCHDEF),PPC)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+  ifeq ($(ZERO_ARCHDEF),PPC32)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+  ifeq ($(ZERO_ARCHDEF),PPC64)
+    ARCHFLAG/zero += -fno-tree-vectorize
+  endif
+endif
+endif
 
 CFLAGS     += $(ARCHFLAG)
 AOUT_FLAGS += $(ARCHFLAG)
@@ -139,7 +160,9 @@
 endif
 
 # Compiler warnings are treated as errors
+ifneq ($(COMPILER_WARNINGS_FATAL),false)
 WARNINGS_ARE_ERRORS = -Werror
+endif
 
 # Except for a few acceptable ones
 # Since GCC 4.3, -Wconversion has changed its meanings to warn these implicit
@@ -174,6 +197,11 @@
 OPT_CFLAGS/mulnode.o += -O0
 endif
 
+# 8041658. Problem in GCC 4.9.0 with virtualisation removing return statements.
+ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 9), 1)
+OPT_CFLAGS += -fno-devirtualize
+endif
+
 # Flags for generating make dependency flags.
 ifneq ("${CC_VER_MAJOR}", "2")
 DEPFLAGS = -fpch-deps -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
@@ -240,6 +268,7 @@
   # Note: The Itanium gcc compiler crashes when using -gstabs.
   DEBUG_CFLAGS/ia64  = -g
   DEBUG_CFLAGS/amd64 = -g
+  DEBUG_CFLAGS/aarch64 = -g
   DEBUG_CFLAGS/arm   = -g
   DEBUG_CFLAGS/ppc   = -g
   DEBUG_CFLAGS/ppc64 = -g
@@ -251,9 +280,11 @@
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
     FASTDEBUG_CFLAGS/ia64  = -g
     FASTDEBUG_CFLAGS/amd64 = -g
+    FASTDEBUG_CFLAGS/aarch64 = -g
     FASTDEBUG_CFLAGS/arm   = -g
     FASTDEBUG_CFLAGS/ppc   = -g
     FASTDEBUG_CFLAGS/ppc64 = -g
+    FASTDEBUG_CFLAGS/zero  = -g
     FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
     ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
       FASTDEBUG_CFLAGS += -gstabs
@@ -261,9 +292,11 @@
   
     OPT_CFLAGS/ia64  = -g
     OPT_CFLAGS/amd64 = -g
+    OPT_CFLAGS/aarch64 = -g
     OPT_CFLAGS/arm   = -g
     OPT_CFLAGS/ppc   = -g
     OPT_CFLAGS/ppc64 = -g
+    OPT_CFLAGS/zero  = -g
     OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
     ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
       OPT_CFLAGS += -gstabs
--- a/make/linux/makefiles/jsig.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/jsig.make	Mon Apr 13 16:44:26 2020 +0100
@@ -65,8 +65,10 @@
                          $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $< -ldl
 	$(QUIETLY) [ -f $(LIBJSIG_G) ] || { ln -s $@ $(LIBJSIG_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -76,10 +78,12 @@
     endif
   endif
 	[ -f $(LIBJSIG_G_DEBUGINFO) ] || { ln -s $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO); }
-  ifeq ($(ZIP_DEBUGINFO_FILES),1)
+  ifneq ($(STRIP_POLICY),no_strip)
+    ifeq ($(ZIP_DEBUGINFO_FILES),1)
 	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
 	$(RM) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
 	[ -f $(LIBJSIG_G_DIZ) ] || { ln -s $(LIBJSIG_DIZ) $(LIBJSIG_G_DIZ); }
+    endif
   endif
 endif
 
--- a/make/linux/makefiles/rules.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/rules.make	Mon Apr 13 16:44:26 2020 +0100
@@ -31,7 +31,10 @@
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
 # $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
-CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+# FIXME: $(CXXFLAGS) currently only includes preprocessor flags while
+# $(CFLAGS) includes C and C++ flags.  Ideally, there should be three
+# variables: $(CFLAGS), $(CXXFLAGS) and $(CPPFLAGS).
+CC_COMPILE       = $(CC) $(CXXFLAGS)
 CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
@@ -143,26 +146,21 @@
 
 include $(GAMMADIR)/make/altsrc.make
 
-# The non-PIC object files are only generated for 32 bit platforms.
-ifdef LP64
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
-else
-%.o: %.cpp
-	@echo Compiling $<
-	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
-endif
 
 %.o: %.s
 	@echo Assembling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(AS.S) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 
+%.o: %.S
+	@echo Assembling $<
+	$(QUIETLY) $(REMOVE_TARGET)
+	$(COMPILE.CC) $(CFLAGS) -o $@ $< $(COMPILE_DONE)
+
 %.s: %.cpp
 	@echo Generating assembly for $<
 	$(QUIETLY) $(GENASM.CXX) -o $@ $<
--- a/make/linux/makefiles/sa.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/sa.make	Mon Apr 13 16:44:26 2020 +0100
@@ -62,7 +62,8 @@
 
 all: 
 	if [ -d $(AGENT_DIR) -a "$(SRCARCH)" != "ia64" \
-             -a "$(SRCARCH)" != "zero" ] ; then \
+             -a "$(SRCARCH)" != "zero"  \
+             -a "$(SRCARCH)" != "aarch64" ] ; then \
 	   $(MAKE) -f sa.make $(GENERATED)/sa-jdi.jar; \
 	fi
 
--- a/make/linux/makefiles/saproc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/saproc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -67,7 +67,7 @@
 # also, we don't build SA on Itanium or zero.
 
 ifneq ($(wildcard $(AGENT_DIR)),)
-ifneq ($(filter-out ia64 zero,$(SRCARCH)),)
+ifneq ($(filter-out ia64 zero aarch64,$(SRCARCH)),)
   BUILDLIBSAPROC = $(LIBSAPROC)
 endif
 endif
@@ -104,8 +104,11 @@
 	           -lthread_db
 	$(QUIETLY) [ -f $(LIBSAPROC_G) ] || { ln -s $@ $(LIBSAPROC_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBSAPROC_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
+	[ -f $(LIBSAPROC_G_DEBUGINFO) ] || { ln -s $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO); }
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -114,11 +117,12 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBSAPROC_G_DEBUGINFO) ] || { ln -s $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
 	$(RM) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
 	[ -f $(LIBSAPROC_G_DIZ) ] || { ln -s $(LIBSAPROC_DIZ) $(LIBSAPROC_G_DIZ); }
+    endif
   endif
 endif
 
--- a/make/linux/makefiles/vm.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/vm.make	Mon Apr 13 16:44:26 2020 +0100
@@ -92,6 +92,10 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
+ifeq ($(BUILTIN_SIM), true)
+  HS_LIB_ARCH=-DHOTSPOT_LIB_ARCH="\"aarch64\""
+endif
+
 CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
@@ -101,6 +105,10 @@
   ${HS_LIB_ARCH}     \
   ${VM_DISTRO}
 
+ifdef DERIVATIVE_ID
+CXXFLAGS += -DDERIVATIVE_ID="\"$(DERIVATIVE_ID)\""
+endif
+
 # This is VERY important! The version define must only be supplied to vm_version.o
 # If not, ccache will not re-use the cache at all, since the version string might contain
 # a time and date.
@@ -115,6 +123,10 @@
 CFLAGS += -DINCLUDE_TRACE=1
 endif
 
+ifdef DISTRIBUTION_ID
+CXXFLAGS += -DDISTRIBUTION_ID="\"$(DISTRIBUTION_ID)\""
+endif
+
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 CFLAGS += $(CFLAGS_WARN/BYFILE)
 
@@ -131,6 +143,11 @@
 
 LIBS += -lm -ldl -lpthread
 
+ifeq ($(BUILTIN_SIM), true)
+  ARMSIM_DIR = $(shell cd $(GAMMADIR)/../../simulator && pwd)
+  LIBS += -L $(ARMSIM_DIR) -larmsim -Wl,-rpath,$(ARMSIM_DIR)
+endif
+
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM
 
@@ -191,7 +208,7 @@
 Src_Dirs/SHARK     := $(CORE_PATHS) $(SHARK_PATHS)
 Src_Dirs := $(Src_Dirs/$(TYPE))
 
-COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp chaitin\* c2_\* runtime_\*
+COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp c2_\* runtime_\*
 COMPILER1_SPECIFIC_FILES := c1_\*
 SHARK_SPECIFIC_FILES     := shark
 ZERO_SPECIFIC_FILES      := zero
@@ -220,7 +237,7 @@
 # Locate all source files in the given directory, excluding files in Src_Files_EXCLUDE.
 define findsrc
 	$(notdir $(shell find $(1)/. ! -name . -prune \
-		-a \( -name \*.c -o -name \*.cpp -o -name \*.s \) \
+		-a \( -name \*.c -o -name \*.cpp -o -name \*.s -o -name \*.S \) \
 		-a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
 endef
 
@@ -232,6 +249,19 @@
 
 vm_version.o: $(filter-out vm_version.o,$(JVM_OBJ_FILES))
 
+# aarch64 simulator build has to export extra symbols to the simulator
+ifeq ($(BUILTIN_SIM), true)
+mapfile : $(MAPFILE) vm.def
+	rm -f $@
+	awk '{ if ($$0 ~ "INSERT VTABLE SYMBOLS HERE")	\
+                 { system ("cat vm.def");		\
+                   print "	# aarch64 sim support";	\
+                   print "	das1;";			\
+                   print "	bccheck;"; }		\
+               else					\
+                 { print $$0 }				\
+             }' > $@ < $(MAPFILE)
+else
 mapfile : $(MAPFILE) vm.def
 	rm -f $@
 	awk '{ if ($$0 ~ "INSERT VTABLE SYMBOLS HERE")	\
@@ -239,6 +269,7 @@
                else					\
                  { print $$0 }				\
              }' > $@ < $(MAPFILE)
+endif
 
 mapfile_reorder : mapfile $(REORDERFILE)
 	rm -f $@
@@ -253,7 +284,11 @@
   ifeq ($(ZERO_LIBARCH), ppc64)
     STATIC_CXX = false
   else
-    STATIC_CXX = true
+    ifeq ($(ZERO_LIBARCH), ppc64)
+      STATIC_CXX = false
+    else
+      STATIC_CXX = true
+    endif
   endif
 endif
 
@@ -343,22 +378,26 @@
 	}
 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DEBUGINFO)
-	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DEBUGINFO) $@
-  ifeq ($(STRIP_POLICY),all_strip)
-	$(QUIETLY) $(STRIP) $@
-  else
-    ifeq ($(STRIP_POLICY),min_strip)
-	$(QUIETLY) $(STRIP) -g $@
-    # implied else here is no stripping at all
-    endif
-  endif
-	$(QUIETLY) [ -f $(LIBJVM_G_DEBUGINFO) ] || ln -s $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-  ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	$(RM) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	[ -f $(LIBJVM_G_DIZ) ] || { ln -s $(LIBJVM_DIZ) $(LIBJVM_G_DIZ); }
-  endif
+     ifneq ($(STRIP_POLICY),no_strip)
+	     	   $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DEBUGINFO)
+		   $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DEBUGINFO) $@
+     endif
+     ifeq ($(STRIP_POLICY),all_strip)
+	     	  $(QUIETLY) $(STRIP) $@
+     else
+     ifeq ($(STRIP_POLICY),min_strip)
+	     	  $(QUIETLY) $(STRIP) -g $@
+	  # implied else here is no stripping at all
+     endif
+     endif
+     ifneq ($(STRIP_POLICY),no_strip)
+	       $(QUIETLY) [ -f $(LIBJVM_G_DEBUGINFO) ] || ln -s $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
+     ifeq ($(ZIP_DEBUGINFO_FILES),1)
+	           $(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
+		   $(RM) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
+		   [ -f $(LIBJVM_G_DIZ) ] || { ln -s $(LIBJVM_DIZ) $(LIBJVM_G_DIZ); }
+     endif
+     endif
 endif
 
 DEST_SUBDIR        = $(JDK_LIBDIR)/$(VM_SUBDIR)
--- a/make/linux/makefiles/zeroshark.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/makefiles/zeroshark.make	Mon Apr 13 16:44:26 2020 +0100
@@ -25,6 +25,38 @@
 
 # Setup common to Zero (non-Shark) and Shark versions of VM
 
+ifeq ($(ZERO_LIBARCH),arm)
+# check to see if we are building the assembler jit or just zero.
+ifeq ($(ARM32JIT),true)
+Obj_Files += asm_helper.o
+Obj_Files += cppInterpreter_arm.o
+Obj_Files += arm32JIT.o
+
+CFLAGS += -DHOTSPOT_ASM
+
+cppInterpreter_arm.o:	offsets_arm.s bytecodes_arm.s
+arm32JIT.o:		offsets_arm.s
+
+offsets_arm.s:	mkoffsets
+	@echo Generating assembler offsets
+	./mkoffsets > $@
+
+bytecodes_arm.s: bytecodes_arm.def mkbc
+	@echo Generating ARM assembler bytecode sequences
+	$(CXX_COMPILE) -E -x c++ - < $< | ./mkbc - $@ $(COMPILE_DONE)
+
+mkbc:	$(GAMMADIR)/tools/mkbc.c
+	@echo Compiling mkbc tool
+	$(CC_COMPILE) -o $@ $< $(COMPILE_DONE)
+
+mkoffsets:	asm_helper.cpp
+	@echo Compiling offset generator
+	$(QUIETLY) $(REMOVE_TARGET)
+	$(CXX_COMPILE) -DSTATIC_OFFSETS -o $@ $< $(COMPILE_DONE)
+
+endif
+endif
+
 # The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
 OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
 # The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/linux/platform_aarch64	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,15 @@
+os_family = linux
+
+arch = aarch64
+
+arch_model = aarch64
+
+os_arch = linux_aarch64
+
+os_arch_model = linux_aarch64
+
+lib_arch = aarch64
+
+compiler = gcc
+
+sysdefs = -DLINUX -D_GNU_SOURCE -DAARCH64
--- a/make/linux/platform_zero.in	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/linux/platform_zero.in	Mon Apr 13 16:44:26 2020 +0100
@@ -14,4 +14,4 @@
 
 gnu_dis_arch = zero
 
-sysdefs = -DLINUX -D_GNU_SOURCE -DCC_INTERP -DZERO -D@ZERO_ARCHDEF@ -DZERO_LIBARCH=\"@ZERO_LIBARCH@\"
+sysdefs = -DLINUX -D_GNU_SOURCE -DCC_INTERP -DZERO @ZERO_ARCHDEF@ -DZERO_LIBARCH=\"@ZERO_LIBARCH@\"
--- a/make/solaris/makefiles/adlc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/adlc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -75,8 +75,10 @@
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
-ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
-  CFLAGS_WARN = +w -errwarn
+ifneq ($(COMPILER_WARNINGS_FATAL),false)
+  ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
+    CFLAGS_WARN = +w -errwarn
+  endif
 endif
 CFLAGS += $(CFLAGS_WARN)
 
--- a/make/solaris/makefiles/dtrace.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/dtrace.make	Mon Apr 13 16:44:26 2020 +0100
@@ -115,6 +115,7 @@
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc
 	[ -f $(XLIBJVM_DB_G) ] || { ln -s $(LIBJVM_DB) $(XLIBJVM_DB_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 # An empty section header has sh_addr == 0 and sh_size == 0.
@@ -128,6 +129,7 @@
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the link name:
 	( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB) )
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -138,11 +140,13 @@
   endif
 	[ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the archived name:
 	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO) )
 	$(RM) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO)
 	[ -f $(XLIBJVM_DB_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); }
+    endif
   endif
 endif
 
@@ -153,6 +157,7 @@
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
 	[ -f $(XLIBJVM_DTRACE_G) ] || { ln -s $(LIBJVM_DTRACE) $(XLIBJVM_DTRACE_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DTRACE_DEBUGINFO)
@@ -161,6 +166,7 @@
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the link name:
 	( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE) )
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -171,11 +177,13 @@
   endif
 	[ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the archived name:
 	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO) )
 	$(RM) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO)
 	[ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); }
+    endif
   endif
 endif
 
@@ -225,12 +233,14 @@
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc
 	[ -f $(LIBJVM_DB_G) ] || { ln -s $@ $(LIBJVM_DB_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DB_DEBUGINFO)
 # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections.
 #	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DB_DEBUGINFO) $@
 	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(LIBJVM_DB_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -241,9 +251,11 @@
   endif
 	[ -f $(LIBJVM_DB_G_DEBUGINFO) ] || { ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO)
 	$(RM) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO)
 	[ -f $(LIBJVM_DB_G_DIZ) ] || { ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); }
+    endif
   endif
 endif
 
@@ -253,12 +265,14 @@
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
 	[ -f $(LIBJVM_DTRACE_G) ] || { ln -s $@ $(LIBJVM_DTRACE_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DTRACE_DEBUGINFO)
 # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections.
 #	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DTRACE_DEBUGINFO) $@
 	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(LIBJVM_DTRACE_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -269,9 +283,11 @@
   endif
 	[ -f $(LIBJVM_DTRACE_G_DEBUGINFO) ] || { ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO)
 	$(RM) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO)
 	[ -f $(LIBJVM_DTRACE_G_DIZ) ] || { ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); }
+    endif
   endif
 endif
 
--- a/make/solaris/makefiles/gcc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/gcc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -117,7 +117,9 @@
 
 
 # Compiler warnings are treated as errors 
-WARNINGS_ARE_ERRORS = -Werror 
+ifneq ($(COMPILER_WARNINGS_FATAL),false)
+WARNINGS_ARE_ERRORS = -Werror
+endif
 # Enable these warnings. See 'info gcc' about details on these options
 ADDITIONAL_WARNINGS = -Wpointer-arith -Wconversion -Wsign-compare 
 CFLAGS_WARN/DEFAULT = $(WARNINGS_ARE_ERRORS) $(ADDITIONAL_WARNINGS) 
--- a/make/solaris/makefiles/jsig.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/jsig.make	Mon Apr 13 16:44:26 2020 +0100
@@ -63,6 +63,7 @@
                          $(LFLAGS_JSIG) -o $@ $(JSIGSRCDIR)/jsig.c -ldl
 	[ -f $(LIBJSIG_G) ] || { ln -s $@ $(LIBJSIG_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 # An empty section header has sh_addr == 0 and sh_size == 0.
@@ -74,6 +75,7 @@
 # Use $(ADD_GNU_DEBUGLINK) until a fixed $(OBJCOPY) is available.
 #	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
 	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(LIBJSIG_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -84,9 +86,11 @@
   endif
 	[ -f $(LIBJSIG_G_DEBUGINFO) ] || { ln -s $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
 	$(RM) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
 	[ -f $(LIBJSIG_G_DIZ) ] || { ln -s $(LIBJSIG_DIZ) $(LIBJSIG_G_DIZ); }
+    endif
   endif
 endif
 
--- a/make/solaris/makefiles/rules.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/rules.make	Mon Apr 13 16:44:26 2020 +0100
@@ -135,20 +135,10 @@
 
 include $(GAMMADIR)/make/altsrc.make
 
-# Sun compiler for 64 bit Solaris does not support building non-PIC object files.
-ifdef LP64
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
-else
-%.o: %.cpp
-	@echo Compiling $<
-	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
-endif
 
 %.o: %.s
 	@echo Assembling $<
--- a/make/solaris/makefiles/saproc.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/saproc.make	Mon Apr 13 16:44:26 2020 +0100
@@ -114,6 +114,7 @@
 	           -ldl -ldemangle -lthread -lc
 	[ -f $(LIBSAPROC_G) ] || { ln -s $@ $(LIBSAPROC_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 # An empty section header has sh_addr == 0 and sh_size == 0.
@@ -125,6 +126,7 @@
 # Use $(ADD_GNU_DEBUGLINK) until a fixed $(OBJCOPY) is available.
 #	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
 	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(LIBSAPROC_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -135,9 +137,11 @@
   endif
 	[ -f $(LIBSAPROC_G_DEBUGINFO) ] || { ln -s $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
 	$(RM) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
 	[ -f $(LIBSAPROC_G_DIZ) ] || { ln -s $(LIBSAPROC_DIZ) $(LIBSAPROC_G_DIZ); }
+    endif
   endif
 endif
 
--- a/make/solaris/makefiles/vm.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/solaris/makefiles/vm.make	Mon Apr 13 16:44:26 2020 +0100
@@ -85,11 +85,19 @@
   ${HS_LIB_ARCH}     \
   ${VM_DISTRO}
 
+ifdef DERIVATIVE_ID
+CXXFLAGS += -DDERIVATIVE_ID="\"$(DERIVATIVE_ID)\""
+endif
+
 # This is VERY important! The version define must only be supplied to vm_version.o
 # If not, ccache will not re-use the cache at all, since the version string might contain
 # a time and date.
 vm_version.o: CXXFLAGS += ${JRE_VERSION}
 
+ifdef DISTRIBUTION_ID
+CXXFLAGS += -DDISTRIBUTION_ID="\"$(DISTRIBUTION_ID)\""
+endif
+
 # Large File Support
 ifneq ($(LP64), 1)
 ostream.o: CXXFLAGS += -D_FILE_OFFSET_BITS=64
@@ -206,7 +214,7 @@
 Src_Dirs/SHARK     := $(CORE_PATHS)
 Src_Dirs := $(Src_Dirs/$(TYPE))
 
-COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp chaitin\* c2_\* runtime_\*
+COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp c2_\* runtime_\*
 COMPILER1_SPECIFIC_FILES := c1_\*
 SHARK_SPECIFIC_FILES     := shark
 ZERO_SPECIFIC_FILES      := zero
@@ -301,6 +309,7 @@
 	$(QUIETLY) [ -f $(LIBJVM_G) ] || ln -s $@ $(LIBJVM_G)
 	$(QUIETLY) [ -f $(LIBJVM_G).1 ] || ln -s $@.1 $(LIBJVM_G).1
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+  ifneq ($(STRIP_POLICY),no_strip)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 # An empty section header has sh_addr == 0 and sh_size == 0.
@@ -312,6 +321,7 @@
 # Use $(ADD_GNU_DEBUGLINK) until a fixed $(OBJCOPY) is available.
 #	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DEBUGINFO) $@
 	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(LIBJVM_DEBUGINFO) $@
+  endif
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -322,9 +332,11 @@
   endif
 	$(QUIETLY) [ -f $(LIBJVM_G_DEBUGINFO) ] || ln -s $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifneq ($(STRIP_POLICY),no_strip)
 	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
 	$(RM) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
 	[ -f $(LIBJVM_G_DIZ) ] || { ln -s $(LIBJVM_DIZ) $(LIBJVM_G_DIZ); }
+    endif
   endif
 endif
 endif # filter -sbfast -xsbfast
--- a/make/windows/create_obj_files.sh	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/windows/create_obj_files.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -112,7 +112,7 @@
     "shark")     Src_Dirs="${CORE_PATHS}" ;;
 esac
 
-COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp chaitin* c2_* runtime_*"
+COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp c2_* runtime_*"
 COMPILER1_SPECIFIC_FILES="c1_*"
 SHARK_SPECIFIC_FILES="shark"
 ZERO_SPECIFIC_FILES="zero"
--- a/make/windows/makefiles/vm.make	Mon Apr 13 06:13:18 2020 +0100
+++ b/make/windows/makefiles/vm.make	Mon Apr 13 16:44:26 2020 +0100
@@ -79,6 +79,14 @@
 # Define that so jni.h is on correct side
 CXX_FLAGS=$(CXX_FLAGS) /D "_JNI_IMPLEMENTATION_"
 
+!ifdef DERIVATIVE_ID
+CXX_FLAGS = $(CPP_FLAGS) /D "DERIVATIVE_ID=\"$(DERIVATIVE_ID)\""
+!endif
+
+!ifdef DISTRIBUTION_ID
+CXX_FLAGS = $(CPP_FLAGS) /D "DISTRIBUTION_ID=\"$(DISTRIBUTION_ID)\""
+!endif
+
 !if "$(BUILDARCH)" == "ia64"
 STACK_SIZE="/STACK:1048576,262144"
 !else
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,11903 @@
+//
+// Copyright (c) 2013, Red Hat Inc.
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates.
+// All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// AArch64 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// We must define the 64 bit int registers in two 32 bit halves, the
+// real lower register and a virtual upper half register. upper halves
+// are used by the register allocator but are not actually supplied as
+// operands to memory ops.
+//
+// follow the C1 compiler in making registers
+//
+//   r0-r7,r10-r26 volatile (caller save)
+//   r27-r32 system (no save, no allocate)
+//   r8-r9 invisible to the allocator (so we can use them as scratch regs)
+//
+// as regards Java usage. we don't use any callee save registers
+// because this makes it difficult to de-optimise a frame (see comment
+// in x86 implementation of Deoptimization::unwind_callee_save_values)
+//
+
+// General Registers
+
+reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
+reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
+reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
+reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
+reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
+reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
+reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
+reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
+reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
+reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
+reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
+reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
+reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
+reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
+reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
+reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
+reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
+reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
+reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
+reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
+reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
+reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
+reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
+reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
+reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
+reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
+reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
+reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
+reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
+reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
+reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
+reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
+reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
+reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
+reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
+reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
+reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
+reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
+reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
+reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
+reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
+reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
+reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
+reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
+reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Double Registers
+
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers. In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even. Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+// AArch64 has 32 floating-point registers. Each can store a vector of
+// single or double precision floating-point values up to 8 * 32
+// floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
+// use the first float or double element of the vector.
+
+// for Java use float registers v0-v15 are always save on call whereas
+// the platform ABI treats v8-v15 as callee save). float registers
+// v16-v31 are SOC as per the platform spec
+
+  reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()         );
+  reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next() );
+  reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()         );
+  reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next() );
+  reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()         );
+  reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next() );
+  reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()         );
+  reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next() );
+  reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()         );
+  reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next() );
+  reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()         );
+  reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next() );
+  reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()         );
+  reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next() );
+  reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()         );
+  reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next() );
+  reg_def V8   ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()         );
+  reg_def V8_H ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()->next() );
+  reg_def V9   ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()         );
+  reg_def V9_H ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()->next() );
+  reg_def V10  ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()        );
+  reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next());
+  reg_def V11  ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()        );
+  reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next());
+  reg_def V12  ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()        );
+  reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next());
+  reg_def V13  ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()        );
+  reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next());
+  reg_def V14  ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()        );
+  reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next());
+  reg_def V15  ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()        );
+  reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next());
+  reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()        );
+  reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next());
+  reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()        );
+  reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next());
+  reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()        );
+  reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next());
+  reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()        );
+  reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next());
+  reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()        );
+  reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next());
+  reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()        );
+  reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next());
+  reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()        );
+  reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next());
+  reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()        );
+  reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next());
+  reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()        );
+  reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next());
+  reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()        );
+  reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next());
+  reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()        );
+  reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next());
+  reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()        );
+  reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next());
+  reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()        );
+  reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next());
+  reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()        );
+  reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next());
+  reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()        );
+  reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next());
+  reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()        );
+  reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next());
+
+// ----------------------------
+// Special Registers
+// ----------------------------
+
+// the AArch64 CSPR status flag register is not directly acessible as
+// instruction operand. the FPSR status flag register is a system
+// register which can be written/read using MSR/MRS but again does not
+// appear as an operand (a code identifying the FSPR occurs as an
+// immediate value in the instruction).
+
+reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
+
+
+// Specify priority of register selection within phases of register
+// allocation.  Highest priority is first.  A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, like EAX and EDX on I486, and choose no-save registers
+// before save-on-call, & save-on-call before save-on-entry.  Registers
+// which participate in fixed calling sequences should come last.
+// Registers which are used as pairs must fall on an even boundary.
+
+alloc_class chunk0(
+    // volatiles
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+
+    // arg registers
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+
+    // non-volatiles
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    
+    // non-allocatable registers
+
+    R27, R27_H, // heapbase
+    R28, R28_H, // thread
+    R29, R29_H, // fp
+    R30, R30_H, // lr
+    R31, R31_H, // sp
+);
+
+alloc_class chunk1(
+
+    // no save
+    V16, V16_H,
+    V17, V17_H,
+    V18, V18_H,
+    V19, V19_H,
+    V20, V20_H,
+    V21, V21_H,
+    V22, V22_H,
+    V23, V23_H,
+    V24, V24_H,
+    V25, V25_H,
+    V26, V26_H,
+    V27, V27_H,
+    V28, V28_H,
+    V29, V29_H,
+    V30, V30_H,
+    V31, V31_H,
+
+    // arg registers
+    V0, V0_H,
+    V1, V1_H,
+    V2, V2_H,
+    V3, V3_H,
+    V4, V4_H,
+    V5, V5_H,
+    V6, V6_H,
+    V7, V7_H,
+
+    // non-volatiles
+    V8, V8_H,
+    V9, V9_H,
+    V10, V10_H,
+    V11, V11_H,
+    V12, V12_H,
+    V13, V13_H,
+    V14, V14_H,
+    V15, V15_H,
+);
+
+alloc_class chunk2(RFLAGS);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
+// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// Class for all 32 bit integer registers -- excludes SP which will
+// never be used as an integer register
+reg_class any_reg32(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R10,
+    R11,
+    R12,
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26,
+    R27,
+    R28,
+    R29,
+    R30
+);
+
+// Singleton class for R0 int register
+reg_class int_r0_reg(R0);
+
+// Singleton class for R2 int register
+reg_class int_r2_reg(R2);
+
+// Singleton class for R4 int register
+reg_class int_r4_reg(R4);
+
+// Class for all long integer registers (including RSP)
+reg_class any_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    R27, R27_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H
+);
+
+// Class for all non-special integer registers
+reg_class no_special_reg32(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R10,
+    R11,
+    R12,			// rmethod
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26
+ /* R27, */			// heapbase
+ /* R28, */			// thread
+ /* R29, */			// fp
+ /* R30, */			// lr
+ /* R31 */			// sp
+);
+
+// Class for all non-special long integer registers
+reg_class no_special_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,			// rmethod
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+ /* R27, R27_H,	*/		// heapbase
+ /* R28, R28_H, */		// thread
+ /* R29, R29_H, */		// fp
+ /* R30, R30_H, */		// lr
+ /* R31, R31_H */		// sp
+);
+
+// Class for 64 bit register r0
+reg_class r0_reg(
+    R0, R0_H
+);
+
+// Class for 64 bit register r1
+reg_class r1_reg(
+    R1, R1_H
+);
+
+// Class for 64 bit register r2
+reg_class r2_reg(
+    R2, R2_H
+);
+
+// Class for 64 bit register r3
+reg_class r3_reg(
+    R3, R3_H
+);
+
+// Class for 64 bit register r4
+reg_class r4_reg(
+    R4, R4_H
+);
+
+// Class for 64 bit register r5
+reg_class r5_reg(
+    R5, R5_H
+);
+
+// Class for 64 bit register r10
+reg_class r10_reg(
+    R10, R10_H
+);
+
+// Class for 64 bit register r11
+reg_class r11_reg(
+    R11, R11_H
+);
+
+// Class for method register
+reg_class method_reg(
+    R12, R12_H
+);
+
+// Class for heapbase register
+reg_class heapbase_reg(
+    R27, R27_H
+);
+
+// Class for thread register
+reg_class thread_reg(
+    R28, R28_H
+);
+
+// Class for frame pointer register
+reg_class fp_reg(
+    R29, R29_H
+);
+
+// Class for link register
+reg_class lr_reg(
+    R30, R30_H
+);
+
+// Class for long sp register
+reg_class sp_reg(
+  R31, R31_H
+);
+
+// Class for all pointer registers
+reg_class ptr_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    R27, R27_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H
+);
+
+// Class for all non_special pointer registers
+reg_class no_special_ptr_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+ /* R27, R27_H, */		// heapbase
+ /* R28, R28_H, */		// thread
+ /* R29, R29_H, */		// fp
+ /* R30, R30_H, */		// lr
+ /* R31, R31_H */		// sp
+);
+
+// Class for all float registers
+reg_class float_reg(
+    V0,
+    V1,
+    V2,
+    V3,
+    V4,
+    V5,
+    V6,
+    V7,
+    V8,
+    V9,
+    V10,
+    V11,
+    V12,
+    V13,
+    V14,
+    V15,
+    V16,
+    V17,
+    V18,
+    V19,
+    V20,
+    V21,
+    V22,
+    V23,
+    V24,
+    V25,
+    V26,
+    V27,
+    V28,
+    V29,
+    V30,
+    V31
+);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+// Class for all double registers
+reg_class double_reg(
+    V0, V0_H, 
+    V1, V1_H, 
+    V2, V2_H, 
+    V3, V3_H, 
+    V4, V4_H, 
+    V5, V5_H, 
+    V6, V6_H, 
+    V7, V7_H, 
+    V8, V8_H, 
+    V9, V9_H, 
+    V10, V10_H, 
+    V11, V11_H, 
+    V12, V12_H, 
+    V13, V13_H, 
+    V14, V14_H, 
+    V15, V15_H, 
+    V16, V16_H, 
+    V17, V17_H, 
+    V18, V18_H, 
+    V19, V19_H, 
+    V20, V20_H, 
+    V21, V21_H, 
+    V22, V22_H, 
+    V23, V23_H, 
+    V24, V24_H, 
+    V25, V25_H, 
+    V26, V26_H, 
+    V27, V27_H, 
+    V28, V28_H, 
+    V29, V29_H, 
+    V30, V30_H, 
+    V31, V31_H
+);
+
+// Singleton class for condition codes
+reg_class int_flags(RFLAGS);
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+
+// we follow the ppc-aix port in using a simple cost model which ranks
+// register operations as cheap, memory ops as more expensive and
+// branches as most expensive. the first two have a low as well as a
+// normal cost. huge cost appears to be a way of saying don't do
+// something
+
+definitions %{
+  // The default cost (of a register move instruction).
+  int_def INSN_COST            (    100,     100);
+  int_def BRANCH_COST          (    200,     2 * INSN_COST);
+  int_def CALL_COST            (    200,     2 * INSN_COST);
+  int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
+%}
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+%}
+
+source %{
+
+#define __ _masm.
+
+// advance declaratuons for helper functions to convert register
+// indices to register objects
+
+// the ad file has to provide implementations of certain methods
+// expected by the generic code
+//
+// REQUIRED FUNCTIONALITY
+
+//=============================================================================
+
+// !!!!! Special hack to get all types of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+
+int MachCallStaticJavaNode::ret_addr_offset()
+{
+  // call should be a simple bl
+  // unless this is a method handle invoke in which case it is
+  // mov(rfp, sp), bl, mov(sp, rfp)
+  int off = 4;
+  if (_method_handle_invoke) {
+    off += 4;
+  }
+  return off;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset()
+{
+  return 16; // movz, movk, movk, bl
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  // for generated stubs the call will be
+  //   far_call(addr)
+  // for real runtime callouts it iwll be
+  //   mov(rscratch1, RuntimeAddress(addr)
+  //   blrt rscratch1
+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
+  if (cb) {
+    return MacroAssembler::far_branch_size();
+  } else {
+    // A 48-bit address.  See movptr().
+    // then a blrt
+    // return 16;
+    return 4 * NativeInstruction::instruction_size;
+  }
+}
+
+// Indicate if the safepoint node needs the polling page as an input
+
+// the shared code plants the oop data at the start of the generated
+// code for the safepoint node and that needs ot be at the load
+// instruction itself. so we cannot plant a mov of the safepoint poll
+// address followed by a load. setting this to true means the mov is
+// scheduled as a prior instruction. that's better for scheduling
+// anyway.
+
+bool SafePointNode::needs_polling_address_input()
+{
+  return true;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  st->print("BREAKPOINT");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  __ brk(0);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
+    st->print("nop \t# %d bytes pad for loops and calls", _count);
+  }
+#endif
+
+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+    MacroAssembler _masm(&cbuf);
+    for (int i = 0; i < _count; i++) { 
+      __ nop();
+    }
+  }
+
+  uint MachNopNode::size(PhaseRegAlloc*) const {
+    return _count * NativeInstruction::instruction_size;
+  }
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_late_expand() const { return false; }
+void MachConstantBaseNode::lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  // Empty encoding
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  return 0;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  Compile* C = ra_->C;
+
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  if (C->need_stack_bang(framesize))
+    st->print("# stack bang size=%d\n\t", framesize);
+
+  if (framesize == 0) {
+    // Is this even possible?
+    st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    st->print("sub  sp, sp, #%d\n\t", framesize);
+    st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
+  } else {
+    st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
+    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
+    st->print("sub  sp, sp, rscratch1");
+  }
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  // n.b. frame size includes space for return pc and rfp
+  long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
+  assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
+
+  // insert a nop at the start of the prolog so we can patch in a
+  // branch if we need to invalidate the method later
+  __ nop();
+
+  if (C->need_stack_bang(framesize))
+    __ generate_stack_overflow_check(framesize);
+
+  __ build_frame(framesize);
+
+  if (NotifySimulator) {
+    __ notify(Assembler::method_entry);
+  }
+
+  if (VerifyStackAtCalls) {
+    Unimplemented();
+  }
+
+  C->set_frame_complete(cbuf.insts_size());
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_); // too many variables; just compute it
+                              // the hard way
+}
+
+int MachPrologNode::reloc() const
+{
+  return 0;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  Compile* C = ra_->C;
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  st->print("# pop frame %d\n\t",framesize);
+
+  if (framesize == 0) {
+    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
+    st->print("add  sp, sp, #%d\n\t", framesize);
+  } else {
+    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
+    st->print("add  sp, sp, rscratch1\n\t");
+    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    st->print("# touch polling page\n\t");
+    st->print("mov  rscratch1, #0x%x\n\t", os::get_polling_page());
+    st->print("ldr zr, [rscratch1]");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  __ remove_frame(framesize);
+
+  if (NotifySimulator) {
+    __ notify(Assembler::method_reentry);
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    address polling_page(os::get_polling_page());
+    __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  // Variable size. Determine dynamically.
+  return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+  // Return number of relocatable values contained in this instruction.
+  return 1; // 1 for polling page.
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+// This method seems to be obsolete. It is declared in machnode.hpp
+// and defined in all *.ad files, but it is never called. Should we
+// get rid of it?
+int MachEpilogNode::safepoint_offset() const {
+  assert(do_polling(), "no return for this epilog node");
+  return 4;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float or
+// rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+
+  if (reg == OptoReg::Bad) {
+    return rc_bad;
+  }
+
+  // we have 30 int registers * 2 halves
+  // (rscratch1 and rscratch2 are omitted)
+
+  if (reg < 60) {
+    return rc_int;
+  }
+
+  // we have 32 float register * 2 halves
+  if (reg < 60 + 64) {
+    return rc_float;
+  }
+
+  // Between float regs & stack is the flags regs.
+  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+
+  return rc_stack;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+  Compile* C = ra_->C;
+
+  // Get registers to move.
+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+  enum RC src_hi_rc = rc_class(src_hi);
+  enum RC src_lo_rc = rc_class(src_lo);
+  enum RC dst_hi_rc = rc_class(dst_hi);
+  enum RC dst_lo_rc = rc_class(dst_lo);
+
+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+
+  if (src_hi != OptoReg::Bad) {
+    assert((src_lo&1)==0 && src_lo+1==src_hi &&
+           (dst_lo&1)==0 && dst_lo+1==dst_hi,
+           "expected aligned-adjacent pairs");
+  }
+
+  if (src_lo == dst_lo && src_hi == dst_hi) {
+    return 0;            // Self copy, no move.
+  }
+
+  switch (src_lo_rc) {
+  case rc_int:
+    if (dst_lo_rc == rc_int) {	// gpr --> gpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mov(as_Register(Matcher::_regEncode[dst_lo]),
+                 as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("mov  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movw(as_Register(Matcher::_regEncode[dst_lo]),
+                  as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("movw  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else {			// gpr --> stack spill
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ str(as_Register(Matcher::_regEncode[src_lo]),
+	         Address(sp, dst_offset));
+        } else if (st) {
+          st->print("str  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+		    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strw(as_Register(Matcher::_regEncode[src_lo]),
+	         Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strw  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+		    dst_offset);
+        }
+      }
+    }
+    return 4;
+  case rc_float:
+    if (dst_lo_rc == rc_int) {	// fpr --> gpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else {			// fpr --> stack spill
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
+	         Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strd  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+		    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
+	         Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strs  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+		    dst_offset);
+        }
+      }
+    }
+    return 4;
+  case rc_stack:
+    int src_offset = ra_->reg2offset(src_lo);
+    if (dst_lo_rc == rc_int) {	// stack --> gpr load
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
+                 Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldr  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+		    src_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
+                  Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldr  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                   src_offset);
+        }
+      }
+      return 4;
+    } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                 Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldrd  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+		    src_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                  Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldrs  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                   src_offset);
+        }
+      }
+      return 4;
+    } else {			// stack --> stack copy
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldr(rscratch1, Address(sp, src_offset));
+          __ str(rscratch1, Address(sp, dst_offset));
+        } else if (st) {
+          st->print("ldr  rscratch1, [sp, %d]\t# mem-mem spill",
+		    src_offset);
+          st->print("\n\t");
+          st->print("str  rscratch1, [sp, %d]",
+		    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrw(rscratch1, Address(sp, src_offset));
+          __ strw(rscratch1, Address(sp, dst_offset));
+        } else if (st) {
+          st->print("ldrw  rscratch1, [sp, %d]\t# mem-mem spill",
+		    src_offset);
+          st->print("\n\t");
+          st->print("strw  rscratch1, [sp, %d]",
+		    dst_offset);
+        }
+      }
+      return 8;
+    }
+  }
+
+  assert(false," bad rc_class for spill ");
+  Unimplemented();
+  return 0;
+
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  if (!ra_)
+    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
+  else
+    implementation(NULL, ra_, false, st);
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return implementation(NULL, ra_, true, NULL);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("add %s, rsp, #%d]\t# box lock",
+            Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg    = ra_->get_encode(this);
+
+  if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
+    __ add(as_Register(reg), sp, offset);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
+  return 4;
+}
+
+uint size_java_to_interp()
+{
+  // ob jdk7 we only need a mov oop and a branch
+  return 7 * NativeInstruction::instruction_size;
+}
+
+// Offset from start of compiled java to interpreter stub to the load
+// constant that loads the inline cache (IC) (0 on aarch64).
+const int CompiledStaticCall::comp_to_int_load_offset = 0;
+
+// emit call stub, compiled java to interpreter
+ void emit_java_to_interp(CodeBuffer& cbuf, address mark)
+{
+  // Stub is fixed up when the corresponding call is converted from
+  // calling compiled code to calling interpreted code.
+  // movq rmethod, zr
+  // here:
+  // b(here) # jmp to self
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(Compile::MAX_stubs_size);
+
+  int offset = __ offset();
+  if (base == NULL)  return;  // CodeBuffer::expand failed
+  // static stub relocation stores the instruction address of the call
+  const RelocationHolder &rspec = static_stub_Relocation::spec(mark);
+  __ relocate(rspec);
+  // static stub relocation also tags the methodOop in the code-stream.
+  //
+  // n.b. for jdk7 we have to use movoop and locate the oop in the
+  // cpool if we use an immediate then patching fails to update the
+  // pool oop and GC overwrites the patch with movk/z 0x0000 again
+  __ movoop(rmethod, (jobject) NULL);
+  // This is recognized as unresolved by relocs/nativeinst/ic code
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
+
+  assert((__ offset() - offset) <= (int)size_java_to_interp(), "stub too big");
+  // Update current stubs pointer and restore insts_end.
+  __ end_a_stub();
+}
+
+// relocation entries for call stub, compiled java to interpretor
+uint reloc_java_to_interp()
+{
+  // n.b. on jdk7 we use a movoop and a branch
+  return 2;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+  st->print_cr("# MachUEPNode");
+  if (UseCompressedOops) {
+    st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+    if (Universe::narrow_oop_shift() != 0) {
+      st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
+    }
+  } else {
+   st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+  }
+  st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
+  st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+  // This is the unverified entry point.
+  MacroAssembler _masm(&cbuf);
+
+  __ cmp_klass(j_rarg0, rscratch2, rscratch1);
+  Label skip;
+  // TODO
+  // can we avoid this skip and still use a reloc?
+  __ br(Assembler::EQ, skip);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ bind(skip);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_);
+}
+
+// REQUIRED EMIT CODE
+
+//=============================================================================
+
+uint size_exception_handler()
+{
+  return MacroAssembler::far_branch_size();
+}
+
+// Emit exception handler code.
+int emit_exception_handler(CodeBuffer& cbuf)
+{
+  // mov rscratch1 #exception_blob_entry_point
+  // br rscratch1
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base =
+  __ start_a_stub(size_exception_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+uint size_deopt_handler()
+{
+  // count one adr and one far branch instruction
+  return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
+}
+
+// Emit deopt handler code.
+int emit_deopt_handler(CodeBuffer& cbuf)
+{
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base =
+  __ start_a_stub(size_deopt_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+
+  __ adr(lr, __ pc());
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// REQUIRED MATCHER CODE
+
+//=============================================================================
+
+// !!! FIXME AARCH64 -- should we reg types Op_Vec? for vectors
+// or specify NotAMachineReg?? we don't use vectors so we robably don't
+// need them but do they do any harm?
+
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
+const bool Matcher::match_rule_supported(int opcode) {
+
+  // TODO 
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_StrEquals and other intrinsics
+  if (!has_match_rule(opcode)) {
+    return false;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum)
+{
+  Unimplemented();
+  return 0;
+}
+
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+//       this method should return false for offset 0.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+
+  return (-32768 <= offset && offset < 32768);
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  // Probably always true, even if a temp register is required.
+  return true;
+}
+
+// true just means we have fast l2f conversion
+const bool Matcher::convL2FSupported(void) {
+  return true;
+}
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  // TODO fixme
+  return 0;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+  int max_size = max_vector_size(bt);
+  // Min size which can be loaded into vector is 4 bytes.
+  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
+  return MIN2(size,max_size);
+}
+
+// Vector ideal reg.
+const int Matcher::vector_ideal_reg(int len) {
+  // TODO fixme
+  return Op_RegD;
+}
+
+// Only lowest bits of xmm reg are used for vector shift count.
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+  // TODO fixme
+  return Op_RegL;
+}
+
+// AArch64 AES instructions are compatible with SunJCE expanded
+// keys, hence we do not need to pass the original key to stubs
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+// x86 supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  // TODO fixme
+  // return !AlignVector; // can be changed by flag
+  return false;
+}
+
+// false => size gets scaled to BytesPerLong, ok.
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Threshold size for cleararray.
+const int Matcher::init_array_short_size = 4 * BytesPerLong;
+
+// Use conditional move (CMOVL)
+const int Matcher::long_cmove_cost() {
+  // long cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+const int Matcher::float_cmove_cost() {
+  // float cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_late_expand = false;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?  True for Intel but false for most RISCs
+const bool Matcher::clone_shift_expressions = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+const bool Matcher::need_masked_shift_count = false;
+
+// This affects two different things:
+//  - how Decode nodes are matched
+//  - how ImplicitNullCheck opportunities are recognized
+// If true, the matcher will try to remove all Decodes and match them
+// (as operands) into nodes. NullChecks are not prepared to deal with 
+// Decodes by final_graph_reshaping().
+// If false, final_graph_reshaping() forces the decode behind the Cmp
+// for a NullCheck. The matcher matches the Decode node into a register.
+// Implicit_null_check optimization moves the Decode along with the 
+// memory operation back up before the NullCheck.
+bool Matcher::narrow_oop_use_complex_address() {
+  assert(UseCompressedOops, "only for compressed oops code");
+  return Universe::narrow_oop_shift() == 0;
+}
+
+// Is it better to copy float constants, or load them directly from
+// memory?  Intel can load a float constant from a direct address,
+// requiring no extra registers.  Most RISCs will have to materialize
+// an address into a register first, so they would do better to copy
+// the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no
+// fixup is needed.  Else we split the double into 2 integer pieces
+// and move it piece-by-piece.  Only happens when passing doubles into
+// C code as the Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = true;
+
+// No-op on amd64
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+  Unimplemented();
+}
+
+// Advertise here if the CPU requires explicit rounding operations to
+// implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Are floats converted to double when stored to stack during
+// deoptimization?
+bool Matcher::float_in_double() { return true; }
+
+// Do ints take an entire long register or just half?
+// The relevant question is how the int is callee-saved:
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits.
+const bool Matcher::int_in_long = true;
+
+// Return whether or not this register is ever used as an argument.
+// This function is used on startup to build the trampoline stubs in
+// generateOptoStub.  Registers not mentioned will be killed by the VM
+// call in the trampoline, and arguments in those registers not be
+// available to the callee.
+bool Matcher::can_be_java_arg(int reg)
+{
+  return
+    reg ==  R0_num || reg == R0_H_num ||
+    reg ==  R1_num || reg == R1_H_num ||
+    reg ==  R2_num || reg == R2_H_num ||
+    reg ==  R3_num || reg == R3_H_num ||
+    reg ==  R4_num || reg == R4_H_num ||
+    reg ==  R5_num || reg == R5_H_num ||
+    reg ==  R6_num || reg == R6_H_num ||
+    reg ==  R7_num || reg == R7_H_num ||
+    reg ==  V0_num || reg == V0_H_num ||
+    reg ==  V1_num || reg == V1_H_num ||
+    reg ==  V2_num || reg == V2_H_num ||
+    reg ==  V3_num || reg == V3_H_num ||
+    reg ==  V4_num || reg == V4_H_num ||
+    reg ==  V5_num || reg == V5_H_num ||
+    reg ==  V6_num || reg == V6_H_num ||
+    reg ==  V7_num || reg == V7_H_num;
+}
+
+bool Matcher::is_spillable_arg(int reg)
+{
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+  return false;
+}
+
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI.
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL.
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODL projection of divmodL.
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return FP_REG_mask();
+}
+
+// helper for encoding java_to_runtime calls on sim
+//
+// this is needed to compute the extra arguments required when
+// planting a call to the simulator blrt instruction. the TypeFunc
+// can be queried to identify the counts for integral, and floating
+// arguments and the return type
+
+static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
+{
+  int gps = 0;
+  int fps = 0;
+  const TypeTuple *domain = tf->domain();
+  int max = domain->cnt();
+  for (int i = TypeFunc::Parms; i < max; i++) {
+    const Type *t = domain->field_at(i);
+    switch(t->basic_type()) {
+    case T_FLOAT:
+    case T_DOUBLE:
+      fps++;
+    default:
+      gps++;
+    }
+  }
+  gpcnt = gps;
+  fpcnt = fps;
+  BasicType rt = tf->return_type();
+  switch (rt) {
+  case T_VOID:
+    rtype = MacroAssembler::ret_type_void;
+    break;
+  default:
+    rtype = MacroAssembler::ret_type_integral;
+    break;
+  case T_FLOAT:
+    rtype = MacroAssembler::ret_type_float;
+    break;
+  case T_DOUBLE:
+    rtype = MacroAssembler::ret_type_double;
+    break;
+  }
+}
+
+#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)	\
+  MacroAssembler _masm(&cbuf);						\
+  {									\
+    Register base = as_Register(BASE);					\
+    if (INDEX == -1) {							\
+	__ lea(SCRATCH, Address(base, DISP));				\
+    } else {								\
+	Register index_reg = as_Register(INDEX);			\
+	if (DISP == 0) {						\
+	  __ lea(SCRATCH, Address(base, index_reg, Address::lsl(SCALE))); \
+	} else {							\
+	  __ lea(SCRATCH, Address(base, DISP));				\
+	  __ lea(SCRATCH, Address(SCRATCH, index_reg, Address::lsl(SCALE))); \
+	}								\
+    }									\
+    __ INSN(REG, SCRATCH);						\
+  }
+
+typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
+typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
+
+  // Used for all non-volatile memory accesses.  The use of
+  // $mem->opcode() to discover whether this pattern uses sign-extended
+  // offsets is something of a kludge.
+  static void loadStore(MacroAssembler masm, mem_insn insn,
+			 Register reg, int opcode,
+			 Register base, int index, int size, int disp)
+  {
+    Address::extend scale;
+
+    // Hooboy, this is fugly.  We need a way to communicate to the
+    // encoder that the index needs to be sign extended, so we have to
+    // enumerate all the cases.
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
+    case INDINDEXOFFSETI2L:
+    case INDINDEXOFFSETI2LN:
+      scale = Address::sxtw(size);
+      break;
+    default:
+      scale = Address::lsl(size);
+    }
+
+    if (index == -1) {
+      (masm.*insn)(reg, Address(base, disp));
+    } else {
+      if (disp == 0) {
+	(masm.*insn)(reg, Address(base, as_Register(index), scale));
+      } else {
+	masm.lea(rscratch1, Address(base, disp));
+	(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
+      }
+    }
+  }
+
+  static void loadStore(MacroAssembler masm, mem_float_insn insn,
+			 FloatRegister reg, int opcode,
+			 Register base, int index, int size, int disp)
+  {
+    Address::extend scale;
+
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
+      scale = Address::sxtw(size);
+      break;
+    default:
+      scale = Address::lsl(size);
+    }
+
+     if (index == -1) {
+      (masm.*insn)(reg, Address(base, disp));
+    } else {
+      if (disp == 0) {
+	(masm.*insn)(reg, Address(base, as_Register(index), scale));
+      } else {
+	masm.lea(rscratch1, Address(base, disp));
+	(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
+      }
+    }
+  }
+
+%}
+
+
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes are parameterized macros
+// used by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.
+//
+// Instructions specify two basic values for encoding.  Again, a
+// function is available to check if the constant displacement is an
+// oop. They use the ins_encode keyword to specify their encoding
+// classes (which must be a sequence of enc_class names, and their
+// parameters, specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular
+// instruction needs for encoding need to be specified.
+encode %{
+  // Build emit functions for each basic byte or larger field in the
+  // intel encoding scheme (opcode, rm, sib, immediate), and call them
+  // from C++ code in the enc_class source block.  Emit functions will
+  // live in the main source block for now.  In future, we can
+  // generalize this by adding a syntax that specifies the sizes of
+  // fields in an order, so that the adlc can build the emit functions
+  // automagically
+
+  // catch all for unimplemented encodings
+  enc_class enc_unimplemented %{
+    MacroAssembler _masm(&cbuf);
+    __ unimplemented("C2 catch all");    
+  %}
+
+  // BEGIN Non-volatile memory access
+
+  enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strb(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strb0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strh(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strh0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strw(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strw0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_str(iRegL src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    // we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+      MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_str0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strs(vRegF src, memory mem) %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strd(vRegD src, memory mem) %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  // END Non-volatile memory access
+
+  // this encoding writes the address of the first instruction in the
+  // call sequence for the runtime call into the anchor pc slot. this
+  // address allows the runtime to i) locate the code buffer for the
+  // caller (any address in the buffer would do) and ii) find the oop
+  // map associated with the call (has to address the instruction
+  // following the call). note that we have to store the address which
+  // follows the actual call.
+  // 
+  // the offset from the current pc can be computed by considering
+  // what gets generated between this point up to and including the
+  // call. it looks like this
+  //
+  //   movz xscratch1 0xnnnn        <-- current pc is here
+  //   movk xscratch1 0xnnnn
+  //   movk xscratch1 0xnnnn
+  //   str xscratch1, [xthread,#anchor_pc_off]
+  //   mov xscratch2, sp
+  //   str xscratch2, [xthread,#anchor_sp_off
+  //   mov x0, x1
+  //   . . .
+  //   mov xn-1, xn
+  //   mov xn, thread            <-- always passed
+  //   mov xn+1, rfp             <-- optional iff primary == 1
+  //   movz xscratch1 0xnnnn
+  //   movk xscratch1 0xnnnn
+  //   movk xscratch1 0xnnnn
+  //   blrt xscratch1
+  //   . . .
+  //
+  // where the called routine has n args (including the thread and,
+  // possibly the stub's caller return address currently in rfp).  we
+  // can compute n by looking at the number of args passed into the
+  // stub. we assert that nargs is < 7.
+  //
+  // so the offset we need to add to the pc (in 32-bit words) is
+  //   3 +        <-- load 48-bit constant return pc
+  //   1 +        <-- write anchor pc
+  //   1 +        <-- copy sp
+  //   1 +        <-- write anchor sp
+  //   nargs +    <-- java stub arg count
+  //   1 +        <-- extra thread arg
+  // [ 1 + ]      <-- optional ret address of stub caller
+  //   3 +        <-- load 64 bit call target address
+  //   1          <-- blrt instruction
+  //
+  // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
+  //
+
+  enc_class aarch64_enc_save_pc() %{
+    Compile* C = ra_->C;
+    int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
+    if ($primary) { nargs++; }
+    assert(nargs <= 8, "opto runtime stub has more than 8 args!");
+    MacroAssembler _masm(&cbuf);
+    address pc = __ pc();
+    int call_offset = (nargs + 11) * 4;
+    int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
+                       in_bytes(JavaFrameAnchor::last_Java_pc_offset());
+    __ lea(rscratch1, InternalAddress(pc + call_offset));
+    __ str(rscratch1, Address(rthread, field_offset));
+  %}
+
+  // volatile loads and stores
+
+  enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlrb);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+  enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlrh);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+  enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlrw);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+
+  enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarb);
+    __ sxtbw(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarb);
+    __ sxtb(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarb);
+  %}
+
+  enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarb);
+  %}
+
+  enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarh);
+    __ sxthw(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarh);
+    __ sxth(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarh);
+  %}
+
+  enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarh);
+  %}
+
+  enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarw);
+  %}
+
+  enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarw);
+  %}
+
+  enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldar);
+  %}
+
+  enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
+    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldarw);
+    __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
+  %}
+
+  enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
+    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+	     rscratch1, ldar);
+    __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
+  %}
+
+  enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    // we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+ 	MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlr);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+  enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
+    {
+      MacroAssembler _masm(&cbuf);
+      FloatRegister src_reg = as_FloatRegister($src$$reg);
+      __ fmovs(rscratch2, src_reg);
+    }
+    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlrw);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+  enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
+    {
+      MacroAssembler _masm(&cbuf);
+      FloatRegister src_reg = as_FloatRegister($src$$reg);
+      __ fmovd(rscratch2, src_reg);
+    }
+    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+		 rscratch1, stlr);
+    if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
+      __ dmb(__ ISH);
+  %}
+
+  // synchronized read/update encodings
+
+  enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {      
+        __ lea(rscratch1, Address(base, disp));
+        __ ldaxr(dst_reg, rscratch1);
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        __ ldaxr(dst_reg, base);
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
+        __ ldaxr(dst_reg, rscratch1);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+        __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
+	__ ldaxr(dst_reg, rscratch1);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register src_reg = as_Register($src$$reg);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {      
+        __ lea(rscratch2, Address(base, disp));
+        __ stlxr(rscratch1, src_reg, rscratch2);
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        __ stlxr(rscratch1, src_reg, base);
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        __ stlxr(rscratch1, src_reg, rscratch2);
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+	__ stlxr(rscratch1, src_reg, rscratch2);
+      }
+    }
+    __ cmpw(rscratch1, zr);
+  %}
+
+  enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {      
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldaxr(rscratch1, addr_reg);
+    __ cmp(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxr(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+    __ membar(__ AnyAny);
+  %}
+
+  enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {      
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldaxrw(rscratch1, addr_reg);
+    __ cmpw(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxrw(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+    __ membar(__ AnyAny);
+  %}
+
+  // auxiliary used for CompareAndSwapX to set result register
+  enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
+    MacroAssembler _masm(&cbuf);
+    Register res_reg = as_Register($res$$reg);
+    __ cset(res_reg, Assembler::EQ);
+  %}
+
+  // prefetch encodings
+
+  enc_class aarch64_enc_prefetchr(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PLDL1KEEP);
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_prefetchw(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PSTL1KEEP);
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_prefetchnta(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PSTL1STRM);
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
+      }
+    }
+  %}
+
+  /// mov envcodings
+
+  enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
+    MacroAssembler _masm(&cbuf);
+    u_int32_t con = (u_int32_t)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    if (con == 0) {
+      __ movw(dst_reg, zr);
+    } else {
+      __ movw(dst_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    u_int64_t con = (u_int64_t)$src$$constant;
+    if (con == 0) {
+      __ mov(dst_reg, zr);
+    } else {
+      __ mov(dst_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL || con == (address)1) {
+      ShouldNotReachHere();
+    } else {
+      // !!! FIXME AARCH64
+      // when and why do we get a non oop value here?
+      if ($src->constant_is_oop()) {
+        __ movoop(dst_reg, (jobject)con);
+      } else {
+        __ movptr(dst_reg, (uintptr_t)con);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, zr);
+  %}
+
+  enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, (u_int64_t)1);
+  %}
+
+  enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
+    MacroAssembler _masm(&cbuf);
+    address page = (address)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    unsigned long off;
+    __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
+    assert(off == 0, "assumed offset == 0");
+  %}
+
+  enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
+    MacroAssembler _masm(&cbuf);
+     __ load_byte_map_base($dst$$Register);
+  %}
+
+  enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      assert($src->constant_is_oop(), "unexpected constant type");
+      __ set_narrow_oop(dst_reg, (jobject)con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, zr);
+  %}
+
+  // arithmetic encodings
+
+  enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src_reg = as_Register($src1$$reg);
+    int32_t con = (int32_t)$src2$$constant;
+    // add has primary == 0, subtract has primary == 1
+    if ($primary) { con = -con; }
+    if (con < 0) {
+      __ subw(dst_reg, src_reg, -con);
+    } else {
+      __ addw(dst_reg, src_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src_reg = as_Register($src1$$reg);
+    int32_t con = (int32_t)$src2$$constant;
+    // add has primary == 0, subtract has primary == 1
+    if ($primary) { con = -con; }
+    if (con < 0) {
+      __ sub(dst_reg, src_reg, -con);
+    } else {
+      __ add(dst_reg, src_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
+  %}
+
+  enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
+  %}
+
+  enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
+  %}
+
+  enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
+  %}
+
+  // compare instruction encodings
+
+  enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmpw(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src1$$reg);
+    int32_t val = $src2$$constant;
+    if (val >= 0) {
+      __ subsw(zr, reg, val);
+    } else {
+      __ addsw(zr, reg, -val);
+    }
+  %}
+
+  enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    u_int32_t val = (u_int32_t)$src2$$constant;
+    __ movw(rscratch1, val);
+    __ cmpw(reg1, rscratch1);
+  %}
+
+  enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmp(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src1$$reg);
+    int64_t val = $src2$$constant;
+    if (val >= 0) {
+      __ subs(zr, reg, val);
+    } else if (val != -val) {
+      __ adds(zr, reg, -val);
+    } else {
+    // aargh, Long.MIN_VALUE is a special case
+      __ orr(rscratch1, zr, (u_int64_t)val);
+      __ subs(zr, reg, rscratch1);
+    }
+  %}
+
+  enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    u_int64_t val = (u_int64_t)$src2$$constant;
+    __ mov(rscratch1, val);
+    __ cmp(reg1, rscratch1);
+  %}
+
+  enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmp(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmpw(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_testp(iRegP src) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src$$reg);
+    __ cmp(reg, zr);
+  %}
+
+  enc_class aarch64_enc_testn(iRegN src) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src$$reg);
+    __ cmpw(reg, zr);
+  %}
+
+  enc_class aarch64_enc_b(label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ b(*L);
+  %}
+
+  enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
+  %}
+
+  enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
+  %}
+
+  enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
+  %{
+     Register sub_reg = as_Register($sub$$reg);
+     Register super_reg = as_Register($super$$reg);
+     Register temp_reg = as_Register($temp$$reg);
+     Register result_reg = as_Register($result$$reg);
+
+     Label miss;
+     MacroAssembler _masm(&cbuf);
+     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
+     if ($primary) {
+       __ mov(result_reg, zr);
+     }
+     __ bind(miss);
+  %}
+
+  enc_class aarch64_enc_java_static_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    address mark = __ pc();
+    address addr = (address)$meth$$method;
+    if (!_method) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if (_optimized_virtual) {
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (_method) {
+      // Emit stub for static call
+      emit_java_to_interp(cbuf, mark);
+    }
+  %}
+
+  enc_class aarch64_enc_java_handle_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    // RFP is preserved across all calls, even compiled calls.
+    // Use it to preserve SP.
+    __ mov(rfp, sp);
+
+    address mark = __ pc();
+    address addr = (address)$meth$$method;
+    if (!_method) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if (_optimized_virtual) {
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (_method) {
+      // Emit stub for static call
+      emit_java_to_interp(cbuf, mark);
+    }
+
+    // now restore sp
+    __ mov(sp, rfp);
+  %}
+
+  enc_class aarch64_enc_java_dynamic_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+    address entry = (address)$meth$$method;
+    __ ic_call(entry);
+  %}
+
+  enc_class aarch64_enc_call_epilog() %{
+    MacroAssembler _masm(&cbuf);
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      __ call_Unimplemented();
+    }
+  %}
+
+  enc_class aarch64_enc_java_to_runtime(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    // some calls to generated routines (arraycopy code) are scheduled
+    // by C2 as runtime calls. if so we can call them using a br (they
+    // will be in a reachable segment) otherwise we have to use a blrt
+    // which loads the absolute address into a register.
+    address entry = (address)$meth$$method;
+    CodeBlob *cb = CodeCache::find_blob(entry);
+    if (cb) {
+      __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
+    } else {
+      int gpcnt;
+      int fpcnt;
+      int rtype;
+      getCallInfo(tf(), gpcnt, fpcnt, rtype);
+      __ lea(rscratch1, RuntimeAddress(entry));
+      __ blrt(rscratch1, gpcnt, fpcnt, rtype);
+    }
+  %}
+
+  enc_class aarch64_enc_rethrow() %{
+    MacroAssembler _masm(&cbuf);
+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
+  %}
+
+  enc_class aarch64_enc_ret() %{
+    MacroAssembler _masm(&cbuf);
+    __ ret(lr);
+  %}
+
+  enc_class aarch64_enc_tail_call(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Register target_reg = as_Register($jump_target$$reg);
+    __ br(target_reg);
+  %}
+
+  enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Register target_reg = as_Register($jump_target$$reg);
+    // exception oop should be in r0
+    // ret addr has been popped into lr
+    // callee expects it in r3
+    __ mov(r3, lr);
+    __ br(target_reg);
+  %}
+
+  enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+    Label cas_failed;
+
+    assert_different_registers(oop, box, tmp, disp_hdr);
+
+    // Load markOop from object into displaced_header.
+    __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ cmp(oop, zr);
+      return;
+    }
+    
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
+    }
+
+    // Handle existing monitor
+    if ((EmitSync & 0x02) == 0) {
+      // we can use AArch64's bit test and branch here but
+      // markoopDesc does not define a bit index just the bit value
+      // so assert in case the bit pos changes
+#     define __monitor_value_log2 1
+      assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
+      __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
+#     undef __monitor_value_log2
+    }
+
+    // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
+    __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
+
+    // Load Compare Value application register.
+
+    // Initialize the box. (Must happen before we update the object mark!)
+    __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Compare object markOop with mark and if equal exchange scratch1
+    // with object markOop.
+    // Note that this is simply a CAS: it does not generate any
+    // barriers.  These are separately generated by
+    // membar_acquire_lock().
+    {
+      Label retry_load;
+      if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+        __ prfm(Address(oop), PSTL1STRM);
+      __ bind(retry_load);
+      __ ldxr(tmp, oop);
+      __ cmp(tmp, disp_hdr);
+      __ br(Assembler::NE, cas_failed);
+      // use stlxr to ensure update is immediately visible
+      __ stlxr(tmp, box, oop);
+      __ cbzw(tmp, cont);
+      __ b(retry_load);
+    }
+
+    // Formerly:
+    // __ cmpxchgptr(/*oldv=*/disp_hdr,
+    //               /*newv=*/box,
+    //               /*addr=*/oop,
+    //               /*tmp=*/tmp,
+    //               cont,
+    //               /*fail*/NULL);
+
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    // If the compare-and-exchange succeeded, then we found an unlocked
+    // object, will have now locked it will continue at label cont
+
+    __ bind(cas_failed);
+    // We did not see an unlocked object so try the fast recursive case.
+
+    // Check if the owner is self by comparing the value in the
+    // markOop of object (disp_hdr) with the stack pointer.
+    __ mov(rscratch1, sp);
+    __ sub(disp_hdr, disp_hdr, rscratch1);
+    __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+    // If condition is true we are cont and hence we can store 0 as the
+    // displaced header in the box, which indicates that it is a recursive lock.
+    __ ands(tmp/*==0?*/, disp_hdr, tmp);
+    __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ b(cont);
+
+      __ bind(object_has_monitor);
+      // The object's monitor m is unlocked iff m->owner == NULL,
+      // otherwise m->owner may contain a thread or a stack address.
+      //
+      // Try to CAS m->owner from NULL to current thread.
+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
+      __ mov(disp_hdr, zr);
+
+      {
+	Label retry_load, fail;
+        if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(tmp), PSTL1STRM);
+	__ bind(retry_load);
+	__ ldxr(rscratch1, tmp);
+	__ cmp(disp_hdr, rscratch1);
+	__ br(Assembler::NE, fail);
+        // use stlxr to ensure update is immediately visible
+	__ stlxr(rscratch1, rthread, tmp);
+	__ cbnzw(rscratch1, retry_load);
+	__ bind(fail);
+      }
+
+      // Label next;
+      // __ cmpxchgptr(/*oldv=*/disp_hdr,
+      //               /*newv=*/rthread,
+      //               /*addr=*/tmp,
+      //               /*tmp=*/rscratch1,
+      //               /*succeed*/next,
+      //               /*fail*/NULL);
+      // __ bind(next);
+
+      // store a non-null value into the box.
+      __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+      // PPC port checks the following invariants
+      // #ifdef ASSERT
+      // bne(flag, cont);
+      // We have acquired the monitor, check some invariants.
+      // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
+      // Invariant 1: _recursions should be 0.
+      // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+      // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
+      //                        "monitor->_recursions should be 0", -1);
+      // Invariant 2: OwnerIsThread shouldn't be 0.
+      // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
+      //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
+      //                           "monitor->OwnerIsThread shouldn't be 0", -1);
+      // #endif
+    }
+
+    __ bind(cont);
+    // flag == EQ indicates success
+    // flag == NE indicates failure
+
+  %}
+
+  // TODO
+  // reimplement this with custom cmpxchgptr code
+  // which avoids some of the unnecessary branching
+  enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+    Label cas_failed;
+
+    assert_different_registers(oop, box, tmp, disp_hdr);
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ cmp(oop, zr); // Oop can't be 0 here => always false.
+      return;
+    }
+
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      __ biased_locking_exit(oop, tmp, cont);
+    }
+
+    // Find the lock address and load the displaced header from the stack.
+    __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // If the displaced header is 0, we have a recursive unlock.
+    __ cmp(disp_hdr, zr);
+    __ br(Assembler::EQ, cont);
+
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+      __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
+    }
+
+    // Check if it is still a light weight lock, this is is true if we
+    // see the stack address of the basicLock in the markOop of the
+    // object.
+
+      {
+	Label retry_load;
+        if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(oop), PSTL1STRM);
+	__ bind(retry_load);
+	__ ldxr(tmp, oop);
+	__ cmp(box, tmp);
+	__ br(Assembler::NE, cas_failed);
+        // use stlxr to ensure update is immediately visible
+	__ stlxr(tmp, disp_hdr, oop);
+	__ cbzw(tmp, cont);
+	__ b(retry_load);
+      }
+
+    // __ cmpxchgptr(/*compare_value=*/box,
+    //               /*exchange_value=*/disp_hdr,
+    //               /*where=*/oop,
+    //               /*result=*/tmp,
+    //               cont,
+    //               /*cas_failed*/NULL);
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    __ bind(cas_failed);
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ b(cont);
+
+      __ bind(object_has_monitor);
+      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
+      __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+      __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
+      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
+      __ cmp(rscratch1, zr);
+      __ br(Assembler::NE, cont);
+
+      __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
+      __ cmp(rscratch1, zr);
+      __ br(Assembler::NE, cont);
+      // need a release store here
+      __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ stlr(rscratch1, tmp);
+    }
+
+    __ bind(cont);
+    // flag == EQ indicates success
+    // flag == NE indicates failure
+  %}
+
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add OptoReg::stack0())
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |        |        |  3
+//  |     |        +--------+
+//  V     |        | old out|      Empty on Intel, window on Sparc
+//        |    old |preserve|      Must be even aligned.
+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
+//        |        |   in   |  3   area for Intel ret address
+//     Owned by    |preserve|      Empty on Sparc.
+//       SELF      +--------+
+//        |        |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> OptoReg::stack0(), even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//     Owned by    +--------+
+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
+//        |    new |preserve|      Must be even-aligned.
+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
+//        |        |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//           (the latter is true on Intel but is it false on AArch64?)
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+frame %{
+  // What direction does stack grow in (assumed to be same for C & Java)
+  stack_direction(TOWARDS_LOW);
+
+  // These three registers define part of the calling convention
+  // between compiled code and the interpreter.
+
+  // Inline Cache Register or methodOop for I2C.
+  inline_cache_reg(R12);
+
+  // Method Oop Register when calling interpreter.
+  interpreter_method_oop_reg(R12);
+
+  // Number of stack slots consumed by locking an object
+  sync_stack_slots(2);
+
+  // Compiled code's Frame Pointer
+  frame_pointer(R31);
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+  interpreter_frame_pointer(R29);
+
+  // Stack alignment requirement
+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots. aarch64 needs two slots for
+  // return address and fp.
+  // TODO think this is correct but check
+  in_preserve_stack_slots(4);
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  // TODO this may well be correct but need to check why that - 2 is there
+  // ppc port uses 0 but we definitely need to allow for fixed_slots
+  // which folds in the space used for monitors
+  return_addr(STACK - 2 +
+              round_to((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+  calling_convention
+  %{
+    // No difference between ingoing/outgoing just pass false
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+  c_calling_convention
+  %{
+    // This is obviously always outgoing
+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
+  %}
+
+  // Location of compiled Java return values.  Same as C for now.
+  return_value
+  %{
+    // TODO do we allow ideal_reg == Op_RegN???
+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
+           "only return normal values");
+
+    static const int lo[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      R0_num,                            // Op_RegN
+      R0_num,                            // Op_RegI
+      R0_num,                            // Op_RegP
+      V0_num,                            // Op_RegF
+      V0_num,                            // Op_RegD
+      R0_num                             // Op_RegL
+    };
+  
+    static const int hi[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      OptoReg::Bad,                       // Op_RegN
+      OptoReg::Bad,                      // Op_RegI
+      R0_H_num,                          // Op_RegP
+      OptoReg::Bad,                      // Op_RegF
+      V0_H_num,                          // Op_RegD
+      R0_H_num                           // Op_RegL
+    };
+
+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
+  %}
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(INSN_COST); // Required cost attribute
+ins_attrib ins_size(32);        // Required size attribute (in bits)
+ins_attrib ins_short_branch(0); // Required flag: is this instruction
+                                // a non-matching short branch variant
+                                // of some long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must
+                                // be a power of 2) specifies the
+                                // alignment that some part of the
+                                // instruction (not necessarily the
+                                // start) requires.  If > 1, a
+                                // compute_padding() function must be
+                                // provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+
+// Integer operands 32 bit
+// 32 bit immediate
+operand immI()
+%{
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit zero
+operand immI0()
+%{
+  predicate(n->get_int() == 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit increment
+operand immI_1()
+%{
+  predicate(n->get_int() == 1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit decrement
+operand immI_M1()
+%{
+  predicate(n->get_int() == -1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_le_4()
+%{
+  predicate(n->get_int() <= 4);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_31()
+%{
+  predicate(n->get_int() == 31);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_8()
+%{
+  predicate(n->get_int() == 8);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_16()
+%{
+  predicate(n->get_int() == 16);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24()
+%{
+  predicate(n->get_int() == 24);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_32()
+%{
+  predicate(n->get_int() == 32);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_48()
+%{
+  predicate(n->get_int() == 48);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_56()
+%{
+  predicate(n->get_int() == 56);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_64()
+%{
+  predicate(n->get_int() == 64);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_255()
+%{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_65535()
+%{
+  predicate(n->get_int() == 65535);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_63()
+%{
+  predicate(n->get_int() == 63);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_255()
+%{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_65535()
+%{
+  predicate(n->get_long() == 65535L);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_4294967295()
+%{
+  predicate(n->get_long() == 4294967295L);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_bitmask()
+%{
+  predicate(((n->get_long() & 0xc000000000000000l) == 0)
+	    && is_power_of_2(n->get_long() + 1));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_bitmask()
+%{
+  predicate(((n->get_int() & 0xc0000000) == 0)
+	    && is_power_of_2(n->get_int() + 1));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Scale values for scaled offset addressing modes (up to long but not quad)
+operand immIScale()
+%{
+  predicate(0 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 26 bit signed offset -- for pc-relative branches
+operand immI26()
+%{
+  predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 19 bit signed offset -- for pc-relative loads
+operand immI19()
+%{
+  predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 12 bit unsigned offset -- for base plus immediate loads
+operand immIU12()
+%{
+  predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLU12()
+%{
+  predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Offset for scaled or unscaled immediate loads and stores
+operand immIOffset()
+%{
+  predicate(Address::offset_ok_for_immed(n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLoffset()
+%{
+  predicate(Address::offset_ok_for_immed(n->get_long()));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for add sub immediate
+operand immIAddSub()
+%{
+  predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unsigned integer valid for logical immediate
+// TODO -- check this is right when e.g the mask is 0x80000000
+operand immILog()
+%{
+  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer operands 64 bit
+// 64 bit immediate
+operand immL()
+%{
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit zero
+operand immL0()
+%{
+  predicate(n->get_long() == 0);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit unit increment
+operand immL_1()
+%{
+  predicate(n->get_long() == 1);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit unit decrement
+operand immL_M1()
+%{
+  predicate(n->get_long() == -1);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit offset of pc in thread anchor
+
+operand immL_pc_off()
+%{
+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for add sub immediate
+operand immLAddSub()
+%{
+  predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for logical immediate
+operand immLLog()
+%{
+  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits()
+%{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer operands
+// Pointer Immediate
+operand immP()
+%{
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP0()
+%{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate One
+// this is used in object initialization (initial object header)
+operand immP_1()
+%{
+  predicate(n->get_ptr() == 1);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Polling Page Pointer Immediate
+operand immPollPage()
+%{
+  predicate((address)n->get_ptr() == os::get_polling_page());
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Card Table Byte Map Base
+operand immByteMapBase()
+%{
+  // Get base of card map
+  predicate((jbyte*)n->get_ptr() ==
+	((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate Minus One
+// this is used when we want to write the current PC to the thread anchor
+operand immP_M1()
+%{
+  predicate(n->get_ptr() == -1);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate Minus Two
+// this is used when we want to write the current PC to the thread anchor
+operand immP_M2()
+%{
+  predicate(n->get_ptr() == -2);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float and Double operands
+// Double Immediate
+operand immD()
+%{
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'double +0.0'.
+operand immD0()
+%{
+  predicate((n->getd() == 0) &&
+            (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'double +0.0'.
+operand immDPacked()
+%{
+  predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF()
+%{
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'float +0.0'.
+operand immF0()
+%{
+  predicate((n->getf() == 0) &&
+            (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 
+operand immFPacked()
+%{
+  predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow pointer operands
+// Narrow Pointer Immediate
+operand immN()
+%{
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0()
+%{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer 32 bit Register Operands
+// Integer 32 bitRegister (excludes SP)
+operand iRegI()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 32 bit Register not Special
+operand iRegINoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register Operands
+// Integer 64 bit Register (includes SP)
+operand iRegL()
+%{
+  constraint(ALLOC_IN_RC(any_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegLNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg));
+  match(RegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Pointer Register
+operand iRegP()
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  match(iRegP_R0);
+  //match(iRegP_R2);
+  //match(iRegP_R4);
+  //match(iRegP_R5);
+  match(thread_RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register not Special
+operand iRegPNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
+  match(RegP);
+  // match(iRegP);
+  // match(iRegP_R0);
+  // match(iRegP_R2);
+  // match(iRegP_R4);
+  // match(iRegP_R5);
+  // match(thread_RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R0 only
+operand iRegP_R0()
+%{
+  constraint(ALLOC_IN_RC(r0_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R1 only
+operand iRegP_R1()
+%{
+  constraint(ALLOC_IN_RC(r1_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R2 only
+operand iRegP_R2()
+%{
+  constraint(ALLOC_IN_RC(r2_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R3 only
+operand iRegP_R3()
+%{
+  constraint(ALLOC_IN_RC(r3_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R4 only
+operand iRegP_R4()
+%{
+  constraint(ALLOC_IN_RC(r4_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R5 only
+operand iRegP_R5()
+%{
+  constraint(ALLOC_IN_RC(r5_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R10 only
+operand iRegP_R10()
+%{
+  constraint(ALLOC_IN_RC(r10_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R11 only
+operand iRegL_R11()
+%{
+  constraint(ALLOC_IN_RC(r11_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register FP only
+operand iRegP_FP()
+%{
+  constraint(ALLOC_IN_RC(fp_reg));
+  match(RegP);
+  // match(iRegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R0 only
+operand iRegI_R0()
+%{
+  constraint(ALLOC_IN_RC(int_r0_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R2 only
+operand iRegI_R2()
+%{
+  constraint(ALLOC_IN_RC(int_r2_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R4 only
+operand iRegI_R4()
+%{
+  constraint(ALLOC_IN_RC(int_r4_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Narrow Pointer Register
+operand iRegN()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegN);
+  match(iRegNNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegNNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// heap base register -- used for encoding immN0
+
+operand iRegIHeapbase()
+%{
+  constraint(ALLOC_IN_RC(heapbase_reg));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Float Register
+// Float register operands
+operand vRegF()
+%{
+  constraint(ALLOC_IN_RC(float_reg));
+  match(RegF);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Double Register
+// Double register operands
+operand vRegD()
+%{
+  constraint(ALLOC_IN_RC(double_reg));
+  match(RegD);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of signed compare instructions
+
+// note that on AArch64 we also use this register as the output for
+// for floating point compare instructions (CmpF CmpD). this ensures
+// that ordered inequality tests use GT, GE, LT or LE none of which
+// pass through cases where the result is unordered i.e. one or both
+// inputs to the compare is a NaN. this means that the ideal code can
+// replace e.g. a GT with an LE and not end up capturing the NaN case
+// (where the comparison should always fail). EQ and NE tests are
+// always generated in ideal code so that unordered folds into the NE
+// case, matching the behaviour of AArch64 NE.
+//
+// This differs from x86 where the outputs of FP compares use a
+// special FP flags registers and where compares based on this
+// register are distinguished into ordered inequalities (cmpOpUCF) and
+// EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
+// to explicitly handle the unordered case in branches. x86 also has
+// to include extra CMoveX rules to accept a cmpOpUCF input.
+
+operand rFlagsReg()
+%{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  op_cost(0);
+  format %{ "RFLAGS" %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of unsigned compare instructions
+operand rFlagsRegU()
+%{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  op_cost(0);
+  format %{ "RFLAGSU" %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
+  match(reg);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
+  match(reg);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Thread Register
+operand thread_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(thread_reg)); // link_reg
+  match(reg);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand lr_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(lr_reg)); // link_reg
+  match(reg);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+
+operand indirect(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(reg);
+  op_cost(0);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg (LShiftL (ConvI2L ireg) scale));
+  op_cost(0);
+  format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg (LShiftL lreg scale));
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale)" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndex(iRegP reg, iRegL lreg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg lreg);
+  op_cost(0);
+  format %{ "$reg, $lreg" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffI(iRegP reg, immIOffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffL(iRegP reg, immLoffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+operand indirectN(iRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+  op_cost(0);
+  format %{ "[$reg]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
+  op_cost(0);
+  format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexN(iRegN reg, iRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+  op_cost(0);
+  format %{ "$reg, $lreg\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffIN(iRegN reg, immIOffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN(iRegN reg, immLoffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+
+// AArch64 opto stubs need to write to the pc slot in the thread anchor
+operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotP(sRegP reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(100);
+  // No match rule because this operand is only generated in matching
+  // match(RegP);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotI(sRegI reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegI);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegF);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegD);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegL);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+// Operands for expressing Control Flow
+// NOTE: Label is a predefined operand which should not be redefined in
+//       the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+// used for signed integral comparisons and fp comparisons
+
+operand cmpOp()
+%{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    not_equal(0x1, "ne");
+    less(0xb, "lt");
+    greater_equal(0xa, "ge");
+    less_equal(0xd, "le");
+    greater(0xc, "gt");
+  %}
+%}
+
+// used for unsigned integral comparisons
+
+operand cmpOpU()
+%{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    not_equal(0x1, "ne");
+    less(0x3, "lo");
+    greater_equal(0x2, "hs");
+    less_equal(0x9, "ls");
+    greater(0x8, "hi");
+  %}
+%}
+
+// Special operand allowing long args to int ops to be truncated for free
+
+operand iRegL2I(iRegL reg) %{
+
+  op_cost(0);
+
+  match(ConvL2I reg);
+
+  format %{ "l2i($reg)" %}
+
+  interface(REG_INTER)
+%}
+
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used as to simplify
+// instruction definitions by not requiring the AD writer to specify
+// separate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format. The classic case of this is memory operands.
+
+// memory is used to define read/write location for load/store
+// instruction defs. we can turn a memory op into an Address
+
+opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+               indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
+
+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
+// operations. it allows the src to be either an iRegI or a (ConvL2I
+// iRegL). in the latter case the l2i normally planted for a ConvL2I
+// can be elided because the 32-bit instruction will just employ the
+// lower 32 bits anyway.
+//
+// n.b. this does not elide all L2I conversions. if the truncated
+// value is consumed by more than one operation then the ConvL2I
+// cannot be bundled into the consuming nodes so an l2i gets planted
+// (actually a movw $dst $src) and the downstream instructions consume
+// the result of the l2i as an iRegI input. That's a shame since the
+// movw is actually redundant but its not too costly.
+
+
+opclass iRegIorL2I(iRegI, iRegL2I);
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+// Integer ALU reg operation
+pipeline %{
+
+attributes %{
+  // ARM instructions are of fixed length
+  fixed_size_instructions;        // Fixed size instructions TODO does
+  max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
+  // ARM instructions come in 32-bit word units
+  instruction_unit_size = 4;         // An instruction is 4 bytes long
+  instruction_fetch_unit_size = 64;  // The processor fetches one line
+  instruction_fetch_units = 1;       // of 64 bytes
+
+  // List of nop instructions
+  nops( MachNop );
+%}
+
+// We don't use an actual pipeline model so don't care about resources
+// or description. we do use pipeline classes to introduce fixed
+// latencies
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+
+resources( INS0, INS1, INS01 = INS0 | INS1,
+           ALU0, ALU1, ALU = ALU0 | ALU1,
+           MAC,
+           DIV,
+           BRANCH,
+           LDST,
+           NEON_FP);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+// Generic P2/P3 pipeline
+pipe_desc(ISS, EX1, EX2, WR);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needed in EX1, result generated in EX2
+// Eg.	ADD	x0, x1, x2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  INS01  : ISS; // Dual issue as instruction 0 or 1
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with constant shift
+// Shifted register must be available in LATE_ISS instead of EX1
+// Eg.	ADD	x0, x1, x2, LSL #2
+pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg operation with constant shift
+// Eg.	LSL	x0, x1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// Both operands must be available in LATE_ISS instead of EX1
+// Result is available in EX1 instead of EX2
+// Eg.	LSLV	x0, x1, x2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+// Integer ALU reg-reg operation with extract
+// As for _vshift above, but result generated in EX2
+// Eg.	EXTR	x0, x1, x2, #N
+pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS1   : ISS; // Can only dual issue as Instruction 1
+  ALU    : EX1;
+%}
+
+// Integer ALU reg operation
+// Eg.	NEG	x0, x1
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg mmediate operation
+// Eg.	ADD	x0, x1, #N
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// Eg.	MOV	x0, #N
+pipe_class ialu_imm(iRegI dst)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+//------- Compare operation -------------------------------
+
+// Compare reg-reg
+// Eg.	CMP	x0, x1
+pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  op2    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Compare reg-reg
+// Eg.	CMP	x0, #N
+pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Conditional instructions ------------------------
+
+// Conditional no operands
+// Eg.	CSINC	x0, zr, zr, <cond>
+pipe_class icond_none(iRegI dst, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.	CSEL	X0, X1, X2, <cond>
+pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.	CSEL	X0, X1, X2, <cond>
+pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src    : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// Eg.	MUL	w0, w1, w2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.	MADD	w0, w1, w2, w3
+pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Eg.	MUL	w0, w1, w2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.	MADD	w0, w1, w2, w3
+pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// Eg.	SDIV	w0, w1, w2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(8); // Maximum latency for 32 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+// Eg.	SDIV	x0, x1, x2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(16); // Maximum latency for 64 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - prefetch
+// Eg.	PFRM	<mem>
+pipe_class iload_prefetch(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, mem
+// Eg.	LDR	x0, <mem>
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+  single_instruction;
+  dst    : WR(write);
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, reg
+// Eg.	LDR	x0, [sp, x1]
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Store - zr, mem
+// Eg.	STR	zr, <mem>
+pipe_class istore_mem(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Store - reg, mem
+// Eg.	STR	x0, <mem>
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  src    : EX2(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Store - reg, reg
+// Eg. STR	x0, [sp, x1]
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : ISS(read);
+  src    : EX2(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Branch
+pipe_class pipe_branch()
+%{
+  single_instruction;
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+// Conditional branch
+pipe_class pipe_branch_cond(rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+// Compare & Branch
+// EG.	CBZ/CBNZ
+pipe_class pipe_cmp_branch(iRegI op1)
+%{
+  single_instruction;
+  op1    : EX1(read);
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+//------- Synchronisation operations ----------------------
+
+// Any operation requiring serialization.
+// EG.	DMB/Atomic Ops/Load Acquire/Str Release
+pipe_class pipe_serial()
+%{
+  single_instruction;
+  force_serialization;
+  fixed_latency(16);
+  INS01  : ISS(2); // Cannot dual issue with any other instruction
+  LDST   : WR;
+%}
+
+// Generic big/slow expanded idiom - also serialized
+pipe_class pipe_slow()
+%{
+  instruction_count(10);
+  multiple_bundles;
+  force_serialization;
+  fixed_latency(16);
+  INS01  : ISS(2); // Cannot dual issue with any other instruction
+  LDST   : WR;
+%}
+
+// Empty pipeline class
+pipe_class pipe_class_empty()
+%{
+  single_instruction;
+  fixed_latency(0);
+%}
+
+// Default pipeline class.
+pipe_class pipe_class_default()
+%{
+  single_instruction;
+  fixed_latency(2);
+%}
+
+// Pipeline class for compares.
+pipe_class pipe_class_compare()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for memory operations.
+pipe_class pipe_class_memory()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for call.
+pipe_class pipe_class_call()
+%{
+  single_instruction;
+  fixed_latency(100);
+%}
+
+// Define the class for the Nop node.
+define %{
+  MachNop = pipe_class_empty;
+%}
+
+%}
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               rrspectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+// ============================================================================
+// Memory (Load/Store) Instructions
+
+// Load Instructions
+
+// Load Byte (8 bit signed)
+instruct loadB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadB mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsbw  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrsbw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit signed) into long
+instruct loadB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrsb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned)
+instruct loadUB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUB mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrbw  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned) into long
+instruct loadUB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed)
+instruct loadS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadS mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrshw  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrshw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed) into long
+instruct loadS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrsh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Char (16 bit unsigned)
+instruct loadUS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUS mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short/Char (16 bit unsigned) into long
+instruct loadUS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed)
+instruct loadI(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadI mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed) into long
+instruct loadI2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrsw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit unsigned) into long
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Long (64 bit signed)
+instruct loadL(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (LoadL mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Range
+instruct loadRange(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# range" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Pointer
+instruct loadP(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadP mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Compressed Pointer
+instruct loadN(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadN mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadKlass mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# class" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Narrow Klass Pointer
+instruct loadNKlass(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Float
+instruct loadF(vRegF dst, memory mem)
+%{
+  match(Set dst (LoadF mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrs  $dst, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_ldrs(dst, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Load Double
+instruct loadD(vRegD dst, memory mem)
+%{
+  match(Set dst (LoadD mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrd  $dst, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_ldrd(dst, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+
+// Load Int Constant
+instruct loadConI(iRegINoSp dst, immI src)
+%{
+  match(Set dst src);
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# int" %}
+
+  ins_encode( aarch64_enc_movw_imm(dst, src) );
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Long Constant
+instruct loadConL(iRegLNoSp dst, immL src)
+%{
+  match(Set dst src);
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# long" %}
+
+  ins_encode( aarch64_enc_mov_imm(dst, src) );
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant
+
+instruct loadConP(iRegPNoSp dst, immP con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+  format %{
+    "mov  $dst, $con\t# ptr\n\t"
+  %}
+
+  ins_encode(aarch64_enc_mov_p(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Null Pointer Constant
+
+instruct loadConP0(iRegPNoSp dst, immP0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_p0(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant One
+
+instruct loadConP1(iRegPNoSp dst, immP_1 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_p1(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Poll Page Constant
+
+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
+
+  ins_encode(aarch64_enc_mov_poll_page(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Byte Map Base Constant
+
+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "adr  $dst, $con\t# Byte Map Base" %}
+
+  ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Pointer Constant
+
+instruct loadConN(iRegNNoSp dst, immN con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+  format %{ "mov  $dst, $con\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_mov_n(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Null Pointer Constant
+
+instruct loadConN0(iRegNNoSp dst, immN0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_n0(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Packed Float Constant
+
+instruct loadConF_packed(vRegF dst, immFPacked con) %{
+  match(Set dst con);
+  ins_cost(INSN_COST * 4);
+  format %{ "fmovs  $dst, $con"%}
+  ins_encode %{
+    __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Float Constant
+
+instruct loadConF(vRegF dst, immF con) %{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+
+  format %{
+    "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
+  %}
+
+  ins_encode %{
+    __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Packed Double Constant
+
+instruct loadConD_packed(vRegD dst, immDPacked con) %{
+  match(Set dst con);
+  ins_cost(INSN_COST);
+  format %{ "fmovd  $dst, $con"%}
+  ins_encode %{
+    __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Double Constant
+
+instruct loadConD(vRegD dst, immD con) %{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 5);
+  format %{
+    "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
+  %}
+
+  ins_encode %{
+    __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Store Instructions
+
+// Store CMS card-mark Immediate
+instruct storeimmCM0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "strb zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Byte
+instruct storeB(iRegIorL2I src, memory mem)
+%{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strb  $src, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+
+instruct storeimmB0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "strb zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Char/Short
+instruct storeC(iRegIorL2I src, memory mem)
+%{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strh  $src, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_strh(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmC0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "strh  zr, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_strh0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Integer
+
+instruct storeI(iRegIorL2I src, memory mem)
+%{
+  match(Set mem(StoreI mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strw  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_strw(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmI0(immI0 zero, memory mem)
+%{
+  match(Set mem(StoreI mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "strw  zr, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_strw0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL(iRegL src, memory mem)
+%{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "str  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_str(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeimmL0(immL0 zero, memory mem)
+%{
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "str  zr, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_str0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Pointer
+instruct storeP(iRegP src, memory mem)
+%{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "str  $src, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_str(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Pointer
+instruct storeimmP0(immP0 zero, memory mem)
+%{
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "str zr, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_str0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Save last Java PC to thread anchor
+
+// the ideal code cannot directly encode a reference to the current
+// PC, never mind the PC of the return address which follows the
+// runtime call whcih wil be generated at a later point. so this
+// operation is encoded in the ideal code as a write of either -1 or
+// -2 via the thread register with an offset which locates the last
+// Java pc slot in the thread anchor. the encoding computes the
+// correct Java pc to write by offsetting form the current pc to allwo
+// for the intervening instructions which will precede the runtime
+// call. Note that the constant is supplied as -2 when the stub caller
+// return address is passed to the C function and -1 when no return
+// address is required. we set the primary opcode to 1 or 0
+// accordingly as this affects the offset calculation.
+
+instruct storeLastJavaPC_no_retaddr(thread_anchor_pc mem, immP_M1 dummy_m1)
+%{
+  match(Set mem (StoreP mem dummy_m1));
+
+  ins_cost(INSN_COST);
+  format %{ "str  ., $mem\t# save pc to thread (no ret addr)" %}
+
+  // use opcode to indicate that we have no return address argument
+  opcode(0x0);
+
+  ins_encode(aarch64_enc_save_pc());
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct storeLastJavaPC_with_retaddr(thread_anchor_pc mem, immP_M2 dummy_m2)
+%{
+  match(Set mem (StoreP mem dummy_m2));
+
+  ins_cost(INSN_COST);
+  format %{ "str  ., $mem\t# save pc to thread (w ret addr)" %}
+
+  // use opcode to indicate that we have a return address argument
+  opcode(0x1);
+
+  ins_encode(aarch64_enc_save_pc());
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Compressed Pointer
+instruct storeN(iRegN src, memory mem)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strw  $src, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_strw(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
+%{
+  match(Set mem (StoreN mem zero));
+  predicate(Universe::narrow_oop_base() == NULL);
+
+  ins_cost(INSN_COST);
+  format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
+
+  ins_encode(aarch64_enc_strw(heapbase, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Float
+instruct storeF(vRegF src, memory mem)
+%{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strs  $src, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_strs(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// TODO
+// implement storeImmF0 and storeFImmPacked
+
+// Store Double
+instruct storeD(vRegD src, memory mem)
+%{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strd  $src, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_strd(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// TODO
+// implement storeImmD0 and storeDImmPacked
+
+// prefetch instructions
+// Must be safe to execute with invalid address (cannot fault).
+
+instruct prefetchr( memory mem ) %{
+  match(PrefetchRead mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
+
+  ins_encode( aarch64_enc_prefetchr(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+instruct prefetchw( memory mem ) %{
+  match(PrefetchAllocation mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
+
+  ins_encode( aarch64_enc_prefetchw(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+instruct prefetchnta( memory mem ) %{
+  match(PrefetchWrite mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
+
+  ins_encode( aarch64_enc_prefetchnta(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+// ============================================================================
+// BSWAP Instructions
+
+instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesI src));
+
+  ins_cost(INSN_COST);
+  format %{ "revw  $dst, $src" %}
+
+  ins_encode %{
+    __ revw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
+  match(Set dst (ReverseBytesL src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev  $dst, $src" %}
+
+  ins_encode %{
+    __ rev(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesUS src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev16w  $dst, $src" %}
+
+  ins_encode %{
+    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesS src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev16w  $dst, $src\n\t"
+            "sbfmw $dst, $dst, #0, #15" %}
+
+  ins_encode %{
+    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
+    __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// ============================================================================
+// Zero Count Instructions
+
+instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (CountLeadingZerosI src));
+
+  ins_cost(INSN_COST);
+  format %{ "clzw  $dst, $src" %}
+  ins_encode %{
+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe( ialu_reg );
+%}
+
+instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
+  match(Set dst (CountLeadingZerosL src));
+
+  ins_cost(INSN_COST);
+  format %{ "clz   $dst, $src" %}
+  ins_encode %{
+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe( ialu_reg );
+%}
+
+instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (CountTrailingZerosI src));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "rbitw  $dst, $src\n\t"
+            "clzw   $dst, $dst" %}
+  ins_encode %{
+    __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
+    __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg );
+%}
+
+instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
+  match(Set dst (CountTrailingZerosL src));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "rbit   $dst, $src\n\t"
+            "clz    $dst, $dst" %}
+  ins_encode %{
+    __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
+    __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
+  %}
+
+  ins_pipe( ialu_reg );
+%}
+
+// ============================================================================
+// MemBar Instruction
+
+instruct membar_acquire()
+%{
+  match(MemBarAcquire);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "MEMBAR-acquire\t# ???" %}
+
+  ins_encode %{
+    __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad|Assembler::LoadStore));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release()
+%{
+  match(MemBarRelease);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "MEMBAR-release" %}
+  ins_encode %{
+  __ membar(Assembler::AnyAny);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "MEMBAR-volatile?" %}
+
+  ins_encode %{
+    __ membar(Assembler::AnyAny);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(VOLATILE_REF_COST);
+
+  ins_encode %{
+    __ membar(Assembler::StoreStore);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquireLock);
+
+  format %{ "MEMBAR-acquire-lock\t# ???" %}
+
+  ins_encode %{
+    __ block_comment("membar-acquire-lock");
+    __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad|Assembler::LoadStore));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarReleaseLock);
+
+  format %{ "MEMBAR-release-lock\t# ???" %}
+
+  ins_encode %{
+    __ block_comment("MEMBAR-release-lock");
+    __ membar(Assembler::AnyAny);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Cast/Convert Instructions
+
+instruct castX2P(iRegPNoSp dst, iRegL src) %{
+  match(Set dst (CastX2P src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# long -> ptr" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
+  match(Set dst (CastP2X src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# ptr -> long" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I(iRegINoSp dst, iRegP src) %{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  ins_cost(INSN_COST);
+  format %{ "movw $dst, $src\t# ptr -> int" %}
+  ins_encode %{
+    __ movw($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(iRegINoSp dst, iRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  ins_cost(INSN_COST);
+  format %{ "mov dst, $src\t# compressed ptr -> int" %}
+  ins_encode %{
+    __ movw($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  ins_cost(INSN_COST * 3);
+  format %{ "encode_heap_oop $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ encode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  ins_cost(INSN_COST * 3);
+  format %{ "encode_heap_oop_not_null $dst, $src" %}
+  ins_encode %{
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
+            n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  ins_cost(INSN_COST * 3);
+  format %{ "decode_heap_oop $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
+            n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  ins_cost(INSN_COST * 3);
+  format %{ "decode_heap_oop_not_null $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop_not_null(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct checkCastPP(iRegPNoSp dst)
+%{
+  match(Set dst (CheckCastPP dst));
+
+  size(0);
+  format %{ "# checkcastPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castPP(iRegPNoSp dst)
+%{
+  match(Set dst (CastPP dst));
+
+  size(0);
+  format %{ "# castPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castII(iRegI dst)
+%{
+  match(Set dst (CastII dst));
+
+  size(0);
+  format %{ "# castII of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_cost(0);
+  ins_pipe(pipe_class_empty);
+%}
+
+// ============================================================================
+// Atomic operation instructions
+//
+// Intel and SPARC both implement Ideal Node LoadPLocked and
+// Store{PIL}Conditional instructions using a normal load for the
+// LoadPLocked and a CAS for the Store{PIL}Conditional.
+//
+// The ideal code appears only to use LoadPLocked/StorePLocked as a
+// pair to lock object allocations from Eden space when not using
+// TLABs.
+//
+// There does not appear to be a Load{IL}Locked Ideal Node and the
+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
+// and to use StoreIConditional only for 32-bit and StoreLConditional
+// only for 64-bit.
+//
+// We implement LoadPLocked and StorePLocked instructions using,
+// respectively the AArch64 hw load-exclusive and store-conditional
+// instructions. Whereas we must implement each of
+// Store{IL}Conditional using a CAS which employs a pair of
+// instructions comprising a load-exclusive followed by a
+// store-conditional.
+
+
+// Locked-load (linked load) of the current heap-top
+// used when updating the eden heap top
+// implemented using ldaxr on AArch64
+
+instruct loadPLocked(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadPLocked mem));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
+
+  ins_encode(aarch64_enc_ldaxr(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flag (EQ) on success.
+// implemented using stlxr on AArch64.
+
+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
+%{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+ // TODO
+ // do we need to do a store-conditional release or can we just use a
+ // plain store-conditional?
+
+  format %{
+    "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
+
+  ins_pipe(pipe_serial);
+%}
+
+// this has to be implemented as a CAS
+instruct storeLConditional(memory mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// this has to be implemented as a CAS
+instruct storeIConditional(memory mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
+%{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
+// can't match them
+
+instruct compareAndSwapI(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapL(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapP(iRegINoSp res, memory mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN(iRegINoSp res, memory mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+
+instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
+  match(Set prev (GetAndSetI mem newv));
+  format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
+  match(Set prev (GetAndSetL mem newv));
+  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
+  match(Set prev (GetAndSetN mem newv));
+  format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
+  match(Set prev (GetAndSetP mem newv));
+  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+
+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
+  match(Set newval (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addL $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addL [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
+  match(Set newval (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addL $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addL [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
+  match(Set newval (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addI $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addI [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
+  match(Set newval (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addI $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addI [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Conditional Move Instructions
+
+// n.b. we have identical rules for both a signed compare op (cmpOp)
+// and an unsigned compare op (cmpOpU). it would be nice if we could
+// define an op class which merged both inputs and use it to type the
+// argument to a single rule. unfortunatelyt his fails because the
+// opclass does not live up to the COND_INTER interface of its
+// component operands. When the generic code tries to negate the
+// operand it ends up running the generci Machoper::negate method
+// which throws a ShouldNotHappen. So, we have to provide two flavours
+// of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
+
+instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+// n.b. this is selected in preference to the rule above because it
+// avoids loading constant 0 into a source register
+
+// TODO
+// we ought only to be able to cull one of these variants as the ideal
+// transforms ought always to order the zero consistently (to left/right?)
+
+instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, zr $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, zr $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, immI0 zero) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src1 $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, immI0 zero) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src1 $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+// special case for creating a boolean 0 or 1
+
+// n.b. this is selected in preference to the rule above because it
+// avoids loading constants 0 and 1 into a source register
+
+instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    // equivalently
+    // cset(as_Register($dst$$reg),
+    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
+    __ csincw(as_Register($dst$$reg),
+	     zr,
+	     zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_none);
+%}
+
+instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    // equivalently
+    // cset(as_Register($dst$$reg),
+    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
+    __ csincw(as_Register($dst$$reg),
+	     zr,
+	     zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_none);
+%}
+
+instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, immL0 zero) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src1 $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, immL0 zero) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src1 $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, zr $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, zr $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, immP0 zero) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src1 $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, immP0 zero) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src1 $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, zr $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, zr $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, immN0 zero) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src1 $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, immN0 zero) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src1 $cmp\t# unsigned, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, zr $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, zr $cmp\t# unsigned, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
+%{
+  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcsels(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
+%{
+  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcsels(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
+%{
+  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcseld(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
+%{
+  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcseld(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Arithmetic Instructions
+//
+
+// Integer Addition
+
+// TODO
+// these currently employ operations which do not set CR and hence are
+// not flagged as killing CR but we would like to isolate the cases
+// where we want to set flags from those where we don't. need to work
+// out how to do that.
+
+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
+  match(Set dst (AddI (ConvL2I src1) src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Addition
+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2\t# ptr" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
+  match(Set dst (AddP src1 (ConvI2L src2)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg), ext::sxtw);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
+  match(Set dst (AddP src1 (LShiftL src2 scale)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
+
+  ins_encode %{
+    __ lea(as_Register($dst$$reg),
+	   Address(as_Register($src1$$reg), as_Register($src2$$reg),
+		   Address::lsl($scale$$constant)));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
+
+  ins_encode %{
+    __ lea(as_Register($dst$$reg),
+	   Address(as_Register($src1$$reg), as_Register($src2$$reg),
+		   Address::sxtw($scale$$constant)));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
+  match(Set dst (LShiftL (ConvI2L src) scale));
+
+  ins_cost(INSN_COST);
+  format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
+
+  ins_encode %{
+    __ sbfiz(as_Register($dst$$reg),
+          as_Register($src$$reg),
+          $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Pointer Immediate Addition
+// n.b. this needs to be more expensive than using an indirect memory
+// operand
+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2\t# ptr" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+
+  match(Set dst (AddL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Addition.
+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
+  match(Set dst (AddL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Subtraction
+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "subw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is a sub not an add
+  opcode(0x1);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Subtraction
+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+
+  match(Set dst (SubL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Subtraction.
+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
+  match(Set dst (SubL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "sub$dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is a sub not an add
+  opcode(0x1);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Negation (special case for sub)
+
+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
+  match(Set dst (SubI zero src));
+
+  ins_cost(INSN_COST);
+  format %{ "negw $dst, $src\t# int" %}
+
+  ins_encode %{
+    __ negw(as_Register($dst$$reg),
+             as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Long Negation
+
+instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
+  match(Set dst (SubL zero src));
+
+  ins_cost(INSN_COST);
+  format %{ "neg $dst, $src\t# long" %}
+
+  ins_encode %{
+    __ neg(as_Register($dst$$reg),
+	   as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Integer Multiply
+
+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (MulI src1 src2));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "mulw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ mulw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "smull  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ smull(as_Register($dst$$reg),
+	     as_Register($src1$$reg),
+	     as_Register($src2$$reg));
+  %}
+
+  ins_pipe(imul_reg_reg);
+%}
+
+// Long Multiply
+
+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (MulL src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "mul  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ mul(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
+%{
+  match(Set dst (MulHiL src1 src2));
+
+  ins_cost(INSN_COST * 7);
+  format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
+
+  ins_encode %{
+    __ smulh(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+	     as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+// Combined Integer Multiply & Add/Sub
+
+instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
+  match(Set dst (AddI src3 (MulI src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "madd  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ maddw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             as_Register($src3$$reg));
+  %}
+
+  ins_pipe(imac_reg_reg);
+%}
+
+instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
+  match(Set dst (SubI src3 (MulI src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "msub  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ msubw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             as_Register($src3$$reg));
+  %}
+
+  ins_pipe(imac_reg_reg);
+%}
+
+// Combined Long Multiply & Add/Sub
+
+instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
+  match(Set dst (AddL src3 (MulL src1 src2)));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "madd  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ madd(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg),
+            as_Register($src3$$reg));
+  %}
+
+  ins_pipe(lmac_reg_reg);
+%}
+
+instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
+  match(Set dst (SubL src3 (MulL src1 src2)));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "msub  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ msub(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg),
+            as_Register($src3$$reg));
+  %}
+
+  ins_pipe(lmac_reg_reg);
+%}
+
+// Integer Divide
+
+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (DivI src1 src2));
+
+  ins_cost(INSN_COST * 19);
+  format %{ "sdivw  $dst, $src1, $src2" %}
+
+  ins_encode(aarch64_enc_divw(dst, src1, src2));
+  ins_pipe(idiv_reg_reg);
+%}
+
+instruct signExtract(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
+  match(Set dst (URShiftI (RShiftI src div1) div2));
+  ins_cost(INSN_COST);
+  format %{ "lsrw $dst, $src, $div1" %}
+  ins_encode %{
+    __ lsrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
+  match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src, LSR $div1" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+	      as_Register($src$$reg),
+	      as_Register($src$$reg),
+	      Assembler::LSR, 31);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Long Divide
+
+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (DivL src1 src2));
+
+  ins_cost(INSN_COST * 35);
+  format %{ "sdiv   $dst, $src1, $src2" %}
+
+  ins_encode(aarch64_enc_div(dst, src1, src2));
+  ins_pipe(ldiv_reg_reg);
+%}
+
+instruct signExtractL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
+  match(Set dst (URShiftL (RShiftL src div1) div2));
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, $src, $div1" %}
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg), as_Register($src$$reg), 63);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
+  match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src, $div1" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+	      as_Register($src$$reg),
+	      as_Register($src$$reg),
+	      Assembler::LSR, 63);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Integer Remainder
+
+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (ModI src1 src2));
+
+  ins_cost(INSN_COST * 22);
+  format %{ "sdivw  rscratch1, $src1, $src2\n\t"
+            "msubw($dst, rscratch1, $src2, $src1" %}
+
+  ins_encode(aarch64_enc_modw(dst, src1, src2));
+  ins_pipe(idiv_reg_reg);
+%}
+
+// Long Remainder
+
+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (ModL src1 src2));
+
+  ins_cost(INSN_COST * 38);
+  format %{ "sdiv   rscratch1, $src1, $src2\n"
+            "msub($dst, rscratch1, $src2, $src1" %}
+
+  ins_encode(aarch64_enc_mod(dst, src1, src2));
+  ins_pipe(ldiv_reg_reg);
+%}
+
+// Integer Shifts
+
+// Shift Left Register
+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lslvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lslvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ lslw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lsrvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lsrvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (URShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ lsrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "asrvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ asrvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (RShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ asrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Combined Int Mask and Right Shift (using UBFM)
+// TODO
+
+// Long Shifts
+
+// Shift Left Register
+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lslv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lslv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsl(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lsrv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lsrv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// A special-case pattern for card table stores.
+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
+  match(Set dst (URShiftL (CastP2X src1) src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "asrv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ asrv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ asr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+instruct regL_not_reg(iRegLNoSp dst,
+                         iRegL src1, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorL src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "eon  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+instruct regI_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorI src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "eonw  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct AndI_reg_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AndL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "bic  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct OrI_reg_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct OrL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "orn  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct XorI_reg_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorI m1 (XorI src2 src1)));
+  ins_cost(INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct XorL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorL m1 (XorL src2 src1)));
+  ins_cost(INSN_COST);
+  format %{ "eon  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_URShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_RShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_LShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_URShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_RShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_LShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_URShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_RShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_LShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_URShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_RShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_LShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_URShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_RShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_LShift_reg(iRegINoSp dst,
+                         iRegIorL2I src1, iRegIorL2I src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what sbfm can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 63
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 63 - lshift;
+    int r = (rshift - lshift) & 63;
+    __ sbfm(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what sbfmw can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 31
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 31 - lshift;
+    int r = (rshift - lshift) & 31;
+    __ sbfmw(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what ubfm can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 63
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 63 - lshift;
+    int r = (rshift - lshift) & 63;
+    __ ubfm(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what ubfmw can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 31
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 31 - lshift;
+    int r = (rshift - lshift) & 31;
+    __ ubfmw(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+// Bitfield extract with shift & mask
+
+instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (AndI (URShiftI src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfxw $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfxw(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
+%{
+  match(Set dst (AndL (URShiftL src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// We can use ubfx when extending an And with a mask when we know mask
+// is positive.  We know that because immI_bitmask guarantees it.
+instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Rotations
+
+instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 63);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 31);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 63);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 31);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+
+// rol expander
+
+instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "rol    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
+            rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// rol expander
+
+instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "rol    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
+            rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
+
+  expand %{
+    rolI_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
+
+  expand %{
+    rolI_rReg(dst, src, shift, cr);
+  %}
+%}
+
+// ror expander
+
+instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "ror    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
+            as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// ror expander
+
+instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "ror    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
+            as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
+%{
+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
+%{
+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
+
+  expand %{
+    rorI_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
+
+  expand %{
+    rorI_rReg(dst, src, shift, cr);
+  %}
+%}
+
+// Add/subtract (extended)
+
+instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%};
+
+instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%};
+
+
+instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxth $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, uxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxth $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, uxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+
+instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxtw" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxtw" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
+
+// ============================================================================
+// Floating Point Arithmetic Instructions
+
+instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (AddF src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fadds   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fadds(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (AddD src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "faddd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ faddd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (SubF src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fsubs   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fsubs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (SubD src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fsubd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fsubd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (MulF src1 src2));
+
+  ins_cost(INSN_COST * 6);
+  format %{ "fmuls   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fmuls(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (MulD src1 src2));
+
+  ins_cost(INSN_COST * 6);
+  format %{ "fmuld   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fmuld(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// We cannot use these fused mul w add/sub ops because they don't
+// produce the same result as the equivalent separated ops
+// (essentially they don't round the intermediate result). that's a
+// shame. leaving them here in case we can idenitfy cases where it is
+// legitimate to use them
+
+
+// instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (AddF (MulF src1 src2) src3));
+
+//   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmadds(as_FloatRegister($dst$$reg),
+//              as_FloatRegister($src1$$reg),
+//              as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (AddD (MulD src1 src2) src3));
+
+//   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmaddd(as_FloatRegister($dst$$reg),
+//              as_FloatRegister($src1$$reg),
+//              as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (AddF (MulF (NegF src1) src2) src3));
+//   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
+
+//   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmsubs(as_FloatRegister($dst$$reg),
+//               as_FloatRegister($src1$$reg),
+//               as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (AddD (MulD (NegD src1) src2) src3));
+//   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
+
+//   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmsubd(as_FloatRegister($dst$$reg),
+//               as_FloatRegister($src1$$reg),
+//               as_FloatRegister($src2$$reg),
+//               as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (SubF (MulF (NegF src1) src2) src3));
+//   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
+
+//   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmadds(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (SubD (MulD (NegD src1) src2) src3));
+//   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
+
+//   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmaddd(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
+//   match(Set dst (SubF (MulF src1 src2) src3));
+
+//   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmsubs(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
+//   match(Set dst (SubD (MulD src1 src2) src3));
+
+//   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//   // n.b. insn name should be fnmsubd
+//     __ fnmsub(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+
+instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (DivF src1  src2));
+
+  ins_cost(INSN_COST * 18);
+  format %{ "fdivs   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fdivs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (DivD src1  src2));
+
+  ins_cost(INSN_COST * 32);
+  format %{ "fdivd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fdivd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct negF_reg_reg(vRegF dst, vRegF src) %{
+  match(Set dst (NegF src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg   $dst, $src" %}
+
+  ins_encode %{
+    __ fnegs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct negD_reg_reg(vRegD dst, vRegD src) %{
+  match(Set dst (NegD src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fnegd   $dst, $src" %}
+
+  ins_encode %{
+    __ fnegd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct absF_reg(vRegF dst, vRegF src) %{
+  match(Set dst (AbsF src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fabss   $dst, $src" %}
+  ins_encode %{
+    __ fabss(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct absD_reg(vRegD dst, vRegD src) %{
+  match(Set dst (AbsD src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fabsd   $dst, $src" %}
+  ins_encode %{
+    __ fabsd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct sqrtD_reg(vRegD dst, vRegD src) %{
+  match(Set dst (SqrtD src));
+
+  ins_cost(INSN_COST * 50);
+  format %{ "fsqrtd  $dst, $src" %}
+  ins_encode %{
+    __ fsqrtd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct sqrtF_reg(vRegF dst, vRegF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+  ins_cost(INSN_COST * 50);
+  format %{ "fsqrts  $dst, $src" %}
+  ins_encode %{
+    __ fsqrts(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Logical Instructions
+
+// Integer Logical Instructions
+
+// And Instructions
+
+
+instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+	    as_Register($src1$$reg),
+	    as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andsw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+	    as_Register($src1$$reg),
+	    (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Or Instructions
+
+instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "orrw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "orrw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Xor Instructions
+
+instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "eorw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "eorw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Logical Instructions
+// TODO
+
+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "and  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+	    as_Register($src1$$reg),
+	    as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "and  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Or Instructions
+
+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Xor Instructions
+
+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (XorL src1 src2));
+
+  format %{ "eor  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
+  match(Set dst (XorL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "eor  $dst, $src1, $src2\t# int" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
+%{
+  match(Set dst (ConvI2L src));
+
+  ins_cost(INSN_COST);
+  format %{ "sxtw  $dst, $src\t# i2l" %}
+  ins_encode %{
+    __ sbfm($dst$$Register, $src$$Register, 0, 31);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// this pattern occurs in bigmath arithmetic
+instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
+  ins_encode %{
+    __ ubfm($dst$$Register, $src$$Register, 0, 31);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
+  match(Set dst (ConvL2I src));
+
+  ins_cost(INSN_COST);
+  format %{ "movw  $dst, $src \t// l2i" %}
+
+  ins_encode %{
+    __ movw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
+%{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+
+  format %{
+    "cmpw $src, zr\n\t"
+    "cset $dst, ne"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src$$reg), zr);
+    __ cset(as_Register($dst$$reg), Assembler::NE);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
+%{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+
+  format %{
+    "cmp  $src, zr\n\t"
+    "cset $dst, ne"
+  %}
+
+  ins_encode %{
+    __ cmp(as_Register($src$$reg), zr);
+    __ cset(as_Register($dst$$reg), Assembler::NE);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convD2F_reg(vRegF dst, vRegD src) %{
+  match(Set dst (ConvD2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtd  $dst, $src \t// d2f" %}
+
+  ins_encode %{
+    __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2D_reg(vRegD dst, vRegF src) %{
+  match(Set dst (ConvF2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvts  $dst, $src \t// f2d" %}
+
+  ins_encode %{
+    __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
+  match(Set dst (ConvF2I src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzsw  $dst, $src \t// f2i" %}
+
+  ins_encode %{
+    __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
+  match(Set dst (ConvF2L src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzs  $dst, $src \t// f2l" %}
+
+  ins_encode %{
+    __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
+  match(Set dst (ConvI2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfws  $dst, $src \t// i2f" %}
+
+  ins_encode %{
+    __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
+  match(Set dst (ConvL2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfs  $dst, $src \t// l2f" %}
+
+  ins_encode %{
+    __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
+  match(Set dst (ConvD2I src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzdw  $dst, $src \t// d2i" %}
+
+  ins_encode %{
+    __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
+  match(Set dst (ConvD2L src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzd  $dst, $src \t// d2l" %}
+
+  ins_encode %{
+    __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
+  match(Set dst (ConvI2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfwd  $dst, $src \t// i2d" %}
+
+  ins_encode %{
+    __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
+  match(Set dst (ConvL2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfd  $dst, $src \t// l2d" %}
+
+  ins_encode %{
+    __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// stack <-> reg and reg <-> reg shuffles with no conversion
+
+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
+
+  ins_encode %{
+    __ ldrw($dst$$Register, Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
+
+  ins_encode %{
+    __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
+
+  ins_encode %{
+    __ ldr($dst$$Register, Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
+
+  ins_encode %{
+    __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
+
+  ins_encode %{
+    __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
+
+  ins_encode %{
+    __ strw($src$$Register, Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
+
+  ins_encode %{
+    __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
+
+  ins_encode %{
+    __ str($src$$Register, Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
+
+  ins_encode %{
+    __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
+
+  ins_encode %{
+    __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
+
+  ins_encode %{
+    __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
+
+  ins_encode %{
+    __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+// ============================================================================
+// clearing of an array
+
+instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base);
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ClearArray $cnt, $base" %}
+
+  ins_encode %{
+    __ zero_words($base$$Register, $cnt$$Register);
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// ============================================================================
+// Compare Instructions
+
+instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
+%{
+  match(Set cr (CmpI op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw $op1, 0" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+// Unsigned compare Instructions; really, same as signed compare
+// except it should only be used to feed an If or a CMovI which takes a
+// cmpOpU.
+
+instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
+%{
+  match(Set cr (CmpU op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw $op1, #0\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
+%{
+  match(Set cr (CmpL op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "tst  $op1" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (CmpUL op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
+%{
+  match(Set cr (CmpUL op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "tst  $op1" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
+%{
+  match(Set cr (CmpUL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
+%{
+  match(Set cr (CmpUL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
+%{
+  match(Set cr (CmpP op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2\t // ptr" %}
+
+  ins_encode(aarch64_enc_cmpp(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
+%{
+  match(Set cr (CmpN op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2\t // compressed ptr" %}
+
+  ins_encode(aarch64_enc_cmpn(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
+%{
+  match(Set cr (CmpP op1 zero));
+
+  effect(DEF cr, USE op1, USE zero);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, 0\t // ptr" %}
+
+  ins_encode(aarch64_enc_testp(op1));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
+%{
+  match(Set cr (CmpN op1 zero));
+
+  effect(DEF cr, USE op1, USE zero);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, 0\t // compressed ptr" %}
+
+  ins_encode(aarch64_enc_testn(op1));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+// FP comparisons
+//
+// n.b. CmpF/CmpD set a normal flags reg which then gets compared
+// using normal cmpOp. See declaration of rFlagsReg for details.
+
+instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
+%{
+  match(Set cr (CmpF src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmps $src1, $src2" %}
+
+  ins_encode %{
+    __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
+%{
+  match(Set cr (CmpF src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmps $src1, 0.0" %}
+
+  ins_encode %{
+    __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
+%{
+  match(Set cr (CmpD src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmpd $src1, $src2" %}
+
+  ins_encode %{
+    __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
+%{
+  match(Set cr (CmpD src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmpd $src1, 0.0" %}
+
+  ins_encode %{
+    __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
+%{
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmps $src1, $src2\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    FloatRegister s2 = as_FloatRegister($src2$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmps(s1, s2);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
+%{
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmpd $src1, $src2\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    FloatRegister s2 = as_FloatRegister($src2$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmpd(s1, s2);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpF3 src1 zero));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmps $src1, 0.0\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmps(s1, 0.0D);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpD3 src1 zero));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmpd $src1, 0.0\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmpd(s1, 0.0D);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_default);
+
+%}
+
+// Manifest a CmpL result in an integer register.
+// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
+%{
+  match(Set dst (CmpL3 src1 src2));
+  effect(KILL flags);
+
+  ins_cost(INSN_COST * 6);
+  format %{
+      "cmp $src1, $src2"
+      "csetw $dst, ne"
+      "cnegw $dst, lt"
+  %}
+  // format %{ "CmpL3 $dst, $src1, $src2" %}
+  ins_encode %{
+    __ cmp($src1$$Register, $src2$$Register);
+    __ csetw($dst$$Register, Assembler::NE);
+    __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
+%{
+  match(Set dst (CmpLTMask p q));
+  effect(KILL cr);
+
+  ins_cost(3 * INSN_COST);
+
+  format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
+            "csetw $dst, lt\n\t"
+            "subw $dst, zr, $dst"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
+    __ csetw(as_Register($dst$$reg), Assembler::LT);
+    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpLTMask src zero));
+  effect(KILL cr);
+
+  ins_cost(INSN_COST);
+
+  format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
+
+  ins_encode %{
+    __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// ============================================================================
+// Max and Min
+
+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  match(Set dst (MinI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2, KILL cr);
+  size(8);
+
+  ins_cost(INSN_COST * 3);
+  format %{
+    "cmpw $src1 $src2\t signed int\n\t"
+    "cselw $dst, $src1, $src2 lt\t"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src1$$reg),
+            as_Register($src2$$reg));
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             Assembler::LT);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  match(Set dst (MaxI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2, KILL cr);
+  size(8);
+
+  ins_cost(INSN_COST * 3);
+  format %{
+    "cmpw $src1 $src2\t signed int\n\t"
+    "cselw $dst, $src1, $src2 gt\t"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src1$$reg),
+            as_Register($src2$$reg));
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             Assembler::GT);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// ============================================================================
+// Branch Instructions
+
+// Direct Branch.
+instruct branch(label lbl)
+%{
+  match(Goto);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b  $lbl" %}
+
+  ins_encode(aarch64_enc_b(lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// Conditional Near Branch
+instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
+%{
+  // Same match rule as `branchConFar'.
+  match(If cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  // ins_short_branch(1);
+  format %{ "b$cmp  $lbl" %}
+
+  ins_encode(aarch64_enc_br_con(cmp, lbl));
+
+  ins_pipe(pipe_branch_cond);
+%}
+
+// Conditional Near Branch Unsigned
+instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
+%{
+  // Same match rule as `branchConFar'.
+  match(If cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  // ins_short_branch(1);
+  format %{ "b$cmp  $lbl\t# unsigned" %}
+
+  ins_encode(aarch64_enc_br_conU(cmp, lbl));
+
+  ins_pipe(pipe_branch_cond);
+%}
+
+// Make use of CBZ and CBNZ.  These instructions, as well as being
+// shorter than (cmp; branch), have the additional benefit of not
+// killing the flags.
+
+instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cbw$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbzw($op1$$Register, *L);
+    else
+      __ cbnzw($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbz($op1$$Register, *L);
+    else
+      __ cbnz($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpP op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbz($op1$$Register, *L);
+    else
+      __ cbnz($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpP (DecodeN oop) zero));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $oop, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbzw($oop$$Register, *L);
+    else
+      __ cbnzw($oop$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
+  match(If cmp (CmpU op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq
+            || n->in(1)->as_Bool()->_test._test == BoolTest::gt
+            ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cbw$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ || cond == Assembler::LS)
+      __ cbzw($op1$$Register, *L);
+    else
+      __ cbnzw($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
+  match(If cmp (CmpUL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq
+            || n->in(1)->as_Bool()->_test._test == BoolTest::gt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::le);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ || cond == Assembler::LS)
+      __ cbz($op1$$Register, *L);
+    else
+      __ cbnz($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+// Test bit and Branch
+
+// Patterns for short (< 32KiB) variants
+instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 63, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 31, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
+  match(If cmp (CmpL (AndL op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
+  match(If cmp (CmpI (AndI op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// And far variants
+instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
+  match(If cmp (CmpL (AndL op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
+  match(If cmp (CmpI (AndI op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+// Test bits
+
+instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpL (AndL op1 op2) op3));
+  predicate(Assembler::operand_valid_for_logical_immediate
+            (/*is_32*/false, n->in(1)->in(2)->get_long()));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # long" %}
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpI (AndI op1 op2) op3));
+  predicate(Assembler::operand_valid_for_logical_immediate
+            (/*is_32*/true, n->in(1)->in(2)->get_int()));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # int" %}
+  ins_encode %{
+    __ tstw($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpL (AndL op1 op2) op3));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # long" %}
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpI (AndI op1 op2) op3));
+
+  ins_cost(INSN_COST);
+  format %{ "tstw $op1, $op2 # int" %}
+  ins_encode %{
+    __ tstw($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+
+// Conditional Far Branch
+// Conditional Far Branch Unsigned
+// TODO: fixme
+
+// counted loop end branch near
+instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
+%{
+  match(CountedLoopEnd cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // short variant.
+  // ins_short_branch(1);
+  format %{ "b$cmp $lbl \t// counted loop end" %}
+
+  ins_encode(aarch64_enc_br_con(cmp, lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// counted loop end branch near Unsigned
+instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
+%{
+  match(CountedLoopEnd cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // short variant.
+  // ins_short_branch(1);
+  format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
+
+  ins_encode(aarch64_enc_br_conU(cmp, lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// counted loop end branch far
+// counted loop end branch far unsigned
+// TODO: fixme
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  // TODO
+  // identify correct cost
+  ins_cost(5 * INSN_COST);
+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
+
+  ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
+
+  ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+
+// ============================================================================
+// Safepoint Instructions
+
+// TODO
+// provide a near and far version of this code
+
+instruct safePoint(rFlagsReg cr, iRegP poll)
+%{
+  match(SafePoint poll);
+  effect(KILL cr);
+
+  format %{
+    "ldrw zr, [$poll]\t# Safepoint: poll for GC"
+  %}
+  ins_encode %{
+    __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+
+// ============================================================================
+// Procedure Call/Return Instructions
+
+// Call Java Static Instruction
+
+instruct CallStaticJavaDirect(method meth)
+%{
+  match(CallStaticJava);
+
+  effect(USE meth);
+
+  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
+
+  ins_cost(CALL_COST);
+
+  format %{ "call,static $meth \t// ==> " %}
+
+  ins_encode( aarch64_enc_java_static_call(meth),
+              aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Java Static Instruction (method handle version)
+
+instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
+%{
+  match(CallStaticJava);
+
+  effect(USE meth);
+
+  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
+
+  ins_cost(CALL_COST);
+
+  format %{ "call,static $meth \t// (methodhandle) ==> " %}
+
+  ins_encode( aarch64_enc_java_handle_call(meth),
+              aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect(method meth)
+%{
+  match(CallDynamicJava);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL,dynamic $meth \t// ==> " %}
+
+  ins_encode( aarch64_enc_java_dynamic_call(meth),
+               aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallRuntimeDirect(method meth)
+%{
+  match(CallRuntime);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafDirect(method meth)
+%{
+  match(CallLeaf);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime leaf $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafNoFPDirect(method meth)
+%{
+  match(CallLeafNoFP);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime leaf nofp $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
+%{
+  match(TailCall jump_target method_oop);
+
+  ins_cost(CALL_COST);
+
+  format %{ "br $jump_target\t# $method_oop holds method oop" %}
+
+  ins_encode(aarch64_enc_tail_call(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
+%{
+  match(TailJump jump_target ex_oop);
+
+  ins_cost(CALL_COST);
+
+  format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
+
+  ins_encode(aarch64_enc_tail_jmp(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+// TODO check
+// should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
+instruct CreateException(iRegP_R0 ex_oop)
+%{
+  match(Set ex_oop (CreateEx));
+
+  format %{ " -- \t// exception oop; no code emitted" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException() %{
+  match(Rethrow);
+  ins_cost(CALL_COST);
+
+  format %{ "b rethrow_stub" %}
+
+  ins_encode( aarch64_enc_rethrow() );
+
+  ins_pipe(pipe_class_call);
+%}
+
+
+// Return Instruction
+// epilog node loads ret address into lr as part of frame pop
+instruct Ret()
+%{
+  match(Return);
+
+  format %{ "ret\t// return register" %}
+
+  ins_encode( aarch64_enc_ret() );
+
+  ins_pipe(pipe_branch);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+  match(Halt);
+
+  ins_cost(CALL_COST);
+  format %{ "ShouldNotReachHere" %}
+
+  ins_encode %{
+    // TODO
+    // implement proper trap call here
+    __ brk(999);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Partial Subtype Check
+// 
+// superklass array for an instance of the superklass.  Set a hidden
+// internal cache on a hit (cache is checked with exposed code in
+// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
+// encoding ALSO sets flags.
+
+instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
+%{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect(KILL cr, KILL temp);
+
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck $result, $sub, $super" %}
+
+  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
+
+  opcode(0x1); // Force zero of result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
+%{
+  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
+  effect(KILL temp, KILL result);
+
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
+
+  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
+
+  opcode(0x0); // Don't zero result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
+                        iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
+%{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      -1, $result$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, zr,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      icnt2, $result$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
+                        iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
+%{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
+  ins_encode %{
+    __ string_equals($str1$$Register, $str2$$Register,
+		      $cnt$$Register, $result$$Register,
+                      $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
+                      iRegP_R10 tmp, rFlagsReg cr)
+%{
+  match(Set result (AryEq ary1 ary2));
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
+
+  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  ins_encode %{
+    __ char_arrays_equals($ary1$$Register, $ary2$$Register,
+                          $result$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL base, TEMP tmp);
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ClearArray $cnt, $base" %}
+
+  ins_encode %{
+    __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(thread_RegP dst)
+%{
+  match(Set dst (ThreadLocal));
+
+  ins_cost(0);
+
+  format %{ " -- \t// $dst=Thread::current(), empty" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == RAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(iRegINoSp dst, iRegI src)
+// %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
+// %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_iReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole
+// %{
+//   peepmatch (incI_iReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (decI_iReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addI_iReg_imm movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (incL_iReg movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (decL_iReg movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addL_iReg_imm movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addP_iReg_imm movP);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, iRegI src)
+// %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(iRegINoSp dst, memory mem)
+// %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// Local Variables:
+// mode: c++
+// End:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64Test.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <stdlib.h>
+
+#include "precompiled.hpp"
+#include "code/codeBlob.hpp"
+#include "asm/assembler.hpp"
+
+// hook routine called during JVM bootstrap to test AArch64 assembler
+
+extern "C" void entry(CodeBuffer*);
+
+void aarch64TestHook()
+{
+  BufferBlob* b = BufferBlob::create("aarch64Test", 500000);
+  CodeBuffer code(b);
+  MacroAssembler _masm(&code);
+  entry(&code);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64_ad.m4	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,367 @@
+dnl Copyright (c) 2014, Red Hat Inc. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+dnl 
+dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
+dnl and shift patterns patterns used in aarch64.ad.
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+dnl
+define(`ORL2I', `ifelse($1,I,orL2I)')
+dnl
+define(`BASE_SHIFT_INSN',
+`
+instruct $2$1_reg_$4_reg(iReg$1NoSp dst,
+                         iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst ($2$1 src1 ($4$1 src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "$3  $dst, $src1, $src2, $5 $src3" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::$5,
+              $src3$$constant & ifelse($1,I,0x1f,0x3f));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}')dnl
+define(`BASE_INVERTED_INSN',
+`
+instruct $2$1_reg_not_reg(iReg$1NoSp dst,
+                         iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_M1 m1,
+                         rFlagsReg cr) %{
+dnl This ifelse is because hotspot reassociates (xor (xor ..)..)
+dnl into this canonical form.
+  ifelse($2,Xor,
+    match(Set dst (Xor$1 m1 (Xor$1 src2 src1)));,
+    match(Set dst ($2$1 src1 (Xor$1 src2 m1)));)
+  ins_cost(INSN_COST);
+  format %{ "$3  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}')dnl
+define(`INVERTED_SHIFT_INSN',
+`
+instruct $2$1_reg_$4_not_reg(iReg$1NoSp dst,
+                         iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2,
+                         immI src3, imm$1_M1 src4, rFlagsReg cr) %{
+dnl This ifelse is because hotspot reassociates (xor (xor ..)..)
+dnl into this canonical form.
+  ifelse($2,Xor,
+    match(Set dst ($2$1 src4 (Xor$1($4$1 src2 src3) src1)));,
+    match(Set dst ($2$1 src1 (Xor$1($4$1 src2 src3) src4)));)
+  ins_cost(1.9 * INSN_COST);
+  format %{ "$3  $dst, $src1, $src2, $5 $src3" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::$5,
+              $src3$$constant & ifelse($1,I,0x1f,0x3f));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}')dnl
+define(`NOT_INSN',
+`instruct reg$1_not_reg(iReg$1NoSp dst,
+                         iReg$1`'ORL2I($1) src1, imm$1_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (Xor$1 src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "$2  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ $2(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}')dnl
+dnl
+define(`BOTH_SHIFT_INSNS',
+`BASE_SHIFT_INSN(I, $1, ifelse($2,andr,andw,$2w), $3, $4)
+BASE_SHIFT_INSN(L, $1, $2, $3, $4)')dnl
+dnl
+define(`BOTH_INVERTED_INSNS',
+`BASE_INVERTED_INSN(I, $1, $2w, $3, $4)
+BASE_INVERTED_INSN(L, $1, $2, $3, $4)')dnl
+dnl
+define(`BOTH_INVERTED_SHIFT_INSNS',
+`INVERTED_SHIFT_INSN(I, $1, $2w, $3, $4, ~0, int)
+INVERTED_SHIFT_INSN(L, $1, $2, $3, $4, ~0l, long)')dnl
+dnl
+define(`ALL_SHIFT_KINDS',
+`BOTH_SHIFT_INSNS($1, $2, URShift, LSR)
+BOTH_SHIFT_INSNS($1, $2, RShift, ASR)
+BOTH_SHIFT_INSNS($1, $2, LShift, LSL)')dnl
+dnl
+define(`ALL_INVERTED_SHIFT_KINDS',
+`BOTH_INVERTED_SHIFT_INSNS($1, $2, URShift, LSR)
+BOTH_INVERTED_SHIFT_INSNS($1, $2, RShift, ASR)
+BOTH_INVERTED_SHIFT_INSNS($1, $2, LShift, LSL)')dnl
+dnl
+NOT_INSN(L, eon)
+NOT_INSN(I, eonw)
+BOTH_INVERTED_INSNS(And, bic)
+BOTH_INVERTED_INSNS(Or, orn)
+BOTH_INVERTED_INSNS(Xor, eon)
+ALL_INVERTED_SHIFT_KINDS(And, bic)
+ALL_INVERTED_SHIFT_KINDS(Xor, eon)
+ALL_INVERTED_SHIFT_KINDS(Or, orn)
+ALL_SHIFT_KINDS(And, andr)
+ALL_SHIFT_KINDS(Xor, eor)
+ALL_SHIFT_KINDS(Or, orr)
+ALL_SHIFT_KINDS(Add, add)
+ALL_SHIFT_KINDS(Sub, sub)
+dnl
+dnl EXTEND mode, rshift_op, src, lshift_count, rshift_count
+define(`EXTEND', `($2$1 (LShift$1 $3 $4) $5)')
+define(`BFM_INSN',`
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct $4$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst EXTEND($1, $3, src, lshift_count, rshift_count));
+  // Make sure we are not going to exceed what $4 can do.
+  predicate((unsigned int)n->in(2)->get_int() <= $2
+            && (unsigned int)n->in(1)->in(2)->get_int() <= $2);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "$4  $dst, $src, $rshift_count - $lshift_count, #$2 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = $2 - lshift;
+    int r = (rshift - lshift) & $2;
+    __ $4(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}')
+BFM_INSN(L, 63, RShift, sbfm)
+BFM_INSN(I, 31, RShift, sbfmw)
+BFM_INSN(L, 63, URShift, ubfm)
+BFM_INSN(I, 31, URShift, ubfmw)
+dnl
+// Bitfield extract with shift & mask
+define(`BFX_INSN',
+`instruct $3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI rshift, imm$1_bitmask mask)
+%{
+  match(Set dst (And$1 ($2$1 src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "$3 $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ $3(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}')
+BFX_INSN(I,URShift,ubfxw)
+BFX_INSN(L,URShift,ubfx)
+
+// We can use ubfx when extending an And with a mask when we know mask
+// is positive.  We know that because immI_bitmask guarantees it.
+instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Rotations
+
+define(`EXTRACT_INSN',
+`instruct extr$3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 (LShift$1 src1 lshift) (URShift$1 src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & $2));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & $2);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+')dnl
+EXTRACT_INSN(L, 63, Or, extr)
+EXTRACT_INSN(I, 31, Or, extrw)
+EXTRACT_INSN(L, 63, Add, extr)
+EXTRACT_INSN(I, 31, Add, extrw)
+define(`ROL_EXPAND', `
+// $2 expander
+
+instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "$2    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ $3(as_Register($dst$$reg), as_Register($src$$reg),
+            rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}')dnl
+define(`ROR_EXPAND', `
+// $2 expander
+
+instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "$2    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ $3(as_Register($dst$$reg), as_Register($src$$reg),
+            as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}')dnl
+define(ROL_INSN, `
+instruct $3$1_rReg_Var_C$2(iRegLNoSp dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr)
+%{
+  match(Set dst (Or$1 (LShift$1 src shift) (URShift$1 src (SubI c$2 shift))));
+
+  expand %{
+    $3L_rReg(dst, src, shift, cr);
+  %}
+%}')dnl
+define(ROR_INSN, `
+instruct $3$1_rReg_Var_C$2(iRegLNoSp dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr)
+%{
+  match(Set dst (Or$1 (URShift$1 src shift) (LShift$1 src (SubI c$2 shift))));
+
+  expand %{
+    $3L_rReg(dst, src, shift, cr);
+  %}
+%}')dnl
+ROL_EXPAND(L, rol, rorv)
+ROL_EXPAND(I, rol, rorvw)
+ROL_INSN(L, _64, rol)
+ROL_INSN(L, 0, rol)
+ROL_INSN(I, _32, rol)
+ROL_INSN(I, 0, rol)
+ROR_EXPAND(L, ror, rorv)
+ROR_EXPAND(I, ror, rorvw)
+ROR_INSN(L, _64, ror)
+ROR_INSN(L, 0, ror)
+ROR_INSN(I, _32, ror)
+ROR_INSN(I, 0, ror)
+
+// Add/subtract (extended)
+dnl ADD_SUB_EXTENDED(mode, size, add node, shift node, insn, shift type, wordsize
+define(`ADD_SUB_CONV', `
+instruct $3Ext$1(iReg$2NoSp dst, iReg$2`'ORL2I($2) src1, iReg$1`'ORL2I($1) src2, rFlagsReg cr)
+%{
+  match(Set dst ($3$2 src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "$4  $dst, $src1, $5 $src2" %}
+
+   ins_encode %{
+     __ $4(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$5);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')dnl
+ADD_SUB_CONV(I,L,Add,add,sxtw);
+ADD_SUB_CONV(I,L,Sub,sub,sxtw);
+dnl
+define(`ADD_SUB_EXTENDED', `
+instruct $3Ext$1_$6(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI_`'eval($7-$2) lshift, immI_`'eval($7-$2) rshift, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 src1 EXTEND($1, $4, src2, lshift, rshift)));
+  ins_cost(INSN_COST);
+  format %{ "$5  $dst, $src1, $6 $src2" %}
+
+   ins_encode %{
+     __ $5(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$6);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')
+ADD_SUB_EXTENDED(I,16,Add,RShift,add,sxth,32)
+ADD_SUB_EXTENDED(I,8,Add,RShift,add,sxtb,32)
+ADD_SUB_EXTENDED(I,8,Add,URShift,add,uxtb,32)
+ADD_SUB_EXTENDED(L,16,Add,RShift,add,sxth,64)
+ADD_SUB_EXTENDED(L,32,Add,RShift,add,sxtw,64)
+ADD_SUB_EXTENDED(L,8,Add,RShift,add,sxtb,64)
+ADD_SUB_EXTENDED(L,8,Add,URShift,add,uxtb,64)
+dnl
+dnl ADD_SUB_ZERO_EXTEND(mode, size, add node, insn, shift type)
+define(`ADD_SUB_ZERO_EXTEND', `
+instruct $3Ext$1_$5_and(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_$2 mask, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 src1 (And$1 src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "$4  $dst, $src1, $src2, $5" %}
+
+   ins_encode %{
+     __ $4(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$5);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')
+dnl
+ADD_SUB_ZERO_EXTEND(I,255,Add,addw,uxtb)
+ADD_SUB_ZERO_EXTEND(I,65535,Add,addw,uxth)
+ADD_SUB_ZERO_EXTEND(L,255,Add,add,uxtb)
+ADD_SUB_ZERO_EXTEND(L,65535,Add,add,uxth)
+ADD_SUB_ZERO_EXTEND(L,4294967295,Add,add,uxtw)
+dnl
+ADD_SUB_ZERO_EXTEND(I,255,Sub,subw,uxtb)
+ADD_SUB_ZERO_EXTEND(I,65535,Sub,subw,uxth)
+ADD_SUB_ZERO_EXTEND(L,255,Sub,sub,uxtb)
+ADD_SUB_ZERO_EXTEND(L,65535,Sub,sub,uxth)
+ADD_SUB_ZERO_EXTEND(L,4294967295,Sub,sub,uxtw)
+
+// END This section of the file is automatically generated. Do not edit --------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64_call.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifdef BUILTIN_SIM
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "../../../../../../simulator/cpustate.hpp"
+#include "../../../../../../simulator/simulator.hpp"
+
+/*
+ * a routine to initialise and enter ARM simulator execution when
+ * calling into ARM code from x86 code.
+ *
+ * we maintain a simulator per-thread and provide it with 8 Mb of
+ * stack space
+ */
+#define SIM_STACK_SIZE (1024 * 1024) // in units of u_int64_t
+
+extern "C" u_int64_t get_alt_stack()
+{
+  return AArch64Simulator::altStack();
+}
+
+extern "C" void setup_arm_sim(void *sp, u_int64_t calltype)
+{
+  // n.b. this function runs on the simulator stack so as to avoid
+  // simulator frames appearing in between VM x86 and ARM frames. note
+  // that arfgument sp points to the old (VM) stack from which the
+  // call into the sim was made. The stack switch and entry into this
+  // routine is handled by x86 prolog code planted in the head of the
+  // ARM code buffer which the sim is about to start executing (see
+  // aarch64_linkage.S).
+  //
+  // The first ARM instruction in the buffer is identified by fnptr
+  // stored at the top of the old stack. x86 register contents precede
+  // fnptr. preceding that are the fp and return address of the VM
+  // caller into ARM code. any extra, non-register arguments passed to
+  // the linkage routine precede the fp (this is as per any normal x86
+  // call wirth extra args).
+  //
+  // note that the sim creates Java frames on the Java stack just
+  // above sp (i.e. directly above fnptr). it sets the sim FP register
+  // to the pushed fp for the caller effectively eliding the register
+  // data saved by the linkage routine.
+  //
+  // x86 register call arguments are loaded from the stack into ARM
+  // call registers. if extra arguments occur preceding the x86
+  // caller's fp then they are copied either into extra ARM registers
+  // (ARM has 8 rather than 6 gp call registers) or up the stack
+  // beyond the saved x86 registers so that they immediately precede
+  // the ARM frame where the ARM calling convention expects them to
+  // be.
+  //
+  // n.b. the number of register/stack values passed to the ARM code
+  // is determined by calltype
+  //
+  // +--------+
+  // | fnptr  |  <--- argument sp points here
+  // +--------+  |
+  // | rax    |  | return slot if we need to return a value
+  // +--------+  |
+  // | rdi    |  increasing
+  // +--------+  address
+  // | rsi    |  |
+  // +--------+  V
+  // | rdx    |
+  // +--------+
+  // | rcx    |
+  // +--------+
+  // | r8     |
+  // +--------+
+  // | r9     |
+  // +--------+
+  // | xmm0   |
+  // +--------+
+  // | xmm1   |
+  // +--------+
+  // | xmm2   |
+  // +--------+
+  // | xmm3   |
+  // +--------+
+  // | xmm4   |
+  // +--------+
+  // | xmm5   |
+  // +--------+
+  // | xmm6   |
+  // +--------+
+  // | xmm7   |
+  // +--------+
+  // | fp     |
+  // +--------+
+  // | caller |
+  // | ret ip |
+  // +--------+
+  // | arg0   | <-- any extra call args start here
+  // +--------+     offset = 18 * wordSize
+  // | . . .  |     (i.e. 1 * calladdr + 1 * rax  + 6 * gp call regs
+  //                      + 8 * fp call regs + 2 * frame words)
+  //
+  // we use a unique sim/stack per thread
+  const int cursor2_offset = 18;
+  const int fp_offset = 16;
+  u_int64_t *cursor = (u_int64_t *)sp;
+  u_int64_t *cursor2 = ((u_int64_t *)sp) + cursor2_offset;
+  u_int64_t *fp = ((u_int64_t *)sp) + fp_offset;
+  int gp_arg_count = calltype & 0xf;
+  int fp_arg_count = (calltype >> 4) & 0xf;
+  int return_type = (calltype >> 8) & 0x3;
+  AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+  // save previous cpu state in case this is a recursive entry
+  CPUState saveState = sim->getCPUState();
+  // set up initial sim pc, sp and fp registers
+  sim->init(*cursor++, (u_int64_t)sp, (u_int64_t)fp);
+  u_int64_t *return_slot = cursor++;
+
+  // if we need to pass the sim extra args on the stack then bump
+  // the stack pointer now
+  u_int64_t *cursor3 = (u_int64_t *)sim->getCPUState().xreg(SP, 1);
+  if (gp_arg_count > 8) {
+    cursor3 -= gp_arg_count - 8;
+  }
+  if (fp_arg_count > 8) {
+    cursor3 -= fp_arg_count - 8;
+  }
+  sim->getCPUState().xreg(SP, 1) = (u_int64_t)(cursor3++);
+
+  for (int i = 0; i < gp_arg_count; i++) {
+    if (i < 6) {
+      // copy saved register to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor++;
+    } else if (i < 8) {
+      // copy extra int arg to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor2++;
+    } else {
+      // copy extra fp arg to sim stack
+      *cursor3++ = *cursor2++;
+    }
+  }
+  for (int i = 0; i < fp_arg_count; i++) {
+    if (i < 8) {
+      // copy saved register to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor++;
+    } else {
+      // copy extra arg to sim stack
+      *cursor3++ = *cursor2++;
+    }
+  }
+  AArch64Simulator::status_t return_status = sim->run();
+  if (return_status != AArch64Simulator::STATUS_RETURN){
+    sim->simPrint0();
+    fatal("invalid status returned from simulator.run()\n");
+  }
+  switch (return_type) {
+  case MacroAssembler::ret_type_void:
+  default:
+    break;
+  case MacroAssembler::ret_type_integral:
+  // this overwrites the saved r0
+    *return_slot = sim->getCPUState().xreg(R0, 0);
+    break;
+  case MacroAssembler::ret_type_float:
+    *(float *)return_slot = sim->getCPUState().sreg(V0);
+    break;
+  case MacroAssembler::ret_type_double:
+    *(double *)return_slot = sim->getCPUState().dreg(V0);
+    break;
+  }
+  // restore incoimng cpu state
+  sim->getCPUState() = saveState;
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64_linkage.S	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,163 @@
+# 
+# Copyright (c) 2012, Red Hat. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# Routines used to enable x86 VM C++ code to invoke JIT-compiled ARM code
+# -- either Java methods or generated stub -- and to allow JIT-compiled
+# ARM code to invoke x86 VM C++ code
+#
+# the code for aarch64_stub_prolog below can be copied into the start
+# of the ARM code buffer and patched with a link to the
+# C++ routine which starts execution on the simulator. the ARM
+# code can be generated immediately following the copied code.
+
+#ifdef BUILTIN_SIM
+
+	.data
+        .globl setup_arm_sim, 
+	.type  setup_arm_sim,@function
+        .globl get_alt_stack, 
+	.type  get_alt_stack,@function
+        .globl aarch64_stub_prolog
+        .p2align  4
+aarch64_stub_prolog:
+	// entry point
+4:	lea 1f(%rip), %r11
+	mov (%r11), %r10
+	mov (%r10), %r10
+	jmp *%r10
+	.p2align 4
+1:
+	.set entry_offset, . - 1b
+	.quad aarch64_prolog_ptr
+	// 64 bit int used to idenitfy called fn arg/return types
+	.set calltype_offset, . - 1b
+	.quad 0
+	// arm JIT code follows the stub
+	.set arm_code_offset, . - 1b
+	.size aarch64_stub_prolog, .-aarch64_stub_prolog
+aarch64_stub_prolog_end:
+
+	.text
+aarch64_prolog_ptr:
+	.quad aarch64_prolog
+
+        .globl aarch64_prolog
+aarch64_prolog:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset 6, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register 6
+	// save all registers used to pass args
+	sub $8, %rsp
+	movd %xmm7, (%rsp)
+	sub $8, %rsp
+	movd %xmm6, (%rsp)
+	sub $8, %rsp
+	movd %xmm5, (%rsp)
+	sub $8, %rsp
+	movd %xmm4, (%rsp)
+	sub $8, %rsp
+	movd %xmm3, (%rsp)
+	sub $8, %rsp
+	movd %xmm2, (%rsp)
+	sub $8, %rsp
+	movd %xmm1, (%rsp)
+	sub $8, %rsp
+	movd %xmm0, (%rsp)
+	push %r9
+	push %r8
+	push %rcx
+	push %rdx
+	push %rsi
+	push %rdi
+	// save rax -- this stack slot will be rewritten with a
+	// return value if needed
+	push %rax
+	// temporarily save r11 while we find the other stack
+	push %r11
+	// retrieve alt stack
+	call get_alt_stack@PLT
+	pop %r11
+	// push start of arm code
+	lea (arm_code_offset)(%r11), %rsi
+	push %rsi
+	// load call type code in arg reg 1
+	mov (calltype_offset)(%r11), %rsi
+	// load current stack pointer in arg reg 0
+	mov %rsp, %rdi
+	// switch to alt stack
+	mov %rax, %rsp
+	// save previous stack pointer on new stack
+	push %rdi
+	// 16-align the new stack pointer
+	push %rdi
+	// call sim setup routine
+	call setup_arm_sim@PLT
+	// switch back to old stack
+	pop %rsp
+	// pop start of arm code
+	pop %rdi
+	// pop rax -- either restores old value or installs return value
+	pop %rax
+	// pop arg registers
+	pop %rdi
+	pop %rsi
+	pop %rdx
+	pop %rcx
+	pop %r8
+	pop %r9
+	movd (%rsp), %xmm0
+	add $8, %rsp
+	movd (%rsp), %xmm1
+	add $8, %rsp
+	movd (%rsp), %xmm2
+	add $8, %rsp
+	movd (%rsp), %xmm3
+	add $8, %rsp
+	movd (%rsp), %xmm4
+	add $8, %rsp
+	movd (%rsp), %xmm5
+	add $8, %rsp
+	movd (%rsp), %xmm6
+	add $8, %rsp
+	movd (%rsp), %xmm7
+	add $8, %rsp
+	leave
+	.cfi_def_cfa 7, 8
+	ret
+	.cfi_endproc
+
+
+        .p2align  4
+get_pc:
+	// get return pc in rdi and then push it back
+	pop %rdi
+	push %rdi
+	ret
+
+	.p2align 4
+	.long
+	.globl aarch64_stub_prolog_size
+	.type  aarch64_stub_prolog_size,@function
+aarch64_stub_prolog_size:
+	leaq  aarch64_stub_prolog_end - aarch64_stub_prolog, %rax
+	ret
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/ad_encode.m4	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,73 @@
+define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4,
+               $5, $6, $7, $8);dnl
+
+  %}')dnl
+define(access, `
+    $3Register $1_reg = as_$3Register($$1$$reg);
+    $4choose(MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
+define(load,`
+  enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl
+access(dst,$2,$3)')dnl
+load(iRegI,ldrsbw)
+load(iRegI,ldrsb)
+load(iRegI,ldrb)
+load(iRegL,ldrb)
+load(iRegI,ldrshw)
+load(iRegI,ldrsh)
+load(iRegI,ldrh)
+load(iRegL,ldrh)
+load(iRegI,ldrw)
+load(iRegL,ldrw)
+load(iRegL,ldrsw)
+load(iRegL,ldr)
+load(vRegF,ldrs,Float)
+load(vRegD,ldrd,Float)
+define(STORE,`
+  enc_class aarch64_enc_$2($1 src, memory mem) %{dnl
+access(src,$2,$3,$4)')dnl
+define(STORE0,`
+  enc_class aarch64_enc_$2`'0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    choose(_masm,zr,$2,$mem->opcode(),
+        as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
+STORE(iRegI,strb)
+STORE0(iRegI,strb)
+STORE(iRegI,strh)
+STORE0(iRegI,strh)
+STORE(iRegI,strw)
+STORE0(iRegI,strw)
+STORE(iRegL,str,,
+`// we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+      MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    ')
+STORE0(iRegL,str)
+STORE(vRegF,strs,Float)
+STORE(vRegD,strd,Float)
+
+  enc_class aarch64_enc_strw_immn(immN src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    address con = (address)$src$$constant;
+    // need to do this the hard way until we can manage relocs
+    // for 32 bit constants
+    __ movoop(rscratch2, (jobject)con);
+    if (con) __ encode_heap_oop_not_null(rscratch2);
+    choose(_masm,rscratch2,strw,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
+
+  enc_class aarch64_enc_strw_immnk(immN src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    address con = (address)$src$$constant;
+    // need to do this the hard way until we can manage relocs
+    // for 32 bit constants
+    __ movoop(rscratch2, (jobject)con);
+    __ encode_klass_not_null(rscratch2);
+    choose(_masm,rscratch2,strw,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,5770 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights
+ * reserved.  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE
+ * HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+
+#ifndef PRODUCT
+const unsigned long Assembler::asm_bp = 0x00007fffee09ac88;
+#endif
+
+#include "compiler/disassembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+// for the moment we reuse the logical/floating point immediate encode
+// and decode functiosn provided by the simulator. when we move to
+// real hardware we will need to pull taht code into here
+
+#include "immediate_aarch64.hpp"
+
+// #include "gc_interface/collectedHeap.inline.hpp"
+// #include "interpreter/interpreter.hpp"
+// #include "memory/cardTableModRefBS.hpp"
+// #include "prims/methodHandles.hpp"
+// #include "runtime/biasedLocking.hpp"
+// #include "runtime/interfaceSupport.hpp"
+// #include "runtime/objectMonitor.hpp"
+// #include "runtime/os.hpp"
+// #include "runtime/sharedRuntime.hpp"
+// #include "runtime/stubRoutines.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif
+
+#include "opto/compile.hpp"
+
+extern "C" void entry(CodeBuffer *cb);
+
+#define __ _masm.
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+static float unpack(unsigned value);
+
+void entry(CodeBuffer *cb) {
+
+  // {
+  //   for (int i = 0; i < 256; i+=16)
+  //     {
+  //    printf("\"%20.20g\", ", unpack(i));
+  //    printf("\"%20.20g\", ", unpack(i+1));
+  //     }
+  //   printf("\n");
+  // }
+
+  Assembler _masm(cb);
+  address entry = __ pc();
+
+  // Smoke test for assembler
+
+#ifdef ASSERT
+// BEGIN  Generated code -- do not edit
+// Generated by aarch64-asmtest.py
+    Label back, forth;
+    __ bind(back);
+
+// ArithOp
+    __ add(r19, r22, r7, Assembler::LSL, 28);          //       add     x19, x22, x7, LSL #28
+    __ sub(r16, r11, r10, Assembler::LSR, 13);         //       sub     x16, x11, x10, LSR #13
+    __ adds(r27, r13, r28, Assembler::ASR, 2);         //       adds    x27, x13, x28, ASR #2
+    __ subs(r20, r28, r26, Assembler::ASR, 41);        //       subs    x20, x28, x26, ASR #41
+    __ addw(r8, r19, r19, Assembler::ASR, 19);         //       add     w8, w19, w19, ASR #19
+    __ subw(r4, r9, r10, Assembler::LSL, 14);          //       sub     w4, w9, w10, LSL #14
+    __ addsw(r8, r11, r30, Assembler::LSL, 13);        //       adds    w8, w11, w30, LSL #13
+    __ subsw(r0, r25, r19, Assembler::LSL, 9);         //       subs    w0, w25, w19, LSL #9
+    __ andr(r20, r0, r21, Assembler::LSL, 19);         //       and     x20, x0, x21, LSL #19
+    __ orr(r21, r14, r20, Assembler::LSL, 17);         //       orr     x21, x14, x20, LSL #17
+    __ eor(r25, r28, r1, Assembler::LSL, 51);          //       eor     x25, x28, x1, LSL #51
+    __ ands(r10, r27, r11, Assembler::ASR, 15);        //       ands    x10, x27, x11, ASR #15
+    __ andw(r25, r5, r12, Assembler::ASR, 23);         //       and     w25, w5, w12, ASR #23
+    __ orrw(r18, r14, r10, Assembler::LSR, 4);         //       orr     w18, w14, w10, LSR #4
+    __ eorw(r4, r21, r5, Assembler::ASR, 22);          //       eor     w4, w21, w5, ASR #22
+    __ andsw(r21, r0, r5, Assembler::ASR, 29);         //       ands    w21, w0, w5, ASR #29
+    __ bic(r26, r30, r6, Assembler::ASR, 37);          //       bic     x26, x30, x6, ASR #37
+    __ orn(r3, r1, r13, Assembler::LSR, 29);           //       orn     x3, x1, x13, LSR #29
+    __ eon(r0, r28, r9, Assembler::LSL, 47);           //       eon     x0, x28, x9, LSL #47
+    __ bics(r29, r5, r28, Assembler::LSL, 46);         //       bics    x29, x5, x28, LSL #46
+    __ bicw(r9, r18, r7, Assembler::LSR, 20);          //       bic     w9, w18, w7, LSR #20
+    __ ornw(r26, r13, r25, Assembler::ASR, 24);        //       orn     w26, w13, w25, ASR #24
+    __ eonw(r25, r4, r19, Assembler::LSL, 6);          //       eon     w25, w4, w19, LSL #6
+    __ bicsw(r5, r26, r4, Assembler::LSR, 24);         //       bics    w5, w26, w4, LSR #24
+
+// AddSubImmOp
+    __ addw(r7, r19, 340u);                            //       add     w7, w19, #340
+    __ addsw(r8, r0, 401u);                            //       adds    w8, w0, #401
+    __ subw(r29, r20, 163u);                           //       sub     w29, w20, #163
+    __ subsw(r8, r23, 759u);                           //       subs    w8, w23, #759
+    __ add(r1, r12, 523u);                             //       add     x1, x12, #523
+    __ adds(r2, r11, 426u);                            //       adds    x2, x11, #426
+    __ sub(r14, r29, 716u);                            //       sub     x14, x29, #716
+    __ subs(r11, r5, 582u);                            //       subs    x11, x5, #582
+
+// LogicalImmOp
+    __ andw(r23, r22, 32768ul);                        //       and     w23, w22, #0x8000
+    __ orrw(r4, r10, 4042322160ul);                    //       orr     w4, w10, #0xf0f0f0f0
+    __ eorw(r0, r24, 4042322160ul);                    //       eor     w0, w24, #0xf0f0f0f0
+    __ andsw(r19, r29, 2139127680ul);                  //       ands    w19, w29, #0x7f807f80
+    __ andr(r5, r10, 4503599627354112ul);              //       and     x5, x10, #0xfffffffffc000
+    __ orr(r12, r30, 18445618178097414144ul);          //       orr     x12, x30, #0xfffc0000fffc0000
+    __ eor(r30, r5, 262128ul);                         //       eor     x30, x5, #0x3fff0
+    __ ands(r26, r23, 4194300ul);                      //       ands    x26, x23, #0x3ffffc
+
+// AbsOp
+    __ b(__ pc());                                     //       b       .
+    __ b(back);                                        //       b       back
+    __ b(forth);                                       //       b       forth
+    __ bl(__ pc());                                    //       bl      .
+    __ bl(back);                                       //       bl      back
+    __ bl(forth);                                      //       bl      forth
+
+// RegAndAbsOp
+    __ cbzw(r12, __ pc());                             //       cbz     w12, .
+    __ cbzw(r12, back);                                //       cbz     w12, back
+    __ cbzw(r12, forth);                               //       cbz     w12, forth
+    __ cbnzw(r20, __ pc());                            //       cbnz    w20, .
+    __ cbnzw(r20, back);                               //       cbnz    w20, back
+    __ cbnzw(r20, forth);                              //       cbnz    w20, forth
+    __ cbz(r12, __ pc());                              //       cbz     x12, .
+    __ cbz(r12, back);                                 //       cbz     x12, back
+    __ cbz(r12, forth);                                //       cbz     x12, forth
+    __ cbnz(r24, __ pc());                             //       cbnz    x24, .
+    __ cbnz(r24, back);                                //       cbnz    x24, back
+    __ cbnz(r24, forth);                               //       cbnz    x24, forth
+    __ adr(r6, __ pc());                               //       adr     x6, .
+    __ adr(r6, back);                                  //       adr     x6, back
+    __ adr(r6, forth);                                 //       adr     x6, forth
+    __ _adrp(r21, __ pc());                             //      adrp    x21, .
+
+// RegImmAbsOp
+    __ tbz(r1, 1, __ pc());                            //       tbz     x1, #1, .
+    __ tbz(r1, 1, back);                               //       tbz     x1, #1, back
+    __ tbz(r1, 1, forth);                              //       tbz     x1, #1, forth
+    __ tbnz(r8, 9, __ pc());                           //       tbnz    x8, #9, .
+    __ tbnz(r8, 9, back);                              //       tbnz    x8, #9, back
+    __ tbnz(r8, 9, forth);                             //       tbnz    x8, #9, forth
+
+// MoveWideImmOp
+    __ movnw(r12, 23175, 0);                           //       movn    w12, #23175, lsl 0
+    __ movzw(r11, 20476, 16);                          //       movz    w11, #20476, lsl 16
+    __ movkw(r21, 3716, 0);                            //       movk    w21, #3716, lsl 0
+    __ movn(r29, 28661, 48);                           //       movn    x29, #28661, lsl 48
+    __ movz(r3, 6927, 0);                              //       movz    x3, #6927, lsl 0
+    __ movk(r22, 9828, 16);                            //       movk    x22, #9828, lsl 16
+
+// BitfieldOp
+    __ sbfm(r12, r8, 6, 22);                           //       sbfm    x12, x8, #6, #22
+    __ bfmw(r19, r25, 25, 19);                         //       bfm     w19, w25, #25, #19
+    __ ubfmw(r9, r12, 29, 15);                         //       ubfm    w9, w12, #29, #15
+    __ sbfm(r28, r25, 16, 16);                         //       sbfm    x28, x25, #16, #16
+    __ bfm(r12, r5, 4, 25);                            //       bfm     x12, x5, #4, #25
+    __ ubfm(r0, r10, 6, 8);                            //       ubfm    x0, x10, #6, #8
+
+// ExtractOp
+    __ extrw(r4, r13, r26, 24);                        //       extr    w4, w13, w26, #24
+    __ extr(r23, r30, r24, 31);                        //       extr    x23, x30, x24, #31
+
+// CondBranchOp
+    __ br(Assembler::EQ, __ pc());                     //       b.EQ    .
+    __ br(Assembler::EQ, back);                        //       b.EQ    back
+    __ br(Assembler::EQ, forth);                       //       b.EQ    forth
+    __ br(Assembler::NE, __ pc());                     //       b.NE    .
+    __ br(Assembler::NE, back);                        //       b.NE    back
+    __ br(Assembler::NE, forth);                       //       b.NE    forth
+    __ br(Assembler::HS, __ pc());                     //       b.HS    .
+    __ br(Assembler::HS, back);                        //       b.HS    back
+    __ br(Assembler::HS, forth);                       //       b.HS    forth
+    __ br(Assembler::CS, __ pc());                     //       b.CS    .
+    __ br(Assembler::CS, back);                        //       b.CS    back
+    __ br(Assembler::CS, forth);                       //       b.CS    forth
+    __ br(Assembler::LO, __ pc());                     //       b.LO    .
+    __ br(Assembler::LO, back);                        //       b.LO    back
+    __ br(Assembler::LO, forth);                       //       b.LO    forth
+    __ br(Assembler::CC, __ pc());                     //       b.CC    .
+    __ br(Assembler::CC, back);                        //       b.CC    back
+    __ br(Assembler::CC, forth);                       //       b.CC    forth
+    __ br(Assembler::MI, __ pc());                     //       b.MI    .
+    __ br(Assembler::MI, back);                        //       b.MI    back
+    __ br(Assembler::MI, forth);                       //       b.MI    forth
+    __ br(Assembler::PL, __ pc());                     //       b.PL    .
+    __ br(Assembler::PL, back);                        //       b.PL    back
+    __ br(Assembler::PL, forth);                       //       b.PL    forth
+    __ br(Assembler::VS, __ pc());                     //       b.VS    .
+    __ br(Assembler::VS, back);                        //       b.VS    back
+    __ br(Assembler::VS, forth);                       //       b.VS    forth
+    __ br(Assembler::VC, __ pc());                     //       b.VC    .
+    __ br(Assembler::VC, back);                        //       b.VC    back
+    __ br(Assembler::VC, forth);                       //       b.VC    forth
+    __ br(Assembler::HI, __ pc());                     //       b.HI    .
+    __ br(Assembler::HI, back);                        //       b.HI    back
+    __ br(Assembler::HI, forth);                       //       b.HI    forth
+    __ br(Assembler::LS, __ pc());                     //       b.LS    .
+    __ br(Assembler::LS, back);                        //       b.LS    back
+    __ br(Assembler::LS, forth);                       //       b.LS    forth
+    __ br(Assembler::GE, __ pc());                     //       b.GE    .
+    __ br(Assembler::GE, back);                        //       b.GE    back
+    __ br(Assembler::GE, forth);                       //       b.GE    forth
+    __ br(Assembler::LT, __ pc());                     //       b.LT    .
+    __ br(Assembler::LT, back);                        //       b.LT    back
+    __ br(Assembler::LT, forth);                       //       b.LT    forth
+    __ br(Assembler::GT, __ pc());                     //       b.GT    .
+    __ br(Assembler::GT, back);                        //       b.GT    back
+    __ br(Assembler::GT, forth);                       //       b.GT    forth
+    __ br(Assembler::LE, __ pc());                     //       b.LE    .
+    __ br(Assembler::LE, back);                        //       b.LE    back
+    __ br(Assembler::LE, forth);                       //       b.LE    forth
+    __ br(Assembler::AL, __ pc());                     //       b.AL    .
+    __ br(Assembler::AL, back);                        //       b.AL    back
+    __ br(Assembler::AL, forth);                       //       b.AL    forth
+    __ br(Assembler::NV, __ pc());                     //       b.NV    .
+    __ br(Assembler::NV, back);                        //       b.NV    back
+    __ br(Assembler::NV, forth);                       //       b.NV    forth
+
+// ImmOp
+    __ svc(12729);                                     //       svc     #12729
+    __ hvc(6788);                                      //       hvc     #6788
+    __ smc(1535);                                      //       smc     #1535
+    __ brk(16766);                                     //       brk     #16766
+    __ hlt(9753);                                      //       hlt     #9753
+
+// Op
+    __ nop();                                          //       nop
+    __ eret();                                         //       eret
+    __ drps();                                         //       drps
+    __ isb();                                          //       isb
+
+// SystemOp
+    __ dsb(Assembler::SY);                             //       dsb     SY
+    __ dmb(Assembler::ISHST);                          //       dmb     ISHST
+
+// OneRegOp
+    __ br(r2);                                         //       br      x2
+    __ blr(r5);                                        //       blr     x5
+
+// LoadStoreExclusiveOp
+    __ stxr(r20, r21, r2);                             //       stxr    w20, x21, [x2]
+    __ stlxr(r5, r29, r7);                             //       stlxr   w5, x29, [x7]
+    __ ldxr(r5, r16);                                  //       ldxr    x5, [x16]
+    __ ldaxr(r27, r29);                                //       ldaxr   x27, [x29]
+    __ stlr(r0, r29);                                  //       stlr    x0, [x29]
+    __ ldar(r21, r28);                                 //       ldar    x21, [x28]
+
+// LoadStoreExclusiveOp
+    __ stxrw(r21, r24, r7);                            //       stxr    w21, w24, [x7]
+    __ stlxrw(r21, r26, r28);                          //       stlxr   w21, w26, [x28]
+    __ ldxrw(r21, r6);                                 //       ldxr    w21, [x6]
+    __ ldaxrw(r15, r30);                               //       ldaxr   w15, [x30]
+    __ stlrw(r19, r3);                                 //       stlr    w19, [x3]
+    __ ldarw(r22, r2);                                 //       ldar    w22, [x2]
+
+// LoadStoreExclusiveOp
+    __ stxrh(r18, r15, r0);                            //       stxrh   w18, w15, [x0]
+    __ stlxrh(r11, r5, r28);                           //       stlxrh  w11, w5, [x28]
+    __ ldxrh(r29, r6);                                 //       ldxrh   w29, [x6]
+    __ ldaxrh(r18, r7);                                //       ldaxrh  w18, [x7]
+    __ stlrh(r25, r28);                                //       stlrh   w25, [x28]
+    __ ldarh(r2, r19);                                 //       ldarh   w2, [x19]
+
+// LoadStoreExclusiveOp
+    __ stxrb(r10, r30, r1);                            //       stxrb   w10, w30, [x1]
+    __ stlxrb(r20, r21, r22);                          //       stlxrb  w20, w21, [x22]
+    __ ldxrb(r25, r2);                                 //       ldxrb   w25, [x2]
+    __ ldaxrb(r24, r5);                                //       ldaxrb  w24, [x5]
+    __ stlrb(r16, r3);                                 //       stlrb   w16, [x3]
+    __ ldarb(r22, r29);                                //       ldarb   w22, [x29]
+
+// LoadStoreExclusiveOp
+    __ ldxp(r8, r2, r19);                              //       ldxp    x8, x2, [x19]
+    __ ldaxp(r7, r19, r14);                            //       ldaxp   x7, x19, [x14]
+    __ stxp(r8, r27, r28, r5);                         //       stxp    w8, x27, x28, [x5]
+    __ stlxp(r5, r8, r14, r6);                         //       stlxp   w5, x8, x14, [x6]
+
+// LoadStoreExclusiveOp
+    __ ldxpw(r25, r4, r22);                            //       ldxp    w25, w4, [x22]
+    __ ldaxpw(r13, r14, r15);                          //       ldaxp   w13, w14, [x15]
+    __ stxpw(r20, r26, r8, r10);                       //       stxp    w20, w26, w8, [x10]
+    __ stlxpw(r23, r18, r18, r18);                     //       stlxp   w23, w18, w18, [x18]
+
+// base_plus_unscaled_offset
+// LoadStoreOp
+    __ str(r30, Address(r11, 99));                     //       str     x30, [x11, 99]
+    __ strw(r23, Address(r25, -77));                   //       str     w23, [x25, -77]
+    __ strb(r2, Address(r14, 3));                      //       strb    w2, [x14, 3]
+    __ strh(r9, Address(r10, 5));                      //       strh    w9, [x10, 5]
+    __ ldr(r20, Address(r15, 57));                     //       ldr     x20, [x15, 57]
+    __ ldrw(r12, Address(r16, -78));                   //       ldr     w12, [x16, -78]
+    __ ldrb(r22, Address(r26, -3));                    //       ldrb    w22, [x26, -3]
+    __ ldrh(r30, Address(r19, -47));                   //       ldrh    w30, [x19, -47]
+    __ ldrsb(r9, Address(r10, -12));                   //       ldrsb   x9, [x10, -12]
+    __ ldrsh(r28, Address(r17, 14));                   //       ldrsh   x28, [x17, 14]
+    __ ldrshw(r3, Address(r5, 10));                    //       ldrsh   w3, [x5, 10]
+    __ ldrsw(r17, Address(r17, -91));                  //       ldrsw   x17, [x17, -91]
+    __ ldrd(v2, Address(r20, -17));                    //       ldr     d2, [x20, -17]
+    __ ldrs(v22, Address(r7, -10));                    //       ldr     s22, [x7, -10]
+    __ strd(v30, Address(r18, -223));                  //       str     d30, [x18, -223]
+    __ strs(v13, Address(r22, 21));                    //       str     s13, [x22, 21]
+
+// pre
+// LoadStoreOp
+    __ str(r9, Address(__ pre(r18, -112)));            //       str     x9, [x18, -112]!
+    __ strw(r29, Address(__ pre(r23, 11)));            //       str     w29, [x23, 11]!
+    __ strb(r18, Address(__ pre(r12, -1)));            //       strb    w18, [x12, -1]!
+    __ strh(r16, Address(__ pre(r20, -23)));           //       strh    w16, [x20, -23]!
+    __ ldr(r3, Address(__ pre(r29, 9)));               //       ldr     x3, [x29, 9]!
+    __ ldrw(r25, Address(__ pre(r3, 19)));             //       ldr     w25, [x3, 19]!
+    __ ldrb(r1, Address(__ pre(r29, -1)));             //       ldrb    w1, [x29, -1]!
+    __ ldrh(r8, Address(__ pre(r29, -57)));            //       ldrh    w8, [x29, -57]!
+    __ ldrsb(r5, Address(__ pre(r14, -13)));           //       ldrsb   x5, [x14, -13]!
+    __ ldrsh(r10, Address(__ pre(r27, 1)));            //       ldrsh   x10, [x27, 1]!
+    __ ldrshw(r11, Address(__ pre(r10, 25)));          //       ldrsh   w11, [x10, 25]!
+    __ ldrsw(r4, Address(__ pre(r22, -92)));           //       ldrsw   x4, [x22, -92]!
+    __ ldrd(v11, Address(__ pre(r23, 8)));             //       ldr     d11, [x23, 8]!
+    __ ldrs(v25, Address(__ pre(r19, 54)));            //       ldr     s25, [x19, 54]!
+    __ strd(v1, Address(__ pre(r7, -174)));            //       str     d1, [x7, -174]!
+    __ strs(v8, Address(__ pre(r25, 54)));             //       str     s8, [x25, 54]!
+
+// post
+// LoadStoreOp
+    __ str(r5, Address(__ post(r11, 37)));             //       str     x5, [x11], 37
+    __ strw(r24, Address(__ post(r15, 19)));           //       str     w24, [x15], 19
+    __ strb(r15, Address(__ post(r26, -1)));           //       strb    w15, [x26], -1
+    __ strh(r18, Address(__ post(r18, -6)));           //       strh    w18, [x18], -6
+    __ ldr(r7, Address(__ post(r2, -230)));            //       ldr     x7, [x2], -230
+    __ ldrw(r27, Address(__ post(r11, -27)));          //       ldr     w27, [x11], -27
+    __ ldrb(r18, Address(__ post(r3, -25)));           //       ldrb    w18, [x3], -25
+    __ ldrh(r10, Address(__ post(r24, -32)));          //       ldrh    w10, [x24], -32
+    __ ldrsb(r22, Address(__ post(r10, 4)));           //       ldrsb   x22, [x10], 4
+    __ ldrsh(r17, Address(__ post(r12, 25)));          //       ldrsh   x17, [x12], 25
+    __ ldrshw(r8, Address(__ post(r7, -62)));          //       ldrsh   w8, [x7], -62
+    __ ldrsw(r23, Address(__ post(r22, -51)));         //       ldrsw   x23, [x22], -51
+    __ ldrd(v24, Address(__ post(r25, 48)));           //       ldr     d24, [x25], 48
+    __ ldrs(v21, Address(__ post(r12, -10)));          //       ldr     s21, [x12], -10
+    __ strd(v18, Address(__ post(r13, -222)));         //       str     d18, [x13], -222
+    __ strs(v16, Address(__ post(r1, -41)));           //       str     s16, [x1], -41
+
+// base_plus_reg
+// LoadStoreOp
+    __ str(r2, Address(r22, r15, Address::sxtw(0)));   //       str     x2, [x22, w15, sxtw #0]
+    __ strw(r2, Address(r16, r29, Address::lsl(0)));   //       str     w2, [x16, x29, lsl #0]
+    __ strb(r20, Address(r18, r14, Address::uxtw(0))); //       strb    w20, [x18, w14, uxtw #0]
+    __ strh(r6, Address(r19, r20, Address::sxtx(1)));  //       strh    w6, [x19, x20, sxtx #1]
+    __ ldr(r14, Address(r29, r14, Address::sxtw(0)));  //       ldr     x14, [x29, w14, sxtw #0]
+    __ ldrw(r16, Address(r20, r12, Address::sxtw(2))); //       ldr     w16, [x20, w12, sxtw #2]
+    __ ldrb(r9, Address(r12, r0, Address::sxtw(0)));   //       ldrb    w9, [x12, w0, sxtw #0]
+    __ ldrh(r12, Address(r17, r3, Address::lsl(1)));   //       ldrh    w12, [x17, x3, lsl #1]
+    __ ldrsb(r2, Address(r17, r3, Address::sxtx(0)));  //       ldrsb   x2, [x17, x3, sxtx #0]
+    __ ldrsh(r7, Address(r1, r17, Address::uxtw(1)));  //       ldrsh   x7, [x1, w17, uxtw #1]
+    __ ldrshw(r25, Address(r15, r18, Address::sxtw(1))); //     ldrsh   w25, [x15, w18, sxtw #1]
+    __ ldrsw(r23, Address(r21, r12, Address::lsl(0))); //       ldrsw   x23, [x21, x12, lsl #0]
+    __ ldrd(v5, Address(r13, r8, Address::lsl(3)));    //       ldr     d5, [x13, x8, lsl #3]
+    __ ldrs(v3, Address(r10, r22, Address::lsl(2)));   //       ldr     s3, [x10, x22, lsl #2]
+    __ strd(v14, Address(r2, r27, Address::sxtw(0)));  //       str     d14, [x2, w27, sxtw #0]
+    __ strs(v20, Address(r6, r25, Address::lsl(0)));   //       str     s20, [x6, x25, lsl #0]
+
+// base_plus_scaled_offset
+// LoadStoreOp
+    __ str(r30, Address(r7, 16256));                   //       str     x30, [x7, 16256]
+    __ strw(r15, Address(r8, 7588));                   //       str     w15, [x8, 7588]
+    __ strb(r11, Address(r0, 1866));                   //       strb    w11, [x0, 1866]
+    __ strh(r3, Address(r17, 3734));                   //       strh    w3, [x17, 3734]
+    __ ldr(r2, Address(r7, 14224));                    //       ldr     x2, [x7, 14224]
+    __ ldrw(r5, Address(r9, 7396));                    //       ldr     w5, [x9, 7396]
+    __ ldrb(r28, Address(r9, 1721));                   //       ldrb    w28, [x9, 1721]
+    __ ldrh(r2, Address(r20, 3656));                   //       ldrh    w2, [x20, 3656]
+    __ ldrsb(r22, Address(r14, 1887));                 //       ldrsb   x22, [x14, 1887]
+    __ ldrsh(r8, Address(r0, 4080));                   //       ldrsh   x8, [x0, 4080]
+    __ ldrshw(r0, Address(r30, 3916));                 //       ldrsh   w0, [x30, 3916]
+    __ ldrsw(r24, Address(r19, 6828));                 //       ldrsw   x24, [x19, 6828]
+    __ ldrd(v24, Address(r12, 13032));                 //       ldr     d24, [x12, 13032]
+    __ ldrs(v8, Address(r8, 7452));                    //       ldr     s8, [x8, 7452]
+    __ strd(v10, Address(r15, 15992));                 //       str     d10, [x15, 15992]
+    __ strs(v26, Address(r19, 6688));                  //       str     s26, [x19, 6688]
+
+// pcrel
+// LoadStoreOp
+    __ ldr(r10, forth);                                //       ldr     x10, forth
+    __ ldrw(r3, __ pc());                              //       ldr     w3, .
+
+// LoadStoreOp
+    __ prfm(Address(r23, 9));                          //       prfm    PLDL1KEEP, [x23, 9]
+
+// LoadStoreOp
+    __ prfm(back);                                     //       prfm    PLDL1KEEP, back
+
+// LoadStoreOp
+    __ prfm(Address(r3, r8, Address::uxtw(0)));        //       prfm    PLDL1KEEP, [x3, w8, uxtw #0]
+
+// LoadStoreOp
+    __ prfm(Address(r11, 15080));                      //       prfm    PLDL1KEEP, [x11, 15080]
+
+// AddSubCarryOp
+    __ adcw(r13, r9, r28);                             //       adc     w13, w9, w28
+    __ adcsw(r27, r19, r28);                           //       adcs    w27, w19, w28
+    __ sbcw(r19, r18, r6);                             //       sbc     w19, w18, w6
+    __ sbcsw(r14, r20, r3);                            //       sbcs    w14, w20, w3
+    __ adc(r16, r14, r8);                              //       adc     x16, x14, x8
+    __ adcs(r0, r29, r8);                              //       adcs    x0, x29, x8
+    __ sbc(r8, r24, r20);                              //       sbc     x8, x24, x20
+    __ sbcs(r12, r28, r0);                             //       sbcs    x12, x28, x0
+
+// AddSubExtendedOp
+    __ addw(r23, r6, r16, ext::uxtb, 4);               //       add     w23, w6, w16, uxtb #4
+    __ addsw(r25, r25, r23, ext::sxth, 2);             //       adds    w25, w25, w23, sxth #2
+    __ sub(r26, r22, r4, ext::uxtx, 1);                //       sub     x26, x22, x4, uxtx #1
+    __ subsw(r17, r29, r19, ext::sxtx, 3);             //       subs    w17, w29, w19, sxtx #3
+    __ add(r11, r30, r21, ext::uxtb, 3);               //       add     x11, x30, x21, uxtb #3
+    __ adds(r16, r19, r0, ext::sxtb, 2);               //       adds    x16, x19, x0, sxtb #2
+    __ sub(r11, r9, r25, ext::sxtx, 1);                //       sub     x11, x9, x25, sxtx #1
+    __ subs(r17, r20, r12, ext::sxtb, 4);              //       subs    x17, x20, x12, sxtb #4
+
+// ConditionalCompareOp
+    __ ccmnw(r13, r11, 3u, Assembler::LE);             //       ccmn    w13, w11, #3, LE
+    __ ccmpw(r13, r12, 2u, Assembler::HI);             //       ccmp    w13, w12, #2, HI
+    __ ccmn(r3, r2, 12u, Assembler::NE);               //       ccmn    x3, x2, #12, NE
+    __ ccmp(r7, r21, 3u, Assembler::VS);               //       ccmp    x7, x21, #3, VS
+
+// ConditionalCompareImmedOp
+    __ ccmnw(r2, 14, 4, Assembler::CC);                //       ccmn    w2, #14, #4, CC
+    __ ccmpw(r17, 17, 6, Assembler::PL);               //       ccmp    w17, #17, #6, PL
+    __ ccmn(r10, 12, 0, Assembler::CS);                //       ccmn    x10, #12, #0, CS
+    __ ccmp(r21, 18, 14, Assembler::GE);               //       ccmp    x21, #18, #14, GE
+
+// ConditionalSelectOp
+    __ cselw(r21, r13, r12, Assembler::GT);            //       csel    w21, w13, w12, GT
+    __ csincw(r10, r27, r15, Assembler::LS);           //       csinc   w10, w27, w15, LS
+    __ csinvw(r0, r13, r9, Assembler::HI);             //       csinv   w0, w13, w9, HI
+    __ csnegw(r18, r4, r26, Assembler::VS);            //       csneg   w18, w4, w26, VS
+    __ csel(r12, r29, r7, Assembler::LS);              //       csel    x12, x29, x7, LS
+    __ csinc(r6, r7, r20, Assembler::VC);              //       csinc   x6, x7, x20, VC
+    __ csinv(r22, r21, r3, Assembler::LE);             //       csinv   x22, x21, x3, LE
+    __ csneg(r19, r12, r27, Assembler::LS);            //       csneg   x19, x12, x27, LS
+
+// TwoRegOp
+    __ rbitw(r0, r16);                                 //       rbit    w0, w16
+    __ rev16w(r17, r23);                               //       rev16   w17, w23
+    __ revw(r17, r14);                                 //       rev     w17, w14
+    __ clzw(r24, r30);                                 //       clz     w24, w30
+    __ clsw(r24, r22);                                 //       cls     w24, w22
+    __ rbit(r3, r17);                                  //       rbit    x3, x17
+    __ rev16(r12, r13);                                //       rev16   x12, x13
+    __ rev32(r9, r22);                                 //       rev32   x9, x22
+    __ rev(r0, r0);                                    //       rev     x0, x0
+    __ clz(r5, r16);                                   //       clz     x5, x16
+    __ cls(r25, r22);                                  //       cls     x25, x22
+
+// ThreeRegOp
+    __ udivw(r29, r4, r0);                             //       udiv    w29, w4, w0
+    __ sdivw(r0, r29, r29);                            //       sdiv    w0, w29, w29
+    __ lslvw(r5, r17, r21);                            //       lslv    w5, w17, w21
+    __ lsrvw(r9, r9, r18);                             //       lsrv    w9, w9, w18
+    __ asrvw(r1, r27, r8);                             //       asrv    w1, w27, w8
+    __ rorvw(r18, r20, r13);                           //       rorv    w18, w20, w13
+    __ udiv(r8, r25, r12);                             //       udiv    x8, x25, x12
+    __ sdiv(r7, r5, r28);                              //       sdiv    x7, x5, x28
+    __ lslv(r5, r17, r27);                             //       lslv    x5, x17, x27
+    __ lsrv(r23, r26, r20);                            //       lsrv    x23, x26, x20
+    __ asrv(r28, r8, r28);                             //       asrv    x28, x8, x28
+    __ rorv(r3, r29, r4);                              //       rorv    x3, x29, x4
+
+// FourRegMulOp
+    __ maddw(r17, r14, r26, r21);                      //       madd    w17, w14, w26, w21
+    __ msubw(r1, r30, r11, r11);                       //       msub    w1, w30, w11, w11
+    __ madd(r1, r17, r6, r28);                         //       madd    x1, x17, x6, x28
+    __ msub(r30, r6, r30, r8);                         //       msub    x30, x6, x30, x8
+    __ smaddl(r21, r6, r14, r8);                       //       smaddl  x21, w6, w14, x8
+    __ smsubl(r10, r10, r24, r19);                     //       smsubl  x10, w10, w24, x19
+    __ umaddl(r20, r18, r14, r24);                     //       umaddl  x20, w18, w14, x24
+    __ umsubl(r18, r2, r5, r5);                        //       umsubl  x18, w2, w5, x5
+
+// ThreeRegFloatOp
+    __ fmuls(v8, v18, v13);                            //       fmul    s8, s18, s13
+    __ fdivs(v2, v14, v28);                            //       fdiv    s2, s14, s28
+    __ fadds(v15, v12, v28);                           //       fadd    s15, s12, s28
+    __ fsubs(v0, v12, v1);                             //       fsub    s0, s12, s1
+    __ fmuls(v15, v29, v4);                            //       fmul    s15, s29, s4
+    __ fmuld(v12, v1, v23);                            //       fmul    d12, d1, d23
+    __ fdivd(v27, v8, v18);                            //       fdiv    d27, d8, d18
+    __ faddd(v23, v20, v11);                           //       fadd    d23, d20, d11
+    __ fsubd(v8, v12, v18);                            //       fsub    d8, d12, d18
+    __ fmuld(v26, v24, v23);                           //       fmul    d26, d24, d23
+
+// FourRegFloatOp
+    __ fmadds(v21, v23, v13, v25);                     //       fmadd   s21, s23, s13, s25
+    __ fmsubs(v22, v10, v1, v14);                      //       fmsub   s22, s10, s1, s14
+    __ fnmadds(v14, v20, v2, v30);                     //       fnmadd  s14, s20, s2, s30
+    __ fnmadds(v7, v29, v22, v22);                     //       fnmadd  s7, s29, s22, s22
+    __ fmaddd(v13, v5, v15, v5);                       //       fmadd   d13, d5, d15, d5
+    __ fmsubd(v14, v12, v5, v10);                      //       fmsub   d14, d12, d5, d10
+    __ fnmaddd(v10, v19, v0, v1);                      //       fnmadd  d10, d19, d0, d1
+    __ fnmaddd(v20, v2, v2, v0);                       //       fnmadd  d20, d2, d2, d0
+
+// TwoRegFloatOp
+    __ fmovs(v25, v9);                                 //       fmov    s25, s9
+    __ fabss(v20, v4);                                 //       fabs    s20, s4
+    __ fnegs(v3, v27);                                 //       fneg    s3, s27
+    __ fsqrts(v1, v2);                                 //       fsqrt   s1, s2
+    __ fcvts(v30, v0);                                 //       fcvt    d30, s0
+    __ fmovd(v12, v4);                                 //       fmov    d12, d4
+    __ fabsd(v1, v27);                                 //       fabs    d1, d27
+    __ fnegd(v8, v22);                                 //       fneg    d8, d22
+    __ fsqrtd(v11, v11);                               //       fsqrt   d11, d11
+    __ fcvtd(v22, v28);                                //       fcvt    s22, d28
+
+// FloatConvertOp
+    __ fcvtzsw(r28, v22);                              //       fcvtzs  w28, s22
+    __ fcvtzs(r20, v27);                               //       fcvtzs  x20, s27
+    __ fcvtzdw(r14, v0);                               //       fcvtzs  w14, d0
+    __ fcvtzd(r26, v11);                               //       fcvtzs  x26, d11
+    __ scvtfws(v28, r22);                              //       scvtf   s28, w22
+    __ scvtfs(v16, r10);                               //       scvtf   s16, x10
+    __ scvtfwd(v8, r21);                               //       scvtf   d8, w21
+    __ scvtfd(v21, r28);                               //       scvtf   d21, x28
+    __ fmovs(r24, v24);                                //       fmov    w24, s24
+    __ fmovd(r8, v19);                                 //       fmov    x8, d19
+    __ fmovs(v8, r12);                                 //       fmov    s8, w12
+    __ fmovd(v6, r7);                                  //       fmov    d6, x7
+
+// TwoRegFloatOp
+    __ fcmps(v30, v16);                                //       fcmp    s30, s16
+    __ fcmpd(v25, v11);                                //       fcmp    d25, d11
+    __ fcmps(v11, 0.0);                                //       fcmp    s11, #0.0
+    __ fcmpd(v11, 0.0);                                //       fcmp    d11, #0.0
+
+// LoadStorePairOp
+    __ stpw(r29, r12, Address(r17, 128));              //       stp     w29, w12, [x17, #128]
+    __ ldpw(r22, r18, Address(r14, -96));              //       ldp     w22, w18, [x14, #-96]
+    __ ldpsw(r11, r16, Address(r1, 64));               //       ldpsw   x11, x16, [x1, #64]
+    __ stp(r0, r11, Address(r26, 112));                //       stp     x0, x11, [x26, #112]
+    __ ldp(r7, r1, Address(r26, 16));                  //       ldp     x7, x1, [x26, #16]
+
+// LoadStorePairOp
+    __ stpw(r10, r7, Address(__ pre(r24, 0)));         //       stp     w10, w7, [x24, #0]!
+    __ ldpw(r7, r28, Address(__ pre(r24, -256)));      //       ldp     w7, w28, [x24, #-256]!
+    __ ldpsw(r25, r28, Address(__ pre(r21, -240)));    //       ldpsw   x25, x28, [x21, #-240]!
+    __ stp(r20, r18, Address(__ pre(r14, -16)));       //       stp     x20, x18, [x14, #-16]!
+    __ ldp(r8, r10, Address(__ pre(r13, 80)));         //       ldp     x8, x10, [x13, #80]!
+
+// LoadStorePairOp
+    __ stpw(r26, r24, Address(__ post(r2, -128)));     //       stp     w26, w24, [x2], #-128
+    __ ldpw(r2, r25, Address(__ post(r21, -192)));     //       ldp     w2, w25, [x21], #-192
+    __ ldpsw(r17, r2, Address(__ post(r21, -144)));    //       ldpsw   x17, x2, [x21], #-144
+    __ stp(r12, r10, Address(__ post(r11, 96)));       //       stp     x12, x10, [x11], #96
+    __ ldp(r24, r6, Address(__ post(r17, -32)));       //       ldp     x24, x6, [x17], #-32
+
+// LoadStorePairOp
+    __ stnpw(r3, r30, Address(r14, -224));             //       stnp    w3, w30, [x14, #-224]
+    __ ldnpw(r15, r20, Address(r26, -144));            //       ldnp    w15, w20, [x26, #-144]
+    __ stnp(r22, r25, Address(r12, -128));             //       stnp    x22, x25, [x12, #-128]
+    __ ldnp(r27, r22, Address(r17, -176));             //       ldnp    x27, x22, [x17, #-176]
+
+// FloatImmediateOp
+    __ fmovd(v0, 2.0);                                 //       fmov d0, #2.0
+    __ fmovd(v0, 2.125);                               //       fmov d0, #2.125
+    __ fmovd(v0, 4.0);                                 //       fmov d0, #4.0
+    __ fmovd(v0, 4.25);                                //       fmov d0, #4.25
+    __ fmovd(v0, 8.0);                                 //       fmov d0, #8.0
+    __ fmovd(v0, 8.5);                                 //       fmov d0, #8.5
+    __ fmovd(v0, 16.0);                                //       fmov d0, #16.0
+    __ fmovd(v0, 17.0);                                //       fmov d0, #17.0
+    __ fmovd(v0, 0.125);                               //       fmov d0, #0.125
+    __ fmovd(v0, 0.1328125);                           //       fmov d0, #0.1328125
+    __ fmovd(v0, 0.25);                                //       fmov d0, #0.25
+    __ fmovd(v0, 0.265625);                            //       fmov d0, #0.265625
+    __ fmovd(v0, 0.5);                                 //       fmov d0, #0.5
+    __ fmovd(v0, 0.53125);                             //       fmov d0, #0.53125
+    __ fmovd(v0, 1.0);                                 //       fmov d0, #1.0
+    __ fmovd(v0, 1.0625);                              //       fmov d0, #1.0625
+    __ fmovd(v0, -2.0);                                //       fmov d0, #-2.0
+    __ fmovd(v0, -2.125);                              //       fmov d0, #-2.125
+    __ fmovd(v0, -4.0);                                //       fmov d0, #-4.0
+    __ fmovd(v0, -4.25);                               //       fmov d0, #-4.25
+    __ fmovd(v0, -8.0);                                //       fmov d0, #-8.0
+    __ fmovd(v0, -8.5);                                //       fmov d0, #-8.5
+    __ fmovd(v0, -16.0);                               //       fmov d0, #-16.0
+    __ fmovd(v0, -17.0);                               //       fmov d0, #-17.0
+    __ fmovd(v0, -0.125);                              //       fmov d0, #-0.125
+    __ fmovd(v0, -0.1328125);                          //       fmov d0, #-0.1328125
+    __ fmovd(v0, -0.25);                               //       fmov d0, #-0.25
+    __ fmovd(v0, -0.265625);                           //       fmov d0, #-0.265625
+    __ fmovd(v0, -0.5);                                //       fmov d0, #-0.5
+    __ fmovd(v0, -0.53125);                            //       fmov d0, #-0.53125
+    __ fmovd(v0, -1.0);                                //       fmov d0, #-1.0
+    __ fmovd(v0, -1.0625);                             //       fmov d0, #-1.0625
+
+    __ bind(forth);
+
+/*
+aarch64ops.o:     file format elf64-littleaarch64
+
+
+Disassembly of section .text:
+
+0000000000000000 <back>:
+   0:   8b0772d3        add     x19, x22, x7, lsl #28
+   4:   cb4a3570        sub     x16, x11, x10, lsr #13
+   8:   ab9c09bb        adds    x27, x13, x28, asr #2
+   c:   eb9aa794        subs    x20, x28, x26, asr #41
+  10:   0b934e68        add     w8, w19, w19, asr #19
+  14:   4b0a3924        sub     w4, w9, w10, lsl #14
+  18:   2b1e3568        adds    w8, w11, w30, lsl #13
+  1c:   6b132720        subs    w0, w25, w19, lsl #9
+  20:   8a154c14        and     x20, x0, x21, lsl #19
+  24:   aa1445d5        orr     x21, x14, x20, lsl #17
+  28:   ca01cf99        eor     x25, x28, x1, lsl #51
+  2c:   ea8b3f6a        ands    x10, x27, x11, asr #15
+  30:   0a8c5cb9        and     w25, w5, w12, asr #23
+  34:   2a4a11d2        orr     w18, w14, w10, lsr #4
+  38:   4a855aa4        eor     w4, w21, w5, asr #22
+  3c:   6a857415        ands    w21, w0, w5, asr #29
+  40:   8aa697da        bic     x26, x30, x6, asr #37
+  44:   aa6d7423        orn     x3, x1, x13, lsr #29
+  48:   ca29bf80        eon     x0, x28, x9, lsl #47
+  4c:   ea3cb8bd        bics    x29, x5, x28, lsl #46
+  50:   0a675249        bic     w9, w18, w7, lsr #20
+  54:   2ab961ba        orn     w26, w13, w25, asr #24
+  58:   4a331899        eon     w25, w4, w19, lsl #6
+  5c:   6a646345        bics    w5, w26, w4, lsr #24
+  60:   11055267        add     w7, w19, #0x154
+  64:   31064408        adds    w8, w0, #0x191
+  68:   51028e9d        sub     w29, w20, #0xa3
+  6c:   710bdee8        subs    w8, w23, #0x2f7
+  70:   91082d81        add     x1, x12, #0x20b
+  74:   b106a962        adds    x2, x11, #0x1aa
+  78:   d10b33ae        sub     x14, x29, #0x2cc
+  7c:   f10918ab        subs    x11, x5, #0x246
+  80:   121102d7        and     w23, w22, #0x8000
+  84:   3204cd44        orr     w4, w10, #0xf0f0f0f0
+  88:   5204cf00        eor     w0, w24, #0xf0f0f0f0
+  8c:   72099fb3        ands    w19, w29, #0x7f807f80
+  90:   92729545        and     x5, x10, #0xfffffffffc000
+  94:   b20e37cc        orr     x12, x30, #0xfffc0000fffc0000
+  98:   d27c34be        eor     x30, x5, #0x3fff0
+  9c:   f27e4efa        ands    x26, x23, #0x3ffffc
+  a0:   14000000        b       a0 <back+0xa0>
+  a4:   17ffffd7        b       0 <back>
+  a8:   1400017f        b       6a4 <forth>
+  ac:   94000000        bl      ac <back+0xac>
+  b0:   97ffffd4        bl      0 <back>
+  b4:   9400017c        bl      6a4 <forth>
+  b8:   3400000c        cbz     w12, b8 <back+0xb8>
+  bc:   34fffa2c        cbz     w12, 0 <back>
+  c0:   34002f2c        cbz     w12, 6a4 <forth>
+  c4:   35000014        cbnz    w20, c4 <back+0xc4>
+  c8:   35fff9d4        cbnz    w20, 0 <back>
+  cc:   35002ed4        cbnz    w20, 6a4 <forth>
+  d0:   b400000c        cbz     x12, d0 <back+0xd0>
+  d4:   b4fff96c        cbz     x12, 0 <back>
+  d8:   b4002e6c        cbz     x12, 6a4 <forth>
+  dc:   b5000018        cbnz    x24, dc <back+0xdc>
+  e0:   b5fff918        cbnz    x24, 0 <back>
+  e4:   b5002e18        cbnz    x24, 6a4 <forth>
+  e8:   10000006        adr     x6, e8 <back+0xe8>
+  ec:   10fff8a6        adr     x6, 0 <back>
+  f0:   10002da6        adr     x6, 6a4 <forth>
+  f4:   90000015        adrp    x21, 0 <back>
+  f8:   36080001        tbz     w1, #1, f8 <back+0xf8>
+  fc:   360ff821        tbz     w1, #1, 0 <back>
+ 100:   36082d21        tbz     w1, #1, 6a4 <forth>
+ 104:   37480008        tbnz    w8, #9, 104 <back+0x104>
+ 108:   374ff7c8        tbnz    w8, #9, 0 <back>
+ 10c:   37482cc8        tbnz    w8, #9, 6a4 <forth>
+ 110:   128b50ec        movn    w12, #0x5a87
+ 114:   52a9ff8b        movz    w11, #0x4ffc, lsl #16
+ 118:   7281d095        movk    w21, #0xe84
+ 11c:   92edfebd        movn    x29, #0x6ff5, lsl #48
+ 120:   d28361e3        movz    x3, #0x1b0f
+ 124:   f2a4cc96        movk    x22, #0x2664, lsl #16
+ 128:   9346590c        sbfx    x12, x8, #6, #17
+ 12c:   33194f33        bfi     w19, w25, #7, #20
+ 130:   531d3d89        ubfiz   w9, w12, #3, #16
+ 134:   9350433c        sbfx    x28, x25, #16, #1
+ 138:   b34464ac        bfxil   x12, x5, #4, #22
+ 13c:   d3462140        ubfx    x0, x10, #6, #3
+ 140:   139a61a4        extr    w4, w13, w26, #24
+ 144:   93d87fd7        extr    x23, x30, x24, #31
+ 148:   54000000        b.eq    148 <back+0x148>
+ 14c:   54fff5a0        b.eq    0 <back>
+ 150:   54002aa0        b.eq    6a4 <forth>
+ 154:   54000001        b.ne    154 <back+0x154>
+ 158:   54fff541        b.ne    0 <back>
+ 15c:   54002a41        b.ne    6a4 <forth>
+ 160:   54000002        b.cs    160 <back+0x160>
+ 164:   54fff4e2        b.cs    0 <back>
+ 168:   540029e2        b.cs    6a4 <forth>
+ 16c:   54000002        b.cs    16c <back+0x16c>
+ 170:   54fff482        b.cs    0 <back>
+ 174:   54002982        b.cs    6a4 <forth>
+ 178:   54000003        b.cc    178 <back+0x178>
+ 17c:   54fff423        b.cc    0 <back>
+ 180:   54002923        b.cc    6a4 <forth>
+ 184:   54000003        b.cc    184 <back+0x184>
+ 188:   54fff3c3        b.cc    0 <back>
+ 18c:   540028c3        b.cc    6a4 <forth>
+ 190:   54000004        b.mi    190 <back+0x190>
+ 194:   54fff364        b.mi    0 <back>
+ 198:   54002864        b.mi    6a4 <forth>
+ 19c:   54000005        b.pl    19c <back+0x19c>
+ 1a0:   54fff305        b.pl    0 <back>
+ 1a4:   54002805        b.pl    6a4 <forth>
+ 1a8:   54000006        b.vs    1a8 <back+0x1a8>
+ 1ac:   54fff2a6        b.vs    0 <back>
+ 1b0:   540027a6        b.vs    6a4 <forth>
+ 1b4:   54000007        b.vc    1b4 <back+0x1b4>
+ 1b8:   54fff247        b.vc    0 <back>
+ 1bc:   54002747        b.vc    6a4 <forth>
+ 1c0:   54000008        b.hi    1c0 <back+0x1c0>
+ 1c4:   54fff1e8        b.hi    0 <back>
+ 1c8:   540026e8        b.hi    6a4 <forth>
+ 1cc:   54000009        b.ls    1cc <back+0x1cc>
+ 1d0:   54fff189        b.ls    0 <back>
+ 1d4:   54002689        b.ls    6a4 <forth>
+ 1d8:   5400000a        b.ge    1d8 <back+0x1d8>
+ 1dc:   54fff12a        b.ge    0 <back>
+ 1e0:   5400262a        b.ge    6a4 <forth>
+ 1e4:   5400000b        b.lt    1e4 <back+0x1e4>
+ 1e8:   54fff0cb        b.lt    0 <back>
+ 1ec:   540025cb        b.lt    6a4 <forth>
+ 1f0:   5400000c        b.gt    1f0 <back+0x1f0>
+ 1f4:   54fff06c        b.gt    0 <back>
+ 1f8:   5400256c        b.gt    6a4 <forth>
+ 1fc:   5400000d        b.le    1fc <back+0x1fc>
+ 200:   54fff00d        b.le    0 <back>
+ 204:   5400250d        b.le    6a4 <forth>
+ 208:   5400000e        b.al    208 <back+0x208>
+ 20c:   54ffefae        b.al    0 <back>
+ 210:   540024ae        b.al    6a4 <forth>
+ 214:   5400000f        b.nv    214 <back+0x214>
+ 218:   54ffef4f        b.nv    0 <back>
+ 21c:   5400244f        b.nv    6a4 <forth>
+ 220:   d4063721        svc     #0x31b9
+ 224:   d4035082        hvc     #0x1a84
+ 228:   d400bfe3        smc     #0x5ff
+ 22c:   d4282fc0        brk     #0x417e
+ 230:   d444c320        hlt     #0x2619
+ 234:   d503201f        nop
+ 238:   d69f03e0        eret
+ 23c:   d6bf03e0        drps
+ 240:   d5033fdf        isb
+ 244:   d5033f9f        dsb     sy
+ 248:   d5033abf        dmb     ishst
+ 24c:   d61f0040        br      x2
+ 250:   d63f00a0        blr     x5
+ 254:   c8147c55        stxr    w20, x21, [x2]
+ 258:   c805fcfd        stlxr   w5, x29, [x7]
+ 25c:   c85f7e05        ldxr    x5, [x16]
+ 260:   c85fffbb        ldaxr   x27, [x29]
+ 264:   c89fffa0        stlr    x0, [x29]
+ 268:   c8dfff95        ldar    x21, [x28]
+ 26c:   88157cf8        stxr    w21, w24, [x7]
+ 270:   8815ff9a        stlxr   w21, w26, [x28]
+ 274:   885f7cd5        ldxr    w21, [x6]
+ 278:   885fffcf        ldaxr   w15, [x30]
+ 27c:   889ffc73        stlr    w19, [x3]
+ 280:   88dffc56        ldar    w22, [x2]
+ 284:   48127c0f        stxrh   w18, w15, [x0]
+ 288:   480bff85        stlxrh  w11, w5, [x28]
+ 28c:   485f7cdd        ldxrh   w29, [x6]
+ 290:   485ffcf2        ldaxrh  w18, [x7]
+ 294:   489fff99        stlrh   w25, [x28]
+ 298:   48dffe62        ldarh   w2, [x19]
+ 29c:   080a7c3e        stxrb   w10, w30, [x1]
+ 2a0:   0814fed5        stlxrb  w20, w21, [x22]
+ 2a4:   085f7c59        ldxrb   w25, [x2]
+ 2a8:   085ffcb8        ldaxrb  w24, [x5]
+ 2ac:   089ffc70        stlrb   w16, [x3]
+ 2b0:   08dfffb6        ldarb   w22, [x29]
+ 2b4:   c87f0a68        ldxp    x8, x2, [x19]
+ 2b8:   c87fcdc7        ldaxp   x7, x19, [x14]
+ 2bc:   c82870bb        stxp    w8, x27, x28, [x5]
+ 2c0:   c825b8c8        stlxp   w5, x8, x14, [x6]
+ 2c4:   887f12d9        ldxp    w25, w4, [x22]
+ 2c8:   887fb9ed        ldaxp   w13, w14, [x15]
+ 2cc:   8834215a        stxp    w20, w26, w8, [x10]
+ 2d0:   8837ca52        stlxp   w23, w18, w18, [x18]
+ 2d4:   f806317e        str     x30, [x11,#99]
+ 2d8:   b81b3337        str     w23, [x25,#-77]
+ 2dc:   39000dc2        strb    w2, [x14,#3]
+ 2e0:   78005149        strh    w9, [x10,#5]
+ 2e4:   f84391f4        ldr     x20, [x15,#57]
+ 2e8:   b85b220c        ldr     w12, [x16,#-78]
+ 2ec:   385fd356        ldrb    w22, [x26,#-3]
+ 2f0:   785d127e        ldrh    w30, [x19,#-47]
+ 2f4:   389f4149        ldrsb   x9, [x10,#-12]
+ 2f8:   79801e3c        ldrsh   x28, [x17,#14]
+ 2fc:   79c014a3        ldrsh   w3, [x5,#10]
+ 300:   b89a5231        ldrsw   x17, [x17,#-91]
+ 304:   fc5ef282        ldr     d2, [x20,#-17]
+ 308:   bc5f60f6        ldr     s22, [x7,#-10]
+ 30c:   fc12125e        str     d30, [x18,#-223]
+ 310:   bc0152cd        str     s13, [x22,#21]
+ 314:   f8190e49        str     x9, [x18,#-112]!
+ 318:   b800befd        str     w29, [x23,#11]!
+ 31c:   381ffd92        strb    w18, [x12,#-1]!
+ 320:   781e9e90        strh    w16, [x20,#-23]!
+ 324:   f8409fa3        ldr     x3, [x29,#9]!
+ 328:   b8413c79        ldr     w25, [x3,#19]!
+ 32c:   385fffa1        ldrb    w1, [x29,#-1]!
+ 330:   785c7fa8        ldrh    w8, [x29,#-57]!
+ 334:   389f3dc5        ldrsb   x5, [x14,#-13]!
+ 338:   78801f6a        ldrsh   x10, [x27,#1]!
+ 33c:   78c19d4b        ldrsh   w11, [x10,#25]!
+ 340:   b89a4ec4        ldrsw   x4, [x22,#-92]!
+ 344:   fc408eeb        ldr     d11, [x23,#8]!
+ 348:   bc436e79        ldr     s25, [x19,#54]!
+ 34c:   fc152ce1        str     d1, [x7,#-174]!
+ 350:   bc036f28        str     s8, [x25,#54]!
+ 354:   f8025565        str     x5, [x11],#37
+ 358:   b80135f8        str     w24, [x15],#19
+ 35c:   381ff74f        strb    w15, [x26],#-1
+ 360:   781fa652        strh    w18, [x18],#-6
+ 364:   f851a447        ldr     x7, [x2],#-230
+ 368:   b85e557b        ldr     w27, [x11],#-27
+ 36c:   385e7472        ldrb    w18, [x3],#-25
+ 370:   785e070a        ldrh    w10, [x24],#-32
+ 374:   38804556        ldrsb   x22, [x10],#4
+ 378:   78819591        ldrsh   x17, [x12],#25
+ 37c:   78dc24e8        ldrsh   w8, [x7],#-62
+ 380:   b89cd6d7        ldrsw   x23, [x22],#-51
+ 384:   fc430738        ldr     d24, [x25],#48
+ 388:   bc5f6595        ldr     s21, [x12],#-10
+ 38c:   fc1225b2        str     d18, [x13],#-222
+ 390:   bc1d7430        str     s16, [x1],#-41
+ 394:   f82fcac2        str     x2, [x22,w15,sxtw]
+ 398:   b83d6a02        str     w2, [x16,x29]
+ 39c:   382e5a54        strb    w20, [x18,w14,uxtw #0]
+ 3a0:   7834fa66        strh    w6, [x19,x20,sxtx #1]
+ 3a4:   f86ecbae        ldr     x14, [x29,w14,sxtw]
+ 3a8:   b86cda90        ldr     w16, [x20,w12,sxtw #2]
+ 3ac:   3860d989        ldrb    w9, [x12,w0,sxtw #0]
+ 3b0:   78637a2c        ldrh    w12, [x17,x3,lsl #1]
+ 3b4:   38a3fa22        ldrsb   x2, [x17,x3,sxtx #0]
+ 3b8:   78b15827        ldrsh   x7, [x1,w17,uxtw #1]
+ 3bc:   78f2d9f9        ldrsh   w25, [x15,w18,sxtw #1]
+ 3c0:   b8ac6ab7        ldrsw   x23, [x21,x12]
+ 3c4:   fc6879a5        ldr     d5, [x13,x8,lsl #3]
+ 3c8:   bc767943        ldr     s3, [x10,x22,lsl #2]
+ 3cc:   fc3bc84e        str     d14, [x2,w27,sxtw]
+ 3d0:   bc3968d4        str     s20, [x6,x25]
+ 3d4:   f91fc0fe        str     x30, [x7,#16256]
+ 3d8:   b91da50f        str     w15, [x8,#7588]
+ 3dc:   391d280b        strb    w11, [x0,#1866]
+ 3e0:   791d2e23        strh    w3, [x17,#3734]
+ 3e4:   f95bc8e2        ldr     x2, [x7,#14224]
+ 3e8:   b95ce525        ldr     w5, [x9,#7396]
+ 3ec:   395ae53c        ldrb    w28, [x9,#1721]
+ 3f0:   795c9282        ldrh    w2, [x20,#3656]
+ 3f4:   399d7dd6        ldrsb   x22, [x14,#1887]
+ 3f8:   799fe008        ldrsh   x8, [x0,#4080]
+ 3fc:   79de9bc0        ldrsh   w0, [x30,#3916]
+ 400:   b99aae78        ldrsw   x24, [x19,#6828]
+ 404:   fd597598        ldr     d24, [x12,#13032]
+ 408:   bd5d1d08        ldr     s8, [x8,#7452]
+ 40c:   fd1f3dea        str     d10, [x15,#15992]
+ 410:   bd1a227a        str     s26, [x19,#6688]
+ 414:   5800148a        ldr     x10, 6a4 <forth>
+ 418:   18000003        ldr     w3, 418 <back+0x418>
+ 41c:   f88092e0        prfm    pldl1keep, [x23,#9]
+ 420:   d8ffdf00        prfm    pldl1keep, 0 <back>
+ 424:   f8a84860        prfm    pldl1keep, [x3,w8,uxtw]
+ 428:   f99d7560        prfm    pldl1keep, [x11,#15080]
+ 42c:   1a1c012d        adc     w13, w9, w28
+ 430:   3a1c027b        adcs    w27, w19, w28
+ 434:   5a060253        sbc     w19, w18, w6
+ 438:   7a03028e        sbcs    w14, w20, w3
+ 43c:   9a0801d0        adc     x16, x14, x8
+ 440:   ba0803a0        adcs    x0, x29, x8
+ 444:   da140308        sbc     x8, x24, x20
+ 448:   fa00038c        sbcs    x12, x28, x0
+ 44c:   0b3010d7        add     w23, w6, w16, uxtb #4
+ 450:   2b37ab39        adds    w25, w25, w23, sxth #2
+ 454:   cb2466da        sub     x26, x22, x4, uxtx #1
+ 458:   6b33efb1        subs    w17, w29, w19, sxtx #3
+ 45c:   8b350fcb        add     x11, x30, w21, uxtb #3
+ 460:   ab208a70        adds    x16, x19, w0, sxtb #2
+ 464:   cb39e52b        sub     x11, x9, x25, sxtx #1
+ 468:   eb2c9291        subs    x17, x20, w12, sxtb #4
+ 46c:   3a4bd1a3        ccmn    w13, w11, #0x3, le
+ 470:   7a4c81a2        ccmp    w13, w12, #0x2, hi
+ 474:   ba42106c        ccmn    x3, x2, #0xc, ne
+ 478:   fa5560e3        ccmp    x7, x21, #0x3, vs
+ 47c:   3a4e3844        ccmn    w2, #0xe, #0x4, cc
+ 480:   7a515a26        ccmp    w17, #0x11, #0x6, pl
+ 484:   ba4c2940        ccmn    x10, #0xc, #0x0, cs
+ 488:   fa52aaae        ccmp    x21, #0x12, #0xe, ge
+ 48c:   1a8cc1b5        csel    w21, w13, w12, gt
+ 490:   1a8f976a        csinc   w10, w27, w15, ls
+ 494:   5a8981a0        csinv   w0, w13, w9, hi
+ 498:   5a9a6492        csneg   w18, w4, w26, vs
+ 49c:   9a8793ac        csel    x12, x29, x7, ls
+ 4a0:   9a9474e6        csinc   x6, x7, x20, vc
+ 4a4:   da83d2b6        csinv   x22, x21, x3, le
+ 4a8:   da9b9593        csneg   x19, x12, x27, ls
+ 4ac:   5ac00200        rbit    w0, w16
+ 4b0:   5ac006f1        rev16   w17, w23
+ 4b4:   5ac009d1        rev     w17, w14
+ 4b8:   5ac013d8        clz     w24, w30
+ 4bc:   5ac016d8        cls     w24, w22
+ 4c0:   dac00223        rbit    x3, x17
+ 4c4:   dac005ac        rev16   x12, x13
+ 4c8:   dac00ac9        rev32   x9, x22
+ 4cc:   dac00c00        rev     x0, x0
+ 4d0:   dac01205        clz     x5, x16
+ 4d4:   dac016d9        cls     x25, x22
+ 4d8:   1ac0089d        udiv    w29, w4, w0
+ 4dc:   1add0fa0        sdiv    w0, w29, w29
+ 4e0:   1ad52225        lsl     w5, w17, w21
+ 4e4:   1ad22529        lsr     w9, w9, w18
+ 4e8:   1ac82b61        asr     w1, w27, w8
+ 4ec:   1acd2e92        ror     w18, w20, w13
+ 4f0:   9acc0b28        udiv    x8, x25, x12
+ 4f4:   9adc0ca7        sdiv    x7, x5, x28
+ 4f8:   9adb2225        lsl     x5, x17, x27
+ 4fc:   9ad42757        lsr     x23, x26, x20
+ 500:   9adc291c        asr     x28, x8, x28
+ 504:   9ac42fa3        ror     x3, x29, x4
+ 508:   1b1a55d1        madd    w17, w14, w26, w21
+ 50c:   1b0bafc1        msub    w1, w30, w11, w11
+ 510:   9b067221        madd    x1, x17, x6, x28
+ 514:   9b1ea0de        msub    x30, x6, x30, x8
+ 518:   9b2e20d5        smaddl  x21, w6, w14, x8
+ 51c:   9b38cd4a        smsubl  x10, w10, w24, x19
+ 520:   9bae6254        umaddl  x20, w18, w14, x24
+ 524:   9ba59452        umsubl  x18, w2, w5, x5
+ 528:   1e2d0a48        fmul    s8, s18, s13
+ 52c:   1e3c19c2        fdiv    s2, s14, s28
+ 530:   1e3c298f        fadd    s15, s12, s28
+ 534:   1e213980        fsub    s0, s12, s1
+ 538:   1e240baf        fmul    s15, s29, s4
+ 53c:   1e77082c        fmul    d12, d1, d23
+ 540:   1e72191b        fdiv    d27, d8, d18
+ 544:   1e6b2a97        fadd    d23, d20, d11
+ 548:   1e723988        fsub    d8, d12, d18
+ 54c:   1e770b1a        fmul    d26, d24, d23
+ 550:   1f0d66f5        fmadd   s21, s23, s13, s25
+ 554:   1f01b956        fmsub   s22, s10, s1, s14
+ 558:   1f227a8e        fnmadd  s14, s20, s2, s30
+ 55c:   1f365ba7        fnmadd  s7, s29, s22, s22
+ 560:   1f4f14ad        fmadd   d13, d5, d15, d5
+ 564:   1f45a98e        fmsub   d14, d12, d5, d10
+ 568:   1f60066a        fnmadd  d10, d19, d0, d1
+ 56c:   1f620054        fnmadd  d20, d2, d2, d0
+ 570:   1e204139        fmov    s25, s9
+ 574:   1e20c094        fabs    s20, s4
+ 578:   1e214363        fneg    s3, s27
+ 57c:   1e21c041        fsqrt   s1, s2
+ 580:   1e22c01e        fcvt    d30, s0
+ 584:   1e60408c        fmov    d12, d4
+ 588:   1e60c361        fabs    d1, d27
+ 58c:   1e6142c8        fneg    d8, d22
+ 590:   1e61c16b        fsqrt   d11, d11
+ 594:   1e624396        fcvt    s22, d28
+ 598:   1e3802dc        fcvtzs  w28, s22
+ 59c:   9e380374        fcvtzs  x20, s27
+ 5a0:   1e78000e        fcvtzs  w14, d0
+ 5a4:   9e78017a        fcvtzs  x26, d11
+ 5a8:   1e2202dc        scvtf   s28, w22
+ 5ac:   9e220150        scvtf   s16, x10
+ 5b0:   1e6202a8        scvtf   d8, w21
+ 5b4:   9e620395        scvtf   d21, x28
+ 5b8:   1e260318        fmov    w24, s24
+ 5bc:   9e660268        fmov    x8, d19
+ 5c0:   1e270188        fmov    s8, w12
+ 5c4:   9e6700e6        fmov    d6, x7
+ 5c8:   1e3023c0        fcmp    s30, s16
+ 5cc:   1e6b2320        fcmp    d25, d11
+ 5d0:   1e202168        fcmp    s11, #0.0
+ 5d4:   1e602168        fcmp    d11, #0.0
+ 5d8:   2910323d        stp     w29, w12, [x17,#128]
+ 5dc:   297449d6        ldp     w22, w18, [x14,#-96]
+ 5e0:   6948402b        ldpsw   x11, x16, [x1,#64]
+ 5e4:   a9072f40        stp     x0, x11, [x26,#112]
+ 5e8:   a9410747        ldp     x7, x1, [x26,#16]
+ 5ec:   29801f0a        stp     w10, w7, [x24,#0]!
+ 5f0:   29e07307        ldp     w7, w28, [x24,#-256]!
+ 5f4:   69e272b9        ldpsw   x25, x28, [x21,#-240]!
+ 5f8:   a9bf49d4        stp     x20, x18, [x14,#-16]!
+ 5fc:   a9c529a8        ldp     x8, x10, [x13,#80]!
+ 600:   28b0605a        stp     w26, w24, [x2],#-128
+ 604:   28e866a2        ldp     w2, w25, [x21],#-192
+ 608:   68ee0ab1        ldpsw   x17, x2, [x21],#-144
+ 60c:   a886296c        stp     x12, x10, [x11],#96
+ 610:   a8fe1a38        ldp     x24, x6, [x17],#-32
+ 614:   282479c3        stnp    w3, w30, [x14,#-224]
+ 618:   286e534f        ldnp    w15, w20, [x26,#-144]
+ 61c:   a8386596        stnp    x22, x25, [x12,#-128]
+ 620:   a8755a3b        ldnp    x27, x22, [x17,#-176]
+ 624:   1e601000        fmov    d0, #2.000000000000000000e+00
+ 628:   1e603000        fmov    d0, #2.125000000000000000e+00
+ 62c:   1e621000        fmov    d0, #4.000000000000000000e+00
+ 630:   1e623000        fmov    d0, #4.250000000000000000e+00
+ 634:   1e641000        fmov    d0, #8.000000000000000000e+00
+ 638:   1e643000        fmov    d0, #8.500000000000000000e+00
+ 63c:   1e661000        fmov    d0, #1.600000000000000000e+01
+ 640:   1e663000        fmov    d0, #1.700000000000000000e+01
+ 644:   1e681000        fmov    d0, #1.250000000000000000e-01
+ 648:   1e683000        fmov    d0, #1.328125000000000000e-01
+ 64c:   1e6a1000        fmov    d0, #2.500000000000000000e-01
+ 650:   1e6a3000        fmov    d0, #2.656250000000000000e-01
+ 654:   1e6c1000        fmov    d0, #5.000000000000000000e-01
+ 658:   1e6c3000        fmov    d0, #5.312500000000000000e-01
+ 65c:   1e6e1000        fmov    d0, #1.000000000000000000e+00
+ 660:   1e6e3000        fmov    d0, #1.062500000000000000e+00
+ 664:   1e701000        fmov    d0, #-2.000000000000000000e+00
+ 668:   1e703000        fmov    d0, #-2.125000000000000000e+00
+ 66c:   1e721000        fmov    d0, #-4.000000000000000000e+00
+ 670:   1e723000        fmov    d0, #-4.250000000000000000e+00
+ 674:   1e741000        fmov    d0, #-8.000000000000000000e+00
+ 678:   1e743000        fmov    d0, #-8.500000000000000000e+00
+ 67c:   1e761000        fmov    d0, #-1.600000000000000000e+01
+ 680:   1e763000        fmov    d0, #-1.700000000000000000e+01
+ 684:   1e781000        fmov    d0, #-1.250000000000000000e-01
+ 688:   1e783000        fmov    d0, #-1.328125000000000000e-01
+ 68c:   1e7a1000        fmov    d0, #-2.500000000000000000e-01
+ 690:   1e7a3000        fmov    d0, #-2.656250000000000000e-01
+ 694:   1e7c1000        fmov    d0, #-5.000000000000000000e-01
+ 698:   1e7c3000        fmov    d0, #-5.312500000000000000e-01
+ 69c:   1e7e1000        fmov    d0, #-1.000000000000000000e+00
+ 6a0:   1e7e3000        fmov    d0, #-1.062500000000000000e+00
+ */
+
+  static const unsigned int insns[] =
+  {
+    0x8b0772d3,     0xcb4a3570,     0xab9c09bb,     0xeb9aa794,
+    0x0b934e68,     0x4b0a3924,     0x2b1e3568,     0x6b132720,
+    0x8a154c14,     0xaa1445d5,     0xca01cf99,     0xea8b3f6a,
+    0x0a8c5cb9,     0x2a4a11d2,     0x4a855aa4,     0x6a857415,
+    0x8aa697da,     0xaa6d7423,     0xca29bf80,     0xea3cb8bd,
+    0x0a675249,     0x2ab961ba,     0x4a331899,     0x6a646345,
+    0x11055267,     0x31064408,     0x51028e9d,     0x710bdee8,
+    0x91082d81,     0xb106a962,     0xd10b33ae,     0xf10918ab,
+    0x121102d7,     0x3204cd44,     0x5204cf00,     0x72099fb3,
+    0x92729545,     0xb20e37cc,     0xd27c34be,     0xf27e4efa,
+    0x14000000,     0x17ffffd7,     0x1400017f,     0x94000000,
+    0x97ffffd4,     0x9400017c,     0x3400000c,     0x34fffa2c,
+    0x34002f2c,     0x35000014,     0x35fff9d4,     0x35002ed4,
+    0xb400000c,     0xb4fff96c,     0xb4002e6c,     0xb5000018,
+    0xb5fff918,     0xb5002e18,     0x10000006,     0x10fff8a6,
+    0x10002da6,     0x90000015,     0x36080001,     0x360ff821,
+    0x36082d21,     0x37480008,     0x374ff7c8,     0x37482cc8,
+    0x128b50ec,     0x52a9ff8b,     0x7281d095,     0x92edfebd,
+    0xd28361e3,     0xf2a4cc96,     0x9346590c,     0x33194f33,
+    0x531d3d89,     0x9350433c,     0xb34464ac,     0xd3462140,
+    0x139a61a4,     0x93d87fd7,     0x54000000,     0x54fff5a0,
+    0x54002aa0,     0x54000001,     0x54fff541,     0x54002a41,
+    0x54000002,     0x54fff4e2,     0x540029e2,     0x54000002,
+    0x54fff482,     0x54002982,     0x54000003,     0x54fff423,
+    0x54002923,     0x54000003,     0x54fff3c3,     0x540028c3,
+    0x54000004,     0x54fff364,     0x54002864,     0x54000005,
+    0x54fff305,     0x54002805,     0x54000006,     0x54fff2a6,
+    0x540027a6,     0x54000007,     0x54fff247,     0x54002747,
+    0x54000008,     0x54fff1e8,     0x540026e8,     0x54000009,
+    0x54fff189,     0x54002689,     0x5400000a,     0x54fff12a,
+    0x5400262a,     0x5400000b,     0x54fff0cb,     0x540025cb,
+    0x5400000c,     0x54fff06c,     0x5400256c,     0x5400000d,
+    0x54fff00d,     0x5400250d,     0x5400000e,     0x54ffefae,
+    0x540024ae,     0x5400000f,     0x54ffef4f,     0x5400244f,
+    0xd4063721,     0xd4035082,     0xd400bfe3,     0xd4282fc0,
+    0xd444c320,     0xd503201f,     0xd69f03e0,     0xd6bf03e0,
+    0xd5033fdf,     0xd5033f9f,     0xd5033abf,     0xd61f0040,
+    0xd63f00a0,     0xc8147c55,     0xc805fcfd,     0xc85f7e05,
+    0xc85fffbb,     0xc89fffa0,     0xc8dfff95,     0x88157cf8,
+    0x8815ff9a,     0x885f7cd5,     0x885fffcf,     0x889ffc73,
+    0x88dffc56,     0x48127c0f,     0x480bff85,     0x485f7cdd,
+    0x485ffcf2,     0x489fff99,     0x48dffe62,     0x080a7c3e,
+    0x0814fed5,     0x085f7c59,     0x085ffcb8,     0x089ffc70,
+    0x08dfffb6,     0xc87f0a68,     0xc87fcdc7,     0xc82870bb,
+    0xc825b8c8,     0x887f12d9,     0x887fb9ed,     0x8834215a,
+    0x8837ca52,     0xf806317e,     0xb81b3337,     0x39000dc2,
+    0x78005149,     0xf84391f4,     0xb85b220c,     0x385fd356,
+    0x785d127e,     0x389f4149,     0x79801e3c,     0x79c014a3,
+    0xb89a5231,     0xfc5ef282,     0xbc5f60f6,     0xfc12125e,
+    0xbc0152cd,     0xf8190e49,     0xb800befd,     0x381ffd92,
+    0x781e9e90,     0xf8409fa3,     0xb8413c79,     0x385fffa1,
+    0x785c7fa8,     0x389f3dc5,     0x78801f6a,     0x78c19d4b,
+    0xb89a4ec4,     0xfc408eeb,     0xbc436e79,     0xfc152ce1,
+    0xbc036f28,     0xf8025565,     0xb80135f8,     0x381ff74f,
+    0x781fa652,     0xf851a447,     0xb85e557b,     0x385e7472,
+    0x785e070a,     0x38804556,     0x78819591,     0x78dc24e8,
+    0xb89cd6d7,     0xfc430738,     0xbc5f6595,     0xfc1225b2,
+    0xbc1d7430,     0xf82fcac2,     0xb83d6a02,     0x382e5a54,
+    0x7834fa66,     0xf86ecbae,     0xb86cda90,     0x3860d989,
+    0x78637a2c,     0x38a3fa22,     0x78b15827,     0x78f2d9f9,
+    0xb8ac6ab7,     0xfc6879a5,     0xbc767943,     0xfc3bc84e,
+    0xbc3968d4,     0xf91fc0fe,     0xb91da50f,     0x391d280b,
+    0x791d2e23,     0xf95bc8e2,     0xb95ce525,     0x395ae53c,
+    0x795c9282,     0x399d7dd6,     0x799fe008,     0x79de9bc0,
+    0xb99aae78,     0xfd597598,     0xbd5d1d08,     0xfd1f3dea,
+    0xbd1a227a,     0x5800148a,     0x18000003,     0xf88092e0,
+    0xd8ffdf00,     0xf8a84860,     0xf99d7560,     0x1a1c012d,
+    0x3a1c027b,     0x5a060253,     0x7a03028e,     0x9a0801d0,
+    0xba0803a0,     0xda140308,     0xfa00038c,     0x0b3010d7,
+    0x2b37ab39,     0xcb2466da,     0x6b33efb1,     0x8b350fcb,
+    0xab208a70,     0xcb39e52b,     0xeb2c9291,     0x3a4bd1a3,
+    0x7a4c81a2,     0xba42106c,     0xfa5560e3,     0x3a4e3844,
+    0x7a515a26,     0xba4c2940,     0xfa52aaae,     0x1a8cc1b5,
+    0x1a8f976a,     0x5a8981a0,     0x5a9a6492,     0x9a8793ac,
+    0x9a9474e6,     0xda83d2b6,     0xda9b9593,     0x5ac00200,
+    0x5ac006f1,     0x5ac009d1,     0x5ac013d8,     0x5ac016d8,
+    0xdac00223,     0xdac005ac,     0xdac00ac9,     0xdac00c00,
+    0xdac01205,     0xdac016d9,     0x1ac0089d,     0x1add0fa0,
+    0x1ad52225,     0x1ad22529,     0x1ac82b61,     0x1acd2e92,
+    0x9acc0b28,     0x9adc0ca7,     0x9adb2225,     0x9ad42757,
+    0x9adc291c,     0x9ac42fa3,     0x1b1a55d1,     0x1b0bafc1,
+    0x9b067221,     0x9b1ea0de,     0x9b2e20d5,     0x9b38cd4a,
+    0x9bae6254,     0x9ba59452,     0x1e2d0a48,     0x1e3c19c2,
+    0x1e3c298f,     0x1e213980,     0x1e240baf,     0x1e77082c,
+    0x1e72191b,     0x1e6b2a97,     0x1e723988,     0x1e770b1a,
+    0x1f0d66f5,     0x1f01b956,     0x1f227a8e,     0x1f365ba7,
+    0x1f4f14ad,     0x1f45a98e,     0x1f60066a,     0x1f620054,
+    0x1e204139,     0x1e20c094,     0x1e214363,     0x1e21c041,
+    0x1e22c01e,     0x1e60408c,     0x1e60c361,     0x1e6142c8,
+    0x1e61c16b,     0x1e624396,     0x1e3802dc,     0x9e380374,
+    0x1e78000e,     0x9e78017a,     0x1e2202dc,     0x9e220150,
+    0x1e6202a8,     0x9e620395,     0x1e260318,     0x9e660268,
+    0x1e270188,     0x9e6700e6,     0x1e3023c0,     0x1e6b2320,
+    0x1e202168,     0x1e602168,     0x2910323d,     0x297449d6,
+    0x6948402b,     0xa9072f40,     0xa9410747,     0x29801f0a,
+    0x29e07307,     0x69e272b9,     0xa9bf49d4,     0xa9c529a8,
+    0x28b0605a,     0x28e866a2,     0x68ee0ab1,     0xa886296c,
+    0xa8fe1a38,     0x282479c3,     0x286e534f,     0xa8386596,
+    0xa8755a3b,     0x1e601000,     0x1e603000,     0x1e621000,
+    0x1e623000,     0x1e641000,     0x1e643000,     0x1e661000,
+    0x1e663000,     0x1e681000,     0x1e683000,     0x1e6a1000,
+    0x1e6a3000,     0x1e6c1000,     0x1e6c3000,     0x1e6e1000,
+    0x1e6e3000,     0x1e701000,     0x1e703000,     0x1e721000,
+    0x1e723000,     0x1e741000,     0x1e743000,     0x1e761000,
+    0x1e763000,     0x1e781000,     0x1e783000,     0x1e7a1000,
+    0x1e7a3000,     0x1e7c1000,     0x1e7c3000,     0x1e7e1000,
+    0x1e7e3000,
+  };
+// END  Generated code -- do not edit
+
+  {
+    bool ok = true;
+    unsigned int *insns1 = (unsigned int *)entry;
+    for (unsigned int i = 0; i < sizeof insns / sizeof insns[0]; i++) {
+      if (insns[i] != insns1[i]) {
+        ok = false;
+        printf("Ours:\n");
+        Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]);
+        printf("Theirs:\n");
+        Disassembler::decode((address)&insns[i], (address)&insns[i+1]);
+        printf("\n");
+      }
+    }
+    assert(ok, "Assembler smoke test failed");
+  }
+
+#ifndef PRODUCT
+
+  address PC = __ pc();
+  __ ld1(v0, __ T16B, Address(r16)); // No offset
+  __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index
+  __ ld1(v0, __ T16B, Address(r16, r17)); //
+
+
+#endif // PRODUCT
+#endif // ASSERT
+}
+
+#undef __
+
+// Implementation of Assembler
+
+void Assembler::emit_data64(jlong data,
+                            relocInfo::relocType rtype,
+                            int format) {
+  if (rtype == relocInfo::none) {
+    emit_long64(data);
+  } else {
+    emit_data64(data, Relocation::spec_simple(rtype), format);
+  }
+}
+
+void Assembler::emit_data64(jlong data,
+                            RelocationHolder const& rspec,
+                            int format) {
+
+  assert(inst_mark() != NULL, "must be inside InstructionMark");
+  // Do not use AbstractAssembler::relocate, which is not intended for
+  // embedded words.  Instead, relocate to the enclosing instruction.
+  code_section()->relocate(inst_mark(), rspec, format);
+  emit_long64(data);
+}
+
+extern "C" {
+  void das(uint64_t start, int len) {
+    ResourceMark rm;
+    len <<= 2;
+    if (len < 0)
+      Disassembler::decode((address)start + len, (address)start);
+    else
+      Disassembler::decode((address)start, (address)start + len);
+  }
+
+  JNIEXPORT void das1(unsigned long insn) {
+    das(insn, 1);
+  }
+}
+
+#define gas_assert(ARG1) assert(ARG1, #ARG1)
+
+#define __ as->
+
+void Address::lea(MacroAssembler *as, Register r) const {
+  Relocation* reloc = _rspec.reloc();
+  relocInfo::relocType rtype = (relocInfo::relocType) reloc->type();
+
+  switch(_mode) {
+  case base_plus_offset: {
+    if (_offset == 0 && _base == r) // it's a nop
+      break;
+    if (_offset > 0)
+      __ add(r, _base, _offset);
+    else
+      __ sub(r, _base, -_offset);
+      break;
+  }
+  case base_plus_offset_reg: {
+    __ add(r, _base, _index, _ext.op(), MAX(_ext.shift(), 0));
+    break;
+  }
+  case literal: {
+    if (rtype == relocInfo::none)
+      __ mov(r, target());
+    else
+      __ movptr(r, (uint64_t)target());
+    break;
+  }
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void Assembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
+  ShouldNotReachHere();
+}
+
+#undef __
+
+#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use)
+
+  void Assembler::adr(Register Rd, address adr) {
+    long offset = adr - pc();
+    int offset_lo = offset & 3;
+    offset >>= 2;
+    starti;
+    f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
+    rf(Rd, 0);
+  }
+
+  void Assembler::_adrp(Register Rd, address adr) {
+    uint64_t pc_page = (uint64_t)pc() >> 12;
+    uint64_t adr_page = (uint64_t)adr >> 12;
+    long offset = adr_page - pc_page;
+    int offset_lo = offset & 3;
+    offset >>= 2;
+    starti;
+    f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
+    rf(Rd, 0);
+  }
+
+#undef starti
+
+Address::Address(address target, relocInfo::relocType rtype) : _mode(literal){
+  _is_lval = false;
+  _target = target;
+  switch (rtype) {
+  case relocInfo::oop_type:
+    // Oops are a special case. Normally they would be their own section
+    // but in cases like icBuffer they are literals in the code stream that
+    // we don't have a section for. We use none so that we get a literal address
+    // which is always patchable.
+    break;
+  case relocInfo::external_word_type:
+    _rspec = external_word_Relocation::spec(target);
+    break;
+  case relocInfo::internal_word_type:
+    _rspec = internal_word_Relocation::spec(target);
+    break;
+  case relocInfo::opt_virtual_call_type:
+    _rspec = opt_virtual_call_Relocation::spec();
+    break;
+  case relocInfo::static_call_type:
+    _rspec = static_call_Relocation::spec();
+    break;
+  case relocInfo::runtime_call_type:
+    _rspec = runtime_call_Relocation::spec();
+    break;
+  case relocInfo::poll_type:
+  case relocInfo::poll_return_type:
+    _rspec = Relocation::spec_simple(rtype);
+    break;
+  case relocInfo::none:
+    _rspec = RelocationHolder::none;
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void Assembler::b(const Address &dest) {
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  b(dest.target());
+}
+
+void Assembler::bl(const Address &dest) {
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  bl(dest.target());
+}
+
+void Assembler::adr(Register r, const Address &dest) {
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  adr(r, dest.target());
+}
+
+void Assembler::br(Condition cc, Label &L) {
+  if (L.is_bound()) {
+    br(cc, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    br(cc, pc());
+  }
+}
+
+void Assembler::wrap_label(Label &L,
+                                 Assembler::uncond_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(pc());
+  }
+}
+
+void Assembler::wrap_label(Register r, Label &L,
+                                 compare_and_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(r, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(r, pc());
+  }
+}
+
+void Assembler::wrap_label(Register r, int bitpos, Label &L,
+                                 test_and_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(r, bitpos, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(r, bitpos, pc());
+  }
+}
+
+void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(target(L), op);
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(pc(), op);
+  }
+}
+
+  // An "all-purpose" add/subtract immediate, per ARM documentation:
+  // A "programmer-friendly" assembler may accept a negative immediate
+  // between -(2^24 -1) and -1 inclusive, causing it to convert a
+  // requested ADD operation to a SUB, or vice versa, and then encode
+  // the absolute value of the immediate as for uimm24.
+void Assembler::add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op,
+                                  int negated_op) {
+  bool sets_flags = op & 1;   // this op sets flags
+  union {
+    unsigned u;
+    int imm;
+  };
+  u = uimm;
+  bool shift = false;
+  bool neg = imm < 0;
+  if (neg) {
+    imm = -imm;
+    op = negated_op;
+  }
+  assert(Rd != sp || imm % 16 == 0, "misaligned stack");
+  if (imm >= (1 << 11)
+      && ((imm >> 12) << 12 == imm)) {
+    imm >>= 12;
+    shift = true;
+  }
+  f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10);
+
+  // add/subtract immediate ops with the S bit set treat r31 as zr;
+  // with S unset they use sp.
+  if (sets_flags)
+    zrf(Rd, 0);
+  else
+    srf(Rd, 0);
+
+  srf(Rn, 5);
+}
+
+bool Assembler::operand_valid_for_add_sub_immediate(long imm) {
+  bool shift = false;
+  unsigned long uimm = uabs(imm);
+  if (uimm < (1 << 12))
+    return true;
+  if (uimm < (1 << 24)
+      && ((uimm >> 12) << 12 == uimm)) {
+    return true;
+  }
+  return false;
+}
+
+bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
+  return encode_logical_immediate(is32, imm) != 0xffffffff;
+}
+
+static uint64_t doubleTo64Bits(jdouble d) {
+  union {
+    jdouble double_value;
+    uint64_t double_bits;
+  };
+
+  double_value = d;
+  return double_bits;
+}
+
+bool Assembler::operand_valid_for_float_immediate(double imm) {
+  // If imm is all zero bits we can use ZR as the source of a
+  // floating-point value.
+  if (doubleTo64Bits(imm) == 0)
+    return true;
+
+  // Otherwise try to encode imm then convert the encoded value back
+  // and make sure it's the exact same bit pattern.
+  unsigned result = encoding_for_fp_immediate(imm);
+  return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true);
+}
+
+void Assembler::relocate(address at, const RelocationHolder& rspec)
+{
+  code_section()->relocate(at, rspec);
+}
+
+void Assembler::relocate(const RelocationHolder& rspec)
+{
+  AbstractAssembler::relocate(rspec);
+}
+
+int AbstractAssembler::code_fill_byte() {
+  return 0;
+}
+
+// n.b. this is implemented in subclass MacroAssembler
+void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }
+
+
+// these are the functions provided by the simulator which are used to
+// encode and decode logical immediates and floating point immediates
+//
+//   u_int64_t logical_immediate_for_encoding(u_int32_t encoding);
+//
+//   u_int32_t encoding_for_logical_immediate(u_int64_t immediate);
+//
+//   u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp);
+//
+//   u_int32_t encoding_for_fp_immediate(float immediate);
+//
+// we currently import these from the simulator librray but the
+// definitions will need to be moved to here when we switch to real
+// hardware.
+
+// and now the routines called by the assembler which encapsulate the
+// above encode and decode functions
+
+uint32_t
+asm_util::encode_logical_immediate(bool is32, uint64_t imm)
+{
+  if (is32) {
+    /* Allow all zeros or all ones in top 32-bits, so that
+       constant expressions like ~1 are permitted. */
+    if (imm >> 32 != 0 && imm >> 32 != 0xffffffff)
+      return 0xffffffff;
+    /* Replicate the 32 lower bits to the 32 upper bits.  */
+    imm &= 0xffffffff;
+    imm |= imm << 32;
+  }
+
+  return encoding_for_logical_immediate(imm);
+}
+
+unsigned Assembler::pack(double value) {
+  float val = (float)value;
+  unsigned result = encoding_for_fp_immediate(val);
+  guarantee(unpack(result) == value,
+            "Invalid floating-point immediate operand");
+  return result;
+}
+
+// Packed operands for  Floating-point Move (immediate)
+
+static float unpack(unsigned value) {
+  union {
+    unsigned ival;
+    float val;
+  };
+  ival = fp_immediate_for_encoding(value, 0);
+  return val;
+}
+
+// Implementation of MacroAssembler
+
+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
+  int instructions = 1;
+  assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant");
+  long offset = (target - branch) >> 2;
+  unsigned insn = *(unsigned*)branch;
+  if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
+    // Load register (literal)
+    Instruction_aarch64::spatch(branch, 23, 5, offset);
+  } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
+    // Unconditional branch (immediate)
+    Instruction_aarch64::spatch(branch, 25, 0, offset);
+  } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
+    // Conditional branch (immediate)
+    Instruction_aarch64::spatch(branch, 23, 5, offset);
+  } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
+    // Compare & branch (immediate)
+    Instruction_aarch64::spatch(branch, 23, 5, offset);
+  } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
+    // Test & branch (immediate)
+    Instruction_aarch64::spatch(branch, 18, 5, offset);
+  } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
+    // PC-rel. addressing
+    offset = target-branch;
+    int shift = Instruction_aarch64::extract(insn, 31, 31);
+    if (shift) {
+      u_int64_t dest = (u_int64_t)target;
+      uint64_t pc_page = (uint64_t)branch >> 12;
+      uint64_t adr_page = (uint64_t)target >> 12;
+      unsigned offset_lo = dest & 0xfff;
+      offset = adr_page - pc_page;
+
+      // We handle 4 types of PC relative addressing
+      //   1 - adrp    Rx, target_page
+      //       ldr/str Ry, [Rx, #offset_in_page]
+      //   2 - adrp    Rx, target_page
+      //       add     Ry, Rx, #offset_in_page
+      //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //       movk    Rx, #imm16<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //
+      // In the first 3 cases we must check that Rx is the same in the adrp and the
+      // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
+      // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
+      // to be followed by a random unrelated ldr/str, add or movk instruction.
+      //
+      // In jdk7 the card type byte map base is aligned on a 1K
+      // boundary which may fail to be 4K aligned. In that case the
+      // card table load will fall into category 2.
+
+      unsigned insn2 = ((unsigned*)branch)[1];
+      if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
+                Instruction_aarch64::extract(insn, 4, 0) ==
+                        Instruction_aarch64::extract(insn2, 9, 5)) {
+        // Load/store register (unsigned immediate)
+        unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
+        Instruction_aarch64::patch(branch + sizeof (unsigned),
+                                    21, 10, offset_lo >> size);
+        guarantee(((dest >> size) << size) == dest, "misaligned target");
+        instructions = 2;
+      } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
+                Instruction_aarch64::extract(insn, 4, 0) ==
+                        Instruction_aarch64::extract(insn2, 4, 0)) {
+        // add (immediate)
+        Instruction_aarch64::patch(branch + sizeof (unsigned),
+                                   21, 10, offset_lo);
+        instructions = 2;
+      } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
+                   Instruction_aarch64::extract(insn, 4, 0) ==
+                     Instruction_aarch64::extract(insn2, 4, 0)) {
+        // movk #imm16<<32
+        Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
+        long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L);
+        long pc_page = (long)branch >> 12;
+        long adr_page = (long)dest >> 12;
+        offset = adr_page - pc_page;
+        instructions = 2;
+      }
+    }
+    int offset_lo = offset & 3;
+    offset >>= 2;
+    Instruction_aarch64::spatch(branch, 23, 5, offset);
+    Instruction_aarch64::patch(branch, 30, 29, offset_lo);
+  } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
+    u_int64_t dest = (u_int64_t)target;
+    // Move wide constant
+    assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
+    assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
+    Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
+    Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
+    Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
+    assert(target_addr_for_insn(branch) == target, "should be");
+    instructions = 3;
+  } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
+             Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
+    // nothing to do
+    assert(target == 0, "did not expect to relocate target for polling page load");
+  } else {
+    ShouldNotReachHere();
+  }
+  return instructions * NativeInstruction::instruction_size;
+}
+
+int MacroAssembler::patch_oop(address insn_addr, address o) {
+  int instructions;
+  unsigned insn = *(unsigned*)insn_addr;
+  assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+
+  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
+  // narrow OOPs by setting the upper 16 bits in the first
+  // instruction.
+  if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
+    // Move narrow OOP
+    assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+    narrowOop n = oopDesc::encode_heap_oop((oop)o);
+    Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
+    Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
+    instructions = 2;
+  } else {
+    // Move wide OOP
+    assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
+    uintptr_t dest = (uintptr_t)o;
+    Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
+    Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
+    Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
+    instructions = 3;
+  }
+  return instructions * NativeInstruction::instruction_size;
+}
+
+address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
+  long offset = 0;
+  if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
+    // Load register (literal)
+    offset = Instruction_aarch64::sextract(insn, 23, 5);
+    return address(((uint64_t)insn_addr + (offset << 2)));
+  } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
+    // Unconditional branch (immediate)
+    offset = Instruction_aarch64::sextract(insn, 25, 0);
+  } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
+    // Conditional branch (immediate)
+    offset = Instruction_aarch64::sextract(insn, 23, 5);
+  } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
+    // Compare & branch (immediate)
+    offset = Instruction_aarch64::sextract(insn, 23, 5);
+   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
+    // Test & branch (immediate)
+    offset = Instruction_aarch64::sextract(insn, 18, 5);
+  } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
+    // PC-rel. addressing
+    offset = Instruction_aarch64::extract(insn, 30, 29);
+    offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;
+    int shift = Instruction_aarch64::extract(insn, 31, 31) ? 12 : 0;
+    if (shift) {
+      offset <<= shift;
+      uint64_t target_page = ((uint64_t)insn_addr) + offset;
+      target_page &= ((uint64_t)-1) << shift;
+      // Return the target address for the following sequences
+      //   1 - adrp    Rx, target_page
+      //       ldr/str Ry, [Rx, #offset_in_page]
+      //   2 - adrp    Rx, target_page
+      //       add     Ry, Rx, #offset_in_page
+      //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //       movk    Rx, #imm12<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //
+      // In the first two cases  we check that the register is the same and
+      // return the target_page + the offset within the page.
+      // Otherwise we assume it is a page aligned relocation and return
+      // the target page only.
+      //
+      unsigned insn2 = ((unsigned*)insn_addr)[1];
+      if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
+                Instruction_aarch64::extract(insn, 4, 0) ==
+                        Instruction_aarch64::extract(insn2, 9, 5)) {
+        // Load/store register (unsigned immediate)
+        unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
+        unsigned int size = Instruction_aarch64::extract(insn2, 31, 30);
+        return address(target_page + (byte_offset << size));
+      } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
+                Instruction_aarch64::extract(insn, 4, 0) ==
+                        Instruction_aarch64::extract(insn2, 4, 0)) {
+        // add (immediate)
+        unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
+        return address(target_page + byte_offset);
+      } else {
+        if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110  &&
+               Instruction_aarch64::extract(insn, 4, 0) ==
+                 Instruction_aarch64::extract(insn2, 4, 0)) {
+          target_page = (target_page & 0xffffffff) |
+                         ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
+        }
+        return (address)target_page;
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
+    u_int32_t *insns = (u_int32_t *)insn_addr;
+    // Move wide constant: movz, movk, movk.  See movptr().
+    assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
+    assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
+    return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
+                   + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
+                   + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
+  } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
+             Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
+    return 0;
+  } else {
+    ShouldNotReachHere();
+  }
+  return address(((uint64_t)insn_addr + (offset << 2)));
+}
+
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+  dsb(Assembler::SY);
+}
+
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp,
+                                           bool clear_pc) {
+  // we must set sp to zero to clear frame
+  str(zr, Address(rthread, JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is
+  // possible that we need it only for debugging
+  if (clear_fp) {
+    str(zr, Address(rthread, JavaThread::last_Java_fp_offset()));
+  }
+
+  if (clear_pc) {
+    str(zr, Address(rthread, JavaThread::last_Java_pc_offset()));
+  }
+}
+
+// Calls to C land
+//
+// When entering C land, the rfp, & resp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         Register last_java_pc,
+                                         Register scratch) {
+
+  if (last_java_pc->is_valid()) {
+      str(last_java_pc, Address(rthread,
+                                JavaThread::frame_anchor_offset()
+                                + JavaFrameAnchor::last_Java_pc_offset()));
+    }
+
+  // determine last_java_sp register
+  if (last_java_sp == sp) {
+    mov(scratch, sp);
+    last_java_sp = scratch;
+  } else if (!last_java_sp->is_valid()) {
+    last_java_sp = esp;
+  }
+
+  str(last_java_sp, Address(rthread, JavaThread::last_Java_sp_offset()));
+
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    str(last_java_fp, Address(rthread, JavaThread::last_Java_fp_offset()));
+  }
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc,
+                                         Register scratch) {
+  if (last_java_pc != NULL) {
+    adr(scratch, last_java_pc);
+  } else {
+    // FIXME: This is almost never correct.  We should delete all
+    // cases of set_last_Java_frame with last_java_pc=NULL and use the
+    // correct return address instead.
+    adr(scratch, pc());
+  }
+
+  str(scratch, Address(rthread,
+                       JavaThread::frame_anchor_offset()
+                       + JavaFrameAnchor::last_Java_pc_offset()));
+
+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, scratch);
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         Label &L,
+                                         Register scratch) {
+  if (L.is_bound()) {
+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
+  } else {
+    InstructionMark im(this);
+    L.add_patch_at(code(), locator());
+    set_last_Java_frame(last_java_sp, last_java_fp, (address)NULL, scratch);
+  }
+}
+
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    blr(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    bl(entry);
+  }
+}
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    br(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    b(entry);
+  }
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert_different_registers(lock_reg, obj_reg, swap_reg);
+
+  if (PrintBiasedLockingStatistics && counters == NULL)
+    counters = BiasedLocking::counters();
+
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    ldr(swap_reg, mark_addr);
+  }
+  andr(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
+  cmp(tmp_reg, markOopDesc::biased_lock_pattern);
+  br(Assembler::NE, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  load_prototype_header(tmp_reg, obj_reg);
+  orr(tmp_reg, tmp_reg, rthread);
+  eor(tmp_reg, swap_reg, tmp_reg);
+  andr(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (counters != NULL) {
+    Label around;
+    cbnz(tmp_reg, around);
+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2);
+    b(done);
+    bind(around);
+  } else {
+    cbz(tmp_reg, done);
+  }
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  andr(rscratch1, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cbnz(rscratch1, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  andr(rscratch1, tmp_reg, markOopDesc::epoch_mask_in_place);
+  cbnz(rscratch1, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  {
+    Label here;
+    mov(rscratch1, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+    andr(swap_reg, swap_reg, rscratch1);
+    orr(tmp_reg, swap_reg, rthread);
+    cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case);
+    // If the biasing toward our thread failed, this means that
+    // another thread succeeded in biasing it toward itself and we
+    // need to revoke that bias. The revocation will occur in the
+    // interpreter runtime in the slow case.
+    bind(here);
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
+                  tmp_reg, rscratch1, rscratch2);
+    }
+  }
+  b(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label here;
+    load_prototype_header(tmp_reg, obj_reg);
+    orr(tmp_reg, rthread, tmp_reg);
+    cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case);
+    // If the biasing toward our thread failed, then another thread
+    // succeeded in biasing it toward itself and we need to revoke that
+    // bias. The revocation will occur in the runtime in the slow case.
+    bind(here);
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
+                  tmp_reg, rscratch1, rscratch2);
+    }
+  }
+  b(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label here, nope;
+    load_prototype_header(tmp_reg, obj_reg);
+    cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, &nope);
+    bind(here);
+
+    // Fall through to the normal CAS-based lock, because no matter what
+    // the result of the above CAS, some thread must have succeeded in
+    // removing the bias bit from the object's header.
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
+                  rscratch1, rscratch2);
+    }
+    bind(nope);
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andr(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmp(temp_reg, markOopDesc::biased_lock_pattern);
+  br(Assembler::EQ, done);
+}
+
+
+// added to make this compile
+
+REGISTER_DEFINITION(Register, noreg);
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+  if (c_rarg0 != arg ) {
+    masm->mov(c_rarg0, arg);
+  }
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+  if (c_rarg1 != arg ) {
+    masm->mov(c_rarg1, arg);
+  }
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+  if (c_rarg2 != arg ) {
+    masm->mov(c_rarg2, arg);
+  }
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+  if (c_rarg3 != arg ) {
+    masm->mov(c_rarg3, arg);
+  }
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+   // determine java_thread register
+  if (!java_thread->is_valid()) {
+    java_thread = rthread;
+  }
+
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = esp;
+  }
+
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+  assert(java_thread == rthread, "unexpected register");
+#ifdef ASSERT
+  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
+  // if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");
+#endif // ASSERT
+
+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+  // push java thread (becomes first argument of C function)
+
+  mov(c_rarg0, java_thread);
+
+  // set last Java frame before call
+  assert(last_java_sp != rfp, "can't use rfp");
+
+  Label l;
+  set_last_Java_frame(last_java_sp, rfp, l, rscratch1);
+
+  // do the call, remove parameters
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
+
+  // reset last Java frame
+  // Only interpreter should have to clear fp
+  reset_last_Java_frame(true, true);
+
+   // C++ interp handles this in the interpreter
+  check_and_handle_popframe(java_thread);
+  check_and_handle_earlyret(java_thread);
+
+  if (check_exceptions) {
+    // check for pending exceptions (java_thread is set upon return)
+    ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
+    Label ok;
+    cbz(rscratch1, ok);
+    lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry()));
+    br(rscratch1);
+    bind(ok);
+  }
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    // !!! FIXME AARCH64 -- retained this, it is in sparc but not in x86 !!!
+    get_vm_result(oop_result, java_thread);
+  }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
+}
+
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+
+void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+  assert(entry.rspec().type() == relocInfo::runtime_call_type
+         || entry.rspec().type() == relocInfo::opt_virtual_call_type
+         || entry.rspec().type() == relocInfo::static_call_type
+         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  unsigned int start_offset = offset();
+#ifdef COMPILER2
+  // We need a trampoline if branches are far.
+  if (far_branches()) {
+    // We don't want to emit a trampoline if C2 is generating dummy
+    // code during its branch shortening phase.
+    CompileTask* task = ciEnv::current()->task();
+    bool in_scratch_emit_size =
+      ((task != NULL) && is_c2_compile(task->comp_level())
+       && Compile::current()->in_scratch_emit_size());
+    if (! in_scratch_emit_size) {
+       emit_trampoline_stub(start_offset, entry.target());
+    }
+  }
+#endif
+
+  if (cbuf) cbuf->set_insts_mark();
+  relocate(entry.rspec());
+#ifdef COMPILER2
+  if (!far_branches()) {
+    bl(entry.target());
+  } else {
+    bl(pc());
+  }
+#else
+    bl(entry.target());
+#endif
+}
+
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+//   branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+//   load the call target from the constant pool
+//   branch (LR still points to the call site above)
+
+void MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+#ifdef COMPILER2
+  address stub = start_a_stub(Compile::MAX_stubs_size/2);
+  if (stub == NULL) {
+    start_a_stub(Compile::MAX_stubs_size/2);
+    Compile::current()->env()->record_out_of_memory_failure();
+    return;
+  }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  align(wordSize);
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+                                            + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // Now, create the trampoline stub's code:
+  // - load the call
+  // - call
+  Label target;
+  ldr(rscratch1, target);
+  br(rscratch1);
+  bind(target);
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  emit_long64((int64_t)dest);
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  end_a_stub();
+#else
+  ShouldNotReachHere();
+#endif
+}
+
+void MacroAssembler::c2bool(Register x) {
+  // implements x == 0 ? 0 : 1
+  // note: must only look at least-significant byte of x
+  //       since C-style booleans are stored in one byte
+  //       only! (was bug)
+  tst(x, 0xff);
+  cset(x, Assembler::NE);
+}
+
+void MacroAssembler::ic_call(address entry) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+  // address const_ptr = long_constant((jlong)Universe::non_oop_word());
+  // unsigned long offset;
+  // ldr_constant(rscratch2, const_ptr);
+  movoop(rscratch2, (jobject)Universe::non_oop_word(), /*immediate*/true);
+  trampoline_call(Address(entry, rh));
+}
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+  ldr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+  str(zr, Address(java_thread, JavaThread::vm_result_offset()));
+  verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+  ldr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+  str(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) nop();
+}
+
+// these are no-ops overridden by InterpreterMacroAssembler
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  ldr(tmp, ExternalAddress((address) delayed_value_addr));
+
+  if (offset != 0)
+    add(tmp, tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}
+
+void MacroAssembler:: notify(int type) {
+  if (type == bytecode_start) {
+    // set_last_Java_frame(esp, rfp, (address)NULL);
+    Assembler:: notify(type);
+    // reset_last_Java_frame(true, false);
+  }
+  else
+    Assembler:: notify(type);
+}
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  ldrw(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
+  lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3)));
+  add(scan_temp, scan_temp, vtable_base);
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for instanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
+  lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3)));
+  if (itentry_off)
+    add(recv_klass, recv_klass, itentry_off);
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+    cmp(intf_klass, method_result);
+
+    if (peel) {
+      br(Assembler::EQ, found_method);
+    } else {
+      br(Assembler::NE, search);
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    cbz(method_result, L_no_such_interface);
+    add(scan_temp, scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  ldr(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+  ldr(method_result, Address(recv_klass, scan_temp));
+}
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  const int base = instanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == 8,
+         "adjust the scaling in the code below");
+  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
+
+  if (vtable_index.is_register()) {
+    lea(method_result, Address(recv_klass,
+                               vtable_index.as_register(),
+                               Address::lsl(LogBytesPerWord)));
+    ldr(method_result, Address(method_result, vtable_offset_in_bytes));
+  } else {
+    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
+    ldr(method_result, Address(recv_klass, vtable_offset_in_bytes));
+  }
+}
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterOrConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+  Address super_check_offset_addr(super_klass, sco_offset);
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            b(label)                /*omit semi*/
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmp(sub_klass, super_klass);
+  br(Assembler::EQ, *L_success);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // Positive movl does right thing on LP64.
+    ldrw(temp_reg, super_check_offset_addr);
+    super_check_offset = RegisterOrConstant(temp_reg);
+  }
+  Address super_check_addr(sub_klass, super_check_offset);
+  ldr(rscratch1, super_check_addr);
+  cmp(super_klass, rscratch1); // load displayed supertype
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    br(Assembler::EQ, *L_success);
+    cmp(super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      br(Assembler::EQ, *L_slow_path);
+    } else {
+      br(Assembler::NE, *L_failure);
+      final_jmp(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      br(Assembler::EQ, *L_success);
+    } else {
+      br(Assembler::NE, *L_slow_path);
+      final_jmp(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      br(Assembler::EQ, *L_success);
+    } else {
+      br(Assembler::NE, *L_failure);
+      final_jmp(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef final_jmp
+}
+
+// These two are taken from x86, but they look generally useful
+
+// scans count pointer sized words at [addr] for occurence of value,
+// generic
+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
+                                Register scratch) {
+  Label Lloop, Lexit;
+  cbz(count, Lexit);
+  bind(Lloop);
+  ldr(scratch, post(addr, wordSize));
+  cmp(value, scratch);
+  br(EQ, Lexit);
+  sub(count, count, 1);
+  cbnz(count, Lloop);
+  bind(Lexit);
+}
+
+// scans count 4 byte words at [addr] for occurence of value,
+// generic
+void MacroAssembler::repne_scanw(Register addr, Register value, Register count,
+                                Register scratch) {
+  Label Lloop, Lexit;
+  cbz(count, Lexit);
+  bind(Lloop);
+  // !!! FIXME AARCH64 -- if this only gets called when CompressedOops
+  // is true and repne_scan only gets called when CompressedOops is
+  // false then the size passed in the post call should be heapOopSize
+  // both here and in repne_scan above. if it is used more generally
+  // for 32 bit searches and repne_scan is used for 64 bit searches
+  // then size needs to be wordSize/2 here and wordSize above.
+  ldrw(scratch, post(addr, wordSize/2));
+  cmpw(value, scratch);
+  br(EQ, Lexit);
+  sub(count, count, 1);
+  cbnz(count, Lloop);
+  bind(Lexit);
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (temp2_reg != noreg)
+    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  BLOCK_COMMENT("check_klass_subtype_slow_path");
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+  assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super)
+  assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter)
+
+  // Get super_klass value into r0 (even if it was in r5 or r2).
+  RegSet pushed_registers;
+  if (!IS_A_TEMP(r2))    pushed_registers += r2;
+  if (!IS_A_TEMP(r5))    pushed_registers += r5;
+
+  if (super_klass != r0 || UseCompressedOops) {
+    if (!IS_A_TEMP(r0))   pushed_registers += r0;
+  }
+
+  push(pushed_registers, sp);
+
+#ifndef PRODUCT
+  mov(rscratch2, (address)&SharedRuntime::_partial_subtype_ctr);
+  Address pst_counter_addr(rscratch2);
+  ldr(rscratch1, pst_counter_addr);
+  add(rscratch1, rscratch1, 1);
+  str(rscratch1, pst_counter_addr);
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  ldr(r5, secondary_supers_addr);
+  // Load the 32 bit array length.
+  ldrw(r2, Address(r5, arrayOopDesc::length_offset_in_bytes()));
+  // Skip to start of data.
+  add(r5, r5, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  // This part is tricky, as values in supers array could be 32 or 64 bit wide
+  // and we store values in objArrays always encoded, thus we need to encode
+  // the value of r0 before repne.  Note that r0 is dead after the repne.
+  if (UseCompressedOops) {
+    encode_heap_oop_not_null(r0); // Changes flags.
+    cmp(sp, zr); // Clear Z flag; SP is never zero
+    repne_scanw(r5, r0, r2, rscratch1);
+  } else {
+    cmp(sp, zr); // Clear Z flag; SP is never zero
+    // Scan R2 words at [R5] for an occurrence of R0.
+    // Set NZ/Z based on last compare.
+    repne_scan(r5, r0, r2, rscratch1);
+  }
+
+  // Unspill the temp. registers:
+  pop(pushed_registers, sp);
+
+  br(Assembler::NE, *L_failure);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  str(super_klass, super_cache_addr);
+
+  if (L_success != &L_fallthrough) {
+    b(*L_success);
+  }
+
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+  if (!VerifyOops) return;
+
+  // Pass register number to verify_oop_subroutine
+  const char* b = NULL;
+  {
+    ResourceMark rm;
+    stringStream ss;
+    ss.print("verify_oop: %s: %s", reg->name(), s);
+    b = code_string(ss.as_string());
+  }
+  BLOCK_COMMENT("verify_oop {");
+
+  stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
+  stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
+
+  mov(r0, reg);
+  mov(rscratch1, (address)b);
+
+  // call indirectly to solve generation ordering problem
+  lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  ldr(rscratch2, Address(rscratch2));
+  blr(rscratch2);
+
+  ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
+  ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
+
+  BLOCK_COMMENT("} verify_oop");
+}
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+  if (!VerifyOops) return;
+
+  const char* b = NULL;
+  {
+    ResourceMark rm;
+    stringStream ss;
+    ss.print("verify_oop_addr: %s", s);
+    b = code_string(ss.as_string());
+  }
+  BLOCK_COMMENT("verify_oop_addr {");
+
+  stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
+  stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
+
+  // addr may contain sp so we will have to adjust it based on the
+  // pushes that we just did.
+  if (addr.uses(sp)) {
+    lea(r0, addr);
+    ldr(r0, Address(r0, 4 * wordSize));
+  } else {
+    ldr(r0, addr);
+  }
+  mov(rscratch1, (address)b);
+
+  // call indirectly to solve generation ordering problem
+  lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  ldr(rscratch2, Address(rscratch2));
+  blr(rscratch2);
+
+  ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
+  ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
+
+  BLOCK_COMMENT("} verify_oop_addr");
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+  if (arg_slot.is_constant()) {
+    return Address(esp, arg_slot.as_constant() * stackElementSize
+                   + offset);
+  } else {
+    add(rscratch1, esp, arg_slot.as_register(),
+        ext::uxtx, exact_log2(stackElementSize));
+    return Address(rscratch1, offset);
+  }
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+                                       int number_of_arguments,
+                                       Label *retaddr) {
+  call_VM_leaf_base1(entry_point, number_of_arguments, 0, ret_type_integral, retaddr);
+}
+
+void MacroAssembler::call_VM_leaf_base1(address entry_point,
+                                        int number_of_gp_arguments,
+                                        int number_of_fp_arguments,
+                                        ret_type type,
+                                        Label *retaddr) {
+  Label E, L;
+
+  // !!! FIXME AARCH64 we normally need to save rmethod as it is
+  // volatile.  however we don't need to when calling from the
+  // interpreter.
+  stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize)));
+
+  // We add 1 to number_of_arguments because the thread in arg0 is
+  // not counted
+  mov(rscratch1, entry_point);
+  blrt(rscratch1, number_of_gp_arguments + 1, number_of_fp_arguments, type);
+  if (retaddr)
+    bind(*retaddr);
+
+  ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize)));
+  maybe_isb();
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+  pass_arg0(this, arg_0);
+  pass_arg1(this, arg_1);
+  call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
+                                  Register arg_1, Register arg_2) {
+  pass_arg0(this, arg_0);
+  pass_arg1(this, arg_1);
+  pass_arg2(this, arg_2);
+  call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  assert(arg_0 != c_rarg2, "smashed arg");
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+  assert(arg_0 != c_rarg3, "smashed arg");
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+  assert(arg_0 != c_rarg2, "smashed arg");
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any registers
+    // NOTE: this is plenty to provoke a segv
+    ldr(zr, Address(reg));
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+// MacroAssembler protected routines needed to implement
+// public methods
+
+void MacroAssembler::mov(Register r, Address dest) {
+  code_section()->relocate(pc(), dest.rspec());
+  u_int64_t imm64 = (u_int64_t)dest.target();
+  movptr(r, imm64);
+}
+
+// Move a constant pointer into r.  In AArch64 mode the virtual
+// address space is 48 bits in size, so we only need three
+// instructions to create a patchable instruction sequence that can
+// reach anywhere.
+void MacroAssembler::movptr(Register r, uintptr_t imm64) {
+#ifndef PRODUCT
+  {
+    char buffer[64];
+    snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64);
+    block_comment(buffer);
+  }
+#endif
+  assert(imm64 < (1ul << 48), "48-bit overflow in address constant");
+  movz(r, imm64 & 0xffff);
+  imm64 >>= 16;
+  movk(r, imm64 & 0xffff, 16);
+  imm64 >>= 16;
+  movk(r, imm64 & 0xffff, 32);
+}
+
+void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
+{
+#ifndef PRODUCT
+  {
+    char buffer[64];
+    snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64);
+    block_comment(buffer);
+  }
+#endif
+  if (operand_valid_for_logical_immediate(false, imm64)) {
+    orr(dst, zr, imm64);
+  } else {
+    // we can use a combination of MOVZ or MOVN with
+    // MOVK to build up the constant
+    u_int64_t imm_h[4];
+    int zero_count = 0;
+    int neg_count = 0;
+    int i;
+    for (i = 0; i < 4; i++) {
+      imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
+      if (imm_h[i] == 0) {
+        zero_count++;
+      } else if (imm_h[i] == 0xffffL) {
+        neg_count++;
+      }
+    }
+    if (zero_count == 4) {
+      // one MOVZ will do
+      movz(dst, 0);
+    } else if (neg_count == 4) {
+      // one MOVN will do
+      movn(dst, 0);
+    } else if (zero_count == 3) {
+      for (i = 0; i < 4; i++) {
+        if (imm_h[i] != 0L) {
+          movz(dst, (u_int32_t)imm_h[i], (i << 4));
+          break;
+        }
+      }
+    } else if (neg_count == 3) {
+      // one MOVN will do
+      for (int i = 0; i < 4; i++) {
+        if (imm_h[i] != 0xffffL) {
+          movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
+          break;
+        }
+      }
+    } else if (zero_count == 2) {
+      // one MOVZ and one MOVK will do
+      for (i = 0; i < 3; i++) {
+        if (imm_h[i] != 0L) {
+          movz(dst, (u_int32_t)imm_h[i], (i << 4));
+          i++;
+          break;
+        }
+      }
+      for (;i < 4; i++) {
+        if (imm_h[i] != 0L) {
+          movk(dst, (u_int32_t)imm_h[i], (i << 4));
+        }
+      }
+    } else if (neg_count == 2) {
+      // one MOVN and one MOVK will do
+      for (i = 0; i < 4; i++) {
+        if (imm_h[i] != 0xffffL) {
+          movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
+          i++;
+          break;
+        }
+      }
+      for (;i < 4; i++) {
+        if (imm_h[i] != 0xffffL) {
+          movk(dst, (u_int32_t)imm_h[i], (i << 4));
+        }
+      }
+    } else if (zero_count == 1) {
+      // one MOVZ and two MOVKs will do
+      for (i = 0; i < 4; i++) {
+        if (imm_h[i] != 0L) {
+          movz(dst, (u_int32_t)imm_h[i], (i << 4));
+          i++;
+          break;
+        }
+      }
+      for (;i < 4; i++) {
+        if (imm_h[i] != 0x0L) {
+          movk(dst, (u_int32_t)imm_h[i], (i << 4));
+        }
+      }
+    } else if (neg_count == 1) {
+      // one MOVN and two MOVKs will do
+      for (i = 0; i < 4; i++) {
+        if (imm_h[i] != 0xffffL) {
+          movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
+          i++;
+          break;
+        }
+      }
+      for (;i < 4; i++) {
+        if (imm_h[i] != 0xffffL) {
+          movk(dst, (u_int32_t)imm_h[i], (i << 4));
+        }
+      }
+    } else {
+      // use a MOVZ and 3 MOVKs (makes it easier to debug)
+      movz(dst, (u_int32_t)imm_h[0], 0);
+      for (i = 1; i < 4; i++) {
+        movk(dst, (u_int32_t)imm_h[i], (i << 4));
+      }
+    }
+  }
+}
+
+void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32)
+{
+#ifndef PRODUCT
+    {
+      char buffer[64];
+      snprintf(buffer, sizeof(buffer), "0x%"PRIX32, imm32);
+      block_comment(buffer);
+    }
+#endif
+  if (operand_valid_for_logical_immediate(true, imm32)) {
+    orrw(dst, zr, imm32);
+  } else {
+    // we can use MOVZ, MOVN or two calls to MOVK to build up the
+    // constant
+    u_int32_t imm_h[2];
+    imm_h[0] = imm32 & 0xffff;
+    imm_h[1] = ((imm32 >> 16) & 0xffff);
+    if (imm_h[0] == 0) {
+      movzw(dst, imm_h[1], 16);
+    } else if (imm_h[0] == 0xffff) {
+      movnw(dst, imm_h[1] ^ 0xffff, 16);
+    } else if (imm_h[1] == 0) {
+      movzw(dst, imm_h[0], 0);
+    } else if (imm_h[1] == 0xffff) {
+      movnw(dst, imm_h[0] ^ 0xffff, 0);
+    } else {
+      // use a MOVZ and MOVK (makes it easier to debug)
+      movzw(dst, imm_h[0], 0);
+      movkw(dst, imm_h[1], 16);
+    }
+  }
+}
+
+// Form an address from base + offset in Rd.  Rd may or may
+// not actually be used: you must use the Address that is returned.
+// It is up to you to ensure that the shift provided matches the size
+// of your data.
+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
+  if (Address::offset_ok_for_immed(byte_offset, shift))
+    // It fits; no need for any heroics
+    return Address(base, byte_offset);
+
+  // Don't do anything clever with negative or misaligned offsets
+  unsigned mask = (1 << shift) - 1;
+  if (byte_offset < 0 || byte_offset & mask) {
+    mov(Rd, byte_offset);
+    add(Rd, base, Rd);
+    return Address(Rd);
+  }
+
+  // See if we can do this with two 12-bit offsets
+  {
+    unsigned long word_offset = byte_offset >> shift;
+    unsigned long masked_offset = word_offset & 0xfff000;
+    if (Address::offset_ok_for_immed(word_offset - masked_offset)
+        && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
+      add(Rd, base, masked_offset << shift);
+      word_offset -= masked_offset;
+      return Address(Rd, word_offset << shift);
+    }
+  }
+
+  // Do it the hard way
+  mov(Rd, byte_offset);
+  add(Rd, base, Rd);
+  return Address(Rd);
+}
+
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
+  Label retry_load;
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+    prfm(Address(counter_addr), PSTL1STRM);
+  bind(retry_load);
+  // flush and load exclusive from the memory location
+  ldxrw(tmp, counter_addr);
+  addw(tmp, tmp, 1);
+  // if we store+flush with no intervening write tmp wil be zero
+  stxrw(tmp2, tmp, counter_addr);
+  cbnzw(tmp2, retry_load);
+}
+
+
+int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
+                                    bool want_remainder, Register scratch)
+{
+  // Full implementation of Java idiv and irem.  The function
+  // returns the (pc) offset of the div instruction - may be needed
+  // for implicit exceptions.
+  //
+  // constraint : ra/rb =/= scratch
+  //         normal case
+  //
+  // input : ra: dividend
+  //         rb: divisor
+  //
+  // result: either
+  //         quotient  (= ra idiv rb)
+  //         remainder (= ra irem rb)
+
+  assert(ra != scratch && rb != scratch, "reg cannot be scratch");
+
+  int idivl_offset = offset();
+  if (! want_remainder) {
+    sdivw(result, ra, rb);
+  } else {
+    sdivw(scratch, ra, rb);
+    Assembler::msubw(result, scratch, rb, ra);
+  }
+
+  return idivl_offset;
+}
+
+int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
+                                    bool want_remainder, Register scratch)
+{
+  // Full implementation of Java ldiv and lrem.  The function
+  // returns the (pc) offset of the div instruction - may be needed
+  // for implicit exceptions.
+  //
+  // constraint : ra/rb =/= scratch
+  //         normal case
+  //
+  // input : ra: dividend
+  //         rb: divisor
+  //
+  // result: either
+  //         quotient  (= ra idiv rb)
+  //         remainder (= ra irem rb)
+
+  assert(ra != scratch && rb != scratch, "reg cannot be scratch");
+
+  int idivq_offset = offset();
+  if (! want_remainder) {
+    sdiv(result, ra, rb);
+  } else {
+    sdiv(scratch, ra, rb);
+    msub(result, scratch, rb, ra);
+  }
+
+  return idivq_offset;
+}
+
+// MacroAssembler routines found actually to be needed
+
+void MacroAssembler::push(Register src)
+{
+  str(src, Address(pre(esp, -1 * wordSize)));
+}
+
+void MacroAssembler::pop(Register dst)
+{
+  ldr(dst, Address(post(esp, 1 * wordSize)));
+}
+
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
+  int off = offset();
+  ldrh(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
+  int off = offset();
+  ldrb(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_short(Register dst, Address src) {
+  int off = offset();
+  ldrsh(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
+  int off = offset();
+  ldrsb(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_short32(Register dst, Address src) {
+  int off = offset();
+  ldrshw(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_byte32(Register dst, Address src) {
+  int off = offset();
+  ldrsbw(dst, src);
+  return off;
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+  switch (size_in_bytes) {
+  case  8:  ldr(dst, src); break;
+  case  4:  ldrw(dst, src); break;
+  case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
+  case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+  switch (size_in_bytes) {
+  case  8:  str(src, dst); break;
+  case  4:  strw(src, dst); break;
+  case  2:  strh(src, dst); break;
+  case  1:  strb(src, dst); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::decrementw(Register reg, int value)
+{
+  if (value < 0)  { incrementw(reg, -value);      return; }
+  if (value == 0) {                               return; }
+  if (value < (1 << 12)) { subw(reg, reg, value); return; }
+  /* else */ {
+    guarantee(reg != rscratch2, "invalid dst for register decrement");
+    movw(rscratch2, (unsigned)value);
+    subw(reg, reg, rscratch2);
+  }
+}
+
+void MacroAssembler::decrement(Register reg, int value)
+{
+  if (value < 0)  { increment(reg, -value);      return; }
+  if (value == 0) {                              return; }
+  if (value < (1 << 12)) { sub(reg, reg, value); return; }
+  /* else */ {
+    assert(reg != rscratch2, "invalid dst for register decrement");
+    mov(rscratch2, (unsigned long)value);
+    sub(reg, reg, rscratch2);
+  }
+}
+
+void MacroAssembler::decrementw(Address dst, int value)
+{
+  assert(!dst.uses(rscratch1), "invalid dst for address decrement");
+  ldrw(rscratch1, dst);
+  decrementw(rscratch1, value);
+  strw(rscratch1, dst);
+}
+
+void MacroAssembler::decrement(Address dst, int value)
+{
+  assert(!dst.uses(rscratch1), "invalid address for decrement");
+  ldr(rscratch1, dst);
+  decrement(rscratch1, value);
+  str(rscratch1, dst);
+}
+
+void MacroAssembler::incrementw(Register reg, int value)
+{
+  if (value < 0)  { decrementw(reg, -value);      return; }
+  if (value == 0) {                               return; }
+  if (value < (1 << 12)) { addw(reg, reg, value); return; }
+  /* else */ {
+    assert(reg != rscratch2, "invalid dst for register increment");
+    movw(rscratch2, (unsigned)value);
+    addw(reg, reg, rscratch2);
+  }
+}
+
+void MacroAssembler::increment(Register reg, int value)
+{
+  if (value < 0)  { decrement(reg, -value);      return; }
+  if (value == 0) {                              return; }
+  if (value < (1 << 12)) { add(reg, reg, value); return; }
+  /* else */ {
+    assert(reg != rscratch2, "invalid dst for register increment");
+    movw(rscratch2, (unsigned)value);
+    add(reg, reg, rscratch2);
+  }
+}
+
+void MacroAssembler::incrementw(Address dst, int value)
+{
+  assert(!dst.uses(rscratch1), "invalid dst for address increment");
+  ldrw(rscratch1, dst);
+  incrementw(rscratch1, value);
+  strw(rscratch1, dst);
+}
+
+void MacroAssembler::increment(Address dst, int value)
+{
+  assert(!dst.uses(rscratch1), "invalid dst for address increment");
+  ldr(rscratch1, dst);
+  increment(rscratch1, value);
+  str(rscratch1, dst);
+}
+
+
+void MacroAssembler::pusha() {
+  push(0x7fffffff, sp);
+}
+
+void MacroAssembler::popa() {
+  pop(0x7fffffff, sp);
+}
+
+// Push lots of registers in the bit set supplied.  Don't push sp.
+// Return the number of words pushed
+int MacroAssembler::push(unsigned int bitset, Register stack) {
+  int words_pushed = 0;
+
+  // Scan bitset to accumulate register pairs
+  unsigned char regs[32];
+  int count = 0;
+  for (int reg = 0; reg <= 30; reg++) {
+    if (1 & bitset)
+      regs[count++] = reg;
+    bitset >>= 1;
+  }
+  regs[count++] = zr->encoding_nocheck();
+  count &= ~1;  // Only push an even nuber of regs
+
+  if (count) {
+    stp(as_Register(regs[0]), as_Register(regs[1]),
+       Address(pre(stack, -count * wordSize)));
+    words_pushed += 2;
+  }
+  for (int i = 2; i < count; i += 2) {
+    stp(as_Register(regs[i]), as_Register(regs[i+1]),
+       Address(stack, i * wordSize));
+    words_pushed += 2;
+  }
+
+  assert(words_pushed == count, "oops, pushed != count");
+
+  return count;
+}
+
+int MacroAssembler::pop(unsigned int bitset, Register stack) {
+  int words_pushed = 0;
+
+  // Scan bitset to accumulate register pairs
+  unsigned char regs[32];
+  int count = 0;
+  for (int reg = 0; reg <= 30; reg++) {
+    if (1 & bitset)
+      regs[count++] = reg;
+    bitset >>= 1;
+  }
+  regs[count++] = zr->encoding_nocheck();
+  count &= ~1;
+
+  for (int i = 2; i < count; i += 2) {
+    ldp(as_Register(regs[i]), as_Register(regs[i+1]),
+       Address(stack, i * wordSize));
+    words_pushed += 2;
+  }
+  if (count) {
+    ldp(as_Register(regs[0]), as_Register(regs[1]),
+       Address(post(stack, count * wordSize)));
+    words_pushed += 2;
+  }
+
+  assert(words_pushed == count, "oops, pushed != count");
+
+  return count;
+}
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+#if 0
+  assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (CheckCompressedOops) {
+    Label ok;
+    push(1 << rscratch1->encoding(), sp); // cmpptr trashes rscratch1
+    cmpptr(rheapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
+    br(Assembler::EQ, ok);
+    stop(msg);
+    bind(ok);
+    pop(1 << rscratch1->encoding(), sp);
+  }
+#endif
+}
+#endif
+
+void MacroAssembler::stop(const char* msg) {
+  address ip = pc();
+  pusha();
+  mov(c_rarg0, (address)msg);
+  mov(c_rarg1, (address)ip);
+  mov(c_rarg2, sp);
+  mov(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+  // call(c_rarg3);
+  blrt(c_rarg3, 3, 0, 1);
+  hlt(0);
+}
+
+// If a constant does not fit in an immediate field, generate some
+// number of MOV instructions and then perform the operation.
+void MacroAssembler::wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm,
+                                           add_sub_imm_insn insn1,
+                                           add_sub_reg_insn insn2) {
+  assert(Rd != zr, "Rd = zr and not setting flags?");
+  if (operand_valid_for_add_sub_immediate((int)imm)) {
+    (this->*insn1)(Rd, Rn, imm);
+  } else {
+    if (uabs(imm) < (1 << 24)) {
+       (this->*insn1)(Rd, Rn, imm & -(1 << 12));
+       (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1));
+    } else {
+       assert_different_registers(Rd, Rn);
+       mov(Rd, (uint64_t)imm);
+       (this->*insn2)(Rd, Rn, Rd, LSL, 0);
+    }
+  }
+}
+
+// Seperate vsn which sets the flags. Optimisations are more restricted
+// because we must set the flags correctly.
+void MacroAssembler::wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm,
+                                           add_sub_imm_insn insn1,
+                                           add_sub_reg_insn insn2) {
+  if (operand_valid_for_add_sub_immediate((int)imm)) {
+    (this->*insn1)(Rd, Rn, imm);
+  } else {
+    assert_different_registers(Rd, Rn);
+    assert(Rd != zr, "overflow in immediate operand");
+    mov(Rd, (uint64_t)imm);
+    (this->*insn2)(Rd, Rn, Rd, LSL, 0);
+  }
+}
+
+
+void MacroAssembler::add(Register Rd, Register Rn, RegisterOrConstant increment) {
+  if (increment.is_register()) {
+    add(Rd, Rn, increment.as_register());
+  } else {
+    add(Rd, Rn, increment.as_constant());
+  }
+}
+
+void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment) {
+  if (increment.is_register()) {
+    addw(Rd, Rn, increment.as_register());
+  } else {
+    addw(Rd, Rn, increment.as_constant());
+  }
+}
+
+void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) {
+  if (decrement.is_register()) {
+    sub(Rd, Rn, decrement.as_register());
+  } else {
+    sub(Rd, Rn, decrement.as_constant());
+  }
+}
+
+void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) {
+  if (decrement.is_register()) {
+    subw(Rd, Rn, decrement.as_register());
+  } else {
+    subw(Rd, Rn, decrement.as_constant());
+  }
+}
+
+// !!! FIXME AARCH64 -- check this is correct !!!
+void MacroAssembler::reinit_heapbase()
+{
+  if (UseCompressedOops) {
+    if (Universe::heap() != NULL) {
+      if (Universe::narrow_oop_base() == NULL) {
+        mov(rheapbase, zr);
+      } else {
+        mov(rheapbase, Universe::narrow_oop_base());
+      }
+    } else {
+      lea(rheapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
+      ldr(rheapbase, Address(rheapbase));
+    }
+  }
+}
+
+// this simulates the behaviour of the x86 cmpxchg instruction using a
+// load linked/store conditional pair. we use the acquire/release
+// versions of these instructions so that we flush pending writes as
+// per Java semantics.
+
+// n.b the x86 version assumes the old value to be compared against is
+// in rax and updates rax with the value located in memory if the
+// cmpxchg fails. we supply a register for the old value explicitly
+
+// the aarch64 load linked/store conditional instructions do not
+// accept an offset. so, unlike x86, we must provide a plain register
+// to identify the memory word to be compared/exchanged rather than a
+// register+offset Address.
+
+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
+                                Label &succeed, Label *fail) {
+  // oldv holds comparison value
+  // newv holds value to write in exchange
+  // addr identifies memory word to compare against/update
+  // tmp returns 0/1 for success/failure
+  Label retry_load, nope;
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+    prfm(Address(addr), PSTL1STRM);
+  bind(retry_load);
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  ldaxr(tmp, addr);
+  cmp(tmp, oldv);
+  br(Assembler::NE, nope);
+  // if we store+flush with no intervening write tmp wil be zero
+  stlxr(tmp, newv, addr);
+  cbzw(tmp, succeed);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  b(retry_load);
+  // if the memory word differs we return it in oldv and signal a fail
+  bind(nope);
+  membar(AnyAny);
+  mov(oldv, tmp);
+  if (fail)
+    b(*fail);
+}
+
+void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
+                                Label &succeed, Label *fail) {
+  // oldv holds comparison value
+  // newv holds value to write in exchange
+  // addr identifies memory word to compare against/update
+  // tmp returns 0/1 for success/failure
+  Label retry_load, nope;
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+    prfm(Address(addr), PSTL1STRM);
+  bind(retry_load);
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  ldaxrw(tmp, addr);
+  cmp(tmp, oldv);
+  br(Assembler::NE, nope);
+  // if we store+flush with no intervening write tmp wil be zero
+  stlxrw(tmp, newv, addr);
+  cbzw(tmp, succeed);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  b(retry_load);
+  // if the memory word differs we return it in oldv and signal a fail
+  bind(nope);
+  membar(AnyAny);
+  mov(oldv, tmp);
+  if (fail)
+    b(*fail);
+}
+
+static bool different(Register a, RegisterOrConstant b, Register c) {
+  if (b.is_constant())
+    return a != c;
+  else
+    return a != b.as_register() && a != c && b.as_register() != c;
+}
+
+#define ATOMIC_OP(LDXR, OP, IOP, STXR)                                       \
+void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
+  Register result = rscratch2;                                          \
+  if (prev->is_valid())                                                 \
+    result = different(prev, incr, addr) ? prev : rscratch2;            \
+                                                                        \
+  Label retry_load;                                                     \
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
+  bind(retry_load);                                                     \
+  LDXR(result, addr);                                                   \
+  OP(rscratch1, result, incr);                                          \
+  STXR(rscratch2, rscratch1, addr);                                     \
+  cbnzw(rscratch2, retry_load);                                         \
+  if (prev->is_valid() && prev != result) {                             \
+    IOP(prev, rscratch1, incr);                                         \
+  }                                                                     \
+}
+
+ATOMIC_OP(ldxr, add, sub, stxr)
+ATOMIC_OP(ldxrw, addw, subw, stxrw)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_XCHG(OP, LDXR, STXR)                                     \
+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
+  Register result = rscratch2;                                          \
+  if (prev->is_valid())                                                 \
+    result = different(prev, newv, addr) ? prev : rscratch2;            \
+                                                                        \
+  Label retry_load;                                                     \
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
+  bind(retry_load);                                                     \
+  LDXR(result, addr);                                                   \
+  STXR(rscratch1, newv, addr);                                          \
+  cbnzw(rscratch1, retry_load);                                         \
+  if (prev->is_valid() && prev != result)                               \
+    mov(prev, result);                                                  \
+}
+
+ATOMIC_XCHG(xchg, ldxr, stxr)
+ATOMIC_XCHG(xchgw, ldxrw, stxrw)
+
+#undef ATOMIC_XCHG
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+                                          Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+  if (!thread->is_valid()) {
+    thread = rthread;
+  }
+  assert(t1->is_valid(), "need temp reg");
+
+  ldr(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset())));
+  if (var_size_in_bytes->is_valid()) {
+    add(t1, t1, var_size_in_bytes);
+  } else {
+    add(t1, t1, con_size_in_bytes);
+  }
+  str(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset())));
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
+{
+  // In order to get locks to work, we need to fake a in_VM state
+  if (ShowMessageBoxOnError ) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+#ifndef PRODUCT
+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+      ttyLocker ttyl;
+      BytecodeCounter::print();
+    }
+#endif
+    if (os::message_box(msg, "Execution stopped, print registers?")) {
+      ttyLocker ttyl;
+      tty->print_cr(" pc = 0x%016lx", pc);
+#ifndef PRODUCT
+      tty->cr();
+      findpc(pc);
+      tty->cr();
+#endif
+      tty->print_cr(" r0 = 0x%016lx", regs[0]);
+      tty->print_cr(" r1 = 0x%016lx", regs[1]);
+      tty->print_cr(" r2 = 0x%016lx", regs[2]);
+      tty->print_cr(" r3 = 0x%016lx", regs[3]);
+      tty->print_cr(" r4 = 0x%016lx", regs[4]);
+      tty->print_cr(" r5 = 0x%016lx", regs[5]);
+      tty->print_cr(" r6 = 0x%016lx", regs[6]);
+      tty->print_cr(" r7 = 0x%016lx", regs[7]);
+      tty->print_cr(" r8 = 0x%016lx", regs[8]);
+      tty->print_cr(" r9 = 0x%016lx", regs[9]);
+      tty->print_cr("r10 = 0x%016lx", regs[10]);
+      tty->print_cr("r11 = 0x%016lx", regs[11]);
+      tty->print_cr("r12 = 0x%016lx", regs[12]);
+      tty->print_cr("r13 = 0x%016lx", regs[13]);
+      tty->print_cr("r14 = 0x%016lx", regs[14]);
+      tty->print_cr("r15 = 0x%016lx", regs[15]);
+      tty->print_cr("r16 = 0x%016lx", regs[16]);
+      tty->print_cr("r17 = 0x%016lx", regs[17]);
+      tty->print_cr("r18 = 0x%016lx", regs[18]);
+      tty->print_cr("r19 = 0x%016lx", regs[19]);
+      tty->print_cr("r20 = 0x%016lx", regs[20]);
+      tty->print_cr("r21 = 0x%016lx", regs[21]);
+      tty->print_cr("r22 = 0x%016lx", regs[22]);
+      tty->print_cr("r23 = 0x%016lx", regs[23]);
+      tty->print_cr("r24 = 0x%016lx", regs[24]);
+      tty->print_cr("r25 = 0x%016lx", regs[25]);
+      tty->print_cr("r26 = 0x%016lx", regs[26]);
+      tty->print_cr("r27 = 0x%016lx", regs[27]);
+      tty->print_cr("r28 = 0x%016lx", regs[28]);
+      tty->print_cr("r30 = 0x%016lx", regs[30]);
+      tty->print_cr("r31 = 0x%016lx", regs[31]);
+      BREAKPOINT;
+    }
+    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+  } else {
+    ttyLocker ttyl;
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
+                    msg);
+    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+  }
+}
+
+#ifdef BUILTIN_SIM
+// routine to generate an x86 prolog for a stub function which
+// bootstraps into the generated ARM code which directly follows the
+// stub
+//
+// the argument encodes the number of general and fp registers
+// passed by the caller and the callng convention (currently just
+// the number of general registers and assumes C argument passing)
+
+extern "C" {
+int aarch64_stub_prolog_size();
+void aarch64_stub_prolog();
+void aarch64_prolog();
+}
+
+void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type,
+                                   address *prolog_ptr)
+{
+  int calltype = (((ret_type & 0x3) << 8) |
+                  ((fp_arg_count & 0xf) << 4) |
+                  (gp_arg_count & 0xf));
+
+  // the addresses for the x86 to ARM entry code we need to use
+  address start = pc();
+  // printf("start = %lx\n", start);
+  int byteCount =  aarch64_stub_prolog_size();
+  // printf("byteCount = %x\n", byteCount);
+  int instructionCount = (byteCount + 3)/ 4;
+  // printf("instructionCount = %x\n", instructionCount);
+  for (int i = 0; i < instructionCount; i++) {
+    nop();
+  }
+
+  memcpy(start, (void*)aarch64_stub_prolog, byteCount);
+
+  // write the address of the setup routine and the call format at the
+  // end of into the copied code
+  u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
+  if (prolog_ptr)
+    patch_end[-2] = (u_int64_t)prolog_ptr;
+  patch_end[-1] = calltype;
+}
+#endif
+
+void MacroAssembler::push_call_clobbered_registers() {
+  push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+
+  // Push v0-v7, v16-v31.
+  for (int i = 30; i >= 0; i -= 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+        stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+             Address(pre(sp, -2 * wordSize)));
+    }
+  }
+}
+
+void MacroAssembler::pop_call_clobbered_registers() {
+
+  for (int i = 0; i < 32; i += 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+    }
+  }
+
+  pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+}
+
+void MacroAssembler::push_CPU_state() {
+    push(0x3fffffff, sp);         // integer registers except lr & sp
+
+    for (int i = 30; i >= 0; i -= 2)
+      stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(pre(sp, -2 * wordSize)));
+}
+
+void MacroAssembler::pop_CPU_state() {
+  for (int i = 0; i < 32; i += 2)
+    ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+         Address(post(sp, 2 * wordSize)));
+
+  pop(0x3fffffff, sp);         // integer registers except lr & sp
+}
+
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table
+ *
+ * @param [in,out]crc   Register containing the crc.
+ * @param [in]val       Register containing the byte to fold into the CRC.
+ * @param [in]table     Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ *
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+  eor(val, val, crc);
+  andr(val, val, 0xff);
+  ldrw(val, Address(table, val, Address::lsl(2)));
+  eor(crc, val, crc, Assembler::LSR, 8);
+}
+
+/**
+ * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3
+ *
+ * @param [in,out]crc   Register containing the crc.
+ * @param [in]v         Register containing the 32-bit to fold into the CRC.
+ * @param [in]table0    Register containing table 0 of crc constants.
+ * @param [in]table1    Register containing table 1 of crc constants.
+ * @param [in]table2    Register containing table 2 of crc constants.
+ * @param [in]table3    Register containing table 3 of crc constants.
+ *
+ * uint32_t crc;
+ *   v = crc ^ v
+ *   crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24]
+ *
+ */
+void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
+        Register table0, Register table1, Register table2, Register table3,
+        bool upper) {
+  eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0);
+  uxtb(tmp, v);
+  ldrw(crc, Address(table3, tmp, Address::lsl(2)));
+  ubfx(tmp, v, 8, 8);
+  ldrw(tmp, Address(table2, tmp, Address::lsl(2)));
+  eor(crc, crc, tmp);
+  ubfx(tmp, v, 16, 8);
+  ldrw(tmp, Address(table1, tmp, Address::lsl(2)));
+  eor(crc, crc, tmp);
+  ubfx(tmp, v, 24, 8);
+  ldrw(tmp, Address(table0, tmp, Address::lsl(2)));
+  eor(crc, crc, tmp);
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register that will contain address of CRC table
+ * @param tmp   scratch register
+ */
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3) {
+  Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
+  unsigned long offset;
+
+    ornw(crc, zr, crc);
+
+  if (UseCRC32) {
+    Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
+
+      subs(len, len, 64);
+      br(Assembler::GE, CRC_by64_loop);
+      adds(len, len, 64-4);
+      br(Assembler::GE, CRC_by4_loop);
+      adds(len, len, 4);
+      br(Assembler::GT, CRC_by1_loop);
+      b(L_exit);
+
+    BIND(CRC_by4_loop);
+      ldrw(tmp, Address(post(buf, 4)));
+      subs(len, len, 4);
+      crc32w(crc, crc, tmp);
+      br(Assembler::GE, CRC_by4_loop);
+      adds(len, len, 4);
+      br(Assembler::LE, L_exit);
+    BIND(CRC_by1_loop);
+      ldrb(tmp, Address(post(buf, 1)));
+      subs(len, len, 1);
+      crc32b(crc, crc, tmp);
+      br(Assembler::GT, CRC_by1_loop);
+      b(L_exit);
+
+      align(CodeEntryAlignment);
+    BIND(CRC_by64_loop);
+      subs(len, len, 64);
+      ldp(tmp, tmp3, Address(post(buf, 16)));
+      crc32x(crc, crc, tmp);
+      crc32x(crc, crc, tmp3);
+      ldp(tmp, tmp3, Address(post(buf, 16)));
+      crc32x(crc, crc, tmp);
+      crc32x(crc, crc, tmp3);
+      ldp(tmp, tmp3, Address(post(buf, 16)));
+      crc32x(crc, crc, tmp);
+      crc32x(crc, crc, tmp3);
+      ldp(tmp, tmp3, Address(post(buf, 16)));
+      crc32x(crc, crc, tmp);
+      crc32x(crc, crc, tmp3);
+      br(Assembler::GE, CRC_by64_loop);
+      adds(len, len, 64-4);
+      br(Assembler::GE, CRC_by4_loop);
+      adds(len, len, 4);
+      br(Assembler::GT, CRC_by1_loop);
+    BIND(L_exit);
+      ornw(crc, zr, crc);
+      return;
+  }
+
+    adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
+    if (offset) add(table0, table0, offset);
+    add(table1, table0, 1*256*sizeof(juint));
+    add(table2, table0, 2*256*sizeof(juint));
+    add(table3, table0, 3*256*sizeof(juint));
+
+  if (UseNeon) {
+      cmp(len, 64);
+      br(Assembler::LT, L_by16);
+      eor(v16, T16B, v16, v16);
+
+    Label L_fold;
+
+      add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants
+
+      ld1(v0, v1, T2D, post(buf, 32));
+      ld1r(v4, T2D, post(tmp, 8));
+      ld1r(v5, T2D, post(tmp, 8));
+      ld1r(v6, T2D, post(tmp, 8));
+      ld1r(v7, T2D, post(tmp, 8));
+      mov(v16, T4S, 0, crc);
+
+      eor(v0, T16B, v0, v16);
+      sub(len, len, 64);
+
+    BIND(L_fold);
+      pmull(v22, T8H, v0, v5, T8B);
+      pmull(v20, T8H, v0, v7, T8B);
+      pmull(v23, T8H, v0, v4, T8B);
+      pmull(v21, T8H, v0, v6, T8B);
+
+      pmull2(v18, T8H, v0, v5, T16B);
+      pmull2(v16, T8H, v0, v7, T16B);
+      pmull2(v19, T8H, v0, v4, T16B);
+      pmull2(v17, T8H, v0, v6, T16B);
+
+      uzp1(v24, v20, v22, T8H);
+      uzp2(v25, v20, v22, T8H);
+      eor(v20, T16B, v24, v25);
+
+      uzp1(v26, v16, v18, T8H);
+      uzp2(v27, v16, v18, T8H);
+      eor(v16, T16B, v26, v27);
+
+      ushll2(v22, T4S, v20, T8H, 8);
+      ushll(v20, T4S, v20, T4H, 8);
+
+      ushll2(v18, T4S, v16, T8H, 8);
+      ushll(v16, T4S, v16, T4H, 8);
+
+      eor(v22, T16B, v23, v22);
+      eor(v18, T16B, v19, v18);
+      eor(v20, T16B, v21, v20);
+      eor(v16, T16B, v17, v16);
+
+      uzp1(v17, v16, v20, T2D);
+      uzp2(v21, v16, v20, T2D);
+      eor(v17, T16B, v17, v21);
+
+      ushll2(v20, T2D, v17, T4S, 16);
+      ushll(v16, T2D, v17, T2S, 16);
+
+      eor(v20, T16B, v20, v22);
+      eor(v16, T16B, v16, v18);
+
+      uzp1(v17, v20, v16, T2D);
+      uzp2(v21, v20, v16, T2D);
+      eor(v28, T16B, v17, v21);
+
+      pmull(v22, T8H, v1, v5, T8B);
+      pmull(v20, T8H, v1, v7, T8B);
+      pmull(v23, T8H, v1, v4, T8B);
+      pmull(v21, T8H, v1, v6, T8B);
+
+      pmull2(v18, T8H, v1, v5, T16B);
+      pmull2(v16, T8H, v1, v7, T16B);
+      pmull2(v19, T8H, v1, v4, T16B);
+      pmull2(v17, T8H, v1, v6, T16B);
+
+      ld1(v0, v1, T2D, post(buf, 32));
+
+      uzp1(v24, v20, v22, T8H);
+      uzp2(v25, v20, v22, T8H);
+      eor(v20, T16B, v24, v25);
+
+      uzp1(v26, v16, v18, T8H);
+      uzp2(v27, v16, v18, T8H);
+      eor(v16, T16B, v26, v27);
+
+      ushll2(v22, T4S, v20, T8H, 8);
+      ushll(v20, T4S, v20, T4H, 8);
+
+      ushll2(v18, T4S, v16, T8H, 8);
+      ushll(v16, T4S, v16, T4H, 8);
+
+      eor(v22, T16B, v23, v22);
+      eor(v18, T16B, v19, v18);
+      eor(v20, T16B, v21, v20);
+      eor(v16, T16B, v17, v16);
+
+      uzp1(v17, v16, v20, T2D);
+      uzp2(v21, v16, v20, T2D);
+      eor(v16, T16B, v17, v21);
+
+      ushll2(v20, T2D, v16, T4S, 16);
+      ushll(v16, T2D, v16, T2S, 16);
+
+      eor(v20, T16B, v22, v20);
+      eor(v16, T16B, v16, v18);
+
+      uzp1(v17, v20, v16, T2D);
+      uzp2(v21, v20, v16, T2D);
+      eor(v20, T16B, v17, v21);
+
+      shl(v16, v28, T2D, 1);
+      shl(v17, v20, T2D, 1);
+
+      eor(v0, T16B, v0, v16);
+      eor(v1, T16B, v1, v17);
+
+      subs(len, len, 32);
+      br(Assembler::GE, L_fold);
+
+      mov(crc, 0);
+      mov(tmp, v0, T1D, 0);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
+      mov(tmp, v0, T1D, 1);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
+      mov(tmp, v1, T1D, 0);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
+      mov(tmp, v1, T1D, 1);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
+      update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
+
+      add(len, len, 32);
+  }
+
+  BIND(L_by16);
+    subs(len, len, 16);
+    br(Assembler::GE, L_by16_loop);
+    adds(len, len, 16-4);
+    br(Assembler::GE, L_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, L_by1_loop);
+    b(L_exit);
+
+  BIND(L_by4_loop);
+    ldrw(tmp, Address(post(buf, 4)));
+    update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3);
+    subs(len, len, 4);
+    br(Assembler::GE, L_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::LE, L_exit);
+  BIND(L_by1_loop);
+    subs(len, len, 1);
+    ldrb(tmp, Address(post(buf, 1)));
+    update_byte_crc32(crc, tmp, table0);
+    br(Assembler::GT, L_by1_loop);
+    b(L_exit);
+
+    align(CodeEntryAlignment);
+  BIND(L_by16_loop);
+    subs(len, len, 16);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
+    update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
+    update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, false);
+    update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, true);
+    br(Assembler::GE, L_by16_loop);
+    adds(len, len, 16-4);
+    br(Assembler::GE, L_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, L_by1_loop);
+  BIND(L_exit);
+    ornw(crc, zr, crc);
+}
+
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, const bool* flag_addr, bool value) {
+  _masm = masm;
+  unsigned long offset;
+  _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
+  _masm->ldrb(rscratch1, Address(rscratch1, offset));
+  _masm->cbzw(rscratch1, _label);
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
+
+void MacroAssembler::addptr(const Address &dst, int32_t src) {
+  Address adr;
+  switch(dst.getMode()) {
+  case Address::base_plus_offset:
+    // This is the expected mode, although we allow all the other
+    // forms below.
+    adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
+    break;
+  default:
+    lea(rscratch2, dst);
+    adr = Address(rscratch2);
+    break;
+  }
+  ldr(rscratch1, adr);
+  add(rscratch1, rscratch1, src);
+  str(rscratch1, adr);
+}
+
+void MacroAssembler::cmpptr(Register src1, Address src2) {
+  unsigned long offset;
+  adrp(rscratch1, src2, offset);
+  ldr(rscratch1, Address(rscratch1, offset));
+  cmp(src1, rscratch1);
+}
+
+void MacroAssembler::store_check(Register obj) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  store_check_part_1(obj);
+  store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  lsr(obj, obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  // The calculation for byte_map_base is as follows:
+  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
+  // So this essentially converts an address to a displacement and
+  // it will never need to be relocated.
+
+  // FIXME: It's not likely that disp will fit into an offset so we
+  // don't bother to check, but it could save an instruction.
+  intptr_t disp = (intptr_t) ct->byte_map_base;
+  load_byte_map_base(rscratch1);
+
+  if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+      membar(StoreStore);
+  }
+  strb(zr, Address(obj, rscratch1));
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+  if (UseCompressedOops) {
+    ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_heap_oop_not_null(dst);
+  } else {
+    ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+// !!! FIXME AARCH64 -- check this is correct !!!
+
+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
+  if (UseCompressedOops) {
+    ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+    if (Universe::narrow_oop_base() == NULL) {
+      cmp(trial_klass, tmp, LSL, Universe::narrow_oop_shift());
+      return;
+    }
+    decode_heap_oop_not_null(tmp);
+  } else {
+    ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+  }
+  cmp(trial_klass, tmp);
+}
+
+// !!! FIXME AARCH64 -- check this is correct !!!
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ldr(dst, Address(dst, Klass::prototype_header_offset()));
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+  // FIXME: Should this be a store release?  concurrent gcs assumes
+  // klass length is valid if klass field is not null.
+  if (UseCompressedOops) {
+    encode_heap_oop_not_null(src);
+    strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+  } else {
+    str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedOops) {
+    // Store to klass gap in destination
+    strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
+  }
+}
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register d, Register s) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+  verify_oop(s, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      lsr(d, s, LogMinObjAlignmentInBytes);
+    } else {
+      mov(d, s);
+    }
+  } else {
+    subs(d, s, rheapbase);
+    csel(d, d, zr, Assembler::HS);
+    lsr(d, d, LogMinObjAlignmentInBytes);
+
+    /*  Old algorithm: is this any worse?
+    Label nonnull;
+    cbnz(r, nonnull);
+    sub(r, r, rheapbase);
+    bind(nonnull);
+    lsr(r, r, LogMinObjAlignmentInBytes);
+    */
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
+  if (CheckCompressedOops) {
+    Label ok;
+    cbnz(r, ok);
+    stop("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop_not_null");
+  if (Universe::narrow_oop_base() != NULL) {
+    sub(r, r, rheapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    lsr(r, r, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
+  if (CheckCompressedOops) {
+    Label ok;
+    cbnz(src, ok);
+    stop("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+
+  Register data = src;
+  if (Universe::narrow_oop_base() != NULL) {
+    sub(dst, src, rheapbase);
+    data = dst;
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    lsr(dst, data, LogMinObjAlignmentInBytes);
+    data = dst;
+  }
+  if (data == src)
+    mov(dst, src);
+}
+
+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0 || d != s) {
+      lsl(d, s, Universe::narrow_oop_shift());
+    }
+  } else {
+    Label done;
+    if (d != s)
+      mov(d, s);
+    cbz(s, done);
+    add(d, rheapbase, s, Assembler::LSL, LogMinObjAlignmentInBytes);
+    bind(done);
+  }
+  verify_oop(d, "broken oop in decode_heap_oop");
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (Universe::narrow_oop_base() != NULL) {
+      add(r, rheapbase, r, Assembler::LSL, LogMinObjAlignmentInBytes);
+    } else {
+      add(r, zr, r, Assembler::LSL, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+  }
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (Universe::narrow_oop_base() != NULL) {
+      add(dst, rheapbase, src, Assembler::LSL, LogMinObjAlignmentInBytes);
+    } else {
+      add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+    if (dst != src) {
+      mov(dst, src);
+    }
+  }
+}
+
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed oops");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+
+  int oop_index = oop_recorder()->find_index(obj);
+  assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+
+  InstructionMark im(this);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  code_section()->relocate(inst_mark(), rspec);
+  movz(dst, 0xDEAD, 16);
+  movk(dst, 0xBEEF);
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src)
+{
+  if (UseCompressedOops) {
+    ldrw(dst, src);
+    decode_heap_oop(dst);
+  } else {
+    ldr(dst, src);
+  }
+}
+
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src)
+{
+  if (UseCompressedOops) {
+    ldrw(dst, src);
+    decode_heap_oop_not_null(dst);
+  } else {
+    ldr(dst, src);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src) {
+  if (UseCompressedOops) {
+    assert(!dst.uses(src), "not enough registers");
+    encode_heap_oop(src);
+    strw(src, dst);
+  } else
+    str(src, dst);
+}
+
+// Used for storing NULLs.
+void MacroAssembler::store_heap_oop_null(Address dst) {
+  if (UseCompressedOops) {
+    strw(zr, dst);
+  } else
+    str(zr, dst);
+}
+
+#ifndef SERIALGC
+/*
+ * g1_write_barrier_pre -- G1GC pre-write barrier for store of new_val at
+ * store_addr.
+ *
+ * Allocates rscratch1
+ */
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register pre_val,
+                                          Register thread,
+                                          Register tmp,
+                                          bool tosca_live,
+                                          bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == rthread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert_different_registers(obj, pre_val, tmp, rscratch1);
+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ldrw(tmp, in_progress);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    ldrb(tmp, in_progress);
+  }
+  cbzw(tmp, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  cbz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  ldr(tmp, index);                      // tmp := *index_adr
+  cbz(tmp, runtime);                    // tmp == 0?
+                                        // If yes, goto runtime
+
+  sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
+  str(tmp, index);                      // *index_adr := tmp
+  ldr(rscratch1, buffer);
+  add(tmp, tmp, rscratch1);             // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  str(pre_val, Address(tmp, 0));
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  push(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+    pass_arg1(this, thread);
+    pass_arg0(this, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
+
+  bind(done);
+}
+
+/*
+ * g1_write_barrier_post -- G1GC post-write barrier for store of new_val at
+ * store_addr
+ *
+ * Allocates rscratch1
+ */
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register thread,
+                                           Register tmp,
+                                           Register tmp2) {
+#ifdef _LP64
+  assert(thread == rthread, "must be");
+#endif // _LP64
+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
+                             rscratch1);
+  assert(store_addr != noreg && new_val != noreg && tmp != noreg
+         && tmp2 != noreg, "expecting a register");
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  eor(tmp, store_addr, new_val);
+  lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
+  cbz(tmp, done);
+
+  // crosses regions, storing NULL?
+
+  cbz(new_val, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+  const Register card_addr = tmp;
+
+  lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
+
+  // get the address of the card
+  load_byte_map_base(tmp2);
+  add(card_addr, card_addr, tmp2);
+  ldrb(tmp2, Address(card_addr));
+  cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  br(Assembler::EQ, done);
+
+  assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
+
+  membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+
+  ldrb(tmp2, Address(card_addr));
+  cbzw(tmp2, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  strb(zr, Address(card_addr));
+
+  ldr(rscratch1, queue_index);
+  cbz(rscratch1, runtime);
+  sub(rscratch1, rscratch1, wordSize);
+  str(rscratch1, queue_index);
+
+  ldr(tmp2, buffer);
+  str(card_addr, Address(tmp2, rscratch1));
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr->bit(true) | new_val->bit(true), sp);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(store_addr->bit(true) | new_val->bit(true), sp);
+
+  bind(done);
+}
+
+#endif // SERIALGC
+
+// Move an oop into a register.  immediate is true if we want
+// immediate instrcutions, i.e. we are not going to patch this
+// instruction while the code is being executed by another thread.  In
+// that case we can use move immediates rather than the constant pool.
+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+  int oop_index;
+  // !!! FIXME AARCH64 -- because of how jdk7 does reloc verification
+  // we need to use movoop when planting Universe::non_oop_word (-1L)
+  // at a call site under ic_call (the verification routine wants an
+  // oop reloc entry). so, that's why we have two special cases here.
+
+  if (obj == NULL || obj == (jobject)Universe::non_oop_word()) {
+    oop_index = oop_recorder()->allocate_index(obj);
+  } else {
+    oop_index = oop_recorder()->find_index(obj);
+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+  }
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  if (! immediate) {
+    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
+    ldr_constant(dst, Address(dummy, rspec));
+  } else
+    mov(dst, Address((address)obj, rspec));
+
+}
+
+Address MacroAssembler::constant_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop");
+  int oop_index = oop_recorder()->find_index(obj);
+  return Address((address)obj, oop_Relocation::spec(oop_index));
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register t1,
+                                   Register t2,
+                                   Label& slow_case) {
+  assert_different_registers(obj, t2);
+  assert_different_registers(obj, var_size_in_bytes);
+  Register end = t2;
+
+  // verify_tlab();
+
+  ldr(obj, Address(rthread, JavaThread::tlab_top_offset()));
+  if (var_size_in_bytes == noreg) {
+    lea(end, Address(obj, con_size_in_bytes));
+  } else {
+    lea(end, Address(obj, var_size_in_bytes));
+  }
+  ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset()));
+  cmp(end, rscratch1);
+  br(Assembler::HI, slow_case);
+
+  // update the tlab top pointer
+  str(end, Address(rthread, JavaThread::tlab_top_offset()));
+
+  // recover var_size_in_bytes if necessary
+  if (var_size_in_bytes == end) {
+    sub(var_size_in_bytes, var_size_in_bytes, obj);
+  }
+  // verify_tlab();
+}
+
+// Preserves r19, and r3.
+Register MacroAssembler::tlab_refill(Label& retry,
+                                     Label& try_eden,
+                                     Label& slow_case) {
+  Register top = r0;
+  Register t1  = r2;
+  Register t2  = r4;
+  assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3);
+  Label do_refill, discard_tlab;
+
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    b(slow_case);
+  }
+
+  ldr(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
+  ldr(t1,  Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
+
+  // calculate amount of free space
+  sub(t1, t1, top);
+  lsr(t1, t1, LogHeapWordSize);
+
+  // Retain tlab and allocate object in shared space if
+  // the amount free in the tlab is too large to discard.
+
+  ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
+  cmp(t1, rscratch1);
+  br(Assembler::LE, discard_tlab);
+
+  // Retain
+  // ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
+  mov(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
+  add(rscratch1, rscratch1, t2);
+  str(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
+
+  if (TLABStats) {
+    // increment number of slow_allocations
+    addmw(Address(rthread, in_bytes(JavaThread::tlab_slow_allocations_offset())),
+         1, rscratch1);
+  }
+  b(try_eden);
+
+  bind(discard_tlab);
+  if (TLABStats) {
+    // increment number of refills
+    addmw(Address(rthread, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1,
+         rscratch1);
+    // accumulate wastage -- t1 is amount free in tlab
+    addmw(Address(rthread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1,
+         rscratch1);
+  }
+
+  // if tlab is currently allocated (top or end != null) then
+  // fill [top, end + alignment_reserve) with array object
+  cbz(top, do_refill);
+
+  // set up the mark word
+  mov(rscratch1, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
+  str(rscratch1, Address(top, oopDesc::mark_offset_in_bytes()));
+  // set the length to the remaining space
+  sub(t1, t1, typeArrayOopDesc::header_size(T_INT));
+  add(t1, t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
+  lsl(t1, t1, log2_intptr(HeapWordSize/sizeof(jint)));
+  strw(t1, Address(top, arrayOopDesc::length_offset_in_bytes()));
+  // set klass to intArrayKlass
+  {
+    unsigned long offset;
+    // dubious reloc why not an oop reloc?
+    adrp(rscratch1, ExternalAddress((address)Universe::intArrayKlassObj_addr()),
+         offset);
+    ldr(t1, Address(rscratch1, offset));
+  }
+  // store klass last.  concurrent gcs assumes klass length is valid if
+  // klass field is not null.
+  store_klass(top, t1);
+
+  mov(t1, top);
+  ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
+  sub(t1, t1, rscratch1);
+  incr_allocated_bytes(rthread, t1, 0, rscratch1);
+
+  // refill the tlab with an eden allocation
+  bind(do_refill);
+  ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
+  lsl(t1, t1, LogHeapWordSize);
+  // allocate new tlab, address returned in top
+  eden_allocate(top, t1, 0, t2, slow_case);
+
+  // Check that t1 was preserved in eden_allocate.
+#ifdef ASSERT
+  if (UseTLAB) {
+    Label ok;
+    Register tsize = r4;
+    assert_different_registers(tsize, rthread, t1);
+    str(tsize, Address(pre(sp, -16)));
+    ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
+    lsl(tsize, tsize, LogHeapWordSize);
+    cmp(t1, tsize);
+    br(Assembler::EQ, ok);
+    STOP("assert(t1 != tlab size)");
+    should_not_reach_here();
+
+    bind(ok);
+    ldr(tsize, Address(post(sp, 16)));
+  }
+#endif
+  str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
+  str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
+  add(top, top, t1);
+  sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+  str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
+  verify_tlab();
+  b(retry);
+
+  return rthread; // for use by caller
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register t1,
+                                   Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    b(slow_case);
+  } else {
+    Register end = t1;
+    Register heap_end = rscratch2;
+    Label retry;
+    bind(retry);
+    {
+      unsigned long offset;
+      adrp(rscratch1, ExternalAddress((address) Universe::heap()->end_addr()), offset);
+      ldr(heap_end, Address(rscratch1, offset));
+    }
+
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+
+    // Get the current top of the heap
+    {
+      unsigned long offset;
+      adrp(rscratch1, heap_top, offset);
+      // Use add() here after ARDP, rather than lea().
+      // lea() does not generate anything if its offset is zero.
+      // However, relocs expect to find either an ADD or a load/store
+      // insn after an ADRP.  add() always generates an ADD insn, even
+      // for add(Rn, Rn, 0).
+      add(rscratch1, rscratch1, offset);
+      ldaxr(obj, rscratch1);
+    }
+
+    // Adjust it my the size of our new object
+    if (var_size_in_bytes == noreg) {
+      lea(end, Address(obj, con_size_in_bytes));
+    } else {
+      lea(end, Address(obj, var_size_in_bytes));
+    }
+
+    // if end < obj then we wrapped around high memory
+    cmp(end, obj);
+    br(Assembler::LO, slow_case);
+
+    cmp(end, heap_end);
+    br(Assembler::HI, slow_case);
+
+    // If heap_top hasn't been changed by some other thread, update it.
+    stlxr(rscratch2, end, rscratch1);
+    cbnzw(rscratch2, retry);
+  }
+}
+
+void MacroAssembler::verify_tlab() {
+#ifdef ASSERT
+  if (UseTLAB && VerifyOops) {
+    Label next, ok;
+
+    stp(rscratch2, rscratch1, Address(pre(sp, -16)));
+
+    ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
+    ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
+    cmp(rscratch2, rscratch1);
+    br(Assembler::HS, next);
+    STOP("assert(top >= start)");
+    should_not_reach_here();
+
+    bind(next);
+    ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
+    ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
+    cmp(rscratch2, rscratch1);
+    br(Assembler::HS, ok);
+    STOP("assert(top <= end)");
+    should_not_reach_here();
+
+    bind(ok);
+    ldp(rscratch2, rscratch1, Address(post(sp, 16)));
+  }
+#endif
+}
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages.  This clobbers tmp.
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+  assert_different_registers(tmp, size, rscratch1);
+  mov(tmp, sp);
+  // Bang stack for total size given plus shadow page size.
+  // Bang one page at a time because large size can bang beyond yellow and
+  // red zones.
+  Label loop;
+  mov(rscratch1, os::vm_page_size());
+  bind(loop);
+  lea(tmp, Address(tmp, -os::vm_page_size()));
+  subsw(size, size, rscratch1);
+  str(size, Address(tmp));
+  br(Assembler::GT, loop);
+
+  // Bang down shadow pages too.
+  // The -1 because we already subtracted 1 page.
+  for (int i = 0; i< StackShadowPages-1; i++) {
+    // this could be any sized move but this is can be a debugging crumb
+    // so the bigger the better.
+    lea(tmp, Address(tmp, -os::vm_page_size()));
+    str(size, Address(tmp));
+  }
+}
+
+
+address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
+  unsigned long off;
+  adrp(r, Address(page, rtype), off);
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), rtype);
+  ldrw(zr, Address(r, off));
+  return inst_mark();
+}
+
+address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), rtype);
+  ldrw(zr, Address(r, 0));
+  return inst_mark();
+}
+
+void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
+  relocInfo::relocType rtype = dest.rspec().reloc()->type();
+  unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
+  unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
+  unsigned long dest_page = (unsigned long)dest.target() >> 12;
+  long offset_low = dest_page - low_page;
+  long offset_high = dest_page - high_page;
+
+  assert(is_valid_AArch64_address(dest.target()), "bad address");
+  assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
+
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  // 8143067: Ensure that the adrp can reach the dest from anywhere within
+  // the code cache so that if it is relocated we know it will still reach
+  if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
+    _adrp(reg1, dest.target());
+  } else {
+    unsigned long target = (unsigned long)dest.target();
+    unsigned long adrp_target
+      = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL);
+
+    _adrp(reg1, (address)adrp_target);
+    movk(reg1, target >> 32, 32);
+  }
+  byte_offset = (unsigned long)dest.target() & 0xfff;
+}
+
+void MacroAssembler::load_byte_map_base(Register reg) {
+  jbyte *byte_map_base =
+    ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
+
+  if (is_valid_AArch64_address((address)byte_map_base)) {
+    // Strictly speaking the byte_map_base isn't an address at all,
+    // and it might even be negative.
+    unsigned long offset;
+    adrp(reg, ExternalAddress((address)byte_map_base), offset);
+    if (offset != 0)
+      add(reg, reg, offset);
+  } else {
+    mov(reg, (uint64_t)byte_map_base);
+  }
+}
+
+void MacroAssembler::build_frame(int framesize) {
+  if (framesize == 0) {
+    // Is this even possible?
+    stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    sub(sp, sp, framesize);
+    stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+  } else {
+    stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+    if (framesize < ((1 << 12) + 2 * wordSize))
+      sub(sp, sp, framesize - 2 * wordSize);
+    else {
+      mov(rscratch1, framesize - 2 * wordSize);
+      sub(sp, sp, rscratch1);
+    }
+  }
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+  if (framesize == 0) {
+    ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+    add(sp, sp, framesize);
+  } else {
+    if (framesize < ((1 << 12) + 2 * wordSize))
+      add(sp, sp, framesize - 2 * wordSize);
+    else {
+      mov(rscratch1, framesize - 2 * wordSize);
+      add(sp, sp, rscratch1);
+    }
+    ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+  }
+}
+
+// Search for str1 in str2 and return index or -1
+void MacroAssembler::string_indexof(Register str2, Register str1,
+                                    Register cnt2, Register cnt1,
+                                    Register tmp1, Register tmp2,
+                                    Register tmp3, Register tmp4,
+                                    int icnt1, Register result) {
+  Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH;
+
+  Register ch1 = rscratch1;
+  Register ch2 = rscratch2;
+  Register cnt1tmp = tmp1;
+  Register cnt2tmp = tmp2;
+  Register cnt1_neg = cnt1;
+  Register cnt2_neg = cnt2;
+  Register result_tmp = tmp4;
+
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+
+// We have two strings, a source string in str2, cnt2 and a pattern string
+// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
+
+// For larger pattern and source we use a simplified Boyer Moore algorithm.
+// With a small pattern and source we use linear scan.
+
+  if (icnt1 == -1) {
+    cmp(cnt1, 256);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
+    ccmp(cnt1, 8, 0b0000, LO);  // Can't handle skip >= 256 because we use
+    br(LO, LINEARSEARCH);       // a byte array.
+    cmp(cnt1, cnt2, LSR, 2);    // Source must be 4 * pattern for BM
+    br(HS, LINEARSEARCH);
+  }
+
+// The Boyer Moore alogorithm is based on the description here:-
+//
+// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
+//
+// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
+// and the 'Good Suffix' rule.
+//
+// These rules are essentially heuristics for how far we can shift the
+// pattern along the search string.
+//
+// The implementation here uses the 'Bad Character' rule only because of the
+// complexity of initialisation for the 'Good Suffix' rule.
+//
+// This is also known as the Boyer-Moore-Horspool algorithm:-
+//
+// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
+//
+// #define ASIZE 128
+//
+//    int bm(unsigned char *x, int m, unsigned char *y, int n) {
+//       int i, j;
+//       unsigned c;
+//       unsigned char bc[ASIZE];
+//
+//       /* Preprocessing */
+//       for (i = 0; i < ASIZE; ++i)
+//          bc[i] = 0;
+//       for (i = 0; i < m - 1; ) {
+//          c = x[i];
+//          ++i;
+//          if (c < ASIZE) bc[c] = i;
+//       }
+//
+//       /* Searching */
+//       j = 0;
+//       while (j <= n - m) {
+//          c = y[i+j];
+//          if (x[m-1] == c)
+//            for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
+//          if (i < 0) return j;
+//          if (c < ASIZE)
+//            j = j - bc[y[j+m-1]] + m;
+//          else
+//            j += 1; // Advance by 1 only if char >= ASIZE
+//       }
+//    }
+
+  if (icnt1 == -1) {
+    BIND(BM);
+
+    Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP;
+    Label BMADV, BMMATCH, BMCHECKEND;
+
+    Register cnt1end = tmp2;
+    Register str2end = cnt2;
+    Register skipch = tmp2;
+
+    // Restrict ASIZE to 128 to reduce stack space/initialisation.
+    // The presence of chars >= ASIZE in the target string does not affect
+    // performance, but we must be careful not to initialise them in the stack
+    // array.
+    // The presence of chars >= ASIZE in the source string may adversely affect
+    // performance since we can only advance by one when we encounter one.
+
+      stp(zr, zr, pre(sp, -128));
+      for (int i = 1; i < 8; i++)
+          stp(zr, zr, Address(sp, i*16));
+
+      mov(cnt1tmp, 0);
+      sub(cnt1end, cnt1, 1);
+    BIND(BCLOOP);
+      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
+      cmp(ch1, 128);
+      add(cnt1tmp, cnt1tmp, 1);
+      br(HS, BCSKIP);
+      strb(cnt1tmp, Address(sp, ch1));
+    BIND(BCSKIP);
+      cmp(cnt1tmp, cnt1end);
+      br(LT, BCLOOP);
+
+      mov(result_tmp, str2);
+
+      sub(cnt2, cnt2, cnt1);
+      add(str2end, str2, cnt2, LSL, 1);
+    BIND(BMLOOPSTR2);
+      sub(cnt1tmp, cnt1, 1);
+      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
+      ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1)));
+      cmp(ch1, skipch);
+      br(NE, BMSKIP);
+      subs(cnt1tmp, cnt1tmp, 1);
+      br(LT, BMMATCH);
+    BIND(BMLOOPSTR1);
+      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
+      ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1)));
+      cmp(ch1, ch2);
+      br(NE, BMSKIP);
+      subs(cnt1tmp, cnt1tmp, 1);
+      br(GE, BMLOOPSTR1);
+    BIND(BMMATCH);
+      sub(result_tmp, str2, result_tmp);
+      lsr(result, result_tmp, 1);
+      add(sp, sp, 128);
+      b(DONE);
+    BIND(BMADV);
+      add(str2, str2, 2);
+      b(BMCHECKEND);
+    BIND(BMSKIP);
+      cmp(skipch, 128);
+      br(HS, BMADV);
+      ldrb(ch2, Address(sp, skipch));
+      add(str2, str2, cnt1, LSL, 1);
+      sub(str2, str2, ch2, LSL, 1);
+    BIND(BMCHECKEND);
+      cmp(str2, str2end);
+      br(LE, BMLOOPSTR2);
+      add(sp, sp, 128);
+      b(NOMATCH);
+  }
+
+  BIND(LINEARSEARCH);
+  {
+    Label DO1, DO2, DO3;
+
+    Register str2tmp = tmp2;
+    Register first = tmp3;
+
+    if (icnt1 == -1)
+    {
+        Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD;
+
+        cmp(cnt1, 4);
+        br(LT, DOSHORT);
+
+        sub(cnt2, cnt2, cnt1);
+        sub(cnt1, cnt1, 4);
+        mov(result_tmp, cnt2);
+
+        lea(str1, Address(str1, cnt1, Address::uxtw(1)));
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt1_neg, zr, cnt1, LSL, 1);
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+        ldr(first, Address(str1, cnt1_neg));
+
+      BIND(FIRST_LOOP);
+        ldr(ch2, Address(str2, cnt2_neg));
+        cmp(first, ch2);
+        br(EQ, STR1_LOOP);
+      BIND(STR2_NEXT);
+        adds(cnt2_neg, cnt2_neg, 2);
+        br(LE, FIRST_LOOP);
+        b(NOMATCH);
+
+      BIND(STR1_LOOP);
+        adds(cnt1tmp, cnt1_neg, 8);
+        add(cnt2tmp, cnt2_neg, 8);
+        br(GE, LAST_WORD);
+
+      BIND(STR1_NEXT);
+        ldr(ch1, Address(str1, cnt1tmp));
+        ldr(ch2, Address(str2, cnt2tmp));
+        cmp(ch1, ch2);
+        br(NE, STR2_NEXT);
+        adds(cnt1tmp, cnt1tmp, 8);
+        add(cnt2tmp, cnt2tmp, 8);
+        br(LT, STR1_NEXT);
+
+      BIND(LAST_WORD);
+        ldr(ch1, Address(str1));
+        sub(str2tmp, str2, cnt1_neg);         // adjust to corresponding
+        ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2
+        cmp(ch1, ch2);
+        br(NE, STR2_NEXT);
+        b(MATCH);
+
+      BIND(DOSHORT);
+        cmp(cnt1, 2);
+        br(LT, DO1);
+        br(GT, DO3);
+    }
+
+    if (icnt1 == 4) {
+      Label CH1_LOOP;
+
+        ldr(ch1, str1);
+        sub(cnt2, cnt2, 4);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+
+      BIND(CH1_LOOP);
+        ldr(ch2, Address(str2, cnt2_neg));
+        cmp(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, 2);
+        br(LE, CH1_LOOP);
+        b(NOMATCH);
+    }
+
+    if (icnt1 == -1 || icnt1 == 2) {
+      Label CH1_LOOP;
+
+      BIND(DO2);
+        ldrw(ch1, str1);
+        sub(cnt2, cnt2, 2);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+
+      BIND(CH1_LOOP);
+        ldrw(ch2, Address(str2, cnt2_neg));
+        cmp(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, 2);
+        br(LE, CH1_LOOP);
+        b(NOMATCH);
+    }
+
+    if (icnt1 == -1 || icnt1 == 3) {
+      Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+
+      BIND(DO3);
+        ldrw(first, str1);
+        ldrh(ch1, Address(str1, 4));
+
+        sub(cnt2, cnt2, 3);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+
+      BIND(FIRST_LOOP);
+        ldrw(ch2, Address(str2, cnt2_neg));
+        cmpw(first, ch2);
+        br(EQ, STR1_LOOP);
+      BIND(STR2_NEXT);
+        adds(cnt2_neg, cnt2_neg, 2);
+        br(LE, FIRST_LOOP);
+        b(NOMATCH);
+
+      BIND(STR1_LOOP);
+        add(cnt2tmp, cnt2_neg, 4);
+        ldrh(ch2, Address(str2, cnt2tmp));
+        cmp(ch1, ch2);
+        br(NE, STR2_NEXT);
+        b(MATCH);
+    }
+
+    if (icnt1 == -1 || icnt1 == 1) {
+      Label CH1_LOOP, HAS_ZERO;
+      Label DO1_SHORT, DO1_LOOP;
+
+      BIND(DO1);
+        ldrh(ch1, str1);
+        cmp(cnt2, 4);
+        br(LT, DO1_SHORT);
+
+        orr(ch1, ch1, ch1, LSL, 16);
+        orr(ch1, ch1, ch1, LSL, 32);
+
+        sub(cnt2, cnt2, 4);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+
+        mov(tmp3, 0x0001000100010001);
+      BIND(CH1_LOOP);
+        ldr(ch2, Address(str2, cnt2_neg));
+        eor(ch2, ch1, ch2);
+        sub(tmp1, ch2, tmp3);
+        orr(tmp2, ch2, 0x7fff7fff7fff7fff);
+        bics(tmp1, tmp1, tmp2);
+        br(NE, HAS_ZERO);
+        adds(cnt2_neg, cnt2_neg, 8);
+        br(LT, CH1_LOOP);
+
+        cmp(cnt2_neg, 8);
+        mov(cnt2_neg, 0);
+        br(LT, CH1_LOOP);
+        b(NOMATCH);
+
+      BIND(HAS_ZERO);
+        rev(tmp1, tmp1);
+        clz(tmp1, tmp1);
+        add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
+        b(MATCH);
+
+      BIND(DO1_SHORT);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+        sub(cnt2_neg, zr, cnt2, LSL, 1);
+      BIND(DO1_LOOP);
+        ldrh(ch2, Address(str2, cnt2_neg));
+        cmpw(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, 2);
+        br(LT, DO1_LOOP);
+    }
+  }
+  BIND(NOMATCH);
+    mov(result, -1);
+    b(DONE);
+  BIND(MATCH);
+    add(result, result_tmp, cnt2_neg, ASR, 1);
+  BIND(DONE);
+}
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    Register tmp1) {
+  Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING,
+    NEXT_WORD, DIFFERENCE;
+
+  BLOCK_COMMENT("string_compare {");
+
+  // Compute the minimum of the string lengths and save the difference.
+  subsw(tmp1, cnt1, cnt2);
+  cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
+
+  // A very short string
+  cmpw(cnt2, 4);
+  br(Assembler::LT, SHORT_STRING);
+
+  // Check if the strings start at the same location.
+  cmp(str1, str2);
+  br(Assembler::EQ, LENGTH_DIFF);
+
+  // Compare longwords
+  {
+    subw(cnt2, cnt2, 4); // The last longword is a special case
+
+    // Move both string pointers to the last longword of their
+    // strings, negate the remaining count, and convert it to bytes.
+    lea(str1, Address(str1, cnt2, Address::uxtw(1)));
+    lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+    sub(cnt2, zr, cnt2, LSL, 1);
+
+    // Loop, loading longwords and comparing them into rscratch2.
+    bind(NEXT_WORD);
+    ldr(result, Address(str1, cnt2));
+    ldr(cnt1, Address(str2, cnt2));
+    adds(cnt2, cnt2, wordSize);
+    eor(rscratch2, result, cnt1);
+    cbnz(rscratch2, DIFFERENCE);
+    br(Assembler::LT, NEXT_WORD);
+
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+
+    ldr(result, Address(str1));
+    ldr(cnt1, Address(str2));
+    eor(rscratch2, result, cnt1);
+    cbz(rscratch2, LENGTH_DIFF);
+
+    // Find the first different characters in the longwords and
+    // compute their difference.
+    bind(DIFFERENCE);
+    rev(rscratch2, rscratch2);
+    clz(rscratch2, rscratch2);
+    andr(rscratch2, rscratch2, -16);
+    lsrv(result, result, rscratch2);
+    uxthw(result, result);
+    lsrv(cnt1, cnt1, rscratch2);
+    uxthw(cnt1, cnt1);
+    subw(result, result, cnt1);
+    b(DONE);
+  }
+
+  bind(SHORT_STRING);
+  // Is the minimum length zero?
+  cbz(cnt2, LENGTH_DIFF);
+
+  bind(SHORT_LOOP);
+  load_unsigned_short(result, Address(post(str1, 2)));
+  load_unsigned_short(cnt1, Address(post(str2, 2)));
+  subw(result, result, cnt1);
+  cbnz(result, DONE);
+  sub(cnt2, cnt2, 1);
+  cbnz(cnt2, SHORT_LOOP);
+
+  // Strings are equal up to min length.  Return the length difference.
+  bind(LENGTH_DIFF);
+  mov(result, tmp1);
+
+  // That's it
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_compare");
+}
+
+
+// base:     Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:      Count in HeapWords.
+// is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit.
+void MacroAssembler::zero_words(Register base, Register cnt)
+{
+  if (UseBlockZeroing) {
+    block_zero(base, cnt);
+  } else {
+    fill_words(base, cnt, zr);
+  }
+}
+
+// r10 = base:   Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:          Immediate count in HeapWords.
+// r11 = tmp:    For use as cnt if we need to call out
+#define ShortArraySize (18 * BytesPerLong)
+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
+{
+  Register tmp = r11;
+  int i = cnt & 1;  // store any odd word to start
+  if (i) str(zr, Address(base));
+
+  if (cnt <= ShortArraySize / BytesPerLong) {
+    for (; i < (int)cnt; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+  } else if (UseBlockZeroing && cnt >= (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord)) {
+    mov(tmp, cnt);
+    block_zero(base, tmp, true);
+  } else {
+    const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
+    int remainder = cnt % (2 * unroll);
+    for (; i < remainder; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+
+    Label loop;
+    Register cnt_reg = rscratch1;
+    Register loop_base = rscratch2;
+    cnt = cnt - remainder;
+    mov(cnt_reg, cnt);
+    // adjust base and prebias by -2 * wordSize so we can pre-increment
+    add(loop_base, base, (remainder - 2) * wordSize);
+    bind(loop);
+    sub(cnt_reg, cnt_reg, 2 * unroll);
+    for (i = 1; i < unroll; i++)
+      stp(zr, zr, Address(loop_base, 2 * i * wordSize));
+    stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
+    cbnz(cnt_reg, loop);
+  }
+}
+
+// base:   Address of a buffer to be filled, 8 bytes aligned.
+// cnt:    Count in 8-byte unit.
+// value:  Value to be filled with.
+// base will point to the end of the buffer after filling.
+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
+{
+//  Algorithm:
+//
+//    scratch1 = cnt & 7;
+//    cnt -= scratch1;
+//    p += scratch1;
+//    switch (scratch1) {
+//      do {
+//        cnt -= 8;
+//          p[-8] = v;
+//        case 7:
+//          p[-7] = v;
+//        case 6:
+//          p[-6] = v;
+//          // ...
+//        case 1:
+//          p[-1] = v;
+//        case 0:
+//          p += 8;
+//      } while (cnt);
+//    }
+
+  assert_different_registers(base, cnt, value, rscratch1, rscratch2);
+
+  Label fini, skip, entry, loop;
+  const int unroll = 8; // Number of stp instructions we'll unroll
+
+  cbz(cnt, fini);
+  tbz(base, 3, skip);
+  str(value, Address(post(base, 8)));
+  sub(cnt, cnt, 1);
+  bind(skip);
+
+  andr(rscratch1, cnt, (unroll-1) * 2);
+  sub(cnt, cnt, rscratch1);
+  add(base, base, rscratch1, Assembler::LSL, 3);
+  adr(rscratch2, entry);
+  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 1);
+  br(rscratch2);
+
+  bind(loop);
+  add(base, base, unroll * 16);
+  for (int i = -unroll; i < 0; i++)
+    stp(value, value, Address(base, i * 16));
+  bind(entry);
+  subs(cnt, cnt, unroll * 2);
+  br(Assembler::GE, loop);
+
+  tbz(cnt, 0, fini);
+  str(value, Address(post(base, 8)));
+  bind(fini);
+}
+
+// Use DC ZVA to do fast zeroing.
+// base:   Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:    Count in HeapWords.
+// is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit.
+void MacroAssembler::block_zero(Register base, Register cnt, bool is_large)
+{
+  Label small;
+  Label store_pair, loop_store_pair, done;
+  Label base_aligned;
+
+  assert_different_registers(base, cnt, rscratch1);
+  guarantee(base == r10 && cnt == r11, "fix register usage");
+
+  Register tmp = rscratch1;
+  Register tmp2 = rscratch2;
+  int zva_length = VM_Version::zva_length();
+
+  // Ensure ZVA length can be divided by 16. This is required by
+  // the subsequent operations.
+  assert (zva_length % 16 == 0, "Unexpected ZVA Length");
+
+  if (!is_large) cbz(cnt, done);
+  tbz(base, 3, base_aligned);
+  str(zr, Address(post(base, 8)));
+  sub(cnt, cnt, 1);
+  bind(base_aligned);
+
+  // Ensure count >= zva_length * 2 so that it still deserves a zva after
+  // alignment.
+  if (!is_large || !(BlockZeroingLowLimit >= zva_length * 2)) {
+    int low_limit = MAX2(zva_length * 2, (int)BlockZeroingLowLimit);
+    cmp(cnt, low_limit >> 3);
+    br(Assembler::LT, small);
+  }
+
+  far_call(StubRoutines::aarch64::get_zero_longs());
+
+  bind(small);
+
+  const int unroll = 8; // Number of stp instructions we'll unroll
+  Label small_loop, small_table_end;
+
+  andr(tmp, cnt, (unroll-1) * 2);
+  sub(cnt, cnt, tmp);
+  add(base, base, tmp, Assembler::LSL, 3);
+  adr(tmp2, small_table_end);
+  sub(tmp2, tmp2, tmp, Assembler::LSL, 1);
+  br(tmp2);
+
+  bind(small_loop);
+  add(base, base, unroll * 16);
+  for (int i = -unroll; i < 0; i++)
+    stp(zr, zr, Address(base, i * 16));
+  bind(small_table_end);
+  subs(cnt, cnt, unroll * 2);
+  br(Assembler::GE, small_loop);
+
+  tbz(cnt, 0, done);
+  str(zr, Address(post(base, 8)));
+
+  bind(done);
+}
+
+void MacroAssembler::string_equals(Register str1, Register str2,
+                                   Register cnt, Register result,
+                                   Register tmp1) {
+  Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING,
+    NEXT_WORD;
+
+  const Register tmp2 = rscratch1;
+  assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2);
+
+  BLOCK_COMMENT("string_equals {");
+
+  // Start by assuming that the strings are not equal.
+  mov(result, zr);
+
+  // A very short string
+  cmpw(cnt, 4);
+  br(Assembler::LT, SHORT_STRING);
+
+  // Check if the strings start at the same location.
+  cmp(str1, str2);
+  br(Assembler::EQ, SAME_CHARS);
+
+  // Compare longwords
+  {
+    subw(cnt, cnt, 4); // The last longword is a special case
+
+    // Move both string pointers to the last longword of their
+    // strings, negate the remaining count, and convert it to bytes.
+    lea(str1, Address(str1, cnt, Address::uxtw(1)));
+    lea(str2, Address(str2, cnt, Address::uxtw(1)));
+    sub(cnt, zr, cnt, LSL, 1);
+
+    // Loop, loading longwords and comparing them into rscratch2.
+    bind(NEXT_WORD);
+    ldr(tmp1, Address(str1, cnt));
+    ldr(tmp2, Address(str2, cnt));
+    adds(cnt, cnt, wordSize);
+    eor(rscratch2, tmp1, tmp2);
+    cbnz(rscratch2, DONE);
+    br(Assembler::LT, NEXT_WORD);
+
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+
+    ldr(tmp1, Address(str1));
+    ldr(tmp2, Address(str2));
+    eor(rscratch2, tmp1, tmp2);
+    cbz(rscratch2, SAME_CHARS);
+    b(DONE);
+  }
+
+  bind(SHORT_STRING);
+  // Is the length zero?
+  cbz(cnt, SAME_CHARS);
+
+  bind(SHORT_LOOP);
+  load_unsigned_short(tmp1, Address(post(str1, 2)));
+  load_unsigned_short(tmp2, Address(post(str2, 2)));
+  subw(tmp1, tmp1, tmp2);
+  cbnz(tmp1, DONE);
+  sub(cnt, cnt, 1);
+  cbnz(cnt, SHORT_LOOP);
+
+  // Strings are equal.
+  bind(SAME_CHARS);
+  mov(result, true);
+
+  // That's it
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_equals");
+}
+
+
+// Compare char[] arrays aligned to 4 bytes
+void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+                                        Register result, Register tmp1)
+{
+  Register cnt1 = rscratch1;
+  Register cnt2 = rscratch2;
+  Register tmp2 = rscratch2;
+
+  Label SAME, DIFFER, NEXT, TAIL03, TAIL01;
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+  BLOCK_COMMENT("char_arrays_equals  {");
+
+    // different until proven equal
+    mov(result, false);
+
+    // same array?
+    cmp(ary1, ary2);
+    br(Assembler::EQ, SAME);
+
+    // ne if either null
+    cbz(ary1, DIFFER);
+    cbz(ary2, DIFFER);
+
+    // lengths ne?
+    ldrw(cnt1, Address(ary1, length_offset));
+    ldrw(cnt2, Address(ary2, length_offset));
+    cmp(cnt1, cnt2);
+    br(Assembler::NE, DIFFER);
+
+    lea(ary1, Address(ary1, base_offset));
+    lea(ary2, Address(ary2, base_offset));
+
+    subs(cnt1, cnt1, 4);
+    br(LT, TAIL03);
+
+  BIND(NEXT);
+    ldr(tmp1, Address(post(ary1, 8)));
+    ldr(tmp2, Address(post(ary2, 8)));
+    subs(cnt1, cnt1, 4);
+    eor(tmp1, tmp1, tmp2);
+    cbnz(tmp1, DIFFER);
+    br(GE, NEXT);
+
+  BIND(TAIL03);  // 0-3 chars left, cnt1 = #chars left - 4
+    tst(cnt1, 0b10);
+    br(EQ, TAIL01);
+    ldrw(tmp1, Address(post(ary1, 4)));
+    ldrw(tmp2, Address(post(ary2, 4)));
+    cmp(tmp1, tmp2);
+    br(NE, DIFFER);
+  BIND(TAIL01);  // 0-1 chars left
+    tst(cnt1, 0b01);
+    br(EQ, SAME);
+    ldrh(tmp1, ary1);
+    ldrh(tmp2, ary2);
+    cmp(tmp1, tmp2);
+    br(NE, DIFFER);
+
+  BIND(SAME);
+    mov(result, true);
+  BIND(DIFFER); // result already set
+
+  BLOCK_COMMENT("} char_arrays_equals");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,3659 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
+
+class BiasedLockingCounters;
+
+// !!! FIXME AARCH64 -- do we need to include asm/register.hpp !!!
+
+// definitions of various symbolic names for machine registers
+
+// First intercalls between C and Java which use 8 general registers
+// and 8 floating registers
+
+// we also have to copy between x86 and ARM registers but that's a
+// secondary complication -- not all code employing C call convention
+// executes as x86 code though -- we generate some of it
+
+class Argument VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum {
+    n_int_register_parameters_c   = 8,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
+    n_float_register_parameters_c = 8,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
+
+    n_int_register_parameters_j   = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
+    n_float_register_parameters_j = 8  // v0, v1, ... v7 (j_farg0, j_farg1, ...
+  };
+};
+
+REGISTER_DECLARATION(Register, c_rarg0, r0);
+REGISTER_DECLARATION(Register, c_rarg1, r1);
+REGISTER_DECLARATION(Register, c_rarg2, r2);
+REGISTER_DECLARATION(Register, c_rarg3, r3);
+REGISTER_DECLARATION(Register, c_rarg4, r4);
+REGISTER_DECLARATION(Register, c_rarg5, r5);
+REGISTER_DECLARATION(Register, c_rarg6, r6);
+REGISTER_DECLARATION(Register, c_rarg7, r7);
+
+REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
+REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
+REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
+REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
+REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
+REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
+REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
+REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
+
+// Symbolically name the register arguments used by the Java calling convention.
+// We have control over the convention for java so we can do what we please.
+// What pleases us is to offset the java calling convention so that when
+// we call a suitable jni method the arguments are lined up and we don't
+// have to do much shuffling. A suitable jni method is non-static and a
+// small number of arguments
+//
+//  |--------------------------------------------------------------------|
+//  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
+//  |--------------------------------------------------------------------|
+//  | r0       r1       r2      r3      r4      r5      r6      r7       |
+//  |--------------------------------------------------------------------|
+//  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
+//  |--------------------------------------------------------------------|
+
+
+REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
+REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
+REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
+REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
+REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
+REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
+REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
+REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
+
+// Java floating args are passed as per C
+
+REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
+REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
+REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
+REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
+REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
+REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
+REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
+REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
+
+// registers used to hold VM data either temporarily within a method
+// or across method calls
+
+// volatile (caller-save) registers
+
+// r8 is used for indirect result location return
+// we use it and r9 as scratch registers
+REGISTER_DECLARATION(Register, rscratch1, r8);
+REGISTER_DECLARATION(Register, rscratch2, r9);
+
+// current method -- must be in a call-clobbered register
+REGISTER_DECLARATION(Register, rmethod,   r12);
+
+// non-volatile (callee-save) registers are r16-29
+// of which the following are dedicated global state
+
+// link register
+REGISTER_DECLARATION(Register, lr,        r30);
+// frame pointer
+REGISTER_DECLARATION(Register, rfp,       r29);
+// current thread
+REGISTER_DECLARATION(Register, rthread,   r28);
+// base of heap
+REGISTER_DECLARATION(Register, rheapbase, r27);
+// constant pool cache
+REGISTER_DECLARATION(Register, rcpool,    r26);
+// monitors allocated on stack
+REGISTER_DECLARATION(Register, rmonitors, r25);
+// locals on stack
+REGISTER_DECLARATION(Register, rlocals,   r24);
+// bytecode pointer
+REGISTER_DECLARATION(Register, rbcp,      r22);
+// Dispatch table base
+REGISTER_DECLARATION(Register, rdispatch,      r21);
+// Java stack pointer
+REGISTER_DECLARATION(Register, esp,      r20);
+
+// TODO : x86 uses rbp to save SP in method handle code
+// we may need to do the same with fp
+// JSR 292 fixed register usages:
+//REGISTER_DECLARATION(Register, r_mh_SP_save, r29);
+
+#define assert_cond(ARG1) assert(ARG1, #ARG1)
+
+namespace asm_util {
+  uint32_t encode_logical_immediate(bool is32, uint64_t imm);
+};
+
+using namespace asm_util;
+
+
+class Assembler;
+
+class Instruction_aarch64 {
+  unsigned insn;
+#ifdef ASSERT
+  unsigned bits;
+#endif
+  Assembler *assem;
+
+public:
+
+  Instruction_aarch64(class Assembler *as) {
+#ifdef ASSERT
+    bits = 0;
+#endif
+    insn = 0;
+    assem = as;
+  }
+
+  inline ~Instruction_aarch64();
+
+  unsigned &get_insn() { return insn; }
+#ifdef ASSERT
+  unsigned &get_bits() { return bits; }
+#endif
+
+  static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
+    union {
+      unsigned u;
+      int n;
+    };
+
+    u = val << (31 - hi);
+    n = n >> (31 - hi + lo);
+    return n;
+  }
+
+  static inline uint32_t extract(uint32_t val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    assert_cond(msb >= lsb);
+    uint32_t mask = (1U << nbits) - 1;
+    uint32_t result = val >> lsb;
+    result &= mask;
+    return result;
+  }
+
+  static inline int32_t sextract(uint32_t val, int msb, int lsb) {
+    uint32_t uval = extract(val, msb, lsb);
+    return extend(uval, msb - lsb);
+  }
+
+  static void patch(address a, int msb, int lsb, unsigned long val) {
+    int nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    assert_cond(msb >= lsb);
+    unsigned mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    unsigned target = *(unsigned *)a;
+    target &= ~mask;
+    target |= val;
+    *(unsigned *)a = target;
+  }
+
+  static void spatch(address a, int msb, int lsb, long val) {
+    int nbits = msb - lsb + 1;
+    long chk = val >> (nbits - 1);
+    guarantee (chk == -1 || chk == 0, "Field too big for insn");
+    unsigned uval = val;
+    unsigned mask = (1U << nbits) - 1;
+    uval &= mask;
+    uval <<= lsb;
+    mask <<= lsb;
+    unsigned target = *(unsigned *)a;
+    target &= ~mask;
+    target |= uval;
+    *(unsigned *)a = target;
+  }
+
+  void f(unsigned val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    assert_cond(msb >= lsb);
+    unsigned mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    insn |= val;
+    assert_cond((bits & mask) == 0);
+#ifdef ASSERT
+    bits |= mask;
+#endif
+  }
+
+  void f(unsigned val, int bit) {
+    f(val, bit, bit);
+  }
+
+  void sf(long val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    long chk = val >> (nbits - 1);
+    guarantee (chk == -1 || chk == 0, "Field too big for insn");
+    unsigned uval = val;
+    unsigned mask = (1U << nbits) - 1;
+    uval &= mask;
+    f(uval, lsb + nbits - 1, lsb);
+  }
+
+  void rf(Register r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  // reg|ZR
+  void zrf(Register r, int lsb) {
+    f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
+  }
+
+  // reg|SP
+  void srf(Register r, int lsb) {
+    f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  void rf(FloatRegister r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  unsigned get(int msb = 31, int lsb = 0) {
+    int nbits = msb - lsb + 1;
+    unsigned mask = ((1U << nbits) - 1) << lsb;
+    assert_cond(bits & mask == mask);
+    return (insn & mask) >> lsb;
+  }
+
+  void fixed(unsigned value, unsigned mask) {
+    assert_cond ((mask & bits) == 0);
+#ifdef ASSERT
+    bits |= mask;
+#endif
+    insn |= value;
+  }
+};
+
+#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use)
+
+class PrePost {
+  int _offset;
+  Register _r;
+public:
+  PrePost(Register reg, int o) : _r(reg), _offset(o) { }
+  int offset() { return _offset; }
+  Register reg() { return _r; }
+};
+
+class Pre : public PrePost {
+public:
+  Pre(Register reg, int o) : PrePost(reg, o) { }
+};
+class Post : public PrePost {
+public:
+  Post(Register reg, int o) : PrePost(reg, o) { }
+};
+
+namespace ext
+{
+  enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
+};
+
+// abs methods which cannot overflow and so are well-defined across
+// the entire domain of integer types.
+static inline unsigned int uabs(unsigned int n) {
+  union {
+    unsigned int result;
+    int value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(unsigned long n) {
+  union {
+    unsigned long result;
+    long value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(long n) { return uabs((unsigned long)n); }
+static inline unsigned long uabs(int n) { return uabs((unsigned int)n); }
+
+// Addressing modes
+class Address VALUE_OBJ_CLASS_SPEC {
+ public:
+
+  enum mode { no_mode, base_plus_offset, pre, post, pcrel,
+              base_plus_offset_reg, literal };
+
+  // Shift and extend for base reg + reg offset addressing
+  class extend {
+    int _option, _shift;
+    ext::operation _op;
+  public:
+    extend() { }
+    extend(int s, int o, ext::operation op) : _shift(s), _option(o), _op(op) { }
+    int option() const{ return _option; }
+    int shift() const { return _shift; }
+    ext::operation op() const { return _op; }
+  };
+  class uxtw : public extend {
+  public:
+    uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
+  };
+  class lsl : public extend {
+  public:
+    lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
+  };
+  class sxtw : public extend {
+  public:
+    sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
+  };
+  class sxtx : public extend {
+  public:
+    sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
+  };
+
+ private:
+  Register _base;
+  Register _index;
+  long _offset;
+  enum mode _mode;
+  extend _ext;
+
+  RelocationHolder _rspec;
+
+  // Typically we use AddressLiterals we want to use their rval
+  // However in some situations we want the lval (effect address) of
+  // the item.  We provide a special factory for making those lvals.
+  bool _is_lval;
+
+  // If the target is far we'll need to load the ea of this to a
+  // register to reach it. Otherwise if near we can do PC-relative
+  // addressing.
+  address          _target;
+
+ public:
+  Address()
+    : _mode(no_mode) { }
+  Address(Register r)
+    : _mode(base_plus_offset), _base(r), _offset(0), _index(noreg), _target(0) { }
+  Address(Register r, int o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+  Address(Register r, long o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+  Address(Register r, unsigned long o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+#ifdef ASSERT
+  Address(Register r, ByteSize disp)
+    : _mode(base_plus_offset), _base(r), _offset(in_bytes(disp)),
+      _index(noreg), _target(0) { }
+#endif
+  Address(Register r, Register r1, extend ext = lsl())
+    : _mode(base_plus_offset_reg), _base(r), _index(r1),
+    _ext(ext), _offset(0), _target(0) { }
+  Address(Pre p)
+    : _mode(pre), _base(p.reg()), _offset(p.offset()) { }
+  Address(Post p)
+    : _mode(post), _base(p.reg()), _offset(p.offset()), _target(0) { }
+  Address(address target, RelocationHolder const& rspec)
+    : _mode(literal),
+      _rspec(rspec),
+      _is_lval(false),
+      _target(target)  { }
+  Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
+  Address(Register base, RegisterOrConstant index, extend ext = lsl())
+    : _base (base),
+      _ext(ext), _offset(0), _target(0) {
+    if (index.is_register()) {
+      _mode = base_plus_offset_reg;
+      _index = index.as_register();
+    } else {
+      guarantee(ext.option() == ext::uxtx, "should be");
+      assert(index.is_constant(), "should be");
+      _mode = base_plus_offset;
+      _offset = index.as_constant() << ext.shift();
+    }
+  }
+
+  Register base() const {
+    guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
+               | _mode == post),
+              "wrong mode");
+    return _base;
+  }
+  long offset() const {
+    return _offset;
+  }
+  Register index() const {
+    return _index;
+  }
+  mode getMode() const {
+    return _mode;
+  }
+  bool uses(Register reg) const { return _base == reg || _index == reg; }
+  address target() const { return _target; }
+  const RelocationHolder& rspec() const { return _rspec; }
+
+  void encode(Instruction_aarch64 *i) const {
+    i->f(0b111, 29, 27);
+    i->srf(_base, 5);
+
+    switch(_mode) {
+    case base_plus_offset:
+      {
+        unsigned size = i->get(31, 30);
+        unsigned mask = (1 << size) - 1;
+        if (_offset < 0 || _offset & mask)
+          {
+            i->f(0b00, 25, 24);
+            i->f(0, 21), i->f(0b00, 11, 10);
+            i->sf(_offset, 20, 12);
+          } else {
+            i->f(0b01, 25, 24);
+            i->f(_offset >> size, 21, 10);
+          }
+      }
+      break;
+
+    case base_plus_offset_reg:
+      {
+        i->f(0b00, 25, 24);
+        i->f(1, 21);
+        i->rf(_index, 16);
+        i->f(_ext.option(), 15, 13);
+        unsigned size = i->get(31, 30);
+        if (size == 0) // It's a byte
+          i->f(_ext.shift() >= 0, 12);
+        else {
+          if (_ext.shift() > 0)
+            assert(_ext.shift() == (int)size, "bad shift");
+          i->f(_ext.shift() > 0, 12);
+        }
+        i->f(0b10, 11, 10);
+      }
+      break;
+
+    case pre:
+      i->f(0b00, 25, 24);
+      i->f(0, 21), i->f(0b11, 11, 10);
+      i->sf(_offset, 20, 12);
+      break;
+
+    case post:
+      i->f(0b00, 25, 24);
+      i->f(0, 21), i->f(0b01, 11, 10);
+      i->sf(_offset, 20, 12);
+      break;
+
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+  void encode_pair(Instruction_aarch64 *i) const {
+    switch(_mode) {
+    case base_plus_offset:
+      i->f(0b010, 25, 23);
+      break;
+    case pre:
+      i->f(0b011, 25, 23);
+      break;
+    case post:
+      i->f(0b001, 25, 23);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+
+    unsigned size; // Operand shift in 32-bit words
+
+    if (i->get(26, 26)) { // float
+      switch(i->get(31, 30)) {
+      case 0b10:
+        size = 2; break;
+      case 0b01:
+        size = 1; break;
+      case 0b00:
+        size = 0; break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else {
+      size = i->get(31, 31);
+    }
+
+    size = 4 << size;
+    guarantee(_offset % size == 0, "bad offset");
+    i->sf(_offset / size, 21, 15);
+    i->srf(_base, 5);
+  }
+
+  void encode_nontemporal_pair(Instruction_aarch64 *i) const {
+    // Only base + offset is allowed
+    i->f(0b000, 25, 23);
+    unsigned size = i->get(31, 31);
+    size = 4 << size;
+    guarantee(_offset % size == 0, "bad offset");
+    i->sf(_offset / size, 21, 15);
+    i->srf(_base, 5);
+    guarantee(_mode == Address::base_plus_offset,
+              "Bad addressing mode for non-temporal op");
+  }
+
+  void lea(MacroAssembler *, Register) const;
+
+  static bool offset_ok_for_immed(long offset, int shift = 0) {
+    unsigned mask = (1 << shift) - 1;
+    if (offset < 0 || offset & mask) {
+      return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset
+    } else {
+      return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset
+    }
+  }
+};
+
+// Convience classes
+class RuntimeAddress: public Address {
+
+  public:
+
+  RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
+
+};
+
+class OopAddress: public Address {
+
+  public:
+
+  OopAddress(address target) : Address(target, relocInfo::oop_type){}
+
+};
+
+class ExternalAddress: public Address {
+ private:
+  static relocInfo::relocType reloc_for_target(address target) {
+    // Sometimes ExternalAddress is used for values which aren't
+    // exactly addresses, like the card table base.
+    // external_word_type can't be used for values in the first page
+    // so just skip the reloc in that case.
+    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+  }
+
+ public:
+
+  ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
+
+};
+
+class InternalAddress: public Address {
+
+  public:
+
+  InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
+};
+
+const int FPUStateSizeInWords = 32 * 2;
+typedef enum {
+  PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
+  PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
+  PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
+} prfop;
+
+class Assembler : public AbstractAssembler {
+
+#ifndef PRODUCT
+  static const unsigned long asm_bp;
+
+  void emit_long(jint x) {
+    if ((unsigned long)pc() == asm_bp)
+      asm volatile ("nop");
+    AbstractAssembler::emit_long(x);
+  }
+#else
+  void emit_long(jint x) {
+    AbstractAssembler::emit_long(x);
+  }
+#endif
+
+public:
+  enum { instruction_size = 4 };
+
+  Address adjust(Register base, int offset, bool preIncrement) {
+    if (preIncrement)
+      return Address(Pre(base, offset));
+    else
+      return Address(Post(base, offset));
+  }
+
+  Address pre(Register base, int offset) {
+    return adjust(base, offset, true);
+  }
+
+  Address post (Register base, int offset) {
+    return adjust(base, offset, false);
+  }
+
+  Instruction_aarch64* current;
+
+  void set_current(Instruction_aarch64* i) { current = i; }
+
+  void f(unsigned val, int msb, int lsb) {
+    current->f(val, msb, lsb);
+  }
+  void f(unsigned val, int msb) {
+    current->f(val, msb, msb);
+  }
+  void sf(long val, int msb, int lsb) {
+    current->sf(val, msb, lsb);
+  }
+  void rf(Register reg, int lsb) {
+    current->rf(reg, lsb);
+  }
+  void srf(Register reg, int lsb) {
+    current->srf(reg, lsb);
+  }
+  void zrf(Register reg, int lsb) {
+    current->zrf(reg, lsb);
+  }
+  void rf(FloatRegister reg, int lsb) {
+    current->rf(reg, lsb);
+  }
+  void fixed(unsigned value, unsigned mask) {
+    current->fixed(value, mask);
+  }
+
+  void emit() {
+    emit_long(current->get_insn());
+    assert_cond(current->get_bits() == 0xffffffff);
+    current = NULL;
+  }
+
+  // !!! FIXME AARCH64 -- added to avoid dependency on codebuffer
+  // implementation before we have a definiton of it
+  void relocate(address at,  const RelocationHolder& rspec);
+  void relocate(const RelocationHolder& rspec);
+
+  typedef void (Assembler::* uncond_branch_insn)(address dest);
+  typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
+  typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
+  typedef void (Assembler::* prefetch_insn)(address target, prfop);
+
+  void wrap_label(Label &L, uncond_branch_insn insn);
+  void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
+  void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
+  void wrap_label(Label &L, prfop, prefetch_insn insn);
+
+  // PC-rel. addressing
+
+  void adr(Register Rd, address dest);
+  void _adrp(Register Rd, address dest);
+
+  void adr(Register Rd, const Address &dest);
+  void _adrp(Register Rd, const Address &dest);
+
+  void adr(Register Rd, Label &L) {
+    wrap_label(Rd, L, &Assembler::Assembler::adr);
+  }
+  void _adrp(Register Rd, Label &L) {
+    wrap_label(Rd, L, &Assembler::_adrp);
+  }
+
+  void adrp(Register Rd, const Address &dest, unsigned long &offset);
+
+#undef INSN
+
+  void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op,
+                         int negated_op);
+
+  // Add/subtract (immediate)
+#define INSN(NAME, decode, negated)                                     \
+  void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) {   \
+    starti;                                                             \
+    f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
+    zrf(Rd, 0), srf(Rn, 5);                                             \
+  }                                                                     \
+                                                                        \
+  void NAME(Register Rd, Register Rn, unsigned imm) {                   \
+    starti;                                                             \
+    add_sub_immediate(Rd, Rn, imm, decode, negated);                    \
+  }
+
+  INSN(addsw, 0b001, 0b011);
+  INSN(subsw, 0b011, 0b001);
+  INSN(adds,  0b101, 0b111);
+  INSN(subs,  0b111, 0b101);
+
+#undef INSN
+
+#define INSN(NAME, decode, negated)                     \
+  void NAME(Register Rd, Register Rn, unsigned imm) {   \
+    starti;                                             \
+    add_sub_immediate(Rd, Rn, imm, decode, negated);    \
+  }
+
+  INSN(addw, 0b000, 0b010);
+  INSN(subw, 0b010, 0b000);
+  INSN(add,  0b100, 0b110);
+  INSN(sub,  0b110, 0b100);
+
+#undef INSN
+
+ // Logical (immediate)
+#define INSN(NAME, decode, is32)                                \
+  void NAME(Register Rd, Register Rn, uint64_t imm) {           \
+    starti;                                                     \
+    uint32_t val = encode_logical_immediate(is32, imm);         \
+    f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
+    srf(Rd, 0), zrf(Rn, 5);                                     \
+  }
+
+  INSN(andw, 0b000, true);
+  INSN(orrw, 0b001, true);
+  INSN(eorw, 0b010, true);
+  INSN(andr,  0b100, false);
+  INSN(orr,  0b101, false);
+  INSN(eor,  0b110, false);
+
+#undef INSN
+
+#define INSN(NAME, decode, is32)                                \
+  void NAME(Register Rd, Register Rn, uint64_t imm) {           \
+    starti;                                                     \
+    uint32_t val = encode_logical_immediate(is32, imm);         \
+    f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
+    zrf(Rd, 0), zrf(Rn, 5);                                     \
+  }
+
+  INSN(ands, 0b111, false);
+  INSN(andsw, 0b011, true);
+
+#undef INSN
+
+  // Move wide (immediate)
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, unsigned imm, unsigned shift = 0) {            \
+    assert_cond((shift/16)*16 == shift);                                \
+    starti;                                                             \
+    f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21),        \
+      f(imm, 20, 5);                                                    \
+    rf(Rd, 0);                                                          \
+  }
+
+  INSN(movnw, 0b000);
+  INSN(movzw, 0b010);
+  INSN(movkw, 0b011);
+  INSN(movn, 0b100);
+  INSN(movz, 0b110);
+  INSN(movk, 0b111);
+
+#undef INSN
+
+  // Bitfield
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
+    starti;                                                             \
+    f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
+    rf(Rn, 5), rf(Rd, 0);                                               \
+  }
+
+  INSN(sbfmw, 0b0001001100);
+  INSN(bfmw,  0b0011001100);
+  INSN(ubfmw, 0b0101001100);
+  INSN(sbfm,  0b1001001101);
+  INSN(bfm,   0b1011001101);
+  INSN(ubfm,  0b1101001101);
+
+#undef INSN
+
+  // Extract
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) {     \
+    starti;                                                             \
+    f(opcode, 31, 21), f(imms, 15, 10);                                 \
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                   \
+  }
+
+  INSN(extrw, 0b00010011100);
+  INSN(extr,  0b10010011110);
+
+#undef INSN
+
+  // The maximum range of a branch is fixed for the AArch64
+  // architecture.  In debug mode we shrink it in order to test
+  // trampolines, but not so small that branches in the interpreter
+  // are out of range.
+  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+  static bool reachable_from_branch_at(address branch, address target) {
+    return uabs(target - branch) < branch_range;
+  }
+
+  // Unconditional branch (immediate)
+
+#define INSN(NAME, opcode)                                              \
+  void NAME(address dest) {                                             \
+    starti;                                                             \
+    long offset = (dest - pc()) >> 2;                                   \
+    DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
+    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
+  }                                                                     \
+  void NAME(Label &L) {                                                 \
+    wrap_label(L, &Assembler::NAME);                                    \
+  }                                                                     \
+  void NAME(const Address &dest);
+
+  INSN(b, 0);
+  INSN(bl, 1);
+
+#undef INSN
+
+  // Compare & branch (immediate)
+#define INSN(NAME, opcode)                              \
+  void NAME(Register Rt, address dest) {                \
+    long offset = (dest - pc()) >> 2;                   \
+    starti;                                             \
+    f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0);    \
+  }                                                     \
+  void NAME(Register Rt, Label &L) {                    \
+    wrap_label(Rt, L, &Assembler::NAME);                \
+  }
+
+  INSN(cbzw,  0b00110100);
+  INSN(cbnzw, 0b00110101);
+  INSN(cbz,   0b10110100);
+  INSN(cbnz,  0b10110101);
+
+#undef INSN
+
+  // Test & branch (immediate)
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rt, int bitpos, address dest) {                    \
+    long offset = (dest - pc()) >> 2;                                   \
+    int b5 = bitpos >> 5;                                               \
+    bitpos &= 0x1f;                                                     \
+    starti;                                                             \
+    f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
+    rf(Rt, 0);                                                          \
+  }                                                                     \
+  void NAME(Register Rt, int bitpos, Label &L) {                        \
+    wrap_label(Rt, bitpos, L, &Assembler::NAME);                        \
+  }
+
+  INSN(tbz,  0b0110110);
+  INSN(tbnz, 0b0110111);
+
+#undef INSN
+
+  // Conditional branch (immediate)
+  enum Condition
+    {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
+
+  void br(Condition  cond, address dest) {
+    long offset = (dest - pc()) >> 2;
+    starti;
+    f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
+  }
+
+#define INSN(NAME, cond)                        \
+  void NAME(address dest) {                     \
+    br(cond, dest);                             \
+  }
+
+  INSN(beq, EQ);
+  INSN(bne, NE);
+  INSN(bhs, HS);
+  INSN(bcs, CS);
+  INSN(blo, LO);
+  INSN(bcc, CC);
+  INSN(bmi, MI);
+  INSN(bpl, PL);
+  INSN(bvs, VS);
+  INSN(bvc, VC);
+  INSN(bhi, HI);
+  INSN(bls, LS);
+  INSN(bge, GE);
+  INSN(blt, LT);
+  INSN(bgt, GT);
+  INSN(ble, LE);
+  INSN(bal, AL);
+  INSN(bnv, NV);
+
+  void br(Condition cc, Label &L);
+
+#undef INSN
+
+  // Exception generation
+  void generate_exception(int opc, int op2, int LL, unsigned imm) {
+    starti;
+    f(0b11010100, 31, 24);
+    f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
+  }
+
+#define INSN(NAME, opc, op2, LL)                \
+  void NAME(unsigned imm) {                     \
+    generate_exception(opc, op2, LL, imm);      \
+  }
+
+  INSN(svc, 0b000, 0, 0b01);
+  INSN(hvc, 0b000, 0, 0b10);
+  INSN(smc, 0b000, 0, 0b11);
+  INSN(brk, 0b001, 0, 0b00);
+  INSN(hlt, 0b010, 0, 0b00);
+  INSN(dpcs1, 0b101, 0, 0b01);
+  INSN(dpcs2, 0b101, 0, 0b10);
+  INSN(dpcs3, 0b101, 0, 0b11);
+
+#undef INSN
+
+  // System
+  void system(int op0, int op1, int CRn, int CRm, int op2,
+              Register rt = (Register)0b11111)
+  {
+    starti;
+    f(0b11010101000, 31, 21);
+    f(op0, 20, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    rf(rt, 0);
+  }
+
+  void hint(int imm) {
+    system(0b00, 0b011, 0b0010, imm, 0b000);
+  }
+
+  void nop() {
+    hint(0);
+  }
+  // we only provide mrs and msr for the special purpose system
+  // registers where op1 (instr[20:19]) == 11 and, (currently) only
+  // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
+
+  void msr(int op1, int CRn, int CRm, int op2, Register rt) {
+    starti;
+    f(0b1101010100011, 31, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    // writing zr is ok
+    zrf(rt, 0);
+  }
+
+  void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
+    starti;
+    f(0b1101010100111, 31, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    // reading to zr is a mistake
+    rf(rt, 0);
+  }
+
+  enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
+                ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
+
+  void dsb(barrier imm) {
+    system(0b00, 0b011, 0b00011, imm, 0b100);
+  }
+
+  void dmb(barrier imm) {
+    system(0b00, 0b011, 0b00011, imm, 0b101);
+  }
+
+  void isb() {
+    system(0b00, 0b011, 0b00011, SY, 0b110);
+  }
+
+  void sys(int op1, int CRn, int CRm, int op2,
+           Register rt = (Register)0b11111) {
+    system(0b01, op1, CRn, CRm, op2, rt);
+  }
+
+  // Only implement operations accessible from EL0 or higher, i.e.,
+  //            op1    CRn    CRm    op2
+  // IC IVAU     3      7      5      1
+  // DC CVAC     3      7      10     1
+  // DC CVAU     3      7      11     1
+  // DC CIVAC    3      7      14     1
+  // DC ZVA      3      7      4      1
+  // So only deal with the CRm field.
+  enum icache_maintenance {IVAU = 0b0101};
+  enum dcache_maintenance {CVAC = 0b1010, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100};
+
+  void dc(dcache_maintenance cm, Register Rt) {
+    sys(0b011, 0b0111, cm, 0b001, Rt);
+  }
+
+  void ic(icache_maintenance cm, Register Rt) {
+    sys(0b011, 0b0111, cm, 0b001, Rt);
+  }
+
+  // A more convenient access to dmb for our purposes
+  enum Membar_mask_bits {
+    StoreStore = ST,
+    LoadStore  = LD,
+    LoadLoad   = LD,
+    // We can use ISH for a full barrier because the ARM ARM says
+    // "This architecture assumes that all Processing Elements that
+    // use the same operating system or hypervisor are in the same
+    // Inner Shareable shareability domain."
+    StoreLoad  = ISH,
+    AnyAny     = ISH
+  };
+
+  void membar(Membar_mask_bits order_constraint) {
+    dmb(Assembler::barrier(order_constraint));
+  }
+
+  // Unconditional branch (register)
+  void branch_reg(Register R, int opc) {
+    starti;
+    f(0b1101011, 31, 25);
+    f(opc, 24, 21);
+    f(0b11111000000, 20, 10);
+    rf(R, 5);
+    f(0b00000, 4, 0);
+  }
+
+#define INSN(NAME, opc)                         \
+  void NAME(Register R) {                       \
+    branch_reg(R, opc);                         \
+  }
+
+  INSN(br, 0b0000);
+  INSN(blr, 0b0001);
+  INSN(ret, 0b0010);
+
+  void ret(void *p); // This forces a compile-time error for ret(0)
+
+#undef INSN
+
+#define INSN(NAME, opc)                         \
+  void NAME() {                 \
+    branch_reg((Register)0b11111, opc);         \
+  }
+
+  INSN(eret, 0b0100);
+  INSN(drps, 0b0101);
+
+#undef INSN
+
+  // Load/store exclusive
+  enum operand_size { byte, halfword, word, xword };
+
+  void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
+    Register Rn, enum operand_size sz, int op, int o0) {
+    starti;
+    f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
+    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+  }
+
+#define INSN4(NAME, sz, op, o0) /* Four registers */                    \
+  void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
+    guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
+    load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
+  }
+
+#define INSN3(NAME, sz, op, o0) /* Three registers */                   \
+  void NAME(Register Rs, Register Rt, Register Rn) {                    \
+    guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
+    load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);    \
+  }
+
+#define INSN2(NAME, sz, op, o0) /* Two registers */                     \
+  void NAME(Register Rt, Register Rn) {                                 \
+    load_store_exclusive((Register)0b11111, Rt, (Register)0b11111,      \
+                         Rn, sz, op, o0);                               \
+  }
+
+#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
+  void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
+    guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
+    load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0);  \
+  }
+
+  // bytes
+  INSN3(stxrb, byte, 0b000, 0);
+  INSN3(stlxrb, byte, 0b000, 1);
+  INSN2(ldxrb, byte, 0b010, 0);
+  INSN2(ldaxrb, byte, 0b010, 1);
+  INSN2(stlrb, byte, 0b100, 1);
+  INSN2(ldarb, byte, 0b110, 1);
+
+  // halfwords
+  INSN3(stxrh, halfword, 0b000, 0);
+  INSN3(stlxrh, halfword, 0b000, 1);
+  INSN2(ldxrh, halfword, 0b010, 0);
+  INSN2(ldaxrh, halfword, 0b010, 1);
+  INSN2(stlrh, halfword, 0b100, 1);
+  INSN2(ldarh, halfword, 0b110, 1);
+
+  // words
+  INSN3(stxrw, word, 0b000, 0);
+  INSN3(stlxrw, word, 0b000, 1);
+  INSN4(stxpw, word, 0b001, 0);
+  INSN4(stlxpw, word, 0b001, 1);
+  INSN2(ldxrw, word, 0b010, 0);
+  INSN2(ldaxrw, word, 0b010, 1);
+  INSN_FOO(ldxpw, word, 0b011, 0);
+  INSN_FOO(ldaxpw, word, 0b011, 1);
+  INSN2(stlrw, word, 0b100, 1);
+  INSN2(ldarw, word, 0b110, 1);
+
+  // xwords
+  INSN3(stxr, xword, 0b000, 0);
+  INSN3(stlxr, xword, 0b000, 1);
+  INSN4(stxp, xword, 0b001, 0);
+  INSN4(stlxp, xword, 0b001, 1);
+  INSN2(ldxr, xword, 0b010, 0);
+  INSN2(ldaxr, xword, 0b010, 1);
+  INSN_FOO(ldxp, xword, 0b011, 0);
+  INSN_FOO(ldaxp, xword, 0b011, 1);
+  INSN2(stlr, xword, 0b100, 1);
+  INSN2(ldar, xword, 0b110, 1);
+
+#undef INSN2
+#undef INSN3
+#undef INSN4
+#undef INSN_FOO
+
+  // Load register (literal)
+#define INSN(NAME, opc, V)                                              \
+  void NAME(Register Rt, address dest) {                                \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    rf(Rt, 0);                                                          \
+  }                                                                     \
+  void NAME(Register Rt, address dest, relocInfo::relocType rtype) {    \
+    InstructionMark im(this);                                           \
+    guarantee(rtype == relocInfo::internal_word_type,                   \
+              "only internal_word_type relocs make sense here");        \
+    relocate(inst_mark(), InternalAddress(dest).rspec());               \
+    NAME(Rt, dest);                                                     \
+  }                                                                     \
+  void NAME(Register Rt, Label &L) {                                    \
+    wrap_label(Rt, L, &Assembler::NAME);                                \
+  }
+
+  INSN(ldrw, 0b00, 0);
+  INSN(ldr, 0b01, 0);
+  INSN(ldrsw, 0b10, 0);
+
+#undef INSN
+
+#define INSN(NAME, opc, V)                                              \
+  void NAME(FloatRegister Rt, address dest) {                           \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    rf((Register)Rt, 0);                                                \
+  }
+
+  INSN(ldrs, 0b00, 1);
+  INSN(ldrd, 0b01, 1);
+
+#undef INSN
+
+#define INSN(NAME, opc, V)                                              \
+  void NAME(address dest, prfop op = PLDL1KEEP) {                       \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    f(op, 4, 0);                                                        \
+  }                                                                     \
+  void NAME(Label &L, prfop op = PLDL1KEEP) {                           \
+    wrap_label(L, op, &Assembler::NAME);                                \
+  }
+
+  INSN(prfm, 0b11, 0);
+
+#undef INSN
+
+  // Load/store
+  void ld_st1(int opc, int p1, int V, int L,
+              Register Rt1, Register Rt2, Address adr, bool no_allocate) {
+    starti;
+    f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
+    zrf(Rt2, 10), zrf(Rt1, 0);
+    if (no_allocate) {
+      adr.encode_nontemporal_pair(current);
+    } else {
+      adr.encode_pair(current);
+    }
+  }
+
+  // Load/store register pair (offset)
+#define INSN(NAME, size, p1, V, L, no_allocate)         \
+  void NAME(Register Rt1, Register Rt2, Address adr) {  \
+    ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
+   }
+
+  INSN(stpw, 0b00, 0b101, 0, 0, false);
+  INSN(ldpw, 0b00, 0b101, 0, 1, false);
+  INSN(ldpsw, 0b01, 0b101, 0, 1, false);
+  INSN(stp, 0b10, 0b101, 0, 0, false);
+  INSN(ldp, 0b10, 0b101, 0, 1, false);
+
+  // Load/store no-allocate pair (offset)
+  INSN(stnpw, 0b00, 0b101, 0, 0, true);
+  INSN(ldnpw, 0b00, 0b101, 0, 1, true);
+  INSN(stnp, 0b10, 0b101, 0, 0, true);
+  INSN(ldnp, 0b10, 0b101, 0, 1, true);
+
+#undef INSN
+
+#define INSN(NAME, size, p1, V, L, no_allocate)                         \
+  void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) {        \
+    ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \
+   }
+
+  INSN(stps, 0b00, 0b101, 1, 0, false);
+  INSN(ldps, 0b00, 0b101, 1, 1, false);
+  INSN(stpd, 0b01, 0b101, 1, 0, false);
+  INSN(ldpd, 0b01, 0b101, 1, 1, false);
+
+#undef INSN
+
+  // Load/store register (all modes)
+  void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
+    starti;
+
+    f(V, 26); // general reg?
+    zrf(Rt, 0);
+
+    // Encoding for literal loads is done here (rather than pushed
+    // down into Address::encode) because the encoding of this
+    // instruction is too different from all of the other forms to
+    // make it worth sharing.
+    if (adr.getMode() == Address::literal) {
+      assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
+      assert(op == 0b01, "literal form can only be used with loads");
+      f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
+      long offset = (adr.target() - pc()) >> 2;
+      sf(offset, 23, 5);
+      // code_section()->relocate(pc(), adr.rspec());
+      relocate(pc(), adr.rspec());
+      return;
+    }
+
+    f(size, 31, 30);
+    f(op, 23, 22); // str
+    adr.encode(current);
+  }
+
+#define INSN(NAME, size, op)                            \
+  void NAME(Register Rt, const Address &adr) {          \
+    ld_st2(Rt, adr, size, op);                          \
+  }                                                     \
+
+  INSN(str, 0b11, 0b00);
+  INSN(strw, 0b10, 0b00);
+  INSN(strb, 0b00, 0b00);
+  INSN(strh, 0b01, 0b00);
+
+  INSN(ldr, 0b11, 0b01);
+  INSN(ldrw, 0b10, 0b01);
+  INSN(ldrb, 0b00, 0b01);
+  INSN(ldrh, 0b01, 0b01);
+
+  INSN(ldrsb, 0b00, 0b10);
+  INSN(ldrsbw, 0b00, 0b11);
+  INSN(ldrsh, 0b01, 0b10);
+  INSN(ldrshw, 0b01, 0b11);
+  INSN(ldrsw, 0b10, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, size, op)                                    \
+  void NAME(const Address &adr, prfop pfop = PLDL1KEEP) {       \
+    ld_st2((Register)pfop, adr, size, op);                      \
+  }
+
+  INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
+                          // writeback modes, but the assembler
+                          // doesn't enfore that.
+
+#undef INSN
+
+#define INSN(NAME, size, op)                            \
+  void NAME(FloatRegister Rt, const Address &adr) {     \
+    ld_st2((Register)Rt, adr, size, op, 1);             \
+  }
+
+  INSN(strd, 0b11, 0b00);
+  INSN(strs, 0b10, 0b00);
+  INSN(ldrd, 0b11, 0b01);
+  INSN(ldrs, 0b10, 0b01);
+
+#undef INSN
+
+  enum shift_kind { LSL, LSR, ASR, ROR };
+
+  void op_shifted_reg(unsigned decode,
+                      enum shift_kind kind, unsigned shift,
+                      unsigned size, unsigned op) {
+    f(size, 31);
+    f(op, 30, 29);
+    f(decode, 28, 24);
+    f(shift, 15, 10);
+    f(kind, 23, 22);
+  }
+
+  // Logical (shifted register)
+#define INSN(NAME, size, op, N)                                 \
+  void NAME(Register Rd, Register Rn, Register Rm,              \
+            enum shift_kind kind = LSL, unsigned shift = 0) {   \
+    starti;                                                     \
+    f(N, 21);                                                   \
+    zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                        \
+    op_shifted_reg(0b01010, kind, shift, size, op);             \
+  }
+
+  INSN(andr, 1, 0b00, 0);
+  INSN(orr, 1, 0b01, 0);
+  INSN(eor, 1, 0b10, 0);
+  INSN(ands, 1, 0b11, 0);
+  INSN(andw, 0, 0b00, 0);
+  INSN(orrw, 0, 0b01, 0);
+  INSN(eorw, 0, 0b10, 0);
+  INSN(andsw, 0, 0b11, 0);
+
+  INSN(bic, 1, 0b00, 1);
+  INSN(orn, 1, 0b01, 1);
+  INSN(eon, 1, 0b10, 1);
+  INSN(bics, 1, 0b11, 1);
+  INSN(bicw, 0, 0b00, 1);
+  INSN(ornw, 0, 0b01, 1);
+  INSN(eonw, 0, 0b10, 1);
+  INSN(bicsw, 0, 0b11, 1);
+
+#undef INSN
+
+  // Add/subtract (shifted register)
+#define INSN(NAME, size, op)                            \
+  void NAME(Register Rd, Register Rn, Register Rm,      \
+            enum shift_kind kind, unsigned shift = 0) { \
+    starti;                                             \
+    f(0, 21);                                           \
+    assert_cond(kind != ROR);                           \
+    zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16);                \
+    op_shifted_reg(0b01011, kind, shift, size, op);     \
+  }
+
+  INSN(add, 1, 0b000);
+  INSN(sub, 1, 0b10);
+  INSN(addw, 0, 0b000);
+  INSN(subw, 0, 0b10);
+
+  INSN(adds, 1, 0b001);
+  INSN(subs, 1, 0b11);
+  INSN(addsw, 0, 0b001);
+  INSN(subsw, 0, 0b11);
+
+#undef INSN
+
+  // Add/subtract (extended register)
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    starti;                                                             \
+    zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0);                                \
+    add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
+  }
+
+  void add_sub_extended_reg(unsigned op, unsigned decode,
+    Register Rd, Register Rn, Register Rm,
+    unsigned opt, ext::operation option, unsigned imm) {
+    guarantee(imm <= 4, "shift amount must be < 4");
+    f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
+    f(option, 15, 13), f(imm, 12, 10);
+  }
+
+  INSN(addw, 0b000);
+  INSN(subw, 0b010);
+  INSN(add, 0b100);
+  INSN(sub, 0b110);
+
+#undef INSN
+
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    starti;                                                             \
+    zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0);                                \
+    add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
+  }
+
+  INSN(addsw, 0b001);
+  INSN(subsw, 0b011);
+  INSN(adds, 0b101);
+  INSN(subs, 0b111);
+
+#undef INSN
+
+  // Aliases for short forms of add and sub
+#define INSN(NAME)                                      \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    if (Rd == sp || Rn == sp)                           \
+      NAME(Rd, Rn, Rm, ext::uxtx);                      \
+    else                                                \
+      NAME(Rd, Rn, Rm, LSL);                            \
+  }
+
+  INSN(addw);
+  INSN(subw);
+  INSN(add);
+  INSN(sub);
+
+  INSN(addsw);
+  INSN(subsw);
+  INSN(adds);
+  INSN(subs);
+
+#undef INSN
+
+  // Add/subtract (with carry)
+  void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
+    starti;
+    f(op, 31, 29);
+    f(0b11010000, 28, 21);
+    f(0b000000, 15, 10);
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);
+  }
+
+  #define INSN(NAME, op)                                \
+    void NAME(Register Rd, Register Rn, Register Rm) {  \
+      add_sub_carry(op, Rd, Rn, Rm);                    \
+    }
+
+  INSN(adcw, 0b000);
+  INSN(adcsw, 0b001);
+  INSN(sbcw, 0b010);
+  INSN(sbcsw, 0b011);
+  INSN(adc, 0b100);
+  INSN(adcs, 0b101);
+  INSN(sbc,0b110);
+  INSN(sbcs, 0b111);
+
+#undef INSN
+
+  // Conditional compare (both kinds)
+  void conditional_compare(unsigned op, int o2, int o3,
+                           Register Rn, unsigned imm5, unsigned nzcv,
+                           unsigned cond) {
+    f(op, 31, 29);
+    f(0b11010010, 28, 21);
+    f(cond, 15, 12);
+    f(o2, 10);
+    f(o3, 4);
+    f(nzcv, 3, 0);
+    f(imm5, 20, 16), rf(Rn, 5);
+  }
+
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rn, Register Rm, int imm, Condition cond) {        \
+    starti;                                                             \
+    f(0, 11);                                                           \
+    conditional_compare(op, 0, 0, Rn, (uintptr_t)Rm, imm, cond);        \
+  }                                                                     \
+                                                                        \
+  void NAME(Register Rn, int imm5, int imm, Condition cond) {   \
+    starti;                                                             \
+    f(1, 11);                                                           \
+    conditional_compare(op, 0, 0, Rn, imm5, imm, cond);                 \
+  }
+
+  INSN(ccmnw, 0b001);
+  INSN(ccmpw, 0b011);
+  INSN(ccmn, 0b101);
+  INSN(ccmp, 0b111);
+
+#undef INSN
+
+  // Conditional select
+  void conditional_select(unsigned op, unsigned op2,
+                          Register Rd, Register Rn, Register Rm,
+                          unsigned cond) {
+    starti;
+    f(op, 31, 29);
+    f(0b11010100, 28, 21);
+    f(cond, 15, 12);
+    f(op2, 11, 10);
+    zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
+  }
+
+#define INSN(NAME, op, op2)                                             \
+  void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \
+    conditional_select(op, op2, Rd, Rn, Rm, cond);                      \
+  }
+
+  INSN(cselw, 0b000, 0b00);
+  INSN(csincw, 0b000, 0b01);
+  INSN(csinvw, 0b010, 0b00);
+  INSN(csnegw, 0b010, 0b01);
+  INSN(csel, 0b100, 0b00);
+  INSN(csinc, 0b100, 0b01);
+  INSN(csinv, 0b110, 0b00);
+  INSN(csneg, 0b110, 0b01);
+
+#undef INSN
+
+  // Data processing
+  void data_processing(unsigned op29, unsigned opcode,
+                       Register Rd, Register Rn) {
+    f(op29, 31, 29), f(0b11010110, 28, 21);
+    f(opcode, 15, 10);
+    rf(Rn, 5), rf(Rd, 0);
+  }
+
+  // (1 source)
+#define INSN(NAME, op29, opcode2, opcode)       \
+  void NAME(Register Rd, Register Rn) {         \
+    starti;                                     \
+    f(opcode2, 20, 16);                         \
+    data_processing(op29, opcode, Rd, Rn);      \
+  }
+
+  INSN(rbitw,  0b010, 0b00000, 0b00000);
+  INSN(rev16w, 0b010, 0b00000, 0b00001);
+  INSN(revw,   0b010, 0b00000, 0b00010);
+  INSN(clzw,   0b010, 0b00000, 0b00100);
+  INSN(clsw,   0b010, 0b00000, 0b00101);
+
+  INSN(rbit,   0b110, 0b00000, 0b00000);
+  INSN(rev16,  0b110, 0b00000, 0b00001);
+  INSN(rev32,  0b110, 0b00000, 0b00010);
+  INSN(rev,    0b110, 0b00000, 0b00011);
+  INSN(clz,    0b110, 0b00000, 0b00100);
+  INSN(cls,    0b110, 0b00000, 0b00101);
+
+#undef INSN
+
+  // (2 sources)
+#define INSN(NAME, op29, opcode)                        \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    starti;                                             \
+    rf(Rm, 16);                                         \
+    data_processing(op29, opcode, Rd, Rn);              \
+  }
+
+  INSN(udivw, 0b000, 0b000010);
+  INSN(sdivw, 0b000, 0b000011);
+  INSN(lslvw, 0b000, 0b001000);
+  INSN(lsrvw, 0b000, 0b001001);
+  INSN(asrvw, 0b000, 0b001010);
+  INSN(rorvw, 0b000, 0b001011);
+
+  INSN(udiv, 0b100, 0b000010);
+  INSN(sdiv, 0b100, 0b000011);
+  INSN(lslv, 0b100, 0b001000);
+  INSN(lsrv, 0b100, 0b001001);
+  INSN(asrv, 0b100, 0b001010);
+  INSN(rorv, 0b100, 0b001011);
+
+#undef INSN
+
+  // (3 sources)
+  void data_processing(unsigned op54, unsigned op31, unsigned o0,
+                       Register Rd, Register Rn, Register Rm,
+                       Register Ra) {
+    starti;
+    f(op54, 31, 29), f(0b11011, 28, 24);
+    f(op31, 23, 21), f(o0, 15);
+    zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
+  }
+
+#define INSN(NAME, op54, op31, o0)                                      \
+  void NAME(Register Rd, Register Rn, Register Rm, Register Ra) {       \
+    data_processing(op54, op31, o0, Rd, Rn, Rm, Ra);                    \
+  }
+
+  INSN(maddw, 0b000, 0b000, 0);
+  INSN(msubw, 0b000, 0b000, 1);
+  INSN(madd, 0b100, 0b000, 0);
+  INSN(msub, 0b100, 0b000, 1);
+  INSN(smaddl, 0b100, 0b001, 0);
+  INSN(smsubl, 0b100, 0b001, 1);
+  INSN(umaddl, 0b100, 0b101, 0);
+  INSN(umsubl, 0b100, 0b101, 1);
+
+#undef INSN
+
+#define INSN(NAME, op54, op31, o0)                      \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31);  \
+  }
+
+  INSN(smulh, 0b100, 0b010, 0);
+  INSN(umulh, 0b100, 0b110, 0);
+
+#undef INSN
+
+  // Floating-point data-processing (1 source)
+  void data_processing(unsigned op31, unsigned type, unsigned opcode,
+                       FloatRegister Vd, FloatRegister Vn) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, opcode)                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn) {       \
+    data_processing(op31, type, opcode, Vd, Vn);        \
+  }
+
+private:
+  INSN(i_fmovs, 0b000, 0b00, 0b000000);
+public:
+  INSN(fabss, 0b000, 0b00, 0b000001);
+  INSN(fnegs, 0b000, 0b00, 0b000010);
+  INSN(fsqrts, 0b000, 0b00, 0b000011);
+  INSN(fcvts, 0b000, 0b00, 0b000101);   // Single-precision to double-precision
+
+private:
+  INSN(i_fmovd, 0b000, 0b01, 0b000000);
+public:
+  INSN(fabsd, 0b000, 0b01, 0b000001);
+  INSN(fnegd, 0b000, 0b01, 0b000010);
+  INSN(fsqrtd, 0b000, 0b01, 0b000011);
+  INSN(fcvtd, 0b000, 0b01, 0b000100);   // Double-precision to single-precision
+
+  void fmovd(FloatRegister Vd, FloatRegister Vn) {
+    assert(Vd != Vn, "should be");
+    i_fmovd(Vd, Vn);
+  }
+
+  void fmovs(FloatRegister Vd, FloatRegister Vn) {
+    assert(Vd != Vn, "should be");
+    i_fmovs(Vd, Vn);
+  }
+
+#undef INSN
+
+  // Floating-point data-processing (2 source)
+  void data_processing(unsigned op31, unsigned type, unsigned opcode,
+                       FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10);
+    rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, opcode)                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
+    data_processing(op31, type, opcode, Vd, Vn, Vm);    \
+  }
+
+  INSN(fmuls, 0b000, 0b00, 0b0000);
+  INSN(fdivs, 0b000, 0b00, 0b0001);
+  INSN(fadds, 0b000, 0b00, 0b0010);
+  INSN(fsubs, 0b000, 0b00, 0b0011);
+  INSN(fnmuls, 0b000, 0b00, 0b1000);
+
+  INSN(fmuld, 0b000, 0b01, 0b0000);
+  INSN(fdivd, 0b000, 0b01, 0b0001);
+  INSN(faddd, 0b000, 0b01, 0b0010);
+  INSN(fsubd, 0b000, 0b01, 0b0011);
+  INSN(fnmuld, 0b000, 0b01, 0b1000);
+
+#undef INSN
+
+   // Floating-point data-processing (3 source)
+  void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
+                       FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
+                       FloatRegister Va) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11111, 28, 24);
+    f(type, 23, 22), f(o1, 21), f(o0, 15);
+    rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, o1, o0)                                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,       \
+            FloatRegister Va) {                                         \
+    data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va);                \
+  }
+
+  INSN(fmadds, 0b000, 0b00, 0, 0);
+  INSN(fmsubs, 0b000, 0b00, 0, 1);
+  INSN(fnmadds, 0b000, 0b00, 1, 0);
+  INSN(fnmsubs, 0b000, 0b00, 1, 1);
+
+  INSN(fmaddd, 0b000, 0b01, 0, 0);
+  INSN(fmsubd, 0b000, 0b01, 0, 1);
+  INSN(fnmaddd, 0b000, 0b01, 1, 0);
+  INSN(fnmsub, 0b000, 0b01, 1, 1);
+
+#undef INSN
+
+   // Floating-point conditional select
+  void fp_conditional_select(unsigned op31, unsigned type,
+                             unsigned op1, unsigned op2,
+                             Condition cond, FloatRegister Vd,
+                             FloatRegister Vn, FloatRegister Vm) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22);
+    f(op1, 21, 21);
+    f(op2, 11, 10);
+    f(cond, 15, 12);
+    rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, op1, op2)                                \
+  void NAME(FloatRegister Vd, FloatRegister Vn,                         \
+            FloatRegister Vm, Condition cond) {                         \
+    fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm);      \
+  }
+
+  INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
+  INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
+
+#undef INSN
+
+   // Floating-point<->integer conversions
+  void float_int_convert(unsigned op31, unsigned type,
+                         unsigned rmode, unsigned opcode,
+                         Register Rd, Register Rn) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
+    f(opcode, 18, 16), f(0b000000, 15, 10);
+    zrf(Rn, 5), zrf(Rd, 0);
+  }
+
+#define INSN(NAME, op31, type, rmode, opcode)                           \
+  void NAME(Register Rd, FloatRegister Vn) {                            \
+    float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn);     \
+  }
+
+  INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
+  INSN(fcvtzs,  0b100, 0b00, 0b11, 0b000);
+  INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
+  INSN(fcvtzd,  0b100, 0b01, 0b11, 0b000);
+
+  INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
+  INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
+
+  // INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
+
+#undef INSN
+
+#define INSN(NAME, op31, type, rmode, opcode)                           \
+  void NAME(FloatRegister Vd, Register Rn) {                            \
+    float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn);     \
+  }
+
+  INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
+  INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
+
+  INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
+  INSN(scvtfs,  0b100, 0b00, 0b00, 0b010);
+  INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
+  INSN(scvtfd,  0b100, 0b01, 0b00, 0b010);
+
+  // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
+
+#undef INSN
+
+  // Floating-point compare
+  void float_compare(unsigned op31, unsigned type,
+                     unsigned op, unsigned op2,
+                     FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21);
+    f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
+    rf(Vn, 5), rf(Vm, 16);
+  }
+
+
+#define INSN(NAME, op31, type, op, op2)                 \
+  void NAME(FloatRegister Vn, FloatRegister Vm) {       \
+    float_compare(op31, type, op, op2, Vn, Vm);         \
+  }
+
+#define INSN1(NAME, op31, type, op, op2)        \
+  void NAME(FloatRegister Vn, double d) {       \
+    assert_cond(d == 0.0);                      \
+    float_compare(op31, type, op, op2, Vn);     \
+  }
+
+  INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
+  INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
+  // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
+  // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
+
+  INSN(fcmpd, 0b000,   0b01, 0b00, 0b00000);
+  INSN1(fcmpd, 0b000,  0b01, 0b00, 0b01000);
+  // INSN(fcmped, 0b000,  0b01, 0b00, 0b10000);
+  // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
+
+#undef INSN
+#undef INSN1
+
+  // Floating-point Move (immediate)
+private:
+  unsigned pack(double value);
+
+  void fmov_imm(FloatRegister Vn, double value, unsigned size) {
+    starti;
+    f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
+    f(pack(value), 20, 13), f(0b10000000, 12, 5);
+    rf(Vn, 0);
+  }
+
+public:
+
+  void fmovs(FloatRegister Vn, double value) {
+    if (value)
+      fmov_imm(Vn, value, 0b00);
+    else
+      fmovs(Vn, zr);
+  }
+  void fmovd(FloatRegister Vn, double value) {
+    if (value)
+      fmov_imm(Vn, value, 0b01);
+    else
+      fmovd(Vn, zr);
+  }
+
+/* SIMD extensions
+ *
+ * We just use FloatRegister in the following. They are exactly the same
+ * as SIMD registers.
+ */
+ public:
+
+  enum SIMD_Arrangement {
+       T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D
+  };
+
+  enum SIMD_RegVariant {
+       S32, D64, Q128
+  };
+
+ private:
+
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+             int imm, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+             Register Xm, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
+   switch (a.getMode()) {
+   case Address::base_plus_offset:
+     guarantee(a.offset() == 0, "no offset allowed here");
+     ld_st(Vt, T, a.base(), op1, op2);
+     break;
+   case Address::post:
+     ld_st(Vt, T, a.base(), a.offset(), op1, op2);
+     break;
+   case Address::base_plus_offset_reg:
+     ld_st(Vt, T, a.base(), a.index(), op1, op2);
+     break;
+   default:
+     ShouldNotReachHere();
+   }
+ }
+
+ public:
+
+#define INSN1(NAME, op1, op2)                                   \
+  void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
+   ld_st(Vt, T, a, op1, op2);                                           \
+ }
+
+#define INSN2(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
+    assert(Vt->successor() == Vt2, "Registers must be ordered");        \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+#define INSN3(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
+            SIMD_Arrangement T, const Address &a) {                     \
+    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
+           "Registers must be ordered");                                \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+#define INSN4(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
+            FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
+    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
+           Vt3->successor() == Vt4, "Registers must be ordered");       \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+  INSN1(ld1,  0b001100010, 0b0111);
+  INSN2(ld1,  0b001100010, 0b1010);
+  INSN3(ld1,  0b001100010, 0b0110);
+  INSN4(ld1,  0b001100010, 0b0010);
+
+  INSN2(ld2,  0b001100010, 0b1000);
+  INSN3(ld3,  0b001100010, 0b0100);
+  INSN4(ld4,  0b001100010, 0b0000);
+
+  INSN1(st1,  0b001100000, 0b0111);
+  INSN2(st1,  0b001100000, 0b1010);
+  INSN3(st1,  0b001100000, 0b0110);
+  INSN4(st1,  0b001100000, 0b0010);
+
+  INSN2(st2,  0b001100000, 0b1000);
+  INSN3(st3,  0b001100000, 0b0100);
+  INSN4(st4,  0b001100000, 0b0000);
+
+  INSN1(ld1r, 0b001101010, 0b1100);
+  INSN2(ld2r, 0b001101011, 0b1100);
+  INSN3(ld3r, 0b001101010, 0b1110);
+  INSN4(ld4r, 0b001101011, 0b1110);
+
+#undef INSN1
+#undef INSN2
+#undef INSN3
+#undef INSN4
+
+#define INSN(NAME, opc)                                                                 \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+    starti;                                                                             \
+    assert(T == T8B || T == T16B, "must be T8B or T16B");                               \
+    f(0, 31), f((int)T & 1, 30), f(opc, 29, 21);                                        \
+    rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);                              \
+  }
+
+  INSN(eor, 0b101110001);
+  INSN(orr, 0b001110101);
+  INSN(andr, 0b001110001);
+  INSN(bic, 0b001110011);
+  INSN(bif, 0b101110111);
+  INSN(bit, 0b101110101);
+  INSN(bsl, 0b101110011);
+  INSN(orn, 0b001110111);
+
+#undef INSN
+
+#define INSN(NAME, opc)                           \
+  void NAME(FloatRegister Vd, FloatRegister Vn) { \
+    starti;                                       \
+    f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);         \
+  }
+
+  INSN(aese, 0b0100111000101000010010);
+  INSN(aesd, 0b0100111000101000010110);
+  INSN(aesmc, 0b0100111000101000011010);
+  INSN(aesimc, 0b0100111000101000011110);
+
+#undef INSN
+
+  void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){
+    starti;
+    /* The encodings for the immh:immb fields (bits 22:16) are
+     *   0001 xxx       8B/16B, shift = xxx
+     *   001x xxx       4H/8H,  shift = xxxx
+     *   01xx xxx       2S/4S,  shift = xxxxx
+     *   1xxx xxx       1D/2D,  shift = xxxxxx (1D is RESERVED)
+     */
+    assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");
+    f(0, 31), f(T & 1, 30), f(0b0011110, 29, 23), f((1 << ((T>>1)+3))|shift, 22, 16);
+    f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
+  void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+    starti;
+    /* The encodings for the immh:immb fields (bits 22:16) are
+     *   0001 xxx       8H, 8B/16b shift = xxx
+     *   001x xxx       4S, 4H/8H  shift = xxxx
+     *   01xx xxx       2D, 2S/4S  shift = xxxxx
+     *   1xxx xxx       RESERVED
+     */
+    assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
+    assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
+    f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
+    f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+  void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
+    ushll(Vd, Ta, Vn, Tb, shift);
+  }
+
+  void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,  SIMD_Arrangement T, int op = 0){
+    starti;
+    f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21);
+    rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+  void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,  SIMD_Arrangement T){
+    uzp1(Vd, Vn, Vm, T, 1);
+  }
+
+  // Move from general purpose register
+  //   mov  Vd.T[index], Rn
+  void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
+    starti;
+    f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
+    f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0);
+  }
+
+  // Move to general purpose register
+  //   mov  Rd, Vn.T[index]
+  void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
+    starti;
+    f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
+    f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
+    f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
+  }
+
+  // We do not handle the 1Q arrangement.
+  void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+    starti;
+    assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier");
+    f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+  void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+    pmull(Vd, Ta, Vn, Vm, Tb);
+  }
+
+  void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
+  {
+    starti;
+    assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H");
+    f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24);
+    f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+
+  // CRC32 instructions
+#define INSN(NAME, sf, sz)                                                \
+  void NAME(Register Rd, Register Rn, Register Rm) {                      \
+    starti;                                                               \
+    f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                     \
+  }
+
+  INSN(crc32b, 0, 0b00);
+  INSN(crc32h, 0, 0b01);
+  INSN(crc32w, 0, 0b10);
+  INSN(crc32x, 1, 0b11);
+
+#undef INSN
+
+
+/* Simulator extensions to the ISA
+
+   haltsim
+
+   takes no arguments, causes the sim to enter a debug break and then
+   return from the simulator run() call with STATUS_HALT? The linking
+   code will call fatal() when it sees STATUS_HALT.
+
+   blrt Xn, Wm
+   blrt Xn, #gpargs, #fpargs, #type
+   Xn holds the 64 bit x86 branch_address
+   call format is encoded either as immediate data in the call
+   or in register Wm. In the latter case
+     Wm[13..6] = #gpargs,
+     Wm[5..2] = #fpargs,
+     Wm[1,0] = #type
+
+   calls the x86 code address 'branch_address' supplied in Xn passing
+   arguments taken from the general and floating point registers according
+   to the supplied counts 'gpargs' and 'fpargs'. may return a result in r0
+   or v0 according to the the return type #type' where
+
+   address branch_address;
+   uimm4 gpargs;
+   uimm4 fpargs;
+   enum ReturnType type;
+
+   enum ReturnType
+     {
+       void_ret = 0,
+       int_ret = 1,
+       long_ret = 1,
+       obj_ret = 1, // i.e. same as long
+       float_ret = 2,
+       double_ret = 3
+     }
+
+   notify
+
+   notifies the simulator of a transfer of control. instr[14:0]
+   identifies the type of change of control.
+
+   0 ==> initial entry to a method.
+
+   1 ==> return into a method from a submethod call.
+
+   2 ==> exit out of Java method code.
+
+   3 ==> start execution for a new bytecode.
+
+   in cases 1 and 2 the simulator is expected to use a JVM callback to
+   identify the name of the specific method being executed. in case 4
+   the simulator is expected to use a JVM callback to identify the
+   bytecode index.
+
+   Instruction encodings
+   ---------------------
+
+   These are encoded in the space with instr[28:25] = 00 which is
+   unallocated. Encodings are
+
+                     10987654321098765432109876543210
+   PSEUDO_HALT   = 0x11100000000000000000000000000000
+   PSEUDO_BLRT  = 0x11000000000000000_______________
+   PSEUDO_BLRTR = 0x1100000000000000100000__________
+   PSEUDO_NOTIFY = 0x10100000000000000_______________
+
+   instr[31,29] = op1 : 111 ==> HALT, 110 ==> BLRT/BLRTR, 101 ==> NOTIFY
+
+   for BLRT
+     instr[14,11] = #gpargs, instr[10,7] = #fpargs
+     instr[6,5] = #type, instr[4,0] = Rn
+   for BLRTR
+     instr[9,5] = Rm, instr[4,0] = Rn
+   for NOTIFY
+     instr[14:0] = type : 0 ==> entry, 1 ==> reentry, 2 ==> exit, 3 ==> bcstart
+*/
+
+  enum NotifyType { method_entry, method_reentry, method_exit, bytecode_start };
+
+  virtual void notify(int type) {
+    if (UseBuiltinSim) {
+      starti;
+      //  109
+      f(0b101, 31, 29);
+      //  87654321098765
+      f(0b00000000000000, 28, 15);
+      f(type, 14, 0);
+    }
+  }
+
+  void blrt(Register Rn, int gpargs, int fpargs, int type) {
+    if (UseBuiltinSim) {
+      starti;
+      f(0b110, 31 ,29);
+      f(0b00, 28, 25);
+      //  4321098765
+      f(0b0000000000, 24, 15);
+      f(gpargs, 14, 11);
+      f(fpargs, 10, 7);
+      f(type, 6, 5);
+      rf(Rn, 0);
+    } else {
+      blr(Rn);
+    }
+  }
+
+  void blrt(Register Rn, Register Rm) {
+    if (UseBuiltinSim) {
+      starti;
+      f(0b110, 31 ,29);
+      f(0b00, 28, 25);
+      //  4321098765
+      f(0b0000000001, 24, 15);
+      //  43210
+      f(0b00000, 14, 10);
+      rf(Rm, 5);
+      rf(Rn, 0);
+    } else {
+      blr(Rn);
+    }
+  }
+
+  void haltsim() {
+    starti;
+    f(0b111, 31 ,29);
+    f(0b00, 28, 27);
+    //  654321098765432109876543210
+    f(0b000000000000000000000000000, 26, 0);
+  }
+
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+  }
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset) {
+    ShouldNotCallThis();
+    return RegisterOrConstant();
+  }
+
+  // Stack overflow checking
+  virtual void bang_stack_with_offset(int offset);
+
+  static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
+  static bool operand_valid_for_add_sub_immediate(long imm);
+  static bool operand_valid_for_float_immediate(double imm);
+
+  inline void emit_long64(jlong x);
+
+  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
+  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
+};
+
+inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
+                                             Assembler::Membar_mask_bits b) {
+  return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
+}
+
+Instruction_aarch64::~Instruction_aarch64() {
+  assem->emit();
+}
+
+#undef starti
+
+// Invert a condition
+inline const Assembler::Condition operator~(const Assembler::Condition cond) {
+  return Assembler::Condition(int(cond) ^ 1);
+}
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+  friend class LIR_Assembler;
+
+  using Assembler::mov;
+
+ protected:
+
+  // Support for VM calls
+  //
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  // c++ interpreter never wants to use interp_masm version of call_VM
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments,        // the number of arguments to pop after the call
+    Label *retaddr = NULL
+  );
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments,        // the number of arguments to pop after the call
+    Label &retaddr) {
+    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
+  }
+
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base
+  // returns the register which contains the thread upon return. If a thread register has been
+  // specified, the return value will correspond to that register. If no last_java_sp is specified
+  // (noreg) than rsp will be used instead.
+  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) { }
+
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // swap_reg is killed.
+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+
+
+  // Helper functions for statistics gathering.
+  // Unconditional atomic increment.
+  void atomic_incw(Register counter_addr, Register tmp, Register tmp2);
+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) {
+    lea(tmp1, counter_addr);
+    atomic_incw(tmp1, tmp2, tmp3);
+  }
+  // Load Effective Address
+  void lea(Register r, const Address &a) {
+    InstructionMark im(this);
+    // code_section()->relocate(inst_mark(), a.rspec());
+    relocate(inst_mark(), a.rspec());
+    a.lea(this, r);
+  }
+
+  void addmw(Address a, Register incr, Register scratch) {
+    ldrw(scratch, a);
+    addw(scratch, scratch, incr);
+    strw(scratch, a);
+  }
+
+  // Add constant to memory word
+  void addmw(Address a, int imm, Register scratch) {
+    ldrw(scratch, a);
+    if (imm > 0)
+      addw(scratch, scratch, (unsigned)imm);
+    else
+      subw(scratch, scratch, (unsigned)-imm);
+    strw(scratch, a);
+  }
+
+  // Frame creation and destruction shared between JITs.
+  void build_frame(int framesize);
+  void remove_frame(int framesize);
+
+  virtual void _call_Unimplemented(address call_site) {
+    mov(rscratch2, call_site);
+    haltsim();
+  }
+
+#define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
+
+  virtual void notify(int type);
+
+  // aliases defined in AARCH64 spec
+
+
+  template<class T>
+  inline void  cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
+  inline void cmp(Register Rd, unsigned imm)  { subs(zr, Rd, imm); }
+
+  inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); }
+  inline void cmn(Register Rd, unsigned imm) { adds(zr, Rd, imm); }
+
+  void cset(Register Rd, Assembler::Condition cond) {
+    csinc(Rd, zr, zr, ~cond);
+  }
+
+  void csetw(Register Rd, Assembler::Condition cond) {
+    csincw(Rd, zr, zr, ~cond);
+  }
+
+  void cneg(Register Rd, Register Rn, Assembler::Condition cond) {
+    csneg(Rd, Rn, Rn, ~cond);
+  }
+  void cnegw(Register Rd, Register Rn, Assembler::Condition cond) {
+    csnegw(Rd, Rn, Rn, ~cond);
+  }
+
+  inline void movw(Register Rd, Register Rn) {
+    if (Rd == sp || Rn == sp) {
+      addw(Rd, Rn, 0U);
+    } else {
+      orrw(Rd, zr, Rn);
+    }
+  }
+  inline void mov(Register Rd, Register Rn) {
+    assert(Rd != r31_sp && Rn != r31_sp, "should be");
+    if (Rd == Rn) {
+    } else if (Rd == sp || Rn == sp) {
+      add(Rd, Rn, 0U);
+    } else {
+      orr(Rd, zr, Rn);
+    }
+  }
+
+  inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); }
+  inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); }
+
+  inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); }
+  inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); }
+
+  inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); }
+  inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); }
+
+  inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
+  }
+  inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
+  }
+
+  inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    bfmw(Rd, Rn, lsb, (lsb + width - 1));
+  }
+  inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    bfm(Rd, Rn, lsb , (lsb + width - 1));
+  }
+
+  inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
+  }
+  inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
+  }
+
+  inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    sbfmw(Rd, Rn, lsb, (lsb + width - 1));
+  }
+  inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    sbfm(Rd, Rn, lsb , (lsb + width - 1));
+  }
+
+  inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
+  }
+  inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
+  }
+
+  inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    ubfmw(Rd, Rn, lsb, (lsb + width - 1));
+  }
+  inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) {
+    ubfm(Rd, Rn, lsb , (lsb + width - 1));
+  }
+
+  inline void asrw(Register Rd, Register Rn, unsigned imm) {
+    sbfmw(Rd, Rn, imm, 31);
+  }
+
+  inline void asr(Register Rd, Register Rn, unsigned imm) {
+    sbfm(Rd, Rn, imm, 63);
+  }
+
+  inline void lslw(Register Rd, Register Rn, unsigned imm) {
+    ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm));
+  }
+
+  inline void lsl(Register Rd, Register Rn, unsigned imm) {
+    ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm));
+  }
+
+  inline void lsrw(Register Rd, Register Rn, unsigned imm) {
+    ubfmw(Rd, Rn, imm, 31);
+  }
+
+  inline void lsr(Register Rd, Register Rn, unsigned imm) {
+    ubfm(Rd, Rn, imm, 63);
+  }
+
+  inline void rorw(Register Rd, Register Rn, unsigned imm) {
+    extrw(Rd, Rn, Rn, imm);
+  }
+
+  inline void ror(Register Rd, Register Rn, unsigned imm) {
+    extr(Rd, Rn, Rn, imm);
+  }
+
+  inline void sxtbw(Register Rd, Register Rn) {
+    sbfmw(Rd, Rn, 0, 7);
+  }
+  inline void sxthw(Register Rd, Register Rn) {
+    sbfmw(Rd, Rn, 0, 15);
+  }
+  inline void sxtb(Register Rd, Register Rn) {
+    sbfm(Rd, Rn, 0, 7);
+  }
+  inline void sxth(Register Rd, Register Rn) {
+    sbfm(Rd, Rn, 0, 15);
+  }
+  inline void sxtw(Register Rd, Register Rn) {
+    sbfm(Rd, Rn, 0, 31);
+  }
+
+  inline void uxtbw(Register Rd, Register Rn) {
+    ubfmw(Rd, Rn, 0, 7);
+  }
+  inline void uxthw(Register Rd, Register Rn) {
+    ubfmw(Rd, Rn, 0, 15);
+  }
+  inline void uxtb(Register Rd, Register Rn) {
+    ubfm(Rd, Rn, 0, 7);
+  }
+  inline void uxth(Register Rd, Register Rn) {
+    ubfm(Rd, Rn, 0, 15);
+  }
+  inline void uxtw(Register Rd, Register Rn) {
+    ubfm(Rd, Rn, 0, 31);
+  }
+
+  inline void cmnw(Register Rn, Register Rm) {
+    addsw(zr, Rn, Rm);
+  }
+  inline void cmn(Register Rn, Register Rm) {
+    adds(zr, Rn, Rm);
+  }
+
+  inline void cmpw(Register Rn, Register Rm) {
+    subsw(zr, Rn, Rm);
+  }
+  inline void cmp(Register Rn, Register Rm) {
+    subs(zr, Rn, Rm);
+  }
+
+  inline void negw(Register Rd, Register Rn) {
+    subw(Rd, zr, Rn);
+  }
+
+  inline void neg(Register Rd, Register Rn) {
+    sub(Rd, zr, Rn);
+  }
+
+  inline void negsw(Register Rd, Register Rn) {
+    subsw(Rd, zr, Rn);
+  }
+
+  inline void negs(Register Rd, Register Rn) {
+    subs(Rd, zr, Rn);
+  }
+
+  inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
+    addsw(zr, Rn, Rm, kind, shift);
+  }
+  inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
+    adds(zr, Rn, Rm, kind, shift);
+  }
+
+  inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
+    subsw(zr, Rn, Rm, kind, shift);
+  }
+  inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
+    subs(zr, Rn, Rm, kind, shift);
+  }
+
+  inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
+    subw(Rd, zr, Rn, kind, shift);
+  }
+
+  inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
+    sub(Rd, zr, Rn, kind, shift);
+  }
+
+  inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
+    subsw(Rd, zr, Rn, kind, shift);
+  }
+
+  inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
+    subs(Rd, zr, Rn, kind, shift);
+  }
+
+  inline void mnegw(Register Rd, Register Rn, Register Rm) {
+    msubw(Rd, Rn, Rm, zr);
+  }
+  inline void mneg(Register Rd, Register Rn, Register Rm) {
+    msub(Rd, Rn, Rm, zr);
+  }
+
+  inline void mulw(Register Rd, Register Rn, Register Rm) {
+    maddw(Rd, Rn, Rm, zr);
+  }
+  inline void mul(Register Rd, Register Rn, Register Rm) {
+    madd(Rd, Rn, Rm, zr);
+  }
+
+  inline void smnegl(Register Rd, Register Rn, Register Rm) {
+    smsubl(Rd, Rn, Rm, zr);
+  }
+  inline void smull(Register Rd, Register Rn, Register Rm) {
+    smaddl(Rd, Rn, Rm, zr);
+  }
+
+  inline void umnegl(Register Rd, Register Rn, Register Rm) {
+    umsubl(Rd, Rn, Rm, zr);
+  }
+  inline void umull(Register Rd, Register Rn, Register Rm) {
+    umaddl(Rd, Rn, Rm, zr);
+  }
+
+#define WRAP(INSN)                                                            \
+  void INSN(Register Rd, Register Rn, Register Rm, Register Ra) {             \
+    if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_A53MAC) && Ra != zr) \
+      nop();                                                                  \
+    Assembler::INSN(Rd, Rn, Rm, Ra);                                          \
+  }
+
+  WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
+  WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
+#undef WRAP
+
+  // macro assembly operations needed for aarch64
+
+  // first two private routines for loading 32 bit or 64 bit constants
+private:
+
+  void mov_immediate64(Register dst, u_int64_t imm64);
+  void mov_immediate32(Register dst, u_int32_t imm32);
+
+  int push(unsigned int bitset, Register stack);
+  int pop(unsigned int bitset, Register stack);
+
+  void mov(Register dst, Address a);
+
+public:
+  void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
+  void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
+
+  // Push and pop everything that might be clobbered by a native
+  // runtime call except rscratch1 and rscratch2.  (They are always
+  // scratch, so we don't have to protect them.)  Only save the lower
+  // 64 bits of each vector register.
+  void push_call_clobbered_registers();
+  void pop_call_clobbered_registers();
+
+  // now mov instructions for loading absolute addresses and 32 or
+  // 64 bit integers
+
+  inline void mov(Register dst, address addr)
+  {
+    mov_immediate64(dst, (u_int64_t)addr);
+  }
+
+  inline void mov(Register dst, u_int64_t imm64)
+  {
+    mov_immediate64(dst, imm64);
+  }
+
+  inline void movw(Register dst, u_int32_t imm32)
+  {
+    mov_immediate32(dst, imm32);
+  }
+
+  inline void mov(Register dst, long l)
+  {
+    mov(dst, (u_int64_t)l);
+  }
+
+  inline void mov(Register dst, int i)
+  {
+    mov(dst, (long)i);
+  }
+
+  void movptr(Register r, uintptr_t imm64);
+
+public:
+
+  // Generalized Test Bit And Branch, including a "far" variety which
+  // spans more than 32KiB.
+  void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) {
+    assert(cond == EQ || cond == NE, "must be");
+
+    if (far)
+      cond = ~cond;
+
+    void (Assembler::* branch)(Register Rt, int bitpos, Label &L);
+    if (cond == Assembler::EQ)
+      branch = &Assembler::tbz;
+    else
+      branch = &Assembler::tbnz;
+
+    if (far) {
+      Label L;
+      (this->*branch)(Rt, bitpos, L);
+      b(dest);
+      bind(L);
+    } else {
+      (this->*branch)(Rt, bitpos, dest);
+    }
+  }
+
+  // macro instructions for accessing and updating floating point
+  // status register
+  //
+  // FPSR : op1 == 011
+  //        CRn == 0100
+  //        CRm == 0100
+  //        op2 == 001
+
+  inline void get_fpsr(Register reg)
+  {
+    mrs(0b11, 0b0100, 0b0100, 0b001, reg);
+  }
+
+  inline void set_fpsr(Register reg)
+  {
+    msr(0b011, 0b0100, 0b0100, 0b001, reg);
+  }
+
+  inline void clear_fpsr()
+  {
+    msr(0b011, 0b0100, 0b0100, 0b001, zr);
+  }
+
+  // DCZID_EL0: op1 == 011
+  //            CRn == 0000
+  //            CRm == 0000
+  //            op2 == 111
+  inline void get_dczid_el0(Register reg)
+  {
+    mrs(0b011, 0b0000, 0b0000, 0b111, reg);
+  }
+
+  // CTR_EL0:   op1 == 011
+  //            CRn == 0000
+  //            CRm == 0000
+  //            op2 == 001
+  inline void get_ctr_el0(Register reg)
+  {
+    mrs(0b011, 0b0000, 0b0000, 0b001, reg);
+  }
+
+  // idiv variant which deals with MINLONG as dividend and -1 as divisor
+  int corrected_idivl(Register result, Register ra, Register rb,
+                      bool want_remainder, Register tmp = rscratch1);
+  int corrected_idivq(Register result, Register ra, Register rb,
+                      bool want_remainder, Register tmp = rscratch1);
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+
+  virtual void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+  static address target_addr_for_insn(address insn_addr, unsigned insn);
+  static address target_addr_for_insn(address insn_addr) {
+    unsigned insn = *(unsigned*)insn_addr;
+    return target_addr_for_insn(insn_addr, insn);
+  }
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  static int pd_patch_instruction_size (address branch, address target);
+  static void pd_patch_instruction(address branch, address target) {
+    pd_patch_instruction_size (branch, target);
+  }
+  static address pd_call_destination(address branch) {
+    return target_addr_for_insn(branch);
+  }
+#ifndef PRODUCT
+  static void pd_print_patched_instruction(address branch);
+#endif
+
+  static int patch_oop(address insn_addr, address o);
+
+  void emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
+  // The following 4 methods return the offset of the appropriate move instruction
+
+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
+  int load_unsigned_byte(Register dst, Address src);
+  int load_unsigned_short(Register dst, Address src);
+
+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
+  int load_signed_byte(Register dst, Address src);
+  int load_signed_short(Register dst, Address src);
+
+  int load_signed_byte32(Register dst, Address src);
+  int load_signed_short32(Register dst, Address src);
+
+  // Support for sign-extension (hi:lo = extend_sign(lo))
+  void extend_sign(Register hi, Register lo);
+
+  // Load and store values by size and signed-ness
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+  // Support for inc/dec with optimal instruction selection depending on value
+
+  // x86_64 aliases an unqualified register/address increment and
+  // decrement to call incrementq and decrementq but also supports
+  // explicitly sized calls to incrementq/decrementq or
+  // incrementl/decrementl
+
+  // for aarch64 the proper convention would be to use
+  // increment/decrement for 64 bit operatons and
+  // incrementw/decrementw for 32 bit operations. so when porting
+  // x86_64 code we can leave calls to increment/decrement as is,
+  // replace incrementq/decrementq with increment/decrement and
+  // replace incrementl/decrementl with incrementw/decrementw.
+
+  // n.b. increment/decrement calls with an Address destination will
+  // need to use a scratch register to load the value to be
+  // incremented. increment/decrement calls which add or subtract a
+  // constant value greater than 2^12 will need to use a 2nd scratch
+  // register to hold the constant. so, a register increment/decrement
+  // may trash rscratch2 and an address increment/decrement trash
+  // rscratch and rscratch2
+
+  void decrementw(Address dst, int value = 1);
+  void decrementw(Register reg, int value = 1);
+
+  void decrement(Register reg, int value = 1);
+  void decrement(Address dst, int value = 1);
+
+  void incrementw(Address dst, int value = 1);
+  void incrementw(Register reg, int value = 1);
+
+  void increment(Register reg, int value = 1);
+  void increment(Address dst, int value = 1);
+
+
+  // Alignment
+  void align(int modulus);
+
+  // Stack frame creation/removal
+  void enter()
+  {
+    stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+    mov(rfp, sp);
+  }
+  void leave()
+  {
+    mov(sp, rfp);
+    ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+  }
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, bool
+               check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // !!! FIXME AARCH64 -- sparc has this but x86 does not !!!
+  void get_vm_result  (Register oop_result, Register thread);
+  void get_vm_result_2(Register metadata_result, Register thread);
+
+  // These always tightly bind to MacroAssembler::call_VM_base
+  // bypassing the virtual implementation
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
+
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2, Register arg_3);
+
+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
+  // bypassing the virtual implementation
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc,
+                           Register scratch);
+
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           Label &last_java_pc,
+                           Register scratch);
+
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           Register last_java_pc,
+                           Register scratch);
+
+  void reset_last_Java_frame(Register thread, bool clearfp, bool clear_pc);
+
+  // thread in the default location (r15_thread on 64bit)
+  void reset_last_Java_frame(bool clear_fp, bool clear_pc);
+
+  // Stores
+  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
+  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
+
+#ifndef SERIALGC
+
+  void g1_write_barrier_pre(Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+#endif // SERIALGC
+
+  // split store_check(Register obj) to enhance instruction interleaving
+  void store_check_part_1(Register obj);
+  void store_check_part_2(Register obj);
+
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+
+  // oop manipulations
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+  void cmp_klass(Register oop, Register trial_klass, Register tmp);
+
+  void load_heap_oop(Register dst, Address src);
+
+  void load_heap_oop_not_null(Register dst, Address src);
+  void store_heap_oop(Address dst, Register src);
+
+  // Used for storing NULL. All other oop constants should be
+  // stored using routines that take a jobject.
+  void store_heap_oop_null(Address dst);
+
+  void load_prototype_header(Register dst, Register src);
+
+  void store_klass_gap(Register dst, Register src);
+
+  // This dummy is to prevent a call to store_heap_oop from
+  // converting a zero (like NULL) into a Register by giving
+  // the compiler two choices it can't resolve
+
+  void store_heap_oop(Address dst, void* dummy);
+
+  void encode_heap_oop(Register d, Register s);
+  void encode_heap_oop(Register r) { encode_heap_oop(r, r); }
+  void decode_heap_oop(Register d, Register s);
+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop_not_null(Register dst, Register src);
+
+  void set_narrow_oop(Register dst, jobject obj);
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  DEBUG_ONLY(void verify_heapbase(const char* msg);)
+
+  void push_CPU_state();
+  void pop_CPU_state() ;
+
+  // Round up to a power of two
+  void round_to(Register reg, int modulus);
+
+  // unimplemented
+#if 0
+  // Callee saved registers handling
+  void push_callee_saved_registers();
+  void pop_callee_saved_registers();
+#endif
+
+  // unimplemented
+
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+  void verify_tlab();
+
+  void incr_allocated_bytes(Register thread,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
+
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface);
+
+  // virtual method calling
+  // n.b. x86 allows RegisterOrConstant for vtable_index
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
+  // unimplemented
+#if 0
+  // method handles (JSR 292)
+  void check_method_handle_type(Register mtype_reg, Register mh_reg,
+                                Register temp_reg,
+                                Label& wrong_method_type);
+  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
+                                  Register temp_reg);
+  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
+#endif
+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+
+  //----
+#if 0
+  // method handles (JSR 292)
+  void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
+#endif
+
+  // Debugging
+
+  // only if +VerifyOops
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  // prints msg and continues
+  void warn(const char* msg);
+
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+  // unimplemented
+#if 0
+  void os_breakpoint();
+#endif
+
+  void untested()                                { stop("untested"); }
+
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
+
+  void should_not_reach_here()                   { stop("should not reach here"); }
+
+  // unimplemented
+#if 0
+  void print_CPU_state();
+#endif
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    mov(rscratch2, -offset);
+    str(zr, Address(sp, rscratch2));
+  }
+
+  // Writes to stack successive pages until offset reached to check for
+  // stack overflow + shadow pages.  Also, clobbers tmp
+  void bang_stack_size(Register size, Register tmp);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
+  // Support for serializing memory accesses between threads
+  void serialize_memory(Register thread, Register tmp);
+
+  // Arithmetics
+
+  void addptr(const Address &dst, int32_t src);
+
+  // unimplemented
+#if 0
+  void addptr(Address dst, Register src);
+#endif
+
+  void addptr(Register dst, Address src) { Unimplemented(); }
+  // unimplemented
+#if 0
+  void addptr(Register dst, int32_t src);
+  void addptr(Register dst, Register src);
+#endif
+  void addptr(Register dst, RegisterOrConstant src) { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  void andptr(Register dst, int32_t src);
+#endif
+  void andptr(Register src1, Register src2) { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  // renamed to drag out the casting of address to int32_t/intptr_t
+  void cmp32(Register src1, int32_t imm);
+
+  void cmp32(Register src1, Address src2);
+#endif
+
+  void cmpptr(Register src1, Register src2) { Unimplemented(); }
+  void cmpptr(Register src1, Address src2);
+  // void cmpptr(Address src1, Register src2) { Unimplemented(); }
+
+  void cmpptr(Register src1, int32_t src2) { Unimplemented(); }
+  void cmpptr(Address src1, int32_t src2) { Unimplemented(); }
+
+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
+                  Label &suceed, Label *fail);
+
+  void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
+                  Label &suceed, Label *fail);
+
+  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
+
+  void atomic_xchg(Register prev, Register newv, Register addr);
+  void atomic_xchgw(Register prev, Register newv, Register addr);
+
+  void imulptr(Register dst, Register src) { Unimplemented(); }
+
+
+  void negptr(Register dst) { Unimplemented(); }
+
+  void notptr(Register dst) { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  void shlptr(Register dst, int32_t shift);
+#endif
+  void shlptr(Register dst) { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  void shrptr(Register dst, int32_t shift);
+#endif
+  void shrptr(Register dst) { Unimplemented(); }
+
+  void sarptr(Register dst) { Unimplemented(); }
+  void sarptr(Register dst, int32_t src) { Unimplemented(); }
+
+  void subptr(Address dst, int32_t src) { Unimplemented(); }
+
+  void subptr(Register dst, Address src) { Unimplemented(); }
+  // unimplemented
+#if 0
+  void subptr(Register dst, int32_t src);
+  // Force generation of a 4 byte immediate value even if it fits into 8bit
+  void subptr_imm32(Register dst, int32_t src);
+  void subptr(Register dst, Register src);
+#endif
+  void subptr(Register dst, RegisterOrConstant src) { Unimplemented(); }
+
+  void sbbptr(Address dst, int32_t src) { Unimplemented(); }
+  void sbbptr(Register dst, int32_t src) { Unimplemented(); }
+
+  void xchgptr(Register src1, Register src2) { Unimplemented(); }
+  void xchgptr(Register src1, Address src2) { Unimplemented(); }
+
+  void xaddptr(Address src1, Register src2) { Unimplemented(); }
+
+
+
+  // unimplemented
+#if 0
+
+  // Perhaps we should implement this one
+  void lea(Register dst, Address adr) { Unimplemented(); }
+
+  void leal32(Register dst, Address src) { Unimplemented(); }
+
+  void orptr(Register dst, Address src) { Unimplemented(); }
+  void orptr(Register dst, Register src) { Unimplemented(); }
+  void orptr(Register dst, int32_t src) { Unimplemented(); }
+
+  void testptr(Register src, int32_t imm32) {  Unimplemented(); }
+  void testptr(Register src1, Register src2);
+
+  void xorptr(Register dst, Register src) { Unimplemented(); }
+  void xorptr(Register dst, Address src) { Unimplemented(); }
+#endif
+
+  // Calls
+
+  void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
+
+  static bool far_branches() {
+    return ReservedCodeCacheSize > branch_range;
+  }
+
+  // Jumps that can reach anywhere in the code cache.
+  // Trashes tmp.
+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+
+  static int far_branch_size() {
+    if (far_branches()) {
+      return 3 * 4;  // adrp, add, br
+    } else {
+      return 4;
+    }
+  }
+
+  // Emit the CompiledIC call idiom
+  void ic_call(address entry);
+
+  // Jumps
+
+  // unimplemented
+#if 0
+  // NOTE: these jumps tranfer to the effective address of dst NOT
+  // the address contained by dst. This is because this is more natural
+  // for jumps/calls.
+  void jump(Address dst);
+  void jump_cc(Condition cc, Address dst);
+#endif
+
+  // Floating
+
+  void fadd_s(Address src)        { Unimplemented(); }
+
+  void fldcw(Address src) { Unimplemented(); }
+
+  void fld_s(int index)   { Unimplemented(); }
+  void fld_s(Address src) { Unimplemented(); }
+
+  void fld_d(Address src) { Unimplemented(); }
+
+  void fld_x(Address src) { Unimplemented(); }
+
+  void fmul_s(Address src)        { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
+  // all corner cases and may result in NaN and require fallback to a
+  // runtime call.
+  void fast_pow();
+  void fast_exp();
+#endif
+
+  // computes exp(x). Fallback to runtime call included.
+  void exp_with_fallback(int num_fpu_regs_in_use) { Unimplemented(); }
+  // computes pow(x,y). Fallback to runtime call included.
+  void pow_with_fallback(int num_fpu_regs_in_use) { Unimplemented(); }
+
+public:
+
+  // Data
+
+  Address constant_oop_address(jobject obj);
+  // unimplemented
+#if 0
+  void pushoop(jobject obj);
+#endif
+
+  void movoop(Register dst, jobject obj, bool immediate = false);
+
+  // sign extend as need a l to ptr sized element
+  void movl2ptr(Register dst, Address src) { Unimplemented(); }
+  void movl2ptr(Register dst, Register src) { Unimplemented(); }
+
+  // unimplemented
+#if 0
+  // C2 compiled method's prolog code.
+  void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
+#endif
+
+  // !!! FIXME AARCH64 -- this is not yet in x86 !!!
+  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+  void kernel_crc32(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3);
+
+#undef VIRTUAL
+
+  // Stack push and pop individual 64 bit registers
+  void push(Register src);
+  void pop(Register dst);
+
+  // push all registers onto the stack
+  void pusha();
+  void popa();
+
+  void repne_scan(Register addr, Register value, Register count,
+                  Register scratch);
+  void repne_scanw(Register addr, Register value, Register count,
+                   Register scratch);
+
+  typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm);
+  typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift);
+
+  // If a constant does not fit in an immediate field, generate some
+  // number of MOV instructions and then perform the operation
+  void wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm,
+                             add_sub_imm_insn insn1,
+                             add_sub_reg_insn insn2);
+  // Seperate vsn which sets the flags
+  void wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm,
+                             add_sub_imm_insn insn1,
+                             add_sub_reg_insn insn2);
+
+#define WRAP(INSN)                                                      \
+  void INSN(Register Rd, Register Rn, unsigned imm) {                   \
+    wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm,                      \
+             enum shift_kind kind, unsigned shift = 0) {                \
+    Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm) {                    \
+    Assembler::INSN(Rd, Rn, Rm);                                        \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
+  }
+
+  WRAP(add) WRAP(addw) WRAP(sub) WRAP(subw)
+
+#undef WRAP
+#define WRAP(INSN)                                                      \
+  void INSN(Register Rd, Register Rn, unsigned imm) {                   \
+    wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm,                      \
+             enum shift_kind kind, unsigned shift = 0) {                \
+    Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm) {                    \
+    Assembler::INSN(Rd, Rn, Rm);                                        \
+  }                                                                     \
+                                                                        \
+  void INSN(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
+  }
+
+  WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw)
+
+  void add(Register Rd, Register Rn, RegisterOrConstant increment);
+  void addw(Register Rd, Register Rn, RegisterOrConstant increment);
+  void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
+  void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
+
+  void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
+
+  void tableswitch(Register index, jint lowbound, jint highbound,
+                   Label &jumptable, Label &jumptable_end, int stride = 1) {
+    adr(rscratch1, jumptable);
+    subsw(rscratch2, index, lowbound);
+    subsw(zr, rscratch2, highbound - lowbound);
+    br(Assembler::HS, jumptable_end);
+    add(rscratch1, rscratch1, rscratch2,
+        ext::sxtw, exact_log2(stride * Assembler::instruction_size));
+    br(rscratch1);
+  }
+
+  // Form an address from base + offset in Rd.  Rd may or may not
+  // actually be used: you must use the Address that is returned.  It
+  // is up to you to ensure that the shift provided matches the size
+  // of your data.
+  Address form_address(Register Rd, Register base, long byte_offset, int shift);
+
+  // Return true iff an address is within the 48-bit AArch64 address
+  // space.
+  bool is_valid_AArch64_address(address a) {
+    return ((uint64_t)a >> 48) == 0;
+  }
+
+  // Load the base of the cardtable byte map into reg.
+  void load_byte_map_base(Register reg);
+
+  // Prolog generator routines to support switch between x86 code and
+  // generated ARM code
+
+  // routine to generate an x86 prolog for a stub function which
+  // bootstraps into the generated ARM code which directly follows the
+  // stub
+  //
+
+  public:
+  // enum used for aarch64--x86 linkage to define return type of x86 function
+  enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
+
+#ifdef BUILTIN_SIM
+  void c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type, address *prolog_ptr = NULL);
+#else
+  void c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type) { }
+#endif
+
+  // special version of call_VM_leaf_base needed for aarch64 simulator
+  // where we need to specify both the gp and fp arg counts and the
+  // return type so that the linkage routine from aarch64 to x86 and
+  // back knows which aarch64 registers to copy to x86 registers and
+  // which x86 result register to copy back to an aarch64 register
+
+  void call_VM_leaf_base1(
+    address  entry_point,             // the entry point
+    int      number_of_gp_arguments,  // the number of gp reg arguments to pass
+    int      number_of_fp_arguments,  // the number of fp reg arguments to pass
+    ret_type type,                    // the return type for the call
+    Label*   retaddr = NULL
+  );
+
+  void ldr_constant(Register dest, const Address &const_addr) {
+    if (NearCpool) {
+      ldr(dest, const_addr);
+    } else {
+      unsigned long offset;
+      adrp(dest, InternalAddress(const_addr.target()), offset);
+      ldr(dest, Address(dest, offset));
+    }
+  }
+
+  address read_polling_page(Register r, address page, relocInfo::relocType rtype);
+  address read_polling_page(Register r, relocInfo::relocType rtype);
+
+  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+  void update_byte_crc32(Register crc, Register val, Register table);
+  void update_word_crc32(Register crc, Register v, Register tmp,
+        Register table0, Register table1, Register table2, Register table3,
+        bool upper = false);
+
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      Register tmp1, Register tmp2,
+                      Register tmp3, Register tmp4,
+                      int int_cnt1, Register result);
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      Register tmp1);
+  void string_equals(Register str1, Register str2,
+                     Register cnt, Register result,
+                     Register tmp1);
+  void char_arrays_equals(Register ary1, Register ary2,
+                          Register result, Register tmp1);
+  void fill_words(Register base, Register cnt, Register value);
+  void zero_words(Register base, u_int64_t cnt);
+  void zero_words(Register base, Register cnt);
+  void block_zero(Register base, Register cnt, bool is_large = false);
+
+  // ISB may be needed because of a safepoint
+  void maybe_isb() { isb(); }
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+struct tableswitch {
+  Register _reg;
+  int _insn_index; jint _first_key; jint _last_key;
+  Label _after;
+  Label _branches;
+};
+
+extern "C" void das(uint64_t start, int len);
+
+#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+inline void Assembler::emit_long64(jlong x) {
+  *(jlong*) _code_pos = x;
+  _code_pos += sizeof(jlong);
+  code_section()->set_end(_code_pos);
+}
+
+#ifndef PRODUCT
+inline void MacroAssembler::pd_print_patched_instruction(address branch) { Unimplemented(); }
+#endif // ndef PRODUCT
+
+#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeInterpreter.hpp"
+#include "interpreter/bytecodeInterpreter.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef TARGET_ARCH_aarch64
+# include "interp_masm_aarch64.hpp"
+#endif
+
+#ifdef CC_INTERP
+
+#endif // CC_INTERP (all)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
+
+// Platform specific for C++ based Interpreter
+
+private:
+
+    interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
+    address   _result_handler;            /* temp for saving native result handler */
+    intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
+
+    address   _extra_junk1;               /* temp to save on recompiles */
+    address   _extra_junk2;               /* temp to save on recompiles */
+    address   _extra_junk3;               /* temp to save on recompiles */
+    // address dummy_for_native2;         /* a native frame result handler would be here... */
+    // address dummy_for_native1;         /* native result type stored here in a interpreter native frame */
+    address   _extra_junk4;               /* temp to save on recompiles */
+    address   _extra_junk5;               /* temp to save on recompiles */
+    address   _extra_junk6;               /* temp to save on recompiles */
+public:
+                                                         // we have an interpreter frame...
+inline intptr_t* sender_sp() {
+  return _sender_sp;
+}
+
+// The interpreter always has the frame anchor fully setup so we don't
+// have to do anything going to vm from the interpreter. On return
+// we do have to clear the flags in case they we're modified to
+// maintain the stack walking invariants.
+//
+#define SET_LAST_JAVA_FRAME()
+
+#define RESET_LAST_JAVA_FRAME()
+
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+
+// JavaStack Implementation
+
+#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+// JavaLocals implementation
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     ((jint)(locals[-(offset)]))
+#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  (cast_to_oop(locals[-(offset)]))
+#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
+                                                ((VMJavaVal64*)(addr))->l)
+
+#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
+
+// Inline interpreter functions for IA32
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  // x86 can do unaligned copies but not 64bits at a time
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on x86
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  // QQQ what about check and throw...
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  // CVM did this 0x3f mask, is the really needed??? QQQ
+  return ((unsigned long long) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  // Divide by zero... QQQ
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 << op2;
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >> (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
+
+#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodes_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+
+
+void Bytecodes::pd_initialize() {
+  // No aarch64 specific initialization
+}
+
+
+Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
+  // No aarch64 specific bytecodes
+  return code;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodes_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
+
+// No aarch64 specific bytecodes
+
+#endif // CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytes_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTES_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTES_AARCH64_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Returns true if the byte ordering used by Java is different from the native byte ordering
+  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
+  // on Sparc.
+  static inline bool is_Java_byte_ordering_different(){ return true; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // (no special code is needed since x86 CPUs can access unaligned data)
+  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
+  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
+  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
+
+  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
+  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
+  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in Java
+  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
+  // needed since x86 CPUs use little-endian format.
+  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
+  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
+  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
+
+  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
+  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
+  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
+
+
+  // Efficient swapping of byte ordering
+  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
+  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
+  static inline u8   swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
+
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "bytes_linux_aarch64.inline.hpp"
+#endif
+
+#endif // CPU_AARCH64_VM_BYTES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+
+#define __ ce->masm()->
+
+float ConversionStub::float_zero = 0.0;
+double ConversionStub::double_zero = 0.0;
+
+static Register as_reg(LIR_Opr op) {
+  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
+}
+
+void ConversionStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  // FIXME: Agh, this is so painful
+
+  __ enter();
+  __ sub(sp, sp, 2 * wordSize);
+  __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+  for (int i = 30; i >= 0; i -= 2) // caller-saved fp registers
+    if (i < 8 || i > 15)
+      __ stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ pre(sp, -2 * wordSize)));
+
+  switch(bytecode()) {
+  case Bytecodes::_f2i:
+    {
+      if (v0 != input()->as_float_reg())
+        __ fmovs(v0, input()->as_float_reg());
+      __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
+                            0, 1, MacroAssembler::ret_type_integral);
+    }
+    break;
+  case Bytecodes::_d2i:
+    {
+      if (v0 != input()->as_double_reg())
+        __ fmovd(v0, input()->as_double_reg());
+      __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
+                            0, 1, MacroAssembler::ret_type_integral);
+    }
+    break;
+  case Bytecodes::_f2l:
+    {
+      if (v0 != input()->as_float_reg())
+        __ fmovs(v0, input()->as_float_reg());
+      __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2l),
+                            0, 1, MacroAssembler::ret_type_integral);
+    }
+    break;
+  case Bytecodes::_d2l:
+    {
+      if (v0 != input()->as_double_reg())
+        __ fmovd(v0, input()->as_double_reg());
+      __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2l),
+                            0, 1, MacroAssembler::ret_type_integral);
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  __ str(r0, Address(rfp, -wordSize));
+
+  for (int i = 0; i < 32; i += 2)
+    if (i < 8 || i > 15)
+      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ post(sp, 2 * wordSize)));
+  __ pop(RegSet::range(r0, r29), sp);
+
+  __ ldr(as_reg(result()), Address(rfp, -wordSize));
+  __ leave();
+
+  __ b(_continuation);
+}
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  ce->store_parameter(_method->as_register(), 1);
+  ce->store_parameter(_bci, 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index)
+{
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_index->is_cpu_register()) {
+    __ mov(rscratch1, _index->as_register());
+  } else {
+    __ mov(rscratch1, _index->as_jint());
+  }
+  Runtime1::StubID stub_id;
+  if (_throw_index_out_of_bounds_exception) {
+    stub_id = Runtime1::throw_index_exception_id;
+  } else {
+    stub_id = Runtime1::throw_range_check_failed_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL, rscratch2);
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+#ifdef ASSERT
+  __ should_not_reach_here();
+#endif
+}
+
+
+
+// Implementation of NewInstanceStub
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id   = stub_id;
+}
+
+
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  __ mov(r3, _klass_reg->as_register());
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0,");
+  __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+
+// Implementation of NewTypeArrayStub
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == r19, "length must in r19,");
+  assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0");
+  __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _result = result;
+  _length = length;
+  _info = new CodeEmitInfo(info);
+}
+
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == r19, "length must in r19,");
+  assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0");
+  __ b(_continuation);
+}
+// Implementation of MonitorAccessStubs
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+: MonitorAccessStub(obj_reg, lock_reg)
+{
+  _info = new CodeEmitInfo(info);
+}
+
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_obj_reg->as_register(),  1);
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  Runtime1::StubID enter_id;
+  if (ce->compilation()->has_fpu_code()) {
+    enter_id = Runtime1::monitorenter_id;
+  } else {
+    enter_id = Runtime1::monitorenter_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    // lock_reg was destroyed by fast unlocking attempt => recompute it
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  // note: non-blocking leaf routine => no call info needed
+  Runtime1::StubID exit_id;
+  if (ce->compilation()->has_fpu_code()) {
+    exit_id = Runtime1::monitorexit_id;
+  } else {
+    exit_id = Runtime1::monitorexit_nofpu_id;
+  }
+  __ adr(lr, _continuation);
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  assert(false, "AArch64 should not use C1 runtime patching");
+}
+
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+  ce->add_call_info_here(_info);
+  DEBUG_ONLY(__ should_not_reach_here());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  __ bind(_entry);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id)));
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+
+  __ bind(_entry);
+  // pass the object in a scratch register because all other registers
+  // must be preserved
+  if (_obj->is_cpu_register()) {
+    __ mov(rscratch1, _obj->as_register());
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2);
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  // Figure out where the args should go
+  // This should really convert the IntrinsicID to the methodOop and signature
+  // but I don't know how to do that.
+  //
+  VMRegPair args[5];
+  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
+  SharedRuntime::java_calling_convention(signature, args, 5, true);
+
+  // push parameters
+  // (src, src_pos, dest, destPos, length)
+  Register r[5];
+  r[0] = src()->as_register();
+  r[1] = src_pos()->as_register();
+  r[2] = dst()->as_register();
+  r[3] = dst_pos()->as_register();
+  r[4] = length()->as_register();
+
+  // next registers will get stored on the stack
+  for (int i = 0; i < 5 ; i++ ) {
+    VMReg r_1 = args[i].first();
+    if (r_1->is_stack()) {
+      int st_off = r_1->reg2stack() * wordSize;
+      __ str (r[i], Address(sp, st_off));
+    } else {
+      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
+    }
+  }
+
+  ce->align_call(lir_static_call);
+
+  ce->emit_static_call_stub();
+  Address resolve(SharedRuntime::get_resolve_static_call_stub(),
+                  relocInfo::static_call_type);
+  __ trampoline_call(resolve);
+  ce->add_call_info_here(info());
+
+#ifndef PRODUCT
+  __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
+  __ incrementw(Address(rscratch2));
+#endif
+
+  __ b(_continuation);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+  __ cbz(pre_val_reg, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ b(_continuation);
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cbz(new_val_reg, _continuation);
+  ce->store_parameter(addr()->as_pointer_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ b(_continuation);
+}
+
+#endif
+/////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_Defs_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
+
+// native word offsets from memory address (little endian)
+enum {
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+// FIXME: There are no callee-saved
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,       // number of registers used during code emission
+  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers,  // number of registers used during code emission
+
+  pd_nof_caller_save_cpu_regs_frame_map = 19 - 2,  // number of registers killed by calls
+  pd_nof_caller_save_fpu_regs_frame_map = 32,  // number of registers killed by calls
+
+  pd_first_callee_saved_reg = 19 - 2,
+  pd_last_callee_saved_reg = 26 - 2,
+
+  pd_last_allocatable_cpu_reg = 16,
+
+  pd_nof_cpu_regs_reg_alloc
+    = pd_last_allocatable_cpu_reg + 1,  // number of registers that are visible to register allocator
+  pd_nof_fpu_regs_reg_alloc = 8,  // number of registers that are visible to register allocator
+
+  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
+  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
+  pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = 16,
+  pd_first_byte_reg = 0,
+  pd_last_byte_reg = 16,
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
+
+  pd_first_callee_saved_fpu_reg = 8 + pd_first_fpu_reg,
+  pd_last_callee_saved_fpu_reg = 15 + pd_first_fpu_reg,
+};
+
+
+// Encoding of float value in debug info.  This is true on x86 where
+// floats are extended to doubles when stored in the stack, false for
+// AArch64 where floats and doubles are stored in their native form.
+enum {
+  pd_float_saved_as_double = false
+};
+
+#endif // CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FpuStackSim.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "utilities/array.hpp"
+#include "utilities/ostream.hpp"
+
+//--------------------------------------------------------
+//               FpuStackSim
+//--------------------------------------------------------
+
+// This class maps the FPU registers to their stack locations; it computes
+// the offsets between individual registers and simulates the FPU stack.
+
+const int EMPTY = -1;
+
+int FpuStackSim::regs_at(int i) const {
+  assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds");
+  return _regs[i];
+}
+
+void FpuStackSim::set_regs_at(int i, int val) {
+  assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds");
+  _regs[i] = val;
+}
+
+void FpuStackSim::dec_stack_size() {
+  _stack_size--;
+  assert(_stack_size >= 0, "FPU stack underflow");
+}
+
+void FpuStackSim::inc_stack_size() {
+  _stack_size++;
+  assert(_stack_size <= FrameMap::nof_fpu_regs, "FPU stack overflow");
+}
+
+FpuStackSim::FpuStackSim(Compilation* compilation)
+ : _compilation(compilation)
+{
+  _stack_size = 0;
+  for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    set_regs_at(i, EMPTY);
+  }
+}
+
+
+void FpuStackSim::pop() {
+  if (TraceFPUStack) { tty->print("FPU-pop "); print(); tty->cr(); }
+  set_regs_at(tos_index(), EMPTY);
+  dec_stack_size();
+}
+
+void FpuStackSim::pop(int rnr) {
+  if (TraceFPUStack) { tty->print("FPU-pop %d", rnr); print(); tty->cr(); }
+  assert(regs_at(tos_index()) == rnr, "rnr is not on TOS");
+  set_regs_at(tos_index(), EMPTY);
+  dec_stack_size();
+}
+
+
+void FpuStackSim::push(int rnr) {
+  if (TraceFPUStack) { tty->print("FPU-push %d", rnr); print(); tty->cr(); }
+  assert(regs_at(stack_size()) == EMPTY, "should be empty");
+  set_regs_at(stack_size(), rnr);
+  inc_stack_size();
+}
+
+
+void FpuStackSim::swap(int offset) {
+  if (TraceFPUStack) { tty->print("FPU-swap %d", offset); print(); tty->cr(); }
+  int t = regs_at(tos_index() - offset);
+  set_regs_at(tos_index() - offset, regs_at(tos_index()));
+  set_regs_at(tos_index(), t);
+}
+
+
+int FpuStackSim::offset_from_tos(int rnr) const {
+  for (int i = tos_index(); i >= 0; i--) {
+    if (regs_at(i) == rnr) {
+      return tos_index() - i;
+    }
+  }
+  assert(false, "FpuStackSim: register not found");
+  BAILOUT_("FpuStackSim: register not found", 0);
+}
+
+
+int FpuStackSim::get_slot(int tos_offset) const {
+  return regs_at(tos_index() - tos_offset);
+}
+
+void FpuStackSim::set_slot(int tos_offset, int rnr) {
+  set_regs_at(tos_index() - tos_offset, rnr);
+}
+
+void FpuStackSim::rename(int old_rnr, int new_rnr) {
+  if (TraceFPUStack) { tty->print("FPU-rename %d %d", old_rnr, new_rnr); print(); tty->cr(); }
+  if (old_rnr == new_rnr)
+    return;
+  bool found = false;
+  for (int i = 0; i < stack_size(); i++) {
+    assert(regs_at(i) != new_rnr, "should not see old occurrences of new_rnr on the stack");
+    if (regs_at(i) == old_rnr) {
+      set_regs_at(i, new_rnr);
+      found = true;
+    }
+  }
+  assert(found, "should have found at least one instance of old_rnr");
+}
+
+
+bool FpuStackSim::contains(int rnr) {
+  for (int i = 0; i < stack_size(); i++) {
+    if (regs_at(i) == rnr) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool FpuStackSim::is_empty() {
+#ifdef ASSERT
+  if (stack_size() == 0) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      assert(regs_at(i) == EMPTY, "must be empty");
+    }
+  }
+#endif
+  return stack_size() == 0;
+}
+
+
+bool FpuStackSim::slot_is_empty(int tos_offset) {
+  return (regs_at(tos_index() - tos_offset) == EMPTY);
+}
+
+
+void FpuStackSim::clear() {
+  if (TraceFPUStack) { tty->print("FPU-clear"); print(); tty->cr(); }
+  for (int i = tos_index(); i >= 0; i--) {
+    set_regs_at(i, EMPTY);
+  }
+  _stack_size = 0;
+}
+
+
+intArray* FpuStackSim::write_state() {
+  intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
+  (*res)[0] = stack_size();
+  for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    (*res)[1 + i] = regs_at(i);
+  }
+  return res;
+}
+
+
+void FpuStackSim::read_state(intArray* fpu_stack_state) {
+  _stack_size = (*fpu_stack_state)[0];
+  for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    set_regs_at(i, (*fpu_stack_state)[1 + i]);
+  }
+}
+
+
+#ifndef PRODUCT
+void FpuStackSim::print() {
+  tty->print(" N=%d[", stack_size());\
+  for (int i = 0; i < stack_size(); i++) {
+    int reg = regs_at(i);
+    if (reg != EMPTY) {
+      tty->print("%d", reg);
+    } else {
+      tty->print("_");
+    }
+  };
+  tty->print(" ]");
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP
+#define CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP
+
+//  Simulates the FPU stack and maintains mapping [fpu-register -> stack offset]
+//  FPU registers are described as numbers from 0..nof_fpu_regs-1
+
+class Compilation;
+
+class FpuStackSim VALUE_OBJ_CLASS_SPEC {
+ private:
+  Compilation* _compilation;
+  int          _stack_size;
+  int          _regs[FrameMap::nof_fpu_regs];
+
+  int tos_index() const                        { return _stack_size - 1; }
+
+  int regs_at(int i) const;
+  void set_regs_at(int i, int val);
+  void dec_stack_size();
+  void inc_stack_size();
+
+  // unified bailout support
+  Compilation*  compilation() const              { return _compilation; }
+  void          bailout(const char* msg) const   { compilation()->bailout(msg); }
+  bool          bailed_out() const               { return compilation()->bailed_out(); }
+
+ public:
+  FpuStackSim(Compilation* compilation);
+  void pop ();
+  void pop (int rnr);                          // rnr must be on tos
+  void push(int rnr);
+  void swap(int offset);                       // exchange tos with tos + offset
+  int offset_from_tos(int rnr) const;          // return the offset of the topmost instance of rnr from TOS
+  int  get_slot(int tos_offset) const;         // return the entry at the given offset from TOS
+  void set_slot(int tos_offset, int rnr);      // set the entry at the given offset from TOS
+  void rename(int old_rnr, int new_rnr);       // rename all instances of old_rnr to new_rnr
+  bool contains(int rnr);                      // debugging support only
+  bool is_empty();
+  bool slot_is_empty(int tos_offset);
+  int stack_size() const                       { return _stack_size; }
+  void clear();
+  intArray* write_state();
+  void read_state(intArray* fpu_stack_state);
+
+  void print() PRODUCT_RETURN;
+};
+
+#endif // CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      Register reg2 = r_2->as_Register();
+#ifdef _LP64
+      assert(reg2 == reg, "must be same register");
+      opr = as_long_opr(reg);
+#else
+      opr = as_long_opr(reg2, reg);
+#endif // _LP64
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    int num = r_1->as_FloatRegister()->encoding();
+    if (type == T_FLOAT) {
+      opr = LIR_OprFact::single_fpu(num);
+    } else {
+      opr = LIR_OprFact::double_fpu(num);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return opr;
+}
+
+LIR_Opr FrameMap::r0_opr;
+LIR_Opr FrameMap::r1_opr;
+LIR_Opr FrameMap::r2_opr;
+LIR_Opr FrameMap::r3_opr;
+LIR_Opr FrameMap::r4_opr;
+LIR_Opr FrameMap::r5_opr;
+LIR_Opr FrameMap::r6_opr;
+LIR_Opr FrameMap::r7_opr;
+LIR_Opr FrameMap::r8_opr;
+LIR_Opr FrameMap::r9_opr;
+LIR_Opr FrameMap::r10_opr;
+LIR_Opr FrameMap::r11_opr;
+LIR_Opr FrameMap::r12_opr;
+LIR_Opr FrameMap::r13_opr;
+LIR_Opr FrameMap::r14_opr;
+LIR_Opr FrameMap::r15_opr;
+LIR_Opr FrameMap::r16_opr;
+LIR_Opr FrameMap::r17_opr;
+LIR_Opr FrameMap::r18_opr;
+LIR_Opr FrameMap::r19_opr;
+LIR_Opr FrameMap::r20_opr;
+LIR_Opr FrameMap::r21_opr;
+LIR_Opr FrameMap::r22_opr;
+LIR_Opr FrameMap::r23_opr;
+LIR_Opr FrameMap::r24_opr;
+LIR_Opr FrameMap::r25_opr;
+LIR_Opr FrameMap::r26_opr;
+LIR_Opr FrameMap::r27_opr;
+LIR_Opr FrameMap::r28_opr;
+LIR_Opr FrameMap::r29_opr;
+LIR_Opr FrameMap::r30_opr;
+
+LIR_Opr FrameMap::rfp_opr;
+LIR_Opr FrameMap::sp_opr;
+
+LIR_Opr FrameMap::receiver_opr;
+
+LIR_Opr FrameMap::r0_oop_opr;
+LIR_Opr FrameMap::r1_oop_opr;
+LIR_Opr FrameMap::r2_oop_opr;
+LIR_Opr FrameMap::r3_oop_opr;
+LIR_Opr FrameMap::r4_oop_opr;
+LIR_Opr FrameMap::r5_oop_opr;
+LIR_Opr FrameMap::r6_oop_opr;
+LIR_Opr FrameMap::r7_oop_opr;
+LIR_Opr FrameMap::r8_oop_opr;
+LIR_Opr FrameMap::r9_oop_opr;
+LIR_Opr FrameMap::r10_oop_opr;
+LIR_Opr FrameMap::r11_oop_opr;
+LIR_Opr FrameMap::r12_oop_opr;
+LIR_Opr FrameMap::r13_oop_opr;
+LIR_Opr FrameMap::r14_oop_opr;
+LIR_Opr FrameMap::r15_oop_opr;
+LIR_Opr FrameMap::r16_oop_opr;
+LIR_Opr FrameMap::r17_oop_opr;
+LIR_Opr FrameMap::r18_oop_opr;
+LIR_Opr FrameMap::r19_oop_opr;
+LIR_Opr FrameMap::r20_oop_opr;
+LIR_Opr FrameMap::r21_oop_opr;
+LIR_Opr FrameMap::r22_oop_opr;
+LIR_Opr FrameMap::r23_oop_opr;
+LIR_Opr FrameMap::r24_oop_opr;
+LIR_Opr FrameMap::r25_oop_opr;
+LIR_Opr FrameMap::r26_oop_opr;
+LIR_Opr FrameMap::r27_oop_opr;
+LIR_Opr FrameMap::r28_oop_opr;
+LIR_Opr FrameMap::r29_oop_opr;
+LIR_Opr FrameMap::r30_oop_opr;
+
+LIR_Opr FrameMap::rscratch1_opr;
+LIR_Opr FrameMap::rscratch2_opr;
+LIR_Opr FrameMap::rscratch1_long_opr;
+LIR_Opr FrameMap::rscratch2_long_opr;
+
+LIR_Opr FrameMap::long0_opr;
+LIR_Opr FrameMap::long1_opr;
+LIR_Opr FrameMap::fpu0_float_opr;
+LIR_Opr FrameMap::fpu0_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+//--------------------------------------------------------
+//               FrameMap
+//--------------------------------------------------------
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i=0;
+  map_register(i, r0); r0_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r1); r1_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r2); r2_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r3); r3_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r4); r4_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r5); r5_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r6); r6_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r7); r7_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r10); r10_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r11); r11_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r12); r12_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r13); r13_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r14); r14_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r16); r16_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r17); r17_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r18); r18_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r19); r19_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r20); r20_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r21); r21_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r22); r22_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r23); r23_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++;
+
+  map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase
+  map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread
+  map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp
+  map_register(i, r30); r30_opr = LIR_OprFact::single_cpu(i); i++; // lr
+  map_register(i, r31_sp); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
+  map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch1
+  map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch2
+
+  rscratch1_opr = r8_opr;
+  rscratch2_opr = r9_opr;
+  rscratch1_long_opr = LIR_OprFact::double_cpu(r8_opr->cpu_regnr(), r8_opr->cpu_regnr());
+  rscratch2_long_opr = LIR_OprFact::double_cpu(r9_opr->cpu_regnr(), r9_opr->cpu_regnr());
+
+  long0_opr = LIR_OprFact::double_cpu(0, 0);
+  long1_opr = LIR_OprFact::double_cpu(1, 1);
+
+  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
+  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
+
+  _caller_save_cpu_regs[0] = r0_opr;
+  _caller_save_cpu_regs[1] = r1_opr;
+  _caller_save_cpu_regs[2] = r2_opr;
+  _caller_save_cpu_regs[3] = r3_opr;
+  _caller_save_cpu_regs[4] = r4_opr;
+  _caller_save_cpu_regs[5] = r5_opr;
+  _caller_save_cpu_regs[6]  = r6_opr;
+  _caller_save_cpu_regs[7]  = r7_opr;
+  // rscratch1, rscratch 2 not included
+  _caller_save_cpu_regs[8] = r10_opr;
+  _caller_save_cpu_regs[9] = r11_opr;
+  _caller_save_cpu_regs[10] = r12_opr;
+  _caller_save_cpu_regs[11] = r13_opr;
+  _caller_save_cpu_regs[12] = r14_opr;
+  _caller_save_cpu_regs[13] = r15_opr;
+  _caller_save_cpu_regs[14] = r16_opr;
+  _caller_save_cpu_regs[15] = r17_opr;
+  _caller_save_cpu_regs[16] = r18_opr;
+
+  for (int i = 0; i < 8; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+
+  _init_done = true;
+
+  r0_oop_opr = as_oop_opr(r0);
+  r1_oop_opr = as_oop_opr(r1);
+  r2_oop_opr = as_oop_opr(r2);
+  r3_oop_opr = as_oop_opr(r3);
+  r4_oop_opr = as_oop_opr(r4);
+  r5_oop_opr = as_oop_opr(r5);
+  r6_oop_opr = as_oop_opr(r6);
+  r7_oop_opr = as_oop_opr(r7);
+  r8_oop_opr = as_oop_opr(r8);
+  r9_oop_opr = as_oop_opr(r9);
+  r10_oop_opr = as_oop_opr(r10);
+  r11_oop_opr = as_oop_opr(r11);
+  r12_oop_opr = as_oop_opr(r12);
+  r13_oop_opr = as_oop_opr(r13);
+  r14_oop_opr = as_oop_opr(r14);
+  r15_oop_opr = as_oop_opr(r15);
+  r16_oop_opr = as_oop_opr(r16);
+  r17_oop_opr = as_oop_opr(r17);
+  r18_oop_opr = as_oop_opr(r18);
+  r19_oop_opr = as_oop_opr(r19);
+  r20_oop_opr = as_oop_opr(r20);
+  r21_oop_opr = as_oop_opr(r21);
+  r22_oop_opr = as_oop_opr(r22);
+  r23_oop_opr = as_oop_opr(r23);
+  r24_oop_opr = as_oop_opr(r24);
+  r25_oop_opr = as_oop_opr(r25);
+  r26_oop_opr = as_oop_opr(r26);
+  r27_oop_opr = as_oop_opr(r27);
+  r28_oop_opr = as_oop_opr(r28);
+  r29_oop_opr = as_oop_opr(r29);
+  r30_oop_opr = as_oop_opr(r30);
+
+  sp_opr = as_pointer_opr(r31_sp);
+  rfp_opr = as_pointer_opr(rfp);
+
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+  receiver_opr = as_oop_opr(regs.first()->as_Register());
+
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  // for rbp, based address use this:
+  // return Address(rbp, in_bytes(sp_offset) - (framesize() - 2) * 4);
+  return Address(sp, in_bytes(sp_offset));
+}
+
+
+// ----------------mapping-----------------------
+// all mapping is based on rfp addressing, except for simple leaf methods where we access
+// the locals sp based (and no frame is built)
+
+
+// Frame for simple leaf methods (quick entries)
+//
+//   +----------+
+//   | ret addr |   <- TOS
+//   +----------+
+//   | args     |
+//   | ......   |
+
+// Frame for standard methods
+//
+//   | .........|  <- TOS
+//   | locals   |
+//   +----------+
+//   |  old fp, |  <- RFP
+//   +----------+
+//   | ret addr |
+//   +----------+
+//   |  args    |
+//   | .........|
+
+
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
+// This is the offset from sp() in the frame of the slot for the index,
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
+//
+//           framesize +
+//           stack0         stack0          0  <- VMReg
+//             |              | <registers> |
+//  ...........|..............|.............|
+//      0 1 2 3 x x 4 5 6 ... |                <- local indices
+//      ^           ^        sp()                 ( x x indicate link
+//      |           |                               and return addr)
+//  arguments   non-argument locals
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  // Return the OptoReg name for the fpu stack slot "n"
+  // A spilled fpu stack slot comprises to two single-word OptoReg's.
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  return FrameMap::sp_opr;
+}
+
+
+// JSR 292
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  // assert(rfp == rbp_mh_SP_save, "must be same register");
+  return rfp_opr;
+}
+
+
+bool FrameMap::validate_frame() {
+  return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_FrameMap_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
+
+//  On AArch64 the frame looks as follows:
+//
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+
+ public:
+  static const int pd_c_runtime_reserved_arg_size;
+
+  enum {
+    first_available_sp_in_frame = 0,
+    frame_pad_in_bytes = 16,
+    nof_reg_args = 8
+  };
+
+ public:
+  static LIR_Opr receiver_opr;
+
+  static LIR_Opr r0_opr;
+  static LIR_Opr r1_opr;
+  static LIR_Opr r2_opr;
+  static LIR_Opr r3_opr;
+  static LIR_Opr r4_opr;
+  static LIR_Opr r5_opr;
+  static LIR_Opr r6_opr;
+  static LIR_Opr r7_opr;
+  static LIR_Opr r8_opr;
+  static LIR_Opr r9_opr;
+  static LIR_Opr r10_opr;
+  static LIR_Opr r11_opr;
+  static LIR_Opr r12_opr;
+  static LIR_Opr r13_opr;
+  static LIR_Opr r14_opr;
+  static LIR_Opr r15_opr;
+  static LIR_Opr r16_opr;
+  static LIR_Opr r17_opr;
+  static LIR_Opr r18_opr;
+  static LIR_Opr r19_opr;
+  static LIR_Opr r20_opr;
+  static LIR_Opr r21_opr;
+  static LIR_Opr r22_opr;
+  static LIR_Opr r23_opr;
+  static LIR_Opr r24_opr;
+  static LIR_Opr r25_opr;
+  static LIR_Opr r26_opr;
+  static LIR_Opr r27_opr;
+  static LIR_Opr r28_opr;
+  static LIR_Opr r29_opr;
+  static LIR_Opr r30_opr;
+  static LIR_Opr rfp_opr;
+  static LIR_Opr sp_opr;
+
+  static LIR_Opr r0_oop_opr;
+  static LIR_Opr r1_oop_opr;
+  static LIR_Opr r2_oop_opr;
+  static LIR_Opr r3_oop_opr;
+  static LIR_Opr r4_oop_opr;
+  static LIR_Opr r5_oop_opr;
+  static LIR_Opr r6_oop_opr;
+  static LIR_Opr r7_oop_opr;
+  static LIR_Opr r8_oop_opr;
+  static LIR_Opr r9_oop_opr;
+  static LIR_Opr r10_oop_opr;
+  static LIR_Opr r11_oop_opr;
+  static LIR_Opr r12_oop_opr;
+  static LIR_Opr r13_oop_opr;
+  static LIR_Opr r14_oop_opr;
+  static LIR_Opr r15_oop_opr;
+  static LIR_Opr r16_oop_opr;
+  static LIR_Opr r17_oop_opr;
+  static LIR_Opr r18_oop_opr;
+  static LIR_Opr r19_oop_opr;
+  static LIR_Opr r20_oop_opr;
+  static LIR_Opr r21_oop_opr;
+  static LIR_Opr r22_oop_opr;
+  static LIR_Opr r23_oop_opr;
+  static LIR_Opr r24_oop_opr;
+  static LIR_Opr r25_oop_opr;
+  static LIR_Opr r26_oop_opr;
+  static LIR_Opr r27_oop_opr;
+  static LIR_Opr r28_oop_opr;
+  static LIR_Opr r29_oop_opr;
+  static LIR_Opr r30_oop_opr;
+
+  static LIR_Opr rscratch1_opr;
+  static LIR_Opr rscratch2_opr;
+  static LIR_Opr rscratch1_long_opr;
+  static LIR_Opr rscratch2_long_opr;
+
+  static LIR_Opr long0_opr;
+  static LIR_Opr long1_opr;
+  static LIR_Opr fpu0_float_opr;
+  static LIR_Opr fpu0_double_opr;
+
+  static LIR_Opr as_long_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+
+  // VMReg name for spilled physical FPU stack slot n
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register (LIR_Opr opr) { return true; }
+  static bool is_caller_save_register (Register r) { return true; }
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg;  }
+  static int last_byte_reg()            { return pd_last_byte_reg; }
+
+#endif // CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,2960 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc_interface/collectedHeap.hpp"
+#include "memory/barrierSet.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+NEEDS_CLEANUP // remove this definitions ?
+const Register IC_Klass    = rscratch2;   // where the IC klass is cached
+const Register SYNC_header = r0;   // synchronization header
+const Register SHIFT_count = r0;   // where count for shift operations must be
+
+#define __ _masm->
+
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp2 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2);
+}
+
+
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2,
+                                       Register &tmp3) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp2 = extra;
+  } else if (tmp3 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp3 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2, tmp3);
+}
+
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
+
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::receiver_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
+}
+
+//--------------fpu register translations-----------------------
+
+
+address LIR_Assembler::float_constant(float f) {
+  address const_addr = __ float_constant(f);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+
+address LIR_Assembler::double_constant(double d) {
+  address const_addr = __ double_constant(d);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+address LIR_Assembler::int_constant(jlong n) {
+  address const_addr = __ long_constant(n);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
+
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
+
+void LIR_Assembler::fpop() { Unimplemented(); }
+
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
+
+void LIR_Assembler::fld(int i) { Unimplemented(); }
+
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
+
+void LIR_Assembler::breakpoint() { Unimplemented(); }
+
+void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
+
+void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
+
+bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
+//-------------------------------------------
+
+static Register as_reg(LIR_Opr op) {
+  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
+}
+
+static jlong as_long(LIR_Opr data) {
+  jlong result;
+  switch (data->type()) {
+  case T_INT:
+    result = (data->as_jint());
+    break;
+  case T_LONG:
+    result = (data->as_jlong());
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return result;
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr opr = addr->index();
+  if (opr->is_cpu_register()) {
+    Register index;
+    if (opr->is_single_cpu())
+      index = opr->as_register();
+    else
+      index = opr->as_register_lo();
+    assert(addr->disp() == 0, "must be");
+    switch(opr->type()) {
+      case T_INT:
+        return Address(base, index, Address::sxtw(addr->scale()));
+      case T_LONG:
+        return Address(base, index, Address::lsl(addr->scale()));
+      default:
+        ShouldNotReachHere();
+      }
+  } else  {
+    intptr_t addr_offset = intptr_t(addr->disp());
+    if (Address::offset_ok_for_immed(addr_offset, addr->scale()))
+      return Address(base, addr_offset, Address::lsl(addr->scale()));
+    else {
+      __ mov(tmp, addr_offset);
+      return Address(base, tmp, Address::lsl(addr->scale()));
+    }
+  }
+  return Address();
+}
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  ShouldNotReachHere();
+  return Address();
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  return as_Address(addr, rscratch1);
+}
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  return as_Address(addr, rscratch1);  // Ouch
+  // FIXME: This needs to be much more clever.  See x86.
+}
+
+
+void LIR_Assembler::osr_entry() {
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // we jump here if osr happens with the interpreter
+  // state set up to continue at the beginning of the
+  // loop that triggered osr - in particular, we have
+  // the following registers setup:
+  //
+  // r2: osr buffer
+  //
+
+  // build frame
+  ciMethod* m = compilation()->method();
+  __ build_frame(initial_frame_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[0..number_of_locks]
+  //
+  // locals is a direct copy of the interpreter frame so in the osr buffer
+  // so first slot in the local array is the last local from the interpreter
+  // and last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   r2: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_pointer_register();
+  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // verify the interpreter's monitor has a non-null object
+      {
+        Label L;
+        __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+        __ cbnz(rscratch1, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif
+      __ ldr(r19, Address(OSR_buf, slot_offset + 0));
+      __ str(r19, frame_map()->address_for_monitor_lock(i));
+      __ ldr(r19, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+      __ str(r19, frame_map()->address_for_monitor_object(i));
+    }
+  }
+}
+
+
+// inline cache check; done before the frame is built.
+int LIR_Assembler::check_icache() {
+  Register receiver = FrameMap::receiver_opr->as_register();
+  Register ic_klass = IC_Klass;
+  int start_offset = __ offset();
+  __ inline_cache_check(receiver, ic_klass);
+
+  // if icache check fails, then jump to runtime routine
+  // Note: RECEIVER must still contain the receiver!
+  Label dont;
+  __ br(Assembler::EQ, dont);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+  // We align the verified entry point unless the method body
+  // (including its inline cache check) will fit in a single 64-byte
+  // icache line.
+  if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
+    // force alignment after the cache check.
+    __ align(CodeEntryAlignment);
+  }
+
+  __ bind(dont);
+  return start_offset;
+}
+
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ mov(reg, zr);
+  } else {
+    __ movoop(reg, o, /*immediate*/true);
+  }
+}
+
+
+// !!! FIXME AARCH64 -- jdk8 calls patching_id to determine which case
+// it is handling and the only options are load_mirror_id or
+// load_appendix_id neither of which exists for jdk8. the code here
+// assumes that the only case we will see is load_klass_id.
+
+void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
+  address target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+  relocInfo::relocType reloc_type = relocInfo::oop_type;
+
+  __ far_call(RuntimeAddress(target));
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  deoptimize_trap(info);
+}
+
+
+// This specifies the rsp decrement needed to build the frame
+int LIR_Assembler::initial_frame_size_in_bytes() {
+  // if rounding, must let FrameMap know!
+
+  // The frame_map records size in slots (32bit word)
+
+  // subtract two words to account for return address and link
+  return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
+}
+
+
+int LIR_Assembler::emit_exception_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(exception_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  // the exception oop and pc are in r0, and r3
+  // no other registers need to be preserved, so invalidate them
+  __ invalidate_registers(false, true, true, false, true, true);
+
+  // check that there is really an exception
+  __ verify_not_null_oop(r0);
+
+  // search an exception handler (r0: exception oop, r3: throwing pc)
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+  if (CommentedAssembly) {
+    _masm->block_comment("Unwind handler");
+  }
+#endif
+
+  int offset = code_offset();
+
+  // Fetch the exception from TLS and clear out exception related thread state
+  __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(r0);
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mov(r19, r0);  // Preserve the exception
+  }
+
+  // Preform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::r0_opr);
+    stub = new MonitorExitStub(FrameMap::r0_opr, true, 0);
+    __ unlock_object(r5, r4, r0, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    __ call_Unimplemented();
+#if 0
+    __ movptr(Address(rsp, 0), rax);
+    __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding());
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
+#endif
+  }
+
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mov(r0, r19);  // Restore the exception
+  }
+
+  // remove the activation and dispatch to the unwind handler
+  __ block_comment("remove_frame and dispatch to the unwind handler");
+  __ remove_frame(initial_frame_size_in_bytes());
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+
+  // Emit the slow path assembly
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+
+int LIR_Assembler::emit_deopt_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(deopt_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  __ adr(lr, pc());
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// This is the fast version of java.lang.String.compare; it has not
+// OSR-entry and therefore, we generate a slow version for OSR's
+void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info)  {
+  __ mov(r2, (address)__FUNCTION__);
+  __ call_Unimplemented();
+}
+
+
+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
+  int pc_offset = code_offset();
+  flush_debug_info(pc_offset);
+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
+  if (info->exception_handlers() != NULL) {
+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
+  }
+}
+
+// Rather than take a segfault when the polling page is protected,
+// explicitly check for a safepoint in progress and if there is one,
+// fake a call to the handler as if a segfault had been caught.
+void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) {
+  __ mov(rscratch1, SafepointSynchronize::address_of_state());
+  __ ldrb(rscratch1, Address(rscratch1));
+  Label nope, poll;
+  __ cbz(rscratch1, nope);
+  __ block_comment("safepoint");
+  __ enter();
+  __ push(0x3, sp);                // r0 & r1
+  __ push(0x3ffffffc, sp);         // integer registers except lr & sp & r0 & r1
+  __ adr(r0, poll);
+  __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset()));
+  __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ maybe_isb();
+  __ pop(0x3ffffffc, sp);          // integer registers except lr & sp & r0 & r1
+  __ mov(rscratch1, r0);
+  __ pop(0x3, sp);                 // r0 & r1
+  __ leave();
+  __ br(rscratch1);
+  address polling_page(os::get_polling_page());
+  assert(os::is_poll_address(polling_page), "should be");
+  unsigned long off;
+  __ adrp(rscratch1, Address(polling_page, rtype), off);
+  __ bind(poll);
+  if (info)
+    add_debug_info_for_branch(info);  // This isn't just debug info:
+                                      // it's the oop map
+  else
+    __ code_section()->relocate(pc(), rtype);
+  __ ldrw(zr, Address(rscratch1, off));
+  __ bind(nope);
+}
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
+  // Pop the stack before the safepoint code
+  __ remove_frame(initial_frame_size_in_bytes());
+  if (UseCompilerSafepoints) {
+    address polling_page(os::get_polling_page());
+    __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type);
+  } else {
+    poll_for_safepoint(relocInfo::poll_return_type);
+  }
+  __ ret(lr);
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+  address polling_page(os::get_polling_page());
+  if (UseCompilerSafepoints) {
+    guarantee(info != NULL, "Shouldn't be NULL");
+    assert(os::is_poll_address(polling_page), "should be");
+    unsigned long off;
+    __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off);
+    add_debug_info_for_branch(info);  // This isn't just debug info:
+                                      // it's the oop map
+    __ ldrw(zr, Address(rscratch1, off));
+  } else {
+    poll_for_safepoint(relocInfo::poll_type, info);
+  }
+
+  return __ offset();
+}
+
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+  if (from_reg == r31_sp)
+    from_reg = sp;
+  if (to_reg == r31_sp)
+    to_reg = sp;
+  __ mov(to_reg, from_reg);
+}
+
+void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
+
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+
+  switch (c->type()) {
+    case T_INT: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ movw(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_ADDRESS: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mov(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_LONG: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mov(dest->as_register_lo(), (intptr_t)c->as_jlong());
+      break;
+    }
+
+    case T_OBJECT: {
+        if (patch_code == lir_patch_none) {
+          jobject2reg(c->as_jobject(), dest->as_register());
+        } else {
+          jobject2reg_with_patching(dest->as_register(), info);
+        }
+      break;
+    }
+
+    case T_FLOAT: {
+      if (__ operand_valid_for_float_immediate(c->as_jfloat())) {
+        __ fmovs(dest->as_float_reg(), (c->as_jfloat()));
+      } else {
+        __ adr(rscratch1, InternalAddress(float_constant(c->as_jfloat())));
+        __ ldrs(dest->as_float_reg(), Address(rscratch1));
+      }
+      break;
+    }
+
+    case T_DOUBLE: {
+      if (__ operand_valid_for_float_immediate(c->as_jdouble())) {
+        __ fmovd(dest->as_double_reg(), (c->as_jdouble()));
+      } else {
+        __ adr(rscratch1, InternalAddress(double_constant(c->as_jdouble())));
+        __ ldrd(dest->as_double_reg(), Address(rscratch1));
+      }
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  LIR_Const* c = src->as_constant_ptr();
+  switch (c->type()) {
+  case T_OBJECT:
+    {
+      if (! c->as_jobject())
+        __ str(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      else {
+        const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL);
+        reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false);
+      }
+    }
+    break;
+  case T_INT:
+  case T_FLOAT:
+    {
+      Register reg = zr;
+      if (c->as_jint_bits() == 0)
+        __ strw(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      else {
+        __ movw(rscratch1, c->as_jint_bits());
+        __ strw(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix()));
+      }
+    }
+    break;
+  case T_LONG:
+  case T_DOUBLE:
+    {
+      Register reg = zr;
+      if (c->as_jlong_bits() == 0)
+        __ str(zr, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                 lo_word_offset_in_bytes));
+      else {
+        __ mov(rscratch1, (intptr_t)c->as_jlong_bits());
+        __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                        lo_word_offset_in_bytes));
+      }
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  assert(src->is_constant(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* to_addr = dest->as_address_ptr();
+
+  void (Assembler::* insn)(Register Rt, const Address &adr);
+
+  switch (type) {
+  case T_ADDRESS:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::str;
+    break;
+  case T_LONG:
+    assert(c->as_jlong() == 0, "should be");
+    insn = &Assembler::str;
+    break;
+  case T_INT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strw;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    assert(c->as_jobject() == 0, "should be");
+    if (UseCompressedOops && !wide) {
+      insn = &Assembler::strw;
+    } else {
+      insn = &Assembler::str;
+    }
+    break;
+  case T_CHAR:
+  case T_SHORT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strh;
+    break;
+  case T_BOOLEAN:
+  case T_BYTE:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strb;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  if (info) add_debug_info_for_null_check_here(info);
+  (_masm->*insn)(zr, as_Address(to_addr, rscratch1));
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  // move between cpu-registers
+  if (dest->is_single_cpu()) {
+    if (src->type() == T_LONG) {
+      // Can do LONG -> OBJECT
+      move_regs(src->as_register_lo(), dest->as_register());
+      return;
+    }
+    assert(src->is_single_cpu(), "must match");
+    if (src->type() == T_OBJECT) {
+      __ verify_oop(src->as_register());
+    }
+    move_regs(src->as_register(), dest->as_register());
+
+  } else if (dest->is_double_cpu()) {
+    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
+      // Surprising to me but we can see move of a long to t_object
+      __ verify_oop(src->as_register());
+      move_regs(src->as_register(), dest->as_register_lo());
+      return;
+    }
+    assert(src->is_double_cpu(), "must match");
+    Register f_lo = src->as_register_lo();
+    Register f_hi = src->as_register_hi();
+    Register t_lo = dest->as_register_lo();
+    Register t_hi = dest->as_register_hi();
+    assert(f_hi == f_lo, "must be same");
+    assert(t_hi == t_lo, "must be same");
+    move_regs(f_lo, t_lo);
+
+  } else if (dest->is_single_fpu()) {
+    __ fmovs(dest->as_float_reg(), src->as_float_reg());
+
+  } else if (dest->is_double_fpu()) {
+    __ fmovd(dest->as_double_reg(), src->as_double_reg());
+
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  if (src->is_single_cpu()) {
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+      __ verify_oop(src->as_register());
+    } else if (type == T_DOUBLE) {
+      // !!! FIXME AARCH64 -- checck if this case is only left because it was an error in the first place!!!
+      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+    } else {
+      __ strw(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+    }
+
+  } else if (src->is_double_cpu()) {
+    Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
+    __ str(src->as_register_lo(), dest_addr_LO);
+
+  } else if (src->is_single_fpu()) {
+    Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+    __ strs(src->as_float_reg(), dest_addr);
+
+  } else if (src->is_double_fpu()) {
+    Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+    __ strd(src->as_double_reg(), dest_addr);
+
+  } else {
+    ShouldNotReachHere();
+  }
+
+}
+
+
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+  LIR_Address* to_addr = dest->as_address_ptr();
+  PatchingStub* patch = NULL;
+  Register compressed_src = rscratch1;
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (type == T_ARRAY || type == T_OBJECT) {
+    __ verify_oop(src->as_register());
+
+    if (UseCompressedOops && !wide) {
+      __ encode_heap_oop(compressed_src, src->as_register());
+    } else {
+      compressed_src = src->as_register();
+    }
+  }
+
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT: {
+      __ strs(src->as_float_reg(), as_Address(to_addr));
+      break;
+    }
+
+    case T_DOUBLE: {
+      __ strd(src->as_double_reg(), as_Address(to_addr));
+      break;
+    }
+
+    case T_ARRAY:   // fall through
+    case T_OBJECT:  // fall through
+      if (UseCompressedOops && !wide) {
+        __ strw(compressed_src, as_Address(to_addr, rscratch2));
+      } else {
+         __ str(compressed_src, as_Address(to_addr));
+      }
+      break;
+    case T_ADDRESS:
+      __ str(src->as_register(), as_Address(to_addr));
+      break;
+    case T_INT:
+      __ strw(src->as_register(), as_Address(to_addr));
+      break;
+
+    case T_LONG: {
+      __ str(src->as_register_lo(), as_Address_lo(to_addr));
+      break;
+    }
+
+    case T_BYTE:    // fall through
+    case T_BOOLEAN: {
+      __ strb(src->as_register(), as_Address(to_addr));
+      break;
+    }
+
+    case T_CHAR:    // fall through
+    case T_SHORT:
+      __ strh(src->as_register(), as_Address(to_addr));
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+  if (info != NULL) {
+    add_debug_info_for_null_check(null_check_here, info);
+  }
+}
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  assert(src->is_stack(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  if (dest->is_single_cpu()) {
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+      __ verify_oop(dest->as_register());
+      // !!! FIXME AARCH64 -- checck if this case is only left because it was an error in the first place!!!
+    } else if (type == T_DOUBLE) {
+      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    } else {
+      __ ldrw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    }
+
+  } else if (dest->is_double_cpu()) {
+    Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
+    __ ldr(dest->as_register_lo(), src_addr_LO);
+
+  } else if (dest->is_single_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
+    __ ldrs(dest->as_float_reg(), src_addr);
+
+  } else if (dest->is_double_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
+    __ ldrd(dest->as_double_reg(), src_addr);
+
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  LIR_Opr temp;
+  if (type == T_LONG)
+    temp = FrameMap::rscratch1_long_opr;
+  else
+    temp = FrameMap::rscratch1_opr;
+
+  stack2reg(src, temp, src->type());
+  reg2stack(temp, dest, dest->type(), false);
+}
+
+
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
+  LIR_Address* addr = src->as_address_ptr();
+  LIR_Address* from_addr = src->as_address_ptr();
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT: {
+      __ ldrs(dest->as_float_reg(), as_Address(from_addr));
+      break;
+    }
+
+    case T_DOUBLE: {
+      __ ldrd(dest->as_double_reg(), as_Address(from_addr));
+      break;
+    }
+
+    case T_ARRAY:   // fall through
+    case T_OBJECT:  // fall through
+      if (UseCompressedOops && !wide) {
+        __ ldrw(dest->as_register(), as_Address(from_addr));
+      } else {
+         __ ldr(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_ADDRESS:
+      __ ldr(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_INT:
+      __ ldrw(dest->as_register(), as_Address(from_addr));
+      break;
+
+    case T_LONG: {
+      __ ldr(dest->as_register_lo(), as_Address_lo(from_addr));
+      break;
+    }
+
+    case T_BYTE:
+      __ ldrsb(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_BOOLEAN: {
+      __ ldrb(dest->as_register(), as_Address(from_addr));
+      break;
+    }
+
+    case T_CHAR:
+      __ ldrh(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_SHORT:
+      __ ldrsh(dest->as_register(), as_Address(from_addr));
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  if (type == T_ARRAY || type == T_OBJECT) {
+    if (UseCompressedOops && !wide) {
+      __ decode_heap_oop(dest->as_register());
+    }
+    __ verify_oop(dest->as_register());
+  }
+}
+
+
+void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); }
+
+
+void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); }
+
+
+int LIR_Assembler::array_element_size(BasicType type) const {
+  int elem_size = type2aelembytes(type);
+  return exact_log2(elem_size);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  Register Rdividend = op->in_opr1()->as_register();
+  Register Rdivisor  = op->in_opr2()->as_register();
+  Register Rscratch  = op->in_opr3()->as_register();
+  Register Rresult   = op->result_opr()->as_register();
+  int divisor = -1;
+
+  /*
+  TODO: For some reason, using the Rscratch that gets passed in is
+  not possible because the register allocator does not see the tmp reg
+  as used, and assignes it the same register as Rdividend. We use rscratch1
+   instead.
+
+  assert(Rdividend != Rscratch, "");
+  assert(Rdivisor  != Rscratch, "");
+  */
+
+  if (Rdivisor == noreg && is_power_of_2(divisor)) {
+    // convert division by a power of two into some shifts and logical operations
+  }
+
+  if (op->code() == lir_irem) {
+    __ corrected_idivl(Rresult, Rdividend, Rdivisor, true, rscratch1);
+   } else if (op->code() == lir_idiv) {
+    __ corrected_idivl(Rresult, Rdividend, Rdivisor, false, rscratch1);
+  } else
+    ShouldNotReachHere();
+}
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+#endif
+
+  if (op->cond() == lir_cond_always) {
+    if (op->info() != NULL) add_debug_info_for_branch(op->info());
+    __ b(*(op->label()));
+  } else {
+    Assembler::Condition acond;
+    if (op->code() == lir_cond_float_branch) {
+      bool is_unordered = (op->ublock() == op->block());
+      // Assembler::EQ does not permit unordered branches, so we add
+      // another branch here.  Likewise, Assembler::NE does not permit
+      // ordered branches.
+      if (is_unordered && op->cond() == lir_cond_equal
+          || !is_unordered && op->cond() == lir_cond_notEqual)
+        __ br(Assembler::VS, *(op->ublock()->label()));
+      switch(op->cond()) {
+      case lir_cond_equal:        acond = Assembler::EQ; break;
+      case lir_cond_notEqual:     acond = Assembler::NE; break;
+      case lir_cond_less:         acond = (is_unordered ? Assembler::LT : Assembler::LO); break;
+      case lir_cond_lessEqual:    acond = (is_unordered ? Assembler::LE : Assembler::LS); break;
+      case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break;
+      case lir_cond_greater:      acond = (is_unordered ? Assembler::HI : Assembler::GT); break;
+      default:                    ShouldNotReachHere();
+      }
+    } else {
+      switch (op->cond()) {
+        case lir_cond_equal:        acond = Assembler::EQ; break;
+        case lir_cond_notEqual:     acond = Assembler::NE; break;
+        case lir_cond_less:         acond = Assembler::LT; break;
+        case lir_cond_lessEqual:    acond = Assembler::LE; break;
+        case lir_cond_greaterEqual: acond = Assembler::GE; break;
+        case lir_cond_greater:      acond = Assembler::GT; break;
+        case lir_cond_belowEqual:   acond = Assembler::LS; break;
+        case lir_cond_aboveEqual:   acond = Assembler::HS; break;
+        default:                         ShouldNotReachHere();
+      }
+    }
+    __ br(acond,*(op->label()));
+  }
+}
+
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr src  = op->in_opr();
+  LIR_Opr dest = op->result_opr();
+
+  switch (op->bytecode()) {
+    case Bytecodes::_i2f:
+      {
+        __ scvtfws(dest->as_float_reg(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2d:
+      {
+        __ scvtfwd(dest->as_double_reg(), src->as_register());
+        break;
+      }
+    case Bytecodes::_l2d:
+      {
+        __ scvtfd(dest->as_double_reg(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_l2f:
+      {
+        __ scvtfs(dest->as_float_reg(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_f2d:
+      {
+        __ fcvts(dest->as_double_reg(), src->as_float_reg());
+        break;
+      }
+    case Bytecodes::_d2f:
+      {
+        __ fcvtd(dest->as_float_reg(), src->as_double_reg());
+        break;
+      }
+    case Bytecodes::_i2c:
+      {
+        __ ubfx(dest->as_register(), src->as_register(), 0, 16);
+        break;
+      }
+    case Bytecodes::_i2l:
+      {
+        __ sxtw(dest->as_register_lo(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2s:
+      {
+        __ sxth(dest->as_register(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2b:
+      {
+        __ sxtb(dest->as_register(), src->as_register());
+        break;
+      }
+    case Bytecodes::_l2i:
+      {
+        _masm->block_comment("FIXME: This could be a no-op");
+        __ uxtw(dest->as_register(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_d2l:
+      {
+        Register tmp = op->tmp1()->as_register();
+        __ clear_fpsr();
+        __ fcvtzd(dest->as_register_lo(), src->as_double_reg());
+        __ get_fpsr(tmp);
+        __ tst(tmp, 1); // FPSCR.IOC
+        __ br(Assembler::NE, *(op->stub()->entry()));
+        __ bind(*op->stub()->continuation());
+        break;
+      }
+    case Bytecodes::_f2i:
+      {
+        Register tmp = op->tmp1()->as_register();
+        __ clear_fpsr();
+        __ fcvtzsw(dest->as_register(), src->as_float_reg());
+        __ get_fpsr(tmp);
+        __ tst(tmp, 1); // FPSCR.IOC
+        __ br(Assembler::NE, *(op->stub()->entry()));
+        __ bind(*op->stub()->continuation());
+        break;
+      }
+    case Bytecodes::_f2l:
+      {
+        Register tmp = op->tmp1()->as_register();
+        __ clear_fpsr();
+        __ fcvtzs(dest->as_register_lo(), src->as_float_reg());
+        __ get_fpsr(tmp);
+        __ tst(tmp, 1); // FPSCR.IOC
+        __ br(Assembler::NE, *(op->stub()->entry()));
+        __ bind(*op->stub()->continuation());
+        break;
+      }
+    case Bytecodes::_d2i:
+      {
+        Register tmp = op->tmp1()->as_register();
+        __ clear_fpsr();
+        __ fcvtzdw(dest->as_register(), src->as_double_reg());
+        __ get_fpsr(tmp);
+        __ tst(tmp, 1); // FPSCR.IOC
+        __ br(Assembler::NE, *(op->stub()->entry()));
+        __ bind(*op->stub()->continuation());
+        break;
+      }
+    default: ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    __ ldrb(rscratch1, Address(op->klass()->as_register(),
+                               instanceKlass::init_state_offset()));
+    __ cmpw(rscratch1, instanceKlass::fully_initialized);
+    add_debug_info_for_null_check_here(op->stub()->info());
+    __ br(Assembler::NE, *op->stub()->entry());
+  }
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  Register len =  op->len()->as_register();
+  __ uxtw(len, len);
+
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+    __ b(*op->stub()->entry());
+  } else {
+    Register tmp1 = op->tmp1()->as_register();
+    Register tmp2 = op->tmp2()->as_register();
+    Register tmp3 = op->tmp3()->as_register();
+    if (len == tmp1) {
+      tmp1 = tmp3;
+    } else if (len == tmp2) {
+      tmp2 = tmp3;
+    } else if (len == tmp3) {
+      // everything is ok
+    } else {
+      __ mov(tmp3, len);
+    }
+    __ allocate_array(op->obj()->as_register(),
+                      len,
+                      tmp1,
+                      tmp2,
+                      arrayOopDesc::header_size(op->type()),
+                      array_element_size(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo,
+                                        ciMethodData *md, ciProfileData *data,
+                                        Register recv, Label* update_done) {
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    __ ldr(rscratch1, Address(rscratch2));
+    __ cmp(recv, rscratch1);
+    __ br(Assembler::NE, next_test);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+    __ addptr(data_addr, DataLayout::counter_increment);
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    __ lea(rscratch2,
+           Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    Address recv_addr(rscratch2);
+    __ ldr(rscratch1, recv_addr);
+    __ cbnz(rscratch1, next_test);
+    __ str(recv, recv_addr);
+    __ mov(rscratch1, DataLayout::counter_increment);
+    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
+    __ str(rscratch1, Address(rscratch2));
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  // we always need a stub for the failure case.
+  CodeStub* stub = op->stub();
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  Register Rtmp1 = noreg;
+
+  // check if it needs to be profiled
+  ciMethodData* md;
+  ciProfileData* data;
+
+  if (op->should_profile()) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    int bci = op->profiled_bci();
+    md = method->method_data_or_null();
+    assert(md != NULL, "Sanity");
+    data = md->bci_to_data(bci);
+    assert(data != NULL,                "need data for type check");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  }
+  Label profile_cast_success, profile_cast_failure;
+  Label *success_target = op->should_profile() ? &profile_cast_success : success;
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+
+  if (obj == k_RInfo) {
+    k_RInfo = dst;
+  } else if (obj == klass_RInfo) {
+    klass_RInfo = dst;
+  }
+  if (k->is_loaded() && !UseCompressedOops) {
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
+  } else {
+    Rtmp1 = op->tmp3()->as_register();
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
+  }
+
+  assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+    if (op->should_profile()) {
+      Label not_null;
+      __ cbnz(obj, not_null);
+      // Object is null; update MDO and exit
+      Register mdo  = klass_RInfo;
+      __ movoop(mdo, md->constant_encoding());
+      Address data_addr
+        = __ form_address(rscratch2, mdo,
+                          md->byte_offset_of_slot(data, DataLayout::DataLayout::header_offset()),
+                          LogBytesPerWord);
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ ldr(rscratch1, data_addr);
+      __ orr(rscratch1, rscratch1, header_bits);
+      __ str(rscratch1, data_addr);
+      __ b(*obj_is_null);
+      __ bind(not_null);
+    } else {
+      __ cbz(obj, *obj_is_null);
+    }
+
+  if (!k->is_loaded()) {
+    jobject2reg_with_patching(k_RInfo, op->info_for_patch());
+  } else {
+    __ movoop(k_RInfo, k->constant_encoding());
+  }
+  __ verify_oop(obj);
+
+  if (op->fast_check()) {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(rscratch1, obj);
+    __ cmp( rscratch1, k_RInfo);
+
+    __ br(Assembler::NE, *failure_target);
+    // successful cast, fall through to profile or jump
+  } else {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(klass_RInfo, obj);
+    if (k->is_loaded()) {
+      // See if we get an immediate positive hit
+      __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset())));
+      __ cmp(k_RInfo, rscratch1);
+      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
+        __ br(Assembler::NE, *failure_target);
+        // successful cast, fall through to profile or jump
+      } else {
+        // See if we get an immediate positive hit
+        __ br(Assembler::EQ, *success_target);
+        // check for self
+        __ cmp(klass_RInfo, k_RInfo);
+        __ br(Assembler::EQ, *success_target);
+
+        __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+        __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+        __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+        // result is a boolean
+        __ cbzw(klass_RInfo, *failure_target);
+        // successful cast, fall through to profile or jump
+      }
+    } else {
+      // perform the fast part of the checking logic
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+      // result is a boolean
+      __ cbz(k_RInfo, *failure_target);
+      // successful cast, fall through to profile or jump
+    }
+  }
+  if (op->should_profile()) {
+    Register mdo  = klass_RInfo, recv = k_RInfo;
+    __ bind(profile_cast_success);
+    __ movoop(mdo, md->constant_encoding());
+    __ load_klass(recv, obj);
+    Label update_done;
+    type_profile_helper(mdo, md, data, recv, success);
+    __ b(*success);
+
+    __ bind(profile_cast_failure);
+    __ movoop(mdo, md->constant_encoding());
+    Address counter_addr
+      = __ form_address(rscratch2, mdo,
+                        md->byte_offset_of_slot(data, CounterData::count_offset()),
+                        LogBytesPerWord);
+    __ ldr(rscratch1, counter_addr);
+    __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
+    __ str(rscratch1, counter_addr);
+    __ b(*failure);
+  }
+  __ b(*success);
+}
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = op->tmp3()->as_register();
+
+    CodeStub* stub = op->stub();
+
+    // check if it needs to be profiled
+    ciMethodData* md;
+    ciProfileData* data;
+
+    if (op->should_profile()) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      int bci = op->profiled_bci();
+      md = method->method_data_or_null();
+      assert(md != NULL, "Sanity");
+      data = md->bci_to_data(bci);
+      assert(data != NULL,                "need data for type check");
+      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+    }
+    Label profile_cast_success, profile_cast_failure, done;
+    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+    if (op->should_profile()) {
+      Label not_null;
+      __ cbnz(value, not_null);
+      // Object is null; update MDO and exit
+      Register mdo  = klass_RInfo;
+      __ movoop(mdo, md->constant_encoding());
+      Address data_addr
+        = __ form_address(rscratch2, mdo,
+                          md->byte_offset_of_slot(data, DataLayout::header_offset()),
+                          LogBytesPerInt);
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ ldrw(rscratch1, data_addr);
+      __ orrw(rscratch1, rscratch1, header_bits);
+      __ strw(rscratch1, data_addr);
+      __ b(done);
+      __ bind(not_null);
+    } else {
+      __ cbz(value, done);
+    }
+
+    add_debug_info_for_null_check_here(op->info_for_exception());
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // get instance klass (it's already uncompressed)
+    __ ldr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset()));
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+    __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+    // result is a boolean
+    __ cbzw(k_RInfo, *failure_target);
+    // fall through to the success case
+
+    if (op->should_profile()) {
+      Register mdo  = klass_RInfo, recv = k_RInfo;
+      __ bind(profile_cast_success);
+      __ movoop(mdo, md->constant_encoding());
+      __ load_klass(recv, value);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &done);
+      __ b(done);
+
+      __ bind(profile_cast_failure);
+      __ movoop(mdo, md->constant_encoding());
+      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+      __ lea(rscratch2, counter_addr);
+      __ ldr(rscratch1, Address(rscratch2));
+      __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
+      __ str(rscratch1, Address(rscratch2));
+      __ b(*stub->entry());
+    }
+
+    __ bind(done);
+  } else if (code == lir_checkcast) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success;
+    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+    __ bind(success);
+    if (dst != obj) {
+      __ mov(dst, obj);
+    }
+  } else if (code == lir_instanceof) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, &failure, &failure);
+    __ bind(failure);
+    __ mov(dst, zr);
+    __ b(done);
+    __ bind(success);
+    __ mov(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
+  Label retry_load, nope;
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+    __ prfm(Address(addr), PSTL1STRM);
+  __ bind(retry_load);
+  __ ldaxrw(rscratch1, addr);
+  __ cmpw(rscratch1, cmpval);
+  __ cset(rscratch1, Assembler::NE);
+  __ br(Assembler::NE, nope);
+  // if we store+flush with no intervening write rscratch1 wil be zero
+  __ stlxrw(rscratch1, newval, addr);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  __ cbnzw(rscratch1, retry_load);
+  __ bind(nope);
+  __ membar(__ AnyAny);
+}
+
+void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
+  Label retry_load, nope;
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
+    __ prfm(Address(addr), PSTL1STRM);
+  __ bind(retry_load);
+  __ ldaxr(rscratch1, addr);
+  __ cmp(rscratch1, cmpval);
+  __ cset(rscratch1, Assembler::NE);
+  __ br(Assembler::NE, nope);
+  // if we store+flush with no intervening write rscratch1 wil be zero
+  __ stlxr(rscratch1, newval, addr);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  __ cbnz(rscratch1, retry_load);
+  __ bind(nope);
+  __ membar(__ AnyAny);
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  assert(VM_Version::supports_cx8(), "wrong machine");
+  Register addr = as_reg(op->addr());
+  Register newval = as_reg(op->new_value());
+  Register cmpval = as_reg(op->cmp_value());
+  Label succeed, fail, around;
+
+  if (op->code() == lir_cas_obj) {
+    if (UseCompressedOops) {
+      Register t1 = op->tmp1()->as_register();
+      assert(op->tmp1()->is_valid(), "must be");
+      __ encode_heap_oop(t1, cmpval);
+      cmpval = t1;
+      __ encode_heap_oop(rscratch2, newval);
+      newval = rscratch2;
+      casw(addr, newval, cmpval);
+    } else {
+      casl(addr, newval, cmpval);
+    }
+  } else if (op->code() == lir_cas_int) {
+    casw(addr, newval, cmpval);
+  } else {
+    casl(addr, newval, cmpval);
+  }
+}
+
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+
+  Assembler::Condition acond, ncond;
+  switch (condition) {
+  case lir_cond_equal:        acond = Assembler::EQ; ncond = Assembler::NE; break;
+  case lir_cond_notEqual:     acond = Assembler::NE; ncond = Assembler::EQ; break;
+  case lir_cond_less:         acond = Assembler::LT; ncond = Assembler::GE; break;
+  case lir_cond_lessEqual:    acond = Assembler::LE; ncond = Assembler::GT; break;
+  case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break;
+  case lir_cond_greater:      acond = Assembler::GT; ncond = Assembler::LE; break;
+  case lir_cond_belowEqual:   Unimplemented(); break;
+  case lir_cond_aboveEqual:   Unimplemented(); break;
+  default:                    ShouldNotReachHere();
+  }
+
+  assert(result->is_single_cpu() || result->is_double_cpu(),
+         "expect single register for result");
+  if (opr1->is_constant() && opr2->is_constant()
+      && opr1->type() == T_INT && opr2->type() == T_INT) {
+    jint val1 = opr1->as_jint();
+    jint val2 = opr2->as_jint();
+    if (val1 == 0 && val2 == 1) {
+      __ cset(result->as_register(), ncond);
+      return;
+    } else if (val1 == 1 && val2 == 0) {
+      __ cset(result->as_register(), acond);
+      return;
+    }
+  }
+
+  if (opr1->is_constant() && opr2->is_constant()
+      && opr1->type() == T_LONG && opr2->type() == T_LONG) {
+    jlong val1 = opr1->as_jlong();
+    jlong val2 = opr2->as_jlong();
+    if (val1 == 0 && val2 == 1) {
+      __ cset(result->as_register_lo(), ncond);
+      return;
+    } else if (val1 == 1 && val2 == 0) {
+      __ cset(result->as_register_lo(), acond);
+      return;
+    }
+  }
+
+  if (opr1->is_stack()) {
+    stack2reg(opr1, FrameMap::rscratch1_opr, result->type());
+    opr1 = FrameMap::rscratch1_opr;
+  } else if (opr1->is_constant()) {
+    LIR_Opr tmp
+      = opr1->type() == T_LONG ? FrameMap::rscratch1_long_opr : FrameMap::rscratch1_opr;
+    const2reg(opr1, tmp, lir_patch_none, NULL);
+    opr1 = tmp;
+  }
+
+  if (opr2->is_stack()) {
+    stack2reg(opr2, FrameMap::rscratch2_opr, result->type());
+    opr2 = FrameMap::rscratch2_opr;
+  } else if (opr2->is_constant()) {
+    LIR_Opr tmp
+      = opr2->type() == T_LONG ? FrameMap::rscratch2_long_opr : FrameMap::rscratch2_opr;
+    const2reg(opr2, tmp, lir_patch_none, NULL);
+    opr2 = tmp;
+  }
+
+  if (result->type() == T_LONG)
+    __ csel(result->as_register_lo(), opr1->as_register_lo(), opr2->as_register_lo(), acond);
+  else
+    __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+  if (left->is_single_cpu()) {
+    Register lreg = left->as_register();
+    Register dreg = as_reg(dest);
+
+    if (right->is_single_cpu()) {
+      // cpu register - cpu register
+
+      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT,
+             "should be");
+      Register rreg = right->as_register();
+      switch (code) {
+      case lir_add: __ addw (dest->as_register(), lreg, rreg); break;
+      case lir_sub: __ subw (dest->as_register(), lreg, rreg); break;
+      case lir_mul: __ mulw (dest->as_register(), lreg, rreg); break;
+      default:      ShouldNotReachHere();
+      }
+
+    } else if (right->is_double_cpu()) {
+      Register rreg = right->as_register_lo();
+      // single_cpu + double_cpu: can happen with obj+long
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      switch (code) {
+      case lir_add: __ add(dreg, lreg, rreg); break;
+      case lir_sub: __ sub(dreg, lreg, rreg); break;
+      default: ShouldNotReachHere();
+      }
+    } else if (right->is_constant()) {
+      // cpu register - constant
+      jlong c;
+
+      // FIXME.  This is fugly: we really need to factor all this logic.
+      switch(right->type()) {
+      case T_LONG:
+        c = right->as_constant_ptr()->as_jlong();
+        break;
+      case T_INT:
+      case T_ADDRESS:
+        c = right->as_constant_ptr()->as_jint();
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      if (c == 0 && dreg == lreg) {
+        COMMENT("effective nop elided");
+        return;
+      }
+      switch(left->type()) {
+      case T_INT:
+        switch (code) {
+        case lir_add: __ addw(dreg, lreg, c); break;
+        case lir_sub: __ subw(dreg, lreg, c); break;
+        default: ShouldNotReachHere();
+        }
+        break;
+      case T_OBJECT:
+      case T_ADDRESS:
+        switch (code) {
+        case lir_add: __ add(dreg, lreg, c); break;
+        case lir_sub: __ sub(dreg, lreg, c); break;
+        default: ShouldNotReachHere();
+        }
+        break;
+        ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (left->is_double_cpu()) {
+    Register lreg_lo = left->as_register_lo();
+
+    if (right->is_double_cpu()) {
+      // cpu register - cpu register
+      Register rreg_lo = right->as_register_lo();
+      switch (code) {
+      case lir_add: __ add (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_sub: __ sub (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_mul: __ mul (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false, rscratch1); break;
+      case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true, rscratch1); break;
+      default:
+        ShouldNotReachHere();
+      }
+
+    } else if (right->is_constant()) {
+      jlong c = right->as_constant_ptr()->as_jlong_bits();
+      Register dreg = as_reg(dest);
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      if (c == 0 && dreg == lreg_lo) {
+        COMMENT("effective nop elided");
+        return;
+      }
+      switch (code) {
+        case lir_add: __ add(dreg, lreg_lo, c); break;
+        case lir_sub: __ sub(dreg, lreg_lo, c); break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_fpu()) {
+    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
+    switch (code) {
+    case lir_add: __ fadds (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_sub: __ fsubs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_mul: __ fmuls (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_div: __ fdivs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    default:
+      ShouldNotReachHere();
+    }
+  } else if (left->is_double_fpu()) {
+    if (right->is_double_fpu()) {
+      // cpu register - cpu register
+      switch (code) {
+      case lir_add: __ faddd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_sub: __ fsubd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_mul: __ fmuld (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_div: __ fdivd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else {
+      if (right->is_constant()) {
+        ShouldNotReachHere();
+      }
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_stack() || left->is_address()) {
+    assert(left == dest, "left and dest must be equal");
+    ShouldNotReachHere();
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }
+
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+  switch(code) {
+  case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
+  case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
+  default      : ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+
+  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
+  Register Rleft = left->is_single_cpu() ? left->as_register() :
+                                           left->as_register_lo();
+   if (dst->is_single_cpu()) {
+     Register Rdst = dst->as_register();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, right->as_jint()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   } else {
+     Register Rdst = dst->as_register_lo();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jlong()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   }
+}
+
+
+
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); }
+
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  if (opr1->is_constant() && opr2->is_single_cpu()) {
+    // tableswitch
+    Register reg = as_reg(opr2);
+    struct tableswitch &table = switches[opr1->as_constant_ptr()->as_jint()];
+    __ tableswitch(reg, table._first_key, table._last_key, table._branches, table._after);
+  } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
+    Register reg1 = as_reg(opr1);
+    if (opr2->is_single_cpu()) {
+      // cpu register - cpu register
+      Register reg2 = opr2->as_register();
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+        __ cmp(reg1, reg2);
+      } else {
+        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+        __ cmpw(reg1, reg2);
+      }
+      return;
+    }
+    if (opr2->is_double_cpu()) {
+      // cpu register - cpu register
+      Register reg2 = opr2->as_register_lo();
+      __ cmp(reg1, reg2);
+      return;
+    }
+
+    if (opr2->is_constant()) {
+      jlong imm;
+      switch(opr2->type()) {
+      case T_LONG:
+        imm = opr2->as_constant_ptr()->as_jlong();
+        break;
+      case T_INT:
+      case T_ADDRESS:
+        imm = opr2->as_constant_ptr()->as_jint();
+        break;
+      case T_OBJECT:
+      case T_ARRAY:
+        imm = jlong(opr2->as_constant_ptr()->as_jobject());
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+
+      if (Assembler::operand_valid_for_add_sub_immediate(imm)) {
+        if (type2aelembytes(opr1->type()) <= 4)
+          __ cmpw(reg1, imm);
+        else
+          __ cmp(reg1, imm);
+        return;
+      } else {
+        __ mov(rscratch1, imm);
+        if (type2aelembytes(opr1->type()) <= 4)
+          __ cmpw(reg1, rscratch1);
+        else
+          __ cmp(reg1, rscratch1);
+        return;
+      }
+    } else
+      ShouldNotReachHere();
+  } else if (opr1->is_single_fpu()) {
+    FloatRegister reg1 = opr1->as_float_reg();
+    assert(opr2->is_single_fpu(), "expect single float register");
+    FloatRegister reg2 = opr2->as_float_reg();
+    __ fcmps(reg1, reg2);
+  } else if (opr1->is_double_fpu()) {
+    FloatRegister reg1 = opr1->as_double_reg();
+    assert(opr2->is_double_fpu(), "expect double float register");
+    FloatRegister reg2 = opr2->as_double_reg();
+    __ fcmpd(reg1, reg2);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register());
+    } else if (left->is_double_fpu()) {
+      __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register());
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (code == lir_cmp_l2i) {
+    Label done;
+    __ cmp(left->as_register_lo(), right->as_register_lo());
+    __ mov(dst->as_register(), (u_int64_t)-1L);
+    __ br(Assembler::LT, done);
+    __ csinc(dst->as_register(), zr, zr, Assembler::EQ);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::align_call(LIR_Code code) {  }
+
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  __ trampoline_call(Address(op->addr(), rtype));
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  __ ic_call(op->addr());
+  add_call_info(code_offset(), op->info());
+}
+
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
+
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  int start = __ offset();
+
+  __ relocate(static_stub_Relocation::spec(call_pc));
+  // !!! FIXME AARCH64
+  // static stub relocation also tags the methodOop in the code-stream.
+  // for jdk7 we have to use movoop and locate the oop in the cpool
+  // if we use an immediate then patching fails to update the pool
+  // oop and GC overwrites the patch with movk/z 0x0000 again
+  __ movoop(rmethod, (jobject)NULL);
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
+
+  assert(__ offset() - start <= call_stub_size, "stub too big");
+  __ end_a_stub();
+}
+
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  assert(exceptionOop->as_register() == r0, "must match");
+  assert(exceptionPC->as_register() == r3, "must match");
+
+  // exception object is not added to oop map by LinearScan
+  // (LinearScan assumes that no oops are in fixed registers)
+  info->add_register_oop(exceptionOop);
+  Runtime1::StubID unwind_id;
+
+  // get current pc information
+  // pc is only needed if the method has an exception handler, the unwind code does not need it.
+  int pc_for_athrow_offset = __ offset();
+  InternalAddress pc_for_athrow(__ pc());
+  __ adr(exceptionPC->as_register(), pc_for_athrow);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  __ verify_not_null_oop(r0);
+  // search an exception handler (r0: exception oop, r3: throwing pc)
+  if (compilation()->has_fpu_code()) {
+    unwind_id = Runtime1::handle_exception_id;
+  } else {
+    unwind_id = Runtime1::handle_exception_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
+
+  // FIXME: enough room for two byte trap   ????
+  __ nop();
+}
+
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  assert(exceptionOop->as_register() == r0, "must match");
+
+  __ b(_unwind_handler_entry);
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+      case lir_shl:  __ lslvw (dreg, lreg, count->as_register()); break;
+      case lir_shr:  __ asrvw (dreg, lreg, count->as_register()); break;
+      case lir_ushr: __ lsrvw (dreg, lreg, count->as_register()); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+      case lir_shl:  __ lslv (dreg, lreg, count->as_register()); break;
+      case lir_shr:  __ asrv (dreg, lreg, count->as_register()); break;
+      case lir_ushr: __ lsrv (dreg, lreg, count->as_register()); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+      case lir_shl:  __ lslw (dreg, lreg, count); break;
+      case lir_shr:  __ asrw (dreg, lreg, count); break;
+      case lir_ushr: __ lsrw (dreg, lreg, count); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+      case lir_shl:  __ lsl (dreg, lreg, count); break;
+      case lir_shr:  __ asr (dreg, lreg, count); break;
+      case lir_ushr: __ lsr (dreg, lreg, count); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ str (r, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ mov (rscratch1, c);
+  __ str (rscratch1, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
+  ShouldNotReachHere();
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ lea(rscratch1, __ constant_oop_address(o));
+  __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+// This code replaces a call to arraycopy; no exception may
+// be thrown in this code, they must be thrown in the System.arraycopy
+// activation frame; we could save some checks if this would not be the case
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  ciArrayKlass* default_type = op->expected_type();
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+
+  CodeStub* stub = op->stub();
+  int flags = op->flags();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+  // if we don't know anything, just go through the generic arraycopy
+  if (default_type == NULL // || basic_type == T_OBJECT
+      ) {
+    Label done;
+    assert(src == r1 && src_pos == r2, "mismatch in calling convention");
+
+    // Save the arguments in case the generic arraycopy fails and we
+    // have to fall back to the JNI stub
+    __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+    __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
+    __ str(src,              Address(sp, 4*BytesPerWord));
+
+    address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    // The arguments are in java calling convention so we shift them
+    // to C convention
+    assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg0, j_rarg0);
+    assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg1, j_rarg1);
+    assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg2, j_rarg2);
+    assert_different_registers(c_rarg3, j_rarg4);
+    __ mov(c_rarg3, j_rarg3);
+    __ mov(c_rarg4, j_rarg4);
+    if (copyfunc_addr == NULL) { // Use C version if stub was not generated
+      __ mov(rscratch1, RuntimeAddress(C_entry));
+      __ blrt(rscratch1, 5, 0, 1);
+    } else {
+#ifndef PRODUCT
+      if (PrintC1Statistics) {
+        __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
+      }
+#endif
+      __ far_call(RuntimeAddress(copyfunc_addr));
+    }
+
+    __ cbz(r0, *stub->continuation());
+
+    // Reload values from the stack so they are where the stub
+    // expects them.
+    __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+    __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
+    __ ldr(src,              Address(sp, 4*BytesPerWord));
+
+    if (copyfunc_addr != NULL) {
+      // r0 is -1^K where K == partial copied count
+      __ eonw(rscratch1, r0, zr);
+      // adjust length down and src/end pos up by partial copied count
+      __ subw(length, length, rscratch1);
+      __ addw(src_pos, src_pos, rscratch1);
+      __ addw(dst_pos, dst_pos, rscratch1);
+    }
+    __ b(*stub->entry());
+
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
+
+  int elem_size = type2aelembytes(basic_type);
+  int shift_amount;
+  int scale = exact_log2(elem_size);
+
+  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+  // test for NULL
+  if (flags & LIR_OpArrayCopy::src_null_check) {
+    __ cbz(src, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_null_check) {
+    __ cbz(dst, *stub->entry());
+  }
+
+  // If the compiler was not able to prove that exact type of the source or the destination
+  // of the arraycopy is an array type, check at runtime if the source or the destination is
+  // an instance type.
+  if (flags & LIR_OpArrayCopy::type_check) {
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ cmpw(rscratch1, Klass::_lh_neutral_value);
+      __ br(Assembler::GE, *stub->entry());
+    }
+
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ cmpw(rscratch1, Klass::_lh_neutral_value);
+      __ br(Assembler::GE, *stub->entry());
+    }
+  }
+
+  // check if negative
+  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+    __ cmpw(src_pos, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+    __ cmpw(dst_pos, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+
+  if (flags & LIR_OpArrayCopy::length_positive_check) {
+    __ cmpw(length, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ addw(tmp, src_pos, length);
+    __ ldrw(rscratch1, src_length_addr);
+    __ cmpw(tmp, rscratch1);
+    __ br(Assembler::HI, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ addw(tmp, dst_pos, length);
+    __ ldrw(rscratch1, dst_length_addr);
+    __ cmpw(tmp, rscratch1);
+    __ br(Assembler::HI, *stub->entry());
+  }
+
+  // FIXME: The logic in LIRGenerator::arraycopy_helper clears
+  // length_positive_check if the source of our length operand is an
+  // arraylength.  However, that arraylength might be zero, and the
+  // stub that we're about to call contains an assertion that count !=
+  // 0 .  So we make this check purely in order not to trigger an
+  // assertion failure.
+  __ cbzw(length, *stub->continuation());
+
+  if (flags & LIR_OpArrayCopy::type_check) {
+    // We don't know the array types are compatible
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays
+      if (UseCompressedOops) {
+        __ ldrw(tmp, src_klass_addr);
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(tmp, src_klass_addr);
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::NE, *stub->entry());
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      Label cont, slow;
+
+#define PUSH(r1, r2)                                    \
+      stp(r1, r2, __ pre(sp, -2 * wordSize));
+
+#define POP(r1, r2)                                     \
+      ldp(r1, r2, __ post(sp, 2 * wordSize));
+
+      __ PUSH(src, dst);
+
+      __ load_klass(src, src);
+      __ load_klass(dst, dst);
+
+      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
+
+      __ PUSH(src, dst);
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ POP(src, dst);
+
+      __ cbnz(src, cont);
+
+      __ bind(slow);
+      __ POP(src, dst);
+
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+      if (copyfunc_addr != NULL) { // use stub if available
+        // src is not a sub class of dst so we have to do a
+        // per-element check.
+
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          // Check that at least both of them object arrays.
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
+          Address klass_lh_addr(tmp, lh_offset);
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ ldrw(rscratch1, klass_lh_addr);
+          __ mov(rscratch2, objArray_lh);
+          __ eorw(rscratch1, rscratch1, rscratch2);
+          __ cbnzw(rscratch1, *stub->entry());
+        }
+
+       // Spill because stubs can use any register they like and it's
+       // easier to restore just those that we care about.
+        __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+        __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
+        __ str(src,              Address(sp, 4*BytesPerWord));
+
+        __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
+        __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(c_rarg0, dst, dst_pos, length);
+        __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
+        __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(c_rarg1, dst, length);
+        __ uxtw(c_rarg2, length);
+        assert_different_registers(c_rarg2, dst);
+
+        __ load_klass(c_rarg4, dst);
+        __ ldr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset()));
+        __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
+        __ far_call(RuntimeAddress(copyfunc_addr));
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          Label failed;
+          __ cbnz(r0, failed);
+          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
+          __ bind(failed);
+        }
+#endif
+
+        __ cbz(r0, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
+        }
+#endif
+        assert_different_registers(dst, dst_pos, length, src_pos, src, r0, rscratch1);
+
+        // Restore previously spilled arguments
+        __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+        __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
+        __ ldr(src,              Address(sp, 4*BytesPerWord));
+
+        // return value is -1^K where K is partial copied count
+        __ eonw(rscratch1, r0, zr);
+        // adjust length down and src/end pos up by partial copied count
+        __ subw(length, length, rscratch1);
+        __ addw(src_pos, src_pos, rscratch1);
+        __ addw(dst_pos, dst_pos, rscratch1);
+      }
+
+      __ b(*stub->entry());
+
+      __ bind(cont);
+      __ POP(src, dst);
+    }
+  }
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class.  For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type.  For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    __ movoop(tmp, default_type->constant_encoding());
+#ifdef _LP64
+    if (UseCompressedOops) {
+      __ encode_heap_oop(tmp);
+    }
+#endif
+
+    if (basic_type != T_OBJECT) {
+
+      if (UseCompressedOops) {
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::NE, halt);
+      if (UseCompressedOops) {
+        __ ldrw(rscratch1, src_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, src_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::EQ, known_ok);
+    } else {
+      if (UseCompressedOops) {
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::EQ, known_ok);
+      __ cmp(src, dst);
+      __ br(Assembler::EQ, known_ok);
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
+  }
+#endif
+
+  __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
+  __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg0, dst, dst_pos, length);
+  __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
+  __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg1, dst, length);
+  __ uxtw(c_rarg2, length);
+  assert_different_registers(c_rarg2, dst);
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+ CodeBlob *cb = CodeCache::find_blob(entry);
+ if (cb) {
+   __ far_call(RuntimeAddress(entry));
+ } else {
+   __ call_VM_leaf(entry, 3);
+ }
+
+  __ bind(*stub->continuation());
+}
+
+
+
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();  // may not be an oop
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+  if (!UseFastLocking) {
+    __ b(*op->stub()->entry());
+  } else if (op->code() == lir_lock) {
+    Register scratch = noreg;
+    if (UseBiasedLocking) {
+      scratch = op->scratch_opr()->as_register();
+    }
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    // add debug info for NullPointerException only if one is possible
+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check(null_check_offset, op->info());
+    }
+    // done
+  } else if (op->code() == lir_unlock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+  } else {
+    Unimplemented();
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
+
+  // Update counter for all call types
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+  __ movoop(mdo, md->constant_encoding());
+  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the methodDataOop rather than needing to do
+      // dynamic tests on the receiver type
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ addptr(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+          __ movoop(rscratch1, known_klass->constant_encoding());
+          __ lea(rscratch2, recv_addr);
+          __ str(rscratch1, Address(rscratch2));
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ addptr(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ addptr(counter_addr, DataLayout::counter_increment);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ addptr(counter_addr, DataLayout::counter_increment);
+  }
+}
+
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(),  "crc must be register");
+  assert(op->val()->is_single_cpu(),  "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+  unsigned long offset;
+  __ adrp(res, ExternalAddress(StubRoutines::crc_table_addr()), offset);
+  if (offset) __ add(res, res, offset);
+
+  __ ornw(crc, zr, crc); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ ornw(res, zr, crc); // ~crc
+}
+
+void LIR_Assembler::align_backward_branch_target() {
+}
+
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  if (left->is_single_cpu()) {
+    assert(dest->is_single_cpu(), "expect single result reg");
+    __ negw(dest->as_register(), left->as_register());
+  } else if (left->is_double_cpu()) {
+    assert(dest->is_double_cpu(), "expect double result reg");
+    __ neg(dest->as_register_lo(), left->as_register_lo());
+  } else if (left->is_single_fpu()) {
+    assert(dest->is_single_fpu(), "expect single float result reg");
+    __ fnegs(dest->as_float_reg(), left->as_float_reg());
+  } else {
+    assert(left->is_double_fpu(), "expect double float operand reg");
+    assert(dest->is_double_fpu(), "expect double float result reg");
+    __ fnegd(dest->as_double_reg(), left->as_double_reg());
+  }
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
+  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  assert(!tmp->is_valid(), "don't need temporary");
+
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  if (cb) {
+    __ far_call(RuntimeAddress(dest));
+  } else {
+    __ mov(rscratch1, RuntimeAddress(dest));
+    int len = args->length();
+    int type = 0;
+    if (! result->is_illegal()) {
+      switch (result->type()) {
+      case T_VOID:
+        type = 0;
+        break;
+      case T_INT:
+      case T_LONG:
+      case T_OBJECT:
+        type = 1;
+        break;
+      case T_FLOAT:
+        type = 2;
+        break;
+      case T_DOUBLE:
+        type = 3;
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    int num_gpargs = 0;
+    int num_fpargs = 0;
+    for (int i = 0; i < args->length(); i++) {
+      LIR_Opr arg = args->at(i);
+      if (arg->type() == T_FLOAT || arg->type() == T_DOUBLE) {
+        num_fpargs++;
+      } else {
+        num_gpargs++;
+      }
+    }
+    __ blrt(rscratch1, num_gpargs, num_fpargs, type);
+  }
+
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+  __ maybe_isb();
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  if (dest->is_address() || src->is_address()) {
+    move_op(src, dest, type, lir_patch_none, info,
+            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+void LIR_Assembler::membar() {
+  COMMENT("membar");
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+}
+
+void LIR_Assembler::membar_release() {
+  __ membar(Assembler::LoadStore|Assembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(Assembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
+
+void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  __ mov(result_reg->as_register(), rthread);
+}
+
+
+void LIR_Assembler::peephole(LIR_List *lir) {
+  if (tableswitch_count >= max_tableswitches)
+    return;
+
+  /*
+    This finite-state automaton recognizes sequences of compare-and-
+    branch instructions.  We will turn them into a tableswitch.  You
+    could argue that C1 really shouldn't be doing this sort of
+    optimization, but without it the code is really horrible.
+  */
+
+  enum { start_s, cmp1_s, beq_s, cmp_s } state;
+  int first_key, last_key = -2147483648;
+  int next_key = 0;
+  int start_insn = -1;
+  int last_insn = -1;
+  Register reg = noreg;
+  LIR_Opr reg_opr;
+  state = start_s;
+
+  LIR_OpList* inst = lir->instructions_list();
+  for (int i = 0; i < inst->length(); i++) {
+    LIR_Op* op = inst->at(i);
+    switch (state) {
+    case start_s:
+      first_key = -1;
+      start_insn = i;
+      switch (op->code()) {
+      case lir_cmp:
+        LIR_Opr opr1 = op->as_Op2()->in_opr1();
+        LIR_Opr opr2 = op->as_Op2()->in_opr2();
+        if (opr1->is_cpu_register() && opr1->is_single_cpu()
+            && opr2->is_constant()
+            && opr2->type() == T_INT) {
+          reg_opr = opr1;
+          reg = opr1->as_register();
+          first_key = opr2->as_constant_ptr()->as_jint();
+          next_key = first_key + 1;
+          state = cmp_s;
+          goto next_state;
+        }
+        break;
+      }
+      break;
+    case cmp_s:
+      switch (op->code()) {
+      case lir_branch:
+        if (op->as_OpBranch()->cond() == lir_cond_equal) {
+          state = beq_s;
+          last_insn = i;
+          goto next_state;
+        }
+      }
+      state = start_s;
+      break;
+    case beq_s:
+      switch (op->code()) {
+      case lir_cmp: {
+        LIR_Opr opr1 = op->as_Op2()->in_opr1();
+        LIR_Opr opr2 = op->as_Op2()->in_opr2();
+        if (opr1->is_cpu_register() && opr1->is_single_cpu()
+            && opr1->as_register() == reg
+            && opr2->is_constant()
+            && opr2->type() == T_INT
+            && opr2->as_constant_ptr()->as_jint() == next_key) {
+          last_key = next_key;
+          next_key++;
+          state = cmp_s;
+          goto next_state;
+        }
+      }
+      }
+      last_key = next_key;
+      state = start_s;
+      break;
+    default:
+      assert(false, "impossible state");
+    }
+    if (state == start_s) {
+      if (first_key < last_key - 5L && reg != noreg) {
+        {
+          // printf("found run register %d starting at insn %d low value %d high value %d\n",
+          //        reg->encoding(),
+          //        start_insn, first_key, last_key);
+          //   for (int i = 0; i < inst->length(); i++) {
+          //     inst->at(i)->print();
+          //     tty->print("\n");
+          //   }
+          //   tty->print("\n");
+        }
+
+        struct tableswitch *sw = &switches[tableswitch_count];
+        sw->_insn_index = start_insn, sw->_first_key = first_key,
+          sw->_last_key = last_key, sw->_reg = reg;
+        inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after));
+        {
+          // Insert the new table of branches
+          int offset = last_insn;
+          for (int n = first_key; n < last_key; n++) {
+            inst->insert_before
+              (last_insn + 1,
+               new LIR_OpBranch(lir_cond_always, T_ILLEGAL,
+                                inst->at(offset)->as_OpBranch()->label()));
+            offset -= 2, i++;
+          }
+        }
+        // Delete all the old compare-and-branch instructions
+        for (int n = first_key; n < last_key; n++) {
+          inst->remove_at(start_insn);
+          inst->remove_at(start_insn);
+        }
+        // Insert the tableswitch instruction
+        inst->insert_before(start_insn,
+                            new LIR_Op2(lir_cmp, lir_cond_always,
+                                        LIR_OprFact::intConst(tableswitch_count),
+                                        reg_opr));
+        inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches));
+        tableswitch_count++;
+      }
+      reg = noreg;
+      last_key = -2147483648;
+    }
+  next_state:
+    ;
+  }
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
+  Address addr = as_Address(src->as_address_ptr(), noreg);
+  BasicType type = src->type();
+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
+
+  void (MacroAssembler::* lda)(Register Rd, Register Ra);
+  void (MacroAssembler::* add)(Register Rd, Register Rn, RegisterOrConstant increment);
+  void (MacroAssembler::* stl)(Register Rs, Register Rt, Register Rn);
+
+  switch(type) {
+  case T_INT:
+    lda = &MacroAssembler::ldaxrw;
+    add = &MacroAssembler::addw;
+    stl = &MacroAssembler::stlxrw;
+    break;
+  case T_LONG:
+    lda = &MacroAssembler::ldaxr;
+    add = &MacroAssembler::add;
+    stl = &MacroAssembler::stlxr;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    if (UseCompressedOops) {
+      lda = &MacroAssembler::ldaxrw;
+      add = &MacroAssembler::addw;
+      stl = &MacroAssembler::stlxrw;
+    } else {
+      lda = &MacroAssembler::ldaxr;
+      add = &MacroAssembler::add;
+      stl = &MacroAssembler::stlxr;
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  switch (code) {
+  case lir_xadd:
+    {
+      RegisterOrConstant inc;
+      Register tmp = as_reg(tmp_op);
+      Register dst = as_reg(dest);
+      if (data->is_constant()) {
+        inc = RegisterOrConstant(as_long(data));
+        assert_different_registers(dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
+      } else {
+        inc = RegisterOrConstant(as_reg(data));
+        assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
+      }
+      Label again;
+      __ lea(tmp, addr);
+      __ bind(again);
+      (_masm->*lda)(dst, tmp);
+      (_masm->*add)(rscratch1, dst, inc);
+      (_masm->*stl)(rscratch2, rscratch1, tmp);
+      __ cbnzw(rscratch2, again);
+      break;
+    }
+  case lir_xchg:
+    {
+      Register tmp = tmp_op->as_register();
+      Register obj = as_reg(data);
+      Register dst = as_reg(dest);
+      if (is_oop && UseCompressedOops) {
+        __ encode_heap_oop(rscratch1, obj);
+        obj = rscratch1;
+      }
+      assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
+      Label again;
+      __ lea(tmp, addr);
+      __ bind(again);
+      (_masm->*lda)(dst, tmp);
+      (_masm->*stl)(rscratch2, obj, tmp);
+      __ cbnzw(rscratch2, again);
+      if (is_oop && UseCompressedOops) {
+        __ decode_heap_oop(dst);
+      }
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  __ membar(__ AnyAny);
+}
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+#define CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+
+ private:
+
+  int array_element_size(BasicType type) const;
+
+  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
+
+  // helper functions which checks for overflow and sets bailout if it
+  // occurs.  Always returns a valid embeddable pointer but in the
+  // bailout case the pointer won't be to unique storage.
+  address float_constant(float f);
+  address double_constant(double d);
+
+  address int_constant(jlong n);
+
+  bool is_literal_address(LIR_Address* addr);
+
+  // When we need to use something other than rscratch1 use this
+  // method.
+  Address as_Address(LIR_Address* addr, Register tmp);
+
+  // Record the type of the receiver in ReceiverTypeData
+  void type_profile_helper(Register mdo,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Label* update_done);
+  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
+
+  void casw(Register addr, Register newval, Register cmpval);
+  void casl(Register addr, Register newval, Register cmpval);
+
+  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
+
+  static const int max_tableswitches = 20;
+  struct tableswitch switches[max_tableswitches];
+  int tableswitch_count;
+
+  void init() { tableswitch_count = 0; }
+
+  void deoptimize_trap(CodeEmitInfo *info);
+
+public:
+
+  void store_parameter(Register r, int offset_from_esp_in_words);
+  void store_parameter(jint c,     int offset_from_esp_in_words);
+  void store_parameter(jobject c,  int offset_from_esp_in_words);
+
+enum { call_stub_size = 12 * NativeInstruction::instruction_size,
+       exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+       deopt_handler_size = 7 * NativeInstruction::instruction_size };
+
+
+#endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,1429 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+// Item will be loaded into a byte register; Intel only
+void LIRItem::load_byte_item() {
+  load_item();
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (r->is_constant()) {
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::r3_opr; }
+LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r0_opr; }
+LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
+
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case intTag:     opr = FrameMap::r0_opr;          break;
+    case objectTag:  opr = FrameMap::r0_oop_opr;      break;
+    case longTag:    opr = FrameMap::long0_opr;        break;
+    case floatTag:   opr = FrameMap::fpu0_float_opr;  break;
+    case doubleTag:  opr = FrameMap::fpu0_double_opr;  break;
+
+    case addressTag:
+    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  LIR_Opr reg = new_register(T_INT);
+  set_vreg_flag(reg, LIRGenerator::byte_reg);
+  return reg;
+}
+
+
+//--------- loading items into registers --------------------------------
+
+
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return v->type()->as_IntConstant()->value() == 0L;
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+  // FIXME: Just a guess
+  if (v->type()->as_IntConstant() != NULL) {
+    return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value());
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return LIR_OprFact::illegalOpr;
+}
+
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+
+  // accumulate fixed displacements
+  if (index->is_constant()) {
+    disp += index->as_constant_ptr()->as_jint() << shift;
+    index = LIR_OprFact::illegalOpr;
+  }
+
+  if (index->is_register()) {
+    // apply the shift and accumulate the displacement
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    if (disp != 0) {
+      LIR_Opr tmp = new_pointer_register();
+      if (Assembler::operand_valid_for_add_sub_immediate(disp)) {
+        __ add(tmp, tmp, LIR_OprFact::intptrConst(disp));
+        index = tmp;
+      } else {
+        __ move(tmp, LIR_OprFact::intptrConst(disp));
+        __ add(tmp, index, tmp);
+        index = tmp;
+      }
+      disp = 0;
+    }
+  } else if (disp != 0 && !Address::offset_ok_for_immed(disp, shift)) {
+    // index is illegal so replace it with the displacement loaded into a register
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(disp), index);
+    disp = 0;
+  }
+
+  // at this point we either have base + index or base + displacement
+  if (disp == 0) {
+    return new LIR_Address(base, index, type);
+  } else {
+    assert(Address::offset_ok_for_immed(disp, 0), "must be");
+    return new LIR_Address(base, disp, type);
+  }
+}
+
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type, bool needs_card_mark) {
+  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+
+  LIR_Address* addr;
+  if (index_opr->is_constant()) {
+    addr = new LIR_Address(array_opr,
+                           offset_in_bytes + index_opr->as_jint() * elem_size, type);
+  } else {
+// #ifdef _LP64
+//     if (index_opr->type() == T_INT) {
+//       LIR_Opr tmp = new_register(T_LONG);
+//       __ convert(Bytecodes::_i2l, index_opr, tmp);
+//       index_opr = tmp;
+//     }
+// #endif
+    if (offset_in_bytes) {
+      LIR_Opr tmp = new_pointer_register();
+      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
+      array_opr = tmp;
+      offset_in_bytes = 0;
+    }
+    addr =  new LIR_Address(array_opr,
+                            index_opr,
+                            LIR_Address::scale(type),
+                            offset_in_bytes, type);
+  }
+  if (needs_card_mark) {
+    // This store will need a precise card mark, so go ahead and
+    // compute the full adddres instead of computing once for the
+    // store and again for the card mark.
+    LIR_Opr tmp = new_pointer_register();
+    __ leal(LIR_OprFact::address(addr), tmp);
+    return new LIR_Address(tmp, type);
+  } else {
+    return addr;
+  }
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+    if (!Assembler::operand_valid_for_logical_immediate(false, x)) {
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+    if (!Assembler::operand_valid_for_logical_immediate(true, x)) {
+      // This is all rather nasty.  We don't know whether our constant
+      // is required for a logical or an arithmetic operation, wo we
+      // don't know what the range of valid values is!!
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return r;
+}
+
+
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr imm = NULL;
+  switch(addr->type()) {
+  case T_INT:
+    imm = LIR_OprFact::intConst(step);
+    break;
+  case T_LONG:
+    imm = LIR_OprFact::longConst(step);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  LIR_Opr reg = new_register(addr->type());
+  __ load(addr, reg);
+  __ add(reg, imm, reg);
+  __ store(reg, addr);
+}
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr reg = new_register(T_INT);
+  __ load(generate_address(base, disp, T_INT), reg, info);
+  __ cmp(condition, reg, LIR_OprFact::intConst(c));
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr reg1 = new_register(T_INT);
+  __ load(generate_address(base, disp, type), reg1, info);
+  __ cmp(condition, reg, reg1);
+}
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+
+  if (is_power_of_2(c - 1)) {
+    __ shift_left(left, exact_log2(c - 1), tmp);
+    __ add(tmp, left, result);
+    return true;
+  } else if (is_power_of_2(c + 1)) {
+    __ shift_left(left, exact_log2(c + 1), tmp);
+    __ sub(tmp, left, result);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType type = item->type();
+  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
+}
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = true;
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length) {
+    needs_range_check = x->compute_needs_range_check();
+    if (needs_range_check) {
+      length.set_instruction(x->length());
+      length.load_item();
+    }
+  }
+  if (needs_store_check || x->check_boolean()) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // the CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // emit array address setup early so it schedules better
+  // FIXME?  No harm in this on aarch64, and it might help
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // range_check also does the null check
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = new_register(objectType);
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(value.result(), array_addr, null_check_info);
+    // Seems to be a precise
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  } else {
+    LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
+    __ move(result, array_addr, null_check_info);
+  }
+}
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop
+  LIR_Opr lock = new_register(T_INT);
+  // Need a scratch register for biased locking
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
+  if (UseBiasedLocking) {
+    scratch = new_register(T_INT);
+  }
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+  // this CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked)
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                        x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+
+  LIRItem from(x->x(), this);
+  from.load_item();
+  LIR_Opr result = rlock_result(x);
+  __ negate (from.result(), result);
+
+}
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+
+  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
+    // float remainder is implemented as a direct call into the runtime
+    LIRItem right(x->x(), this);
+    LIRItem left(x->y(), this);
+
+    BasicTypeList signature(2);
+    if (x->op() == Bytecodes::_frem) {
+      signature.append(T_FLOAT);
+      signature.append(T_FLOAT);
+    } else {
+      signature.append(T_DOUBLE);
+      signature.append(T_DOUBLE);
+    }
+    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+    const LIR_Opr result_reg = result_register_for(x->type());
+    left.load_item_force(cc->at(1));
+    right.load_item();
+
+    __ move(right.result(), cc->at(0));
+
+    address entry;
+    if (x->op() == Bytecodes::_frem) {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+    } else {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+    }
+
+    LIR_Opr result = rlock_result(x);
+    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
+    __ move(result_reg, result);
+
+    return;
+  }
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg  = &left;
+  LIRItem* right_arg = &right;
+
+  // Always load right hand side.
+  right.load_item();
+
+  if (!left.is_register())
+    left.load_item();
+
+  LIR_Opr reg = rlock(x);
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
+    tmp = new_register(T_DOUBLE);
+  }
+
+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL);
+
+  set_result(x, round_item(reg));
+}
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+
+  // missing test if instr is commutative and if we should swap
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+
+    // the check for division by zero destroys the right operand
+    right.set_destroys_register();
+
+    // check for division by zero (destroys registers of right operand!)
+    CodeEmitInfo* info = state_for(x);
+
+    left.load_item();
+    right.load_item();
+
+    __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+    __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+
+    rlock_result(x);
+    switch (x->op()) {
+    case Bytecodes::_lrem:
+      __ rem (left.result(), right.result(), x->operand());
+      break;
+    case Bytecodes::_ldiv:
+      __ div (left.result(), right.result(), x->operand());
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+
+
+  } else {
+    assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
+            "expect lmul, ladd or lsub");
+    // add, sub, mul
+    left.load_item();
+    if (! right.is_register()) {
+      if (x->op() == Bytecodes::_lmul
+          || ! right.is_constant()
+          || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) {
+        right.load_item();
+      } else { // add, sub
+        assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
+        // don't load constants to save register
+        right.load_nonconstant();
+      }
+    }
+    rlock_result(x);
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+
+  // Test if instr is commutative and if we should swap
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg = &left;
+  LIRItem* right_arg = &right;
+  if (x->is_commutative() && left.is_stack() && right.is_register()) {
+    // swap them if left is real stack (or cached) and right is real register(not cached)
+    left_arg = &right;
+    right_arg = &left;
+  }
+
+  left_arg->load_item();
+
+  // do not need to load right, as we can handle stack and constants
+  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+
+    right_arg->load_item();
+    rlock_result(x);
+
+    CodeEmitInfo* info = state_for(x);
+    LIR_Opr tmp = new_register(T_INT);
+    __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+    __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+    info = state_for(x);
+
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+    }
+
+  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
+    if (right.is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) {
+      right.load_nonconstant();
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
+  } else {
+    assert (x->op() == Bytecodes::_imul, "expect imul");
+    if (right.is_constant()) {
+      int c = right.get_jint_constant();
+      if (! is_power_of_2(c) && ! is_power_of_2(c + 1) && ! is_power_of_2(c - 1)) {
+        // Cannot use constant op.
+        right.load_item();
+      } else {
+        right.dont_load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
+  }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag:  do_ArithmeticOp_FPU(x);  return;
+    case longTag:    do_ArithmeticOp_Long(x); return;
+    case intTag:     do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()) {
+    right.dont_load_item();
+
+    switch (x->op()) {
+    case Bytecodes::_ishl: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ shift_left(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_ishr: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_iushr: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ unsigned_shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lshl: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ shift_left(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lshr: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lushr: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ unsigned_shift_right(left.result(), c, x->operand());
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+  } else {
+    right.load_item();
+    LIR_Opr tmp = new_register(T_INT);
+    switch (x->op()) {
+    case Bytecodes::_ishl: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_ishr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_iushr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lshl: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lshr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lushr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()
+      && ((right.type()->tag() == intTag
+           && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant()))
+          || (right.type()->tag() == longTag
+              && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant()))))  {
+    right.dont_load_item();
+  } else {
+    right.load_item();
+  }
+  switch (x->op()) {
+  case Bytecodes::_iand:
+  case Bytecodes::_land:
+    __ logical_and(left.result(), right.result(), x->operand()); break;
+  case Bytecodes::_ior:
+  case Bytecodes::_lor:
+    __ logical_or (left.result(), right.result(), x->operand()); break;
+  case Bytecodes::_ixor:
+  case Bytecodes::_lxor:
+    __ logical_xor(left.result(), right.result(), x->operand()); break;
+  default: Unimplemented();
+  }
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  ValueTag tag = x->x()->type()->tag();
+  if (tag == longTag) {
+    left.set_destroys_register();
+  }
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // value to compare with field
+  LIRItem val   (x->argument_at(3), this);  // replace field with val if matches cmp
+
+  assert(obj.type()->tag() == objectTag, "invalid type");
+
+  // In 64bit the type can be long, sparc doesn't have this assert
+  // assert(offset.type()->tag() == intTag, "invalid type");
+
+  assert(cmp.type()->tag() == type->tag(), "invalid type");
+  assert(val.type()->tag() == type->tag(), "invalid type");
+
+  // get address of field
+  obj.load_item();
+  offset.load_nonconstant();
+  val.load_item();
+  cmp.load_item();
+
+  LIR_Address* a;
+  if(offset.result()->is_constant()) {
+    jlong c = offset.result()->as_jlong();
+    if ((jlong)((jint)c) == c) {
+      a = new LIR_Address(obj.result(),
+                          (jint)c,
+                          as_BasicType(type));
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(offset.result(), tmp);
+      a = new LIR_Address(obj.result(),
+                          tmp,
+                          as_BasicType(type));
+    }
+  } else {
+    a = new LIR_Address(obj.result(),
+                        offset.result(),
+                        LIR_Address::times_1,
+                        0,
+                        as_BasicType(type));
+  }
+  LIR_Opr addr = new_pointer_register();
+  __ leal(LIR_OprFact::address(a), addr);
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+
+  LIR_Opr result = rlock_result(x);
+
+  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+  if (type == objectType)
+    __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT),
+               result);
+  else if (type == intType)
+    __ cas_int(addr, cmp.result(), val.result(), ill, ill);
+  else if (type == longType)
+    __ cas_long(addr, cmp.result(), val.result(), ill, ill);
+  else {
+    ShouldNotReachHere();
+  }
+
+  __ logical_xor(FrameMap::r8_opr, LIR_OprFact::intConst(1), result);
+
+  if (type == objectType) {   // Write-barrier needed for Object fields.
+    // Seems to be precise
+    post_barrier(addr, val.result());
+  }
+}
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  switch (x->id()) {
+    case vmIntrinsics::_dabs:
+    case vmIntrinsics::_dsqrt: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+
+      switch (x->id()) {
+      case vmIntrinsics::_dsqrt: {
+        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      case vmIntrinsics::_dabs: {
+        __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      }
+      break;
+    }
+    case vmIntrinsics::_dlog10: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+
+      address runtime_entry = NULL;
+      switch (x->id()) {
+      case vmIntrinsics::_dsin:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+        break;
+      case vmIntrinsics::_dcos:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+        break;
+      case vmIntrinsics::_dtan:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+        break;
+      case vmIntrinsics::_dlog:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+        break;
+      case vmIntrinsics::_dlog10:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+        break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+
+      LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+  }
+}
+
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src(x->argument_at(0), this);
+  LIRItem src_pos(x->argument_at(1), this);
+  LIRItem dst(x->argument_at(2), this);
+  LIRItem dst_pos(x->argument_at(3), this);
+  LIRItem length(x->argument_at(4), this);
+
+  // operands for arraycopy must use fixed registers, otherwise
+  // LinearScan will fail allocation (because arraycopy always needs a
+  // call)
+
+  // The java calling convention will give us enough registers
+  // so that on the stub side the args will be perfect already.
+  // On the other slow/special case side we call C and the arg
+  // positions are not similar enough to pick one as the best.
+  // Also because the java calling convention is a "shifted" version
+  // of the C convention we can process the java args trivially into C
+  // args without worry of overwriting during the xfer
+
+  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
+  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
+  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
+  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
+  length.load_item_force  (FrameMap::as_opr(j_rarg4));
+
+  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
+
+  set_no_result(x);
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "why are we here?");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+       offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+      }
+
+      if (offset) {
+        LIR_Opr tmp = new_pointer_register();
+        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
+        base_op = tmp;
+        offset = 0;
+      }
+
+      LIR_Address* a = new LIR_Address(base_op,
+                                       index,
+                                       LIR_Address::times_1,
+                                       offset,
+                                       T_BYTE);
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  bool needs_stub;
+
+  switch (x->op()) {
+    case Bytecodes::_i2l:
+    case Bytecodes::_l2i:
+    case Bytecodes::_i2b:
+    case Bytecodes::_i2c:
+    case Bytecodes::_i2s:
+    case Bytecodes::_f2d:
+    case Bytecodes::_d2f:
+    case Bytecodes::_i2f:
+    case Bytecodes::_i2d:
+    case Bytecodes::_l2f:
+    case Bytecodes::_l2d: needs_stub = false;
+      break;
+    case Bytecodes::_f2l:
+    case Bytecodes::_d2l:
+    case Bytecodes::_f2i:
+    case Bytecodes::_d2i: needs_stub = true;
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  LIRItem value(x->value(), this);
+  value.load_item();
+  LIR_Opr input = value.result();
+  LIR_Opr result = rlock(x);
+
+  // arguments of lir_convert
+  LIR_Opr conv_input = input;
+  LIR_Opr conv_result = result;
+  ConversionStub* stub = NULL;
+
+  if (needs_stub) {
+    stub = new ConversionStub(x->op(), conv_input, conv_result);
+  }
+
+  __ convert(x->op(), conv_input, conv_result, stub, new_register(T_INT));
+
+  assert(result->is_virtual(), "result must be virtual register");
+  set_result(x, result);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(),
+                       FrameMap::r2_oop_opr,
+                       FrameMap::r5_oop_opr,
+                       FrameMap::r4_oop_opr,
+                       LIR_OprFact::illegalOpr,
+                       FrameMap::r3_oop_opr, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item_force(FrameMap::r19_opr);
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r3_oop_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ oop2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  LIRItem length(x->length(), this);
+  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+  // and therefore provide the state before the parameters have been consumed
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info =  state_for(x, x->state_before());
+  }
+
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r3_oop_opr;
+
+  length.load_item_force(FrameMap::r19_opr);
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciObject* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  jobject2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(dims->length(), NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_item();
+
+    store_stack_parameter(size->result(), in_ByteSize(i*4));
+  }
+
+  LIR_Opr reg = result_register_for(x->type());
+  jobject2reg_with_patching(reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::r19_opr;
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+  LIR_Opr varargs = FrameMap::r2_opr;
+  __ move(FrameMap::sp_opr, varargs);
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(reg);
+  args->append(rank);
+  args->append(varargs);
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+    // must do this before locking the destination register as an oop register,
+    // and before the obj is loaded (the latter is for deoptimization)
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+
+  // info for exceptions
+  CodeEmitInfo* info_for_exception = state_for(x);
+
+  CodeStub* stub;
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedOops) {
+    tmp3 = new_register(objectType);
+  }
+  __ checkcast(reg, obj.result(), x->klass(),
+               new_register(objectType), new_register(objectType), tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+
+  // result and test object may not be in same register
+  LIR_Opr reg = rlock_result(x);
+  CodeEmitInfo* patching_info = NULL;
+  if ((!x->klass()->is_loaded() || PatchALot)) {
+    // must do this before locking the destination register as an oop register
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedOops) {
+    tmp3 = new_register(objectType);
+  }
+  __ instanceof(reg, obj.result(), x->klass(),
+                new_register(objectType), new_register(objectType), tmp3,
+                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_If(If* x) {
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  bool is_safepoint = x->is_safepoint();
+
+  If::Condition cond = x->cond();
+
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+
+  if (tag == longTag) {
+    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
+    // mirror for other conditions
+    if (cond == If::gtr || cond == If::leq) {
+      cond = Instruction::mirror(cond);
+      xin = &yitem;
+      yin = &xitem;
+    }
+    xin->set_destroys_register();
+  }
+  xin->load_item();
+
+  if (tag == longTag) {
+    if (yin->is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else if (tag == intTag) {
+    if (yin->is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant()))  {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else {
+    yin->load_item();
+  }
+
+  // add safepoint before generating condition code so it can be recomputed
+  if (x->is_safepoint()) {
+    // increment backedge counter if needed
+    increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+  }
+  set_no_result(x);
+
+  LIR_Opr left = xin->result();
+  LIR_Opr right = yin->result();
+
+  __ cmp(lir_cond(cond), left, right);
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  profile_branch(x, cond);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+  } else {
+    __ branch(lir_cond(cond), right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+   return FrameMap::as_pointer_opr(rthread);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+  __ volatile_store_mem_reg(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  __ volatile_load_mem_reg(address, result, info);
+}
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  __ load(addr, dst);
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  if (is_obj) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(data, addr);
+    assert(src->is_register(), "must be register");
+    // Seems to be a precise address
+    post_barrier(LIR_OprFact::address(addr), data);
+  } else {
+    __ move(data, addr);
+  }
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  off.load_nonconstant();
+
+  // We can cope with a constant increment in an xadd
+  if (! (x->is_add()
+         && value.is_constant()
+         && can_inline_as_constant(x->value()))) {
+    value.load_item();
+  }
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  LIR_Opr offset = off.result();
+
+  if (data == dst) {
+    LIR_Opr tmp = new_register(data->type());
+    __ move(data, tmp);
+    data = tmp;
+  }
+
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+    jlong l = offset->as_jlong();
+    assert((jlong)((jint)l) == l, "offset too large for constant");
+    jint c = (jint)l;
+    addr = new LIR_Address(src.result(), c, type);
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  LIR_Opr tmp = new_register(T_INT);
+  LIR_Opr ptr = LIR_OprFact::illegalOpr;
+
+  if (x->is_add()) {
+    __ xadd(LIR_OprFact::address(addr), data, dst, tmp);
+  } else {
+    if (is_obj) {
+      // Do the pre-write barrier, if any.
+      ptr = new_pointer_register();
+      __ add(src.result(), off.result(), ptr);
+      pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
+    }
+    __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
+    if (is_obj) {
+      post_barrier(ptr, data);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+
+//----------------------------------------------------------------------
+// Allocation of FPU stack slots (Intel x86 only)
+//----------------------------------------------------------------------
+
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack on AArch64
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_LINEARSCAN_HPP
+#define CPU_AARCH64_VM_C1_LINEARSCAN_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+  if (assigned_reg < pd_first_callee_saved_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
+    return true;
+  return false;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // FIXME ??
+}
+
+
+// Implementation of LinearScanWalker
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || cur->type() == T_ADDRESS) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_allocatable_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+
+
+#endif // CPU_AARCH64_VM_C1_LINEARSCAN_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc_interface/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
+                                  FloatRegister f0, FloatRegister f1,
+                                  Register result)
+{
+  if (is_float) {
+    fcmps(f0, f1);
+  } else {
+    fcmpd(f0, f1);
+  }
+  if (unordered_result < 0) {
+    // we want -1 for unordered or less than, 0 for equal and 1 for
+    // greater than.
+    cset(result, NE);  // Not equal or unordered
+    cneg(result, result, LT);  // Less than or unordered
+  } else {
+    // we want -1 for less than, 0 for equal and 1 for unordered or
+    // greater than.
+    cset(result, NE);  // Not equal or unordered
+    cneg(result, result, LO);  // Less than
+  }
+}
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done, fail;
+  int null_check_offset = -1;
+
+  verify_oop(obj);
+
+  // save object being locked into the BasicObjectLock
+  str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+  if (UseBiasedLocking) {
+    assert(scratch != noreg, "should have scratch register at this point");
+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
+  } else {
+    null_check_offset = offset();
+  }
+
+  // Load object header
+  ldr(hdr, Address(obj, hdr_offset));
+  // and mark it as unlocked
+  orr(hdr, hdr, markOopDesc::unlocked_value);
+  // save unlocked object header into the displaced header location on the stack
+  str(hdr, Address(disp_hdr, 0));
+  // test if object header is still the same (i.e. unlocked), and if so, store the
+  // displaced header address in the object header - if it is not the same, get the
+  // object header instead
+  lea(rscratch2, Address(obj, hdr_offset));
+  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
+  // if the object header was the same, we're done
+  // if the object header was not the same, it is now in the hdr register
+  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+  //
+  // 1) (hdr & aligned_mask) == 0
+  // 2) sp <= hdr
+  // 3) hdr <= sp + page_size
+  //
+  // these 3 tests can be done by evaluating the following expression:
+  //
+  // (hdr - sp) & (aligned_mask - page_size)
+  //
+  // assuming both the stack pointer and page_size have their least
+  // significant 2 bits cleared and page_size is a power of 2
+  mov(rscratch1, sp);
+  sub(hdr, hdr, rscratch1);
+  ands(hdr, hdr, aligned_mask - os::vm_page_size());
+  // for recursive locking, the result is zero => save it in the displaced header
+  // location (NULL in the displaced hdr location indicates recursive locking)
+  str(hdr, Address(disp_hdr, 0));
+  // otherwise we don't care about the result and handle locking via runtime call
+  cbnz(hdr, slow_case);
+  // done
+  bind(done);
+  if (PrintBiasedLockingStatistics) {
+    lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+    addmw(Address(rscratch2, 0), 1, rscratch1);
+  }
+  return null_check_offset;
+}
+
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done;
+
+  if (UseBiasedLocking) {
+    // load object
+    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    biased_locking_exit(obj, hdr, done);
+  }
+
+  // load displaced header
+  ldr(hdr, Address(disp_hdr, 0));
+  // if the loaded hdr is NULL we had recursive locking
+  // if we had recursive locking, we are done
+  cbz(hdr, done);
+  if (!UseBiasedLocking) {
+    // load object
+    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+  }
+  verify_oop(obj);
+  // test if object header is pointing to the displaced header, and if so, restore
+  // the displaced header in the object - if the object header is not pointing to
+  // the displaced header, get the object header instead
+  // if the object header was not pointing to the displaced header,
+  // we do unlocking via runtime call
+  if (hdr_offset) {
+    lea(rscratch1, Address(obj, hdr_offset));
+    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
+  } else {
+    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
+  }
+  // done
+  bind(done);
+}
+
+
+// Defines obj, preserves var_size_in_bytes
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+    incr_allocated_bytes(noreg, var_size_in_bytes, con_size_in_bytes, t1);
+  }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
+  assert_different_registers(obj, klass, len);
+  if (UseBiasedLocking && !len->is_valid()) {
+    assert_different_registers(obj, klass, len, t1, t2);
+    ldr(t1, Address(klass, Klass::prototype_header_offset()));
+  } else {
+    // This assumes that all prototype bits fit in an int32_t
+    mov(t1, (int32_t)(intptr_t)markOopDesc::prototype());
+  }
+  str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (UseCompressedOops) { // Take care not to kill klass
+    encode_heap_oop_not_null(t1, klass);
+    strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else {
+    str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+
+  if (len->is_valid()) {
+    strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+  } else if (UseCompressedOops) {
+    store_klass_gap(obj, zr);
+  }
+}
+
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
+  assert_different_registers(addr, len, t1, rscratch1, rscratch2);
+
+#ifdef ASSERT
+  { Label L;
+    tst(len, BytesPerWord - 1);
+    br(Assembler::EQ, L);
+    stop("len is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif
+
+#ifndef PRODUCT
+  block_comment("zero memory");
+#endif
+
+  Label loop;
+  Label entry;
+
+//  Algorithm:
+//
+//    scratch1 = cnt & 7;
+//    cnt -= scratch1;
+//    p += scratch1;
+//    switch (scratch1) {
+//      do {
+//        cnt -= 8;
+//          p[-8] = 0;
+//        case 7:
+//          p[-7] = 0;
+//        case 6:
+//          p[-6] = 0;
+//          // ...
+//        case 1:
+//          p[-1] = 0;
+//        case 0:
+//          p += 8;
+//      } while (cnt);
+//    }
+
+  const int unroll = 8; // Number of str(zr) instructions we'll unroll
+
+  lsr(len, len, LogBytesPerWord);
+  andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
+  sub(len, len, rscratch1);      // cnt -= unroll
+  // t1 always points to the end of the region we're about to zero
+  add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
+  adr(rscratch2, entry);
+  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
+  br(rscratch2);
+  bind(loop);
+  sub(len, len, unroll);
+  for (int i = -unroll; i < 0; i++)
+    str(zr, Address(t1, i * wordSize));
+  bind(entry);
+  add(t1, t1, unroll * wordSize);
+  cbnz(len, loop);
+}
+
+// preserves obj, destroys len_in_bytes
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+  Label done;
+  assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
+  assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
+  Register index = len_in_bytes;
+  // index is positive and ptr sized
+  subs(index, index, hdr_size_in_bytes);
+  br(Assembler::EQ, done);
+  // note: for the remaining code to work, index must be a multiple of BytesPerWord
+#ifdef ASSERT
+  { Label L;
+    tst(index, BytesPerWord - 1);
+    br(Assembler::EQ, L);
+    stop("index is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif
+
+  // Preserve obj
+  if (hdr_size_in_bytes)
+    add(obj, obj, hdr_size_in_bytes);
+  zero_memory(obj, index, t1);
+  if (hdr_size_in_bytes)
+    sub(obj, obj, hdr_size_in_bytes);
+
+  // done
+  bind(done);
+}
+
+
+void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
+  assert_different_registers(obj, t1, t2); // XXX really?
+  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+
+  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
+
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+         "con_size_in_bytes is not multiple of alignment");
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, t1, t2);
+
+  // clear rest of allocated space
+  const Register index = t2;
+  const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
+  if (var_size_in_bytes != noreg) {
+    mov(index, var_size_in_bytes);
+    initialize_body(obj, index, hdr_size_in_bytes, t1);
+  } else if (con_size_in_bytes <= threshold) {
+    // use explicit null stores
+    int i = hdr_size_in_bytes;
+    if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
+      str(zr, Address(obj, i));
+      i += BytesPerWord;
+    }
+    for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
+      stp(zr, zr, Address(obj, i));
+  } else if (con_size_in_bytes > hdr_size_in_bytes) {
+    block_comment("zero memory");
+    // use loop to null out the fields
+
+    int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
+    mov(index,  words / 8);
+
+    const int unroll = 8; // Number of str(zr) instructions we'll unroll
+    int remainder = words % unroll;
+    lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
+
+    Label entry_point, loop;
+    b(entry_point);
+
+    bind(loop);
+    sub(index, index, 1);
+    for (int i = -unroll; i < 0; i++) {
+      if (-i == remainder)
+        bind(entry_point);
+      str(zr, Address(rscratch1, i * wordSize));
+    }
+    if (remainder == 0)
+      bind(entry_point);
+    add(rscratch1, rscratch1, unroll * wordSize);
+    cbnz(index, loop);
+
+  }
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == r0, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
+  assert_different_registers(obj, len, t1, t2, klass);
+
+  // determine alignment mask
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+
+  // check for negative or excessive length
+  mov(rscratch1, (int32_t)max_array_allocation_length);
+  cmp(len, rscratch1);
+  br(Assembler::HS, slow_case);
+
+  const Register arr_size = t2; // okay to be the same
+  // align object end
+  mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  add(arr_size, arr_size, len, ext::uxtw, f);
+  andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
+
+  initialize_header(obj, klass, len, t1, t2);
+
+  // clear rest of allocated space
+  const Register len_zero = len;
+  initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == r0, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+  verify_oop(receiver);
+  // explicit NULL check not needed since load from [klass_offset] causes a trap
+  // check against inline cache
+  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
+  cmp_klass(receiver, iCache, rscratch1);
+}
+
+
+void C1_MacroAssembler::build_frame(int framesize) {
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.  For this action to be legal we
+  // must ensure that this first instruction is a B, BL, NOP, BKPT,
+  // SVC, HVC, or SMC.  Make it a NOP.
+  nop();
+  // Make sure there is enough stack space for this method's activation.
+  // Note that we do this before doing an enter().
+  generate_stack_overflow_check(framesize);
+  MacroAssembler::build_frame(framesize + 2 * wordSize);
+  if (NotifySimulator) {
+    notify(Assembler::method_entry);
+  }
+}
+
+
+void C1_MacroAssembler::remove_frame(int framesize) {
+  MacroAssembler::remove_frame(framesize + 2 * wordSize);
+  if (NotifySimulator) {
+    notify(Assembler::method_reentry);
+  }
+}
+
+void C1_MacroAssembler::verified_entry() {
+}
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  if (!VerifyOops) return;
+  verify_oop_addr(Address(sp, stack_offset), "oop");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  if (!VerifyOops) return;
+  Label not_null;
+  cbnz(r, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) {
+#ifdef ASSERT
+  static int nn;
+  if (inv_r0) mov(r0, 0xDEAD);
+  if (inv_r19) mov(r19, 0xDEAD);
+  if (inv_r2) mov(r2, nn++);
+  if (inv_r3) mov(r3, 0xDEAD);
+  if (inv_r4) mov(r4, 0xDEAD);
+  if (inv_r5) mov(r5, 0xDEAD);
+#endif
+}
+#endif // ifndef PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
+
+using MacroAssembler::null_check;
+
+// C1_MacroAssembler contains high-level macros for C1
+
+ private:
+  int _rsp_offset;    // track rsp changes
+  // initialization
+  void pd_init() { _rsp_offset = 0; }
+
+void zero_memory(Register addr, Register len, Register t1);
+
+ public:
+  void try_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
+
+  void float_cmp(bool is_float, int unordered_result,
+                 FloatRegister f0, FloatRegister f1,
+                 Register result);
+
+  // locking
+  // hdr     : must be r0, contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must point to the displaced header location, contents preserved
+  // scratch : scratch register, contents destroyed
+  // returns code offset at which to add null check debug information
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+
+  // unlocking
+  // hdr     : contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed
+  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2                        // temp register
+  );
+
+  // allocation of fixed-size objects
+  // (can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards)
+  // obj        : will contain pointer to allocated object
+  // t1, t2     : scratch registers - contents destroyed
+  // header_size: size of object header in words
+  // object_size: total size of object in words
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case);
+
+  enum {
+    max_array_allocation_length = 0x00FFFFFF
+  };
+
+  // allocation of arrays
+  // obj        : will contain pointer to allocated object
+  // len        : array length in number of elements
+  // t          : scratch register - contents destroyed
+  // header_size: size of object header in words
+  // f          : element scale factor
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case);
+
+  int  rsp_offset() const { return _rsp_offset; }
+  void set_rsp_offset(int n) { _rsp_offset = n; }
+
+  void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
+
+#endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "compiler/disassembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/compiledICHolderOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_aarch64.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframe.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, int args_size) {
+  // setup registers
+  assert(!(oop_result1->is_valid() || oop_result2->is_valid()) || oop_result1 != oop_result2, "registers must be different");
+  assert(oop_result1 != rthread && oop_result2 != rthread, "registers must be different");
+  assert(args_size >= 0, "illegal args_size");
+  bool align_stack = false;
+
+  mov(c_rarg0, rthread);
+  set_num_rt_args(0); // Nothing on stack
+
+  Label retaddr;
+  set_last_Java_frame(sp, rfp, retaddr, rscratch1);
+
+  // do the call
+  lea(rscratch1, RuntimeAddress(entry));
+  blrt(rscratch1, args_size + 1, 8, 1);
+  bind(retaddr);
+  int call_offset = offset();
+  // verify callee-saved register
+#ifdef ASSERT
+  push(r0, sp);
+  { Label L;
+    get_thread(r0);
+    cmp(rthread, r0);
+    br(Assembler::EQ, L);
+    stop("StubAssembler::call_RT: rthread not callee saved?");
+    bind(L);
+  }
+  pop(r0, sp);
+#endif
+  reset_last_Java_frame(true, true);
+  maybe_isb();
+
+  // check for pending exceptions
+  { Label L;
+    // check for pending exceptions (java_thread is set upon return)
+    ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    cbz(rscratch1, L);
+    // exception pending => remove activation and forward to exception handler
+    // make sure that the vm_results are cleared
+    if (oop_result1->is_valid()) {
+      str(zr, Address(rthread, JavaThread::vm_result_offset()));
+    }
+    if (oop_result2->is_valid()) {
+      str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
+    }
+    if (frame_size() == no_frame_size) {
+      leave();
+      far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+    }
+    bind(L);
+  }
+  // get oop results if there are any and reset the values in the thread
+  if (oop_result1->is_valid()) {
+    get_vm_result(oop_result1, rthread);
+  }
+  if (oop_result2->is_valid()) {
+    get_vm_result_2(oop_result2, rthread);
+  }
+  return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1) {
+  mov(c_rarg1, arg1);
+  return call_RT(oop_result1, oop_result2, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2) {
+  if (c_rarg1 == arg2) {
+    if (c_rarg2 == arg1) {
+      mov(rscratch1, arg1);
+      mov(arg1, arg2);
+      mov(arg2, rscratch1);
+    } else {
+      mov(c_rarg2, arg2);
+      mov(c_rarg1, arg1);
+    }
+  } else {
+    mov(c_rarg1, arg1);
+    mov(c_rarg2, arg2);
+  }
+  return call_RT(oop_result1, oop_result2, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2, Register arg3) {
+  // if there is any conflict use the stack
+  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
+      arg2 == c_rarg1 || arg1 == c_rarg3 ||
+      arg3 == c_rarg1 || arg1 == c_rarg2) {
+    stp(arg3, arg2, Address(pre(sp, 2 * wordSize)));
+    stp(arg1, zr, Address(pre(sp, -2 * wordSize)));
+    ldp(c_rarg1, zr, Address(post(sp, 2 * wordSize)));
+    ldp(c_rarg3, c_rarg2, Address(post(sp, 2 * wordSize)));
+  } else {
+    mov(c_rarg1, arg1);
+    mov(c_rarg2, arg2);
+    mov(c_rarg3, arg3);
+  }
+  return call_RT(oop_result1, oop_result2, entry, 3);
+}
+
+// Implementation of StubFrame
+
+class StubFrame: public StackObj {
+ private:
+  StubAssembler* _sasm;
+
+ public:
+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+  void load_argument(int offset_in_words, Register reg);
+
+  ~StubFrame();
+};;
+
+
+#define __ _sasm->
+
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+  _sasm = sasm;
+  __ set_info(name, must_gc_arguments);
+  __ enter();
+}
+
+// load parameters that were stored with LIR_Assembler::store_parameter
+// Note: offsets for store_parameter and load_argument must match
+void StubFrame::load_argument(int offset_in_words, Register reg) {
+  // rbp, + 0: link
+  //     + 1: return address
+  //     + 2: argument with offset 0
+  //     + 3: argument with offset 1
+  //     + 4: ...
+
+  __ ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord));
+}
+
+
+StubFrame::~StubFrame() {
+  __ leave();
+  __ ret(lr);
+}
+
+#undef __
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
+
+// Stack layout for saving/restoring  all the registers needed during a runtime
+// call (this includes deoptimization)
+// Note: note that users of this frame may well have arguments to some runtime
+// while these values are on the stack. These positions neglect those arguments
+// but the code in save_live_registers will take the argument count into
+// account.
+//
+
+enum reg_save_layout {
+  reg_save_frame_size = 32 /* float */ + 32 /* integer */
+};
+
+// Save off registers which might be killed by calls into the runtime.
+// Tries to smart of about FP registers.  In particular we separate
+// saving and describing the FPU registers for deoptimization since we
+// have to save the FPU registers twice if we describe them.  The
+// deopt blob is the only thing which needs to describe FPU registers.
+// In all other cases it should be sufficient to simply save their
+// current value.
+
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
+static int reg_save_size_in_words;
+static int frame_size_in_bytes = -1;
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
+  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
+  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+  int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+
+  for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                r->as_VMReg());
+    }
+  }
+
+  if (save_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      {
+        int sp_offset = fpu_reg_save_offsets[i];
+        oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                  r->as_VMReg());
+      }
+    }
+  }
+  return oop_map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm,
+                                   bool save_fpu_registers = true) {
+  __ block_comment("save_live_registers");
+
+  __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+
+  if (save_fpu_registers) {
+    for (int i = 30; i >= 0; i -= 2)
+      __ stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ pre(sp, -2 * wordSize)));
+  } else {
+    __ add(sp, sp, -32 * wordSize);
+  }
+
+  return generate_oop_map(sasm, save_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i += 2)
+      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ post(sp, 2 * wordSize)));
+  } else {
+    __ add(sp, sp, 32 * wordSize);
+  }
+
+  __ pop(RegSet::range(r0, r29), sp);
+}
+
+static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
+
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i += 2)
+      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ post(sp, 2 * wordSize)));
+  } else {
+    __ add(sp, sp, 32 * wordSize);
+  }
+
+  __ ldp(zr, r1, Address(__ post(sp, 16)));
+  __ pop(RegSet::range(r2, r29), sp);
+}
+
+
+
+void Runtime1::initialize_pd() {
+  int i;
+  int sp_offset = 0;
+
+  // all float registers are saved explicitly
+  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
+  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    fpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2;   // SP offsets are in halfwords
+  }
+
+  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    cpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2;   // SP offsets are in halfwords
+  }
+}
+
+
+// target: the entry point of the method that creates and posts the exception oop
+// has_argument: true if the exception needs an argument (passed in rscratch1)
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+  // make a frame and preserve the caller's caller-save registers
+  OopMap* oop_map = save_live_registers(sasm);
+  int call_offset;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    call_offset = __ call_RT(noreg, noreg, target, rscratch1);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // incoming parameters
+  const Register exception_oop = r0;
+  const Register exception_pc  = r3;
+  // other registers used in this stub
+
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places.  Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found.  Otherwise unwind and dispatch to the callers
+    // exception handler.
+    oop_map = generate_oop_map(sasm, 1 /*thread*/);
+
+    // load and clear pending exception oop into r0
+    __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset()));
+    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
+
+    // load issuing PC (the return address for this stub) into r3
+    __ ldr(exception_pc, Address(rfp, 1*BytesPerWord));
+
+    // make sure that the vm_results are cleared (may be unnecessary)
+    __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
+    __ str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
+    break;
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id: {
+    // At this point all registers except exception oop (r0) and
+    // exception pc (lr) are dead.
+    const int frame_size = 2 /*fp, return address*/;
+    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
+    sasm->set_frame_size(frame_size);
+    break;
+  }
+  default:
+    __ should_not_reach_here();
+    break;
+  }
+
+  // verify that only r0 and r3 are valid at this time
+  __ invalidate_registers(false, true, true, false, true, true);
+  // verify that r0 contains a valid exception
+  __ verify_not_null_oop(exception_oop);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are
+  // empty before writing to them
+  Label oop_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
+  __ cbz(rscratch1, oop_empty);
+  __ stop("exception oop already set");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+  __ cbz(rscratch1, pc_empty);
+  __ stop("exception pc already set");
+  __ bind(pc_empty);
+#endif
+
+  // save exception oop and issuing pc into JavaThread
+  // (exception handler will load it from here)
+  __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset()));
+
+  // patch throwing pc into return address (has bci & oop map)
+  __ str(exception_pc, Address(rfp, 1*BytesPerWord));
+
+  // compute the exception handler.
+  // the exception oop and the throwing pc are read from the fields in JavaThread
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // r0: handler address
+  //      will be the deopt blob if nmethod was deoptimized while we looked up
+  //      handler regardless of whether handler existed in the nmethod.
+
+  // only r0 is valid at this time, all other registers have been destroyed by the runtime call
+  __ invalidate_registers(false, true, true, true, true, true);
+
+  // patch the return address, this stub will directly return to the exception handler
+  __ str(r0, Address(rfp, 1*BytesPerWord));
+
+  switch (id) {
+  case forward_exception_id:
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // Restore the registers that were saved at the beginning.
+    restore_live_registers(sasm, id != handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id:
+    // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
+    // since we do a leave anyway.
+
+    // Pop the return address since we are possibly changing SP (restoring from BP).
+    __ leave();
+
+    // Restore SP from FP if the exception PC is a method handle call site.
+    {
+      Label nope;
+      __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
+      __ cbzw(rscratch1, nope);
+      __ mov(sp, rfp);
+      __ bind(nope);
+    }
+
+    __ ret(lr);  // jump to exception handler
+    break;
+  default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+  // incoming parameters
+  const Register exception_oop = r0;
+  // callee-saved copy of exception_oop during runtime call
+  const Register exception_oop_callee_saved = r19;
+  // other registers used in this stub
+  const Register exception_pc = r3;
+  const Register handler_addr = r1;
+
+  // verify that only r0, is valid at this time
+  __ invalidate_registers(false, true, true, true, true, true);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are empty
+  Label oop_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
+  __ cbz(rscratch1, oop_empty);
+  __ stop("exception oop must be empty");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+  __ cbz(rscratch1, pc_empty);
+  __ stop("exception pc must be empty");
+  __ bind(pc_empty);
+#endif
+
+  // Save our return address because
+  // exception_handler_for_return_address will destroy it.  We also
+  // save exception_oop
+  __ stp(lr, exception_oop, Address(__ pre(sp, -2 * wordSize)));
+
+  // search the exception handler address of the caller (using the return address)
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr);
+  // r0: exception handler address of the caller
+
+  // Only R0 is valid at this time; all other registers have been
+  // destroyed by the call.
+  __ invalidate_registers(false, true, true, true, false, true);
+
+  // move result of call into correct register
+  __ mov(handler_addr, r0);
+
+  // get throwing pc (= return address).
+  // lr has been destroyed by the call
+  __ ldp(lr, exception_oop, Address(__ post(sp, 2 * wordSize)));
+  __ mov(r3, lr);
+
+  __ verify_not_null_oop(exception_oop);
+
+  {
+    Label foo;
+    __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
+    __ cbzw(rscratch1, foo);
+    __ mov(sp, rfp);
+    __ bind(foo);
+  }
+
+  // continue at exception handler (return address removed)
+  // note: do *not* remove arguments when unwinding the
+  //       activation since the caller assumes having
+  //       all arguments on the stack when entering the
+  //       runtime to determine the exception handler
+  //       (GC happens at call site with arguments!)
+  // r0: exception oop
+  // r3: throwing pc
+  // r1: exception handler
+  __ br(handler_addr);
+}
+
+
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // use the maximum number of runtime-arguments here because it is difficult to
+  // distinguish each RT-Call.
+  // Note: This number affects also the RT-Call in generate_handle_exception because
+  //       the oop-map is shared for all calls.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  OopMap* oop_map = save_live_registers(sasm);
+
+  __ mov(c_rarg0, rthread);
+  Label retaddr;
+  __ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
+  // do the call
+  __ lea(rscratch1, RuntimeAddress(target));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ bind(retaddr);
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(__ offset(), oop_map);
+  // verify callee-saved register
+#ifdef ASSERT
+  { Label L;
+    __ get_thread(rscratch1);
+    __ cmp(rthread, rscratch1);
+    __ br(Assembler::EQ, L);
+    __ stop("StubAssembler::call_RT: rthread not callee saved?");
+    __ bind(L);
+  }
+#endif
+  __ reset_last_Java_frame(true, false);
+  __ maybe_isb();
+
+  // check for pending exceptions
+  { Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, L);
+    // exception pending => remove activation and forward to exception handler
+
+    { Label L1;
+      __ cbnz(r0, L1);                                  // have we deoptimized?
+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      __ bind(L1);
+    }
+
+    // the deopt blob expects exceptions in the special fields of
+    // JavaThread, so copy and clear pending exception.
+
+    // load and clear pending exception
+    __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
+    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
+
+    // check that there is really a valid exception
+    __ verify_not_null_oop(r0);
+
+    // load throwing pc: this is the return address of the stub
+    __ mov(r3, lr);
+
+#ifdef ASSERT
+    // check that fields in JavaThread for exception oop and issuing pc are empty
+    Label oop_empty;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, oop_empty);
+    __ stop("exception oop must be empty");
+    __ bind(oop_empty);
+
+    Label pc_empty;
+    __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+    __ cbz(rscratch1, pc_empty);
+    __ stop("exception pc must be empty");
+    __ bind(pc_empty);
+#endif
+
+    // store exception oop and throwing pc to JavaThread
+    __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
+    __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
+
+    restore_live_registers(sasm);
+
+    __ leave();
+
+    // Forward the exception directly to deopt blob. We can blow no
+    // registers and must leave throwing pc on the stack.  A patch may
+    // have values live in registers so the entry point with the
+    // exception in tls.
+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
+
+    __ bind(L);
+  }
+
+
+  // Runtime will return true if the nmethod has been deoptimized during
+  // the patching process. In that case we must do a deopt reexecute instead.
+
+  Label reexecuteEntry, cont;
+
+  __ cbz(r0, cont);                                 // have we deoptimized?
+
+  // Will reexecute. Proper return address is already on the stack we just restore
+  // registers, pop all of our frame but the return address and jump to the deopt blob
+  restore_live_registers(sasm);
+  __ leave();
+  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+
+  __ bind(cont);
+  restore_live_registers(sasm);
+  __ leave();
+  __ ret(lr);
+
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+
+  const Register exception_oop = r0;
+  const Register exception_pc  = r3;
+
+  // for better readability
+  const bool must_gc_arguments = true;
+  const bool dont_gc_arguments = false;
+
+  // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
+  bool save_fpu_registers = true;
+
+  // stub code & info for the different stubs
+  OopMapSet* oop_maps = NULL;
+  OopMap* oop_map = NULL;
+  switch (id) {
+    {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case throw_div0_exception_id:
+      { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+
+    case throw_null_pointer_exception_id:
+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        Register klass = r3; // Incoming
+        Register obj   = r0; // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+            UseTLAB && FastTLABRefill) {
+          Label slow_path;
+          Register obj_size = r2;
+          Register t1       = r19;
+          Register t2       = r4;
+          assert_different_registers(klass, obj, obj_size, t1, t2);
+
+          __ stp(r5, r19, Address(__ pre(sp, -2 * wordSize)));
+
+          if (id == fast_new_instance_init_check_id) {
+            // make sure the klass is initialized
+            __ ldrb(rscratch1, Address(klass, instanceKlass::init_state_offset()));
+            __ cmpw(rscratch1, instanceKlass::fully_initialized);
+            __ br(Assembler::NE, slow_path);
+          }
+
+#ifdef ASSERT
+          // assert object can be fast path allocated
+          {
+            Label ok, not_ok;
+            __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+            __ cmp(obj_size, 0u);
+            __ br(Assembler::LE, not_ok);  // make sure it's an instance (LH > 0)
+            __ tstw(obj_size, Klass::_lh_instance_slow_path_bit);
+            __ br(Assembler::EQ, ok);
+            __ bind(not_ok);
+            __ stop("assert(can be fast path allocated)");
+            __ should_not_reach_here();
+            __ bind(ok);
+          }
+#endif // ASSERT
+
+          // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+          Label retry_tlab, try_eden;
+          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy r3 (klass), returns r5
+
+          __ bind(retry_tlab);
+
+          // get the instance size (size is postive so movl is fine for 64bit)
+          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
+
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ret(lr);
+
+          __ bind(try_eden);
+          // get the instance size (size is postive so movl is fine for 64bit)
+          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
+          __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1);
+
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ret(lr);
+
+          __ bind(slow_path);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ ret(lr);
+
+        // r0,: new instance
+      }
+
+      break;
+
+    case counter_overflow_id:
+      {
+        Register bci = r0, method = r1;
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        // Retrieve bci
+        __ ldrw(bci, Address(rfp, 2*BytesPerWord));
+        // And a pointer to the methodOop
+        __ ldr(method, Address(rfp, 3*BytesPerWord));
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        Register length   = r19; // Incoming
+        Register klass    = r3; // Incoming
+        Register obj      = r0; // Result
+
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // assert object type is really an array of the proper kind
+        {
+          Label ok;
+          Register t0 = obj;
+          __ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
+          __ asrw(t0, t0, Klass::_lh_array_tag_shift);
+          int tag = ((id == new_type_array_id)
+                     ? Klass::_lh_array_tag_type_value
+                     : Klass::_lh_array_tag_obj_value);
+          __ mov(rscratch1, tag);
+          __ cmpw(t0, rscratch1);
+          __ br(Assembler::EQ, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        if (UseTLAB && FastTLABRefill) {
+          Register arr_size = r4;
+          Register t1       = r2;
+          Register t2       = r5;
+          Label slow_path;
+          assert_different_registers(length, klass, obj, arr_size, t1, t2);
+
+          // check that array length is small enough for fast path.
+          __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length);
+          __ cmpw(length, rscratch1);
+          __ br(Assembler::HI, slow_path);
+
+          // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+          Label retry_tlab, try_eden;
+          const Register thread =
+            __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves r19 & r3, returns rthread
+
+          __ bind(retry_tlab);
+
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          // since size is positive ldrw does right thing on 64bit
+          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
+          __ lslvw(arr_size, length, t1);
+          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
+                  exact_log2(Klass::_lh_header_size_mask + 1));
+          __ add(arr_size, arr_size, t1);
+          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
+
+          __ initialize_header(obj, klass, length, t1, t2);
+          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andr(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);       // body start
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ membar(Assembler::StoreStore);
+          __ verify_oop(obj);
+
+          __ ret(lr);
+
+          __ bind(try_eden);
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          // since size is positive ldrw does right thing on 64bit
+          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
+          // since size is postive movw does right thing on 64bit
+          __ movw(arr_size, length);
+          __ lslvw(arr_size, length, t1);
+          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
+                  exact_log2(Klass::_lh_header_size_mask + 1));
+          __ add(arr_size, arr_size, t1);
+          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+          __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
+          __ incr_allocated_bytes(thread, arr_size, 0, rscratch1);
+
+          __ initialize_header(obj, klass, length, t1, t2);
+          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andr(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);       // body start
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ membar(Assembler::StoreStore);
+          __ verify_oop(obj);
+
+          __ ret(lr);
+
+          __ bind(slow_path);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        __ verify_oop(obj);
+        __ leave();
+        __ ret(lr);
+
+        // r0: new array
+      }
+      break;
+
+    case new_multi_array_id:
+      { StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
+        // r0,: klass
+        // r19,: rank
+        // r2: address of 1st dimension
+        OopMap* map = save_live_registers(sasm);
+        __ mov(c_rarg1, r0);
+        __ mov(c_rarg3, r2);
+        __ mov(c_rarg2, r19);
+        int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), r1, r2, r3);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        // r0,: new multi array
+        __ verify_oop(r0);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+
+        // This is called via call_runtime so the arguments
+        // will be place in C abi locations
+
+        __ verify_oop(c_rarg0);
+
+        // load the klass and check the has finalizer flag
+        Label register_finalizer;
+        Register t = r5;
+        __ load_klass(t, r0);
+        __ ldrw(t, Address(t, Klass::access_flags_offset()));
+        __ tst(t, JVM_ACC_HAS_FINALIZER);
+        __ br(Assembler::NE, register_finalizer);
+        __ ret(lr);
+
+        __ bind(register_finalizer);
+        __ enter();
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), r0);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        // Now restore all the live registers
+        restore_live_registers(sasm);
+
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case throw_class_cast_exception_id:
+      { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+      }
+      break;
+
+    case throw_incompatible_class_change_error_id:
+      { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+
+    case slow_subtype_check_id:
+      {
+        // Typical calling sequence:
+        // __ push(klass_RInfo);  // object klass or other subclass
+        // __ push(sup_k_RInfo);  // array element klass or other superclass
+        // __ bl(slow_subtype_check);
+        // Note that the subclass is pushed first, and is therefore deepest.
+        enum layout {
+          r0_off, r0_off_hi,
+          r2_off, r2_off_hi,
+          r4_off, r4_off_hi,
+          r5_off, r5_off_hi,
+          sup_k_off, sup_k_off_hi,
+          klass_off, klass_off_hi,
+          framesize,
+          result_off = sup_k_off
+        };
+
+        __ set_info("slow_subtype_check", dont_gc_arguments);
+        __ push(RegSet::of(r0, r2, r4, r5), sp);
+
+        // This is called by pushing args and not with C abi
+        // __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
+        // __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
+
+        __ ldp(r4, r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size));
+
+        Label miss;
+        __ check_klass_subtype_slow_path(r4, r0, r2, r5, NULL, &miss);
+
+        // fallthrough on success:
+        __ mov(rscratch1, 1);
+        __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop(RegSet::of(r0, r2, r4, r5), sp);
+        __ ret(lr);
+
+        __ bind(miss);
+        __ str(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop(RegSet::of(r0, r2, r4, r5), sp);
+        __ ret(lr);
+      }
+      break;
+
+    case monitorenter_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorenter_id:
+      {
+        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(1, r0); // r0,: object
+        f.load_argument(0, r1); // r1,: lock address
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), r0, r1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorexit_id:
+      {
+        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(0, r0); // r0,: lock address
+
+        // note: really a leaf routine but must setup last java sp
+        //       => use call_RT for now (speed can be improved by
+        //       doing last java sp setup manually)
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), r0);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case deoptimize_id:
+      {
+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        __ leave();
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+      }
+      break;
+
+    case unwind_exception_id:
+      { __ set_info("unwind_exception", dont_gc_arguments);
+        // note: no stubframe since we are about to leave the current
+        //       activation and we are calling a leaf VM function only.
+        generate_unwind_exception(sasm);
+      }
+      break;
+
+    case access_field_patching_id:
+      { StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      { StubFrame f(sasm, "handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case throw_index_exception_id:
+      { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+
+    case throw_array_store_exception_id:
+      { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+        // tos + 0: link
+        //     + 1: return address
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+
+#ifndef SERIALGC
+
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ mov(r0, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        const Register pre_val = r0;
+        const Register thread = rthread;
+        const Register tmp = rscratch1;
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+        __ ldr(tmp, queue_index);
+        __ cbz(tmp, runtime);
+
+        __ sub(tmp, tmp, wordSize);
+        __ str(tmp, queue_index);
+        __ ldr(rscratch2, buffer);
+        __ add(tmp, tmp, rscratch2);
+        f.load_argument(0, rscratch2);
+        __ str(rscratch2, Address(tmp, 0));
+        __ b(done);
+
+        __ bind(runtime);
+        __ push_call_clobbered_registers();
+        f.load_argument(0, pre_val);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+        __ pop_call_clobbered_registers();
+        __ bind(done);
+      }
+      break;
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+        // arg0: store_address
+        Address store_addr(rfp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regions.
+        // Must check to see if card is already dirty
+
+        const Register thread = rthread;
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        const Register card_offset = rscratch2;
+        // LR is free here, so we can use it to hold the byte_map_base.
+        const Register byte_map_base = lr;
+
+        assert_different_registers(card_offset, byte_map_base, rscratch1);
+
+        f.load_argument(0, card_offset);
+        __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift);
+        __ load_byte_map_base(byte_map_base);
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
+        __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+        __ br(Assembler::EQ, done);
+
+        assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
+
+        __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
+        __ cbzw(rscratch1, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+        __ strb(zr, Address(byte_map_base, card_offset));
+
+        // Convert card offset into an address in card_addr
+        Register card_addr = card_offset;
+        __ add(card_addr, byte_map_base, card_addr);
+
+        __ ldr(rscratch1, queue_index);
+        __ cbz(rscratch1, runtime);
+        __ sub(rscratch1, rscratch1, wordSize);
+        __ str(rscratch1, queue_index);
+
+        // Reuse LR to hold buffer_addr
+        const Register buffer_addr = lr;
+
+        __ ldr(buffer_addr, buffer);
+        __ str(card_addr, Address(buffer_addr, rscratch1));
+        __ b(done);
+
+        __ bind(runtime);
+        __ push_call_clobbered_registers();
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        __ pop_call_clobbered_registers();
+        __ bind(done);
+
+      }
+      break;
+#endif // !SERIALGC
+
+    default:
+      { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+        __ mov(r0, (int)id);
+        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
+        __ should_not_reach_here();
+      }
+      break;
+    }
+  }
+  return oop_maps;
+}
+
+#undef __
+
+// Simple helper to see if the caller of a runtime stub which
+// entered the VM has been deoptimized
+
+static bool caller_is_deopted() {
+  JavaThread* thread = JavaThread::current();
+  RegisterMap reg_map(thread, false);
+  frame runtime_frame = thread->last_frame();
+  frame caller_frame = runtime_frame.sender(&reg_map);
+  assert(caller_frame.is_compiled_frame(), "must be compiled");
+  return caller_frame.is_deoptimized_frame();
+}
+
+// AArch64 defines its own version of this routine which simply deopts the
+// relevant code
+
+JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id ))
+{
+  RegisterMap reg_map(thread, false);
+
+  NOT_PRODUCT(_patch_code_slowcase_cnt++;)
+
+  // According to the ARMv8 ARM, "Concurrent modification and
+  // execution of instructions can lead to the resulting instruction
+  // performing any behavior that can be achieved by executing any
+  // sequence of instructions that can be executed from the same
+  // Exception level, except where the instruction before
+  // modification and the instruction after modification is a B, BL,
+  // NOP, BKPT, SVC, HVC, or SMC instruction."
+  //
+  // This effectively makes the games we play when patching
+  // impossible, so when we come across an access that needs
+  // patching we must deoptimize.
+
+  if (TracePatching) {
+    tty->print_cr("Deoptimizing because patch is needed");
+  }
+
+  frame runtime_frame = thread->last_frame();
+  frame caller_frame = runtime_frame.sender(&reg_map);
+
+  // It's possible the nmethod was invalidated in the last
+  // safepoint, but if it's still alive then make it not_entrant.
+  nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
+  if (nm != NULL) {
+    nm->make_not_entrant();
+  }
+
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
+
+  // Return to the now deoptimized frame.
+}
+
+JRT_END
+
+const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c1_globals_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+#ifdef BUILTIN_SIM
+// We compile very aggressively with the builtin simulator because
+// doing so greatly reduces run times and tests more code.
+define_pd_global(intx, CompileThreshold,             150 );
+define_pd_global(intx, BackEdgeThreshold,            500);
+#else
+define_pd_global(intx, CompileThreshold,             1500 );
+define_pd_global(intx, BackEdgeThreshold,            100000);
+#endif
+
+define_pd_global(intx, OnStackReplacePercentage,     933  );
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+define_pd_global(uintx, PermSize,                    12*M);
+define_pd_global(uintx, MaxPermSize,                 64*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(bool, CICompileOSR,                 true );
+#endif // !TIERED
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               true );
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      true );
+define_pd_global(bool, CSEArrayLength,               false);
+define_pd_global(bool, TwoOperandLIRForm,            false );
+
+define_pd_global(intx, SafepointPollOffset,          0  );
+
+#endif // CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).  Alpha-sorted.
+
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+#ifdef CC_INTERP
+define_pd_global(bool, ProfileInterpreter,           false);
+#else
+define_pd_global(bool, ProfileInterpreter,           true);
+#endif // CC_INTERP
+// !!! FIXME AARCH64 -- check that we do need this rather than false
+define_pd_global(bool, TieredCompilation,            trueInTiered);
+define_pd_global(intx, CompileThreshold,             10000);
+define_pd_global(intx, BackEdgeThreshold,            100000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         3);
+define_pd_global(intx, FLOATPRESSURE,                64);
+define_pd_global(intx, FreqInlineSize,               325);
+define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit,              60);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 true);
+define_pd_global(bool, OptoScheduling,               false);
+define_pd_global(bool, OptoBundling,                 false);
+
+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
+define_pd_global(uintx,CodeCacheMinBlockLength,      4);
+
+// Heap related flags
+define_pd_global(uintx,PermSize,    ScaleForWordSize(16*M));
+define_pd_global(uintx,MaxPermSize, ScaleForWordSize(64*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/c2_init_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// processor dependent initialization for i486
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+  // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could
+  // simply be left out.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/codeBuffer_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
+#define CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/compile_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Name space for methods with platform dependent extensions of compile
+// (this is friend of compile).
+
+#ifndef CPU_AARCH64_VM_COMPILE_AARCH64_HPP
+#define CPU_AARCH64_VM_COMPILE_AARCH64_HPP
+
+class Compile;
+
+class PdCompile {
+public:
+  static void pd_post_matching_hook(Compile* C) { }  // empty on aarch64
+};
+
+#endif // CPU_AARCH64_VM_COMPILE_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/copy_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_COPY_AARCH64_HPP
+#define CPU_AARCH64_VM_COPY_AARCH64_HPP
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "copy_linux_aarch64.inline.hpp"
+#endif
+
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif // CPU_AARCH64_VM_COPY_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
+#define CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
+
+ protected:
+
+  void generate_more_monitors();
+  void generate_deopt_handling();
+
+#endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/cpustate_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,592 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _CPU_STATE_H
+#define _CPU_STATE_H
+
+#include <sys/types.h>
+
+/*
+ * symbolic names used to identify general registers which also match
+ * the registers indices in machine code
+ *
+ * We have 32 general registers which can be read/written as 32 bit or
+ * 64 bit sources/sinks and are appropriately referred to as Wn or Xn
+ * in the assembly code.  Some instructions mix these access modes
+ * (e.g. ADD X0, X1, W2) so the implementation of the instruction
+ * needs to *know* which type of read or write access is required.
+ */
+enum GReg {
+  R0,
+  R1,
+  R2,
+  R3,
+  R4,
+  R5,
+  R6,
+  R7,
+  R8,
+  R9,
+  R10,
+  R11,
+  R12,
+  R13,
+  R14,
+  R15,
+  R16,
+  R17,
+  R18,
+  R19,
+  R20,
+  R21,
+  R22,
+  R23,
+  R24,
+  R25,
+  R26,
+  R27,
+  R28,
+  R29,
+  R30,
+  R31,
+  // and now the aliases
+  RSCRATCH1=R8,
+  RSCRATCH2=R9,
+  RMETHOD=R12,
+  RESP=R20,
+  RDISPATCH=R21,
+  RBCP=R22,
+  RLOCALS=R24,
+  RMONITORS=R25,
+  RCPOOL=R26,
+  RHEAPBASE=R27,
+  RTHREAD=R28,
+  FP = R29,
+  LR = R30,
+  SP = R31,
+  ZR = R31
+};
+
+/*
+ * symbolic names used to refer to floating point registers which also
+ * match the registers indices in machine code
+ *
+ * We have 32 FP registers which can be read/written as 8, 16, 32, 64
+ * and 128 bit sources/sinks and are appropriately referred to as Bn,
+ * Hn, Sn, Dn and Qn in the assembly code. Some instructions mix these
+ * access modes (e.g. FCVT S0, D0) so the implementation of the
+ * instruction needs to *know* which type of read or write access is
+ * required.
+ */
+
+enum VReg {
+  V0,
+  V1,
+  V2,
+  V3,
+  V4,
+  V5,
+  V6,
+  V7,
+  V8,
+  V9,
+  V10,
+  V11,
+  V12,
+  V13,
+  V14,
+  V15,
+  V16,
+  V17,
+  V18,
+  V19,
+  V20,
+  V21,
+  V22,
+  V23,
+  V24,
+  V25,
+  V26,
+  V27,
+  V28,
+  V29,
+  V30,
+  V31,
+};
+
+/**
+ * all the different integer bit patterns for the components of a
+ * general register are overlaid here using a union so as to allow all
+ * reading and writing of the desired bits.
+ *
+ * n.b. the ARM spec says that when you write a 32 bit register you
+ * are supposed to write the low 32 bits and zero the high 32
+ * bits. But we don't actually have to care about this because Java
+ * will only ever consume the 32 bits value as a 64 bit quantity after
+ * an explicit extend.
+ */
+union GRegisterValue
+{
+  int8_t s8;
+  int16_t s16;
+  int32_t s32;
+  int64_t s64;
+  u_int8_t u8;
+  u_int16_t u16;
+  u_int32_t u32;
+  u_int64_t u64;
+};
+
+class GRegister
+{
+public:
+  GRegisterValue value;
+};
+
+/*
+ * float registers provide for storage of a single, double or quad
+ * word format float in the same register. single floats are not
+ * paired within each double register as per 32 bit arm. instead each
+ * 128 bit register Vn embeds the bits for Sn, and Dn in the lower
+ * quarter and half, respectively, of the bits for Qn.
+ *
+ * The upper bits can also be accessed as single or double floats by
+ * the float vector operations using indexing e.g. V1.D[1], V1.S[3]
+ * etc and, for SIMD operations using a horrible index range notation.
+ *
+ * The spec also talks about accessing float registers as half words
+ * and bytes with Hn and Bn providing access to the low 16 and 8 bits
+ * of Vn but it is not really clear what these bits represent. We can
+ * probably ignore this for Java anyway. However, we do need to access
+ * the raw bits at 32 and 64 bit resolution to load to/from integer
+ * registers.
+ */
+
+union FRegisterValue
+{
+  float s;
+  double d;
+  long double q;
+  // eventually we will need to be able to access the data as a vector
+  // the integral array elements allow us to access the bits in s, d,
+  // q, vs and vd at an appropriate level of granularity
+  u_int8_t vb[16];
+  u_int16_t vh[8];
+  u_int32_t vw[4];
+  u_int64_t vx[2];
+  float vs[4];
+  double vd[2];
+};
+
+class FRegister
+{
+public:
+  FRegisterValue value;
+};
+
+/*
+ * CPSR register -- this does not exist as a directly accessible
+ * register but we need to store the flags so we can implement
+ * flag-seting and flag testing operations
+ *
+ * we can possibly use injected x86 asm to report the outcome of flag
+ * setting operations. if so we will need to grab the flags
+ * immediately after the operation in order to ensure we don't lose
+ * them because of the actions of the simulator. so we still need
+ * somewhere to store the condition codes.
+ */
+
+class CPSRRegister
+{
+public:
+  u_int32_t value;
+
+/*
+ * condition register bit select values
+ *
+ * the order of bits here is important because some of
+ * the flag setting conditional instructions employ a
+ * bit field to populate the flags when a false condition
+ * bypasses execution of the operation and we want to
+ * be able to assign the flags register using the
+ * supplied value.
+ */
+
+  enum CPSRIdx {
+    V_IDX,
+    C_IDX,
+    Z_IDX,
+    N_IDX
+  };
+
+  enum CPSRMask {
+    V = 1 << V_IDX,
+    C = 1 << C_IDX,
+    Z = 1 << Z_IDX,
+    N = 1 << N_IDX
+  };
+
+  static const int CPSR_ALL_FLAGS = (V | C | Z | N);
+};
+
+// auxiliary function to assemble the relevant bits from
+// the x86 EFLAGS register into an ARM CPSR value
+
+#define X86_V_IDX 11
+#define X86_C_IDX 0
+#define X86_Z_IDX 6
+#define X86_N_IDX 7
+
+#define X86_V (1 << X86_V_IDX)
+#define X86_C (1 << X86_C_IDX)
+#define X86_Z (1 << X86_Z_IDX)
+#define X86_N (1 << X86_N_IDX)
+
+inline u_int32_t convertX86Flags(u_int32_t x86flags)
+{
+  u_int32_t flags;
+  // set N flag
+  flags = ((x86flags & X86_N) >> X86_N_IDX);
+  // shift then or in Z flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_Z) >> X86_Z_IDX);
+  // shift then or in C flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_C) >> X86_C_IDX);
+  // shift then or in V flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_V) >> X86_V_IDX);
+
+  return flags;
+}
+
+inline u_int32_t convertX86FlagsFP(u_int32_t x86flags)
+{
+  // x86 flags set by fcomi(x,y) are ZF:PF:CF
+  // (yes, that's PF for parity, WTF?)
+  // where
+  // 0) 0:0:0 means x > y
+  // 1) 0:0:1 means x < y
+  // 2) 1:0:0 means x = y
+  // 3) 1:1:1 means x and y are unordered
+  // note that we don't have to check PF so
+  // we really have a simple 2-bit case switch
+  // the corresponding ARM64 flags settings
+  //  in hi->lo bit order are
+  // 0) --C-
+  // 1) N---
+  // 2) -ZC-
+  // 3) --CV
+
+  static u_int32_t armFlags[] = {
+      0b0010,
+      0b1000,
+      0b0110,
+      0b0011
+  };
+  // pick out the ZF and CF bits
+  u_int32_t zc = ((x86flags & X86_Z) >> X86_Z_IDX);
+  zc <<= 1;
+  zc |= ((x86flags & X86_C) >> X86_C_IDX);
+
+  return armFlags[zc];
+}
+
+/*
+ * FPSR register -- floating point status register
+
+ * this register includes IDC, IXC, UFC, OFC, DZC, IOC and QC bits,
+ * and the floating point N, Z, C, V bits but the latter are unused in
+ * aarch64 mode. the sim ignores QC for now.
+ *
+ * bit positions are as per the ARMv7 FPSCR register
+ *
+ * IDC :  7 ==> Input Denormal (cumulative exception bit)
+ * IXC :  4 ==> Inexact
+ * UFC :  3 ==> Underflow
+ * OFC :  2 ==> Overflow
+ * DZC :  1 ==> Division by Zero
+ * IOC :  0 ==> Invalid Operation
+ */
+
+class FPSRRegister
+{
+public:
+  u_int32_t value;
+  // indices for bits in the FPSR register value
+  enum FPSRIdx {
+    IO_IDX = 0,
+    DZ_IDX = 1,
+    OF_IDX = 2,
+    UF_IDX = 3,
+    IX_IDX = 4,
+    ID_IDX = 7
+  };
+  // corresponding bits as numeric values
+  enum FPSRMask {
+    IO = (1 << IO_IDX),
+    DZ = (1 << DZ_IDX),
+    OF = (1 << OF_IDX),
+    UF = (1 << UF_IDX),
+    IX = (1 << IX_IDX),
+    ID = (1 << ID_IDX)
+  };
+  static const int FPSR_ALL_FPSRS = (IO | DZ | OF | UF | IX | ID);
+};
+
+// debugger support
+
+enum PrintFormat
+{
+  FMT_DECIMAL,
+  FMT_HEX,
+  FMT_SINGLE,
+  FMT_DOUBLE,
+  FMT_QUAD,
+  FMT_MULTI
+};
+
+/*
+ * model of the registers and other state associated with the cpu
+ */
+class CPUState
+{
+  friend class AArch64Simulator;
+private:
+  // this is the PC of the instruction being executed
+  u_int64_t pc;
+  // this is the PC of the instruction to be executed next
+  // it is defaulted to pc + 4 at instruction decode but
+  // execute may reset it
+
+  u_int64_t nextpc;
+  GRegister gr[33];             // extra register at index 32 is used
+                                // to hold zero value
+  FRegister fr[32];
+  CPSRRegister cpsr;
+  FPSRRegister fpsr;
+
+public:
+
+  CPUState() {
+    gr[20].value.u64 = 0;  // establish initial condition for
+                           // checkAssertions()
+    trace_counter = 0;
+  }
+
+  // General Register access macros
+
+  // only xreg or xregs can be used as an lvalue in order to update a
+  // register. this ensures that the top part of a register is always
+  // assigned when it is written by the sim.
+
+  inline u_int64_t &xreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u64;
+    } else {
+      return gr[reg].value.u64;
+    }
+  }
+
+  inline int64_t &xregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s64;
+    } else {
+      return gr[reg].value.s64;
+    }
+  }
+
+  inline u_int32_t wreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u32;
+    } else {
+      return gr[reg].value.u32;
+    }
+  }
+
+  inline int32_t wregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s32;
+    } else {
+      return gr[reg].value.s32;
+    }
+  }
+
+  inline u_int32_t hreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u16;
+    } else {
+      return gr[reg].value.u16;
+    }
+  }
+
+  inline int32_t hregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s16;
+    } else {
+      return gr[reg].value.s16;
+    }
+  }
+
+  inline u_int32_t breg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u8;
+    } else {
+      return gr[reg].value.u8;
+    }
+  }
+
+  inline int32_t bregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s8;
+    } else {
+      return gr[reg].value.s8;
+    }
+  }
+
+  // FP Register access macros
+
+  // all non-vector accessors return a reference so we can both read
+  // and assign
+
+  inline float &sreg(VReg reg) {
+    return fr[reg].value.s;
+  }
+
+  inline double &dreg(VReg reg) {
+    return fr[reg].value.d;
+  }
+
+  inline long double &qreg(VReg reg) {
+    return fr[reg].value.q;
+  }
+
+  // all vector register accessors return a pointer
+
+  inline float *vsreg(VReg reg) {
+    return &fr[reg].value.vs[0];
+  }
+
+  inline double *vdreg(VReg reg) {
+    return &fr[reg].value.vd[0];
+  }
+
+  inline u_int8_t *vbreg(VReg reg) {
+    return &fr[reg].value.vb[0];
+  }
+
+  inline u_int16_t *vhreg(VReg reg) {
+    return &fr[reg].value.vh[0];
+  }
+
+  inline u_int32_t *vwreg(VReg reg) {
+    return &fr[reg].value.vw[0];
+  }
+
+  inline u_int64_t *vxreg(VReg reg) {
+    return &fr[reg].value.vx[0];
+  }
+
+  union GRegisterValue prev_sp, prev_fp;
+
+  static const int trace_size = 256;
+  u_int64_t trace_buffer[trace_size];
+  int trace_counter;
+
+  bool checkAssertions()
+  {
+    // Make sure that SP is 16-aligned
+    // Also make sure that ESP is above SP.
+    // We don't care about checking ESP if it is null, i.e. it hasn't
+    // been used yet.
+    if (gr[31].value.u64 & 0x0f) {
+      asm volatile("nop");
+      return false;
+    }
+    return true;
+  }
+
+  // pc register accessors
+
+  // this instruction can be used to fetch the current PC
+  u_int64_t getPC();
+  // instead of setting the current PC directly you can
+  // first set the next PC (either absolute or PC-relative)
+  // and later copy the next PC into the current PC
+  // this supports a default increment by 4 at instruction
+  // fetch with an optional reset by control instructions
+  u_int64_t getNextPC();
+  void setNextPC(u_int64_t next);
+  void offsetNextPC(int64_t offset);
+  // install nextpc as current pc
+  void updatePC();
+
+  // this instruction can be used to save the next PC to LR
+  // just before installing a branch PC
+  inline void saveLR() { gr[LR].value.u64 = nextpc; }
+
+  // cpsr register accessors
+  u_int32_t getCPSRRegister();
+  void setCPSRRegister(u_int32_t flags);
+  // read a specific subset of the flags as a bit pattern
+  // mask should be composed using elements of enum FlagMask
+  u_int32_t getCPSRBits(u_int32_t mask);
+  // assign a specific subset of the flags as a bit pattern
+  // mask and value should be composed using elements of enum FlagMask
+  void setCPSRBits(u_int32_t mask, u_int32_t value);
+  // test the value of a single flag returned as 1 or 0
+  u_int32_t testCPSR(CPSRRegister::CPSRIdx idx);
+  // set a single flag
+  void setCPSR(CPSRRegister::CPSRIdx idx);
+  // clear a single flag
+  void clearCPSR(CPSRRegister::CPSRIdx idx);
+  // utility method to set ARM CSPR flags from an x86 bit mask generated by integer arithmetic
+  void setCPSRRegisterFromX86(u_int64_t x86Flags);
+  // utility method to set ARM CSPR flags from an x86 bit mask generated by floating compare
+  void setCPSRRegisterFromX86FP(u_int64_t x86Flags);
+
+  // fpsr register accessors
+  u_int32_t getFPSRRegister();
+  void setFPSRRegister(u_int32_t flags);
+  // read a specific subset of the fprs bits as a bit pattern
+  // mask should be composed using elements of enum FPSRRegister::FlagMask
+  u_int32_t getFPSRBits(u_int32_t mask);
+  // assign a specific subset of the flags as a bit pattern
+  // mask and value should be composed using elements of enum FPSRRegister::FlagMask
+  void setFPSRBits(u_int32_t mask, u_int32_t value);
+  // test the value of a single flag returned as 1 or 0
+  u_int32_t testFPSR(FPSRRegister::FPSRIdx idx);
+  // set a single flag
+  void setFPSR(FPSRRegister::FPSRIdx idx);
+  // clear a single flag
+  void clearFPSR(FPSRRegister::FPSRIdx idx);
+
+  // debugger support
+  void printPC(int pending, const char *trailing = "\n");
+  void printInstr(u_int32_t instr, void (*dasm)(u_int64_t), const char *trailing = "\n");
+  void printGReg(GReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
+  void printVReg(VReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
+  void printCPSR(const char *trailing = "\n");
+  void printFPSR(const char *trailing = "\n");
+  void dumpState();
+};
+
+#endif // ifndef _CPU_STATE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/debug_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/top.hpp"
+
+void pd_ps(frame f) {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/decode_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _DECODE_H
+#define _DECODE_H
+
+#include <sys/types.h>
+#include "cpustate_aarch64.hpp"
+
+// bitfield immediate expansion helper
+
+extern int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
+                                    u_int32_t imms, u_int64_t &bimm);
+
+
+/*
+ * codes used in conditional instructions
+ *
+ * these are passed to conditional operations to identify which
+ * condition to test for
+ */
+enum CondCode {
+  EQ = 0b0000, // meaning Z == 1
+  NE = 0b0001, // meaning Z == 0
+  HS = 0b0010, // meaning C == 1
+  CS = HS,
+  LO = 0b0011, // meaning C == 0
+  CC = LO,
+  MI = 0b0100, // meaning N == 1
+  PL = 0b0101, // meaning N == 0
+  VS = 0b0110, // meaning V == 1
+  VC = 0b0111, // meaning V == 0
+  HI = 0b1000, // meaning C == 1 && Z == 0
+  LS = 0b1001, // meaning !(C == 1 && Z == 0)
+  GE = 0b1010, // meaning N == V
+  LT = 0b1011, // meaning N != V
+  GT = 0b1100, // meaning Z == 0 && N == V
+  LE = 0b1101, // meaning !(Z == 0 && N == V)
+  AL = 0b1110, // meaning ANY
+  NV = 0b1111  // ditto
+};
+
+/*
+ * certain addressing modes for load require pre or post writeback of
+ * the computed address to a base register
+ */
+enum WriteBack {
+  Post = 0,
+  Pre = 1
+};
+
+/*
+ * certain addressing modes for load require an offset to
+ * be optionally scaled so the decode needs to pass that
+ * through to the execute routine
+ */
+enum Scaling {
+  Unscaled = 0,
+  Scaled = 1
+};
+
+/*
+ * when we do have to scale we do so by shifting using
+ * log(bytes in data element - 1) as the shift count.
+ * so we don't have to scale offsets when loading
+ * bytes.
+ */
+enum ScaleShift {
+  ScaleShift16 = 1,
+  ScaleShift32 = 2,
+  ScaleShift64 = 3,
+  ScaleShift128 = 4
+};
+
+/*
+ * one of the addressing modes for load requires a 32-bit register
+ * value to be either zero- or sign-extended for these instructions
+ * UXTW or SXTW should be passed
+ *
+ * arithmetic register data processing operations can optionally
+ * extend a portion of the second register value for these
+ * instructions the value supplied must identify the portion of the
+ * register which is to be zero- or sign-exended
+ */
+enum Extension {
+  UXTB = 0,
+  UXTH = 1,
+  UXTW = 2,
+  UXTX = 3,
+  SXTB = 4,
+  SXTH = 5,
+  SXTW = 6,
+  SXTX = 7
+};
+
+/*
+ * arithmetic and logical register data processing operations
+ * optionally perform a shift on the second register value
+ */
+enum Shift {
+  LSL = 0,
+  LSR = 1,
+  ASR = 2,
+  ROR = 3
+};
+
+/*
+ * bit twiddling helpers for instruction decode
+ */
+
+// 32 bit mask with bits [hi,...,lo] set
+
+static inline u_int32_t mask32(int hi = 31, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1 << nbits) - 1) << lo;
+}
+
+static inline u_int64_t mask64(int hi = 63, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1L << nbits) - 1) << lo;
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask32(hi, lo));
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask64(hi, lo));
+}
+
+// pick bits [hi,...,lo] from val and shift to [(hi-(newlo - lo)),newlo]
+static inline u_int32_t pickshift32(u_int32_t val, int hi = 31,
+                                    int lo = 0, int newlo = 0)
+{
+  u_int32_t bits = pick32(val, hi, lo);
+  if (lo < newlo) {
+    return (bits << (newlo - lo));
+  } else {
+    return (bits >> (lo - newlo));
+  }
+}
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (pick32(val, hi, lo) >> lo);
+}
+
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0)
+{
+  return (pick64(val, hi, lo) >> lo);
+}
+
+/*
+ * decode registers, immediates and constants of various types
+ */
+
+static inline GReg greg(u_int32_t val, int lo)
+{
+  return (GReg)pickbits32(val, lo + 4, lo);
+}
+
+static inline VReg vreg(u_int32_t val, int lo)
+{
+  return (VReg)pickbits32(val, lo + 4, lo);
+}
+
+static inline u_int32_t uimm(u_int32_t val, int hi, int lo)
+{
+  return pickbits32(val, hi, lo);
+}
+
+static inline int32_t simm(u_int32_t val, int hi = 31, int lo = 0) {
+  union {
+    u_int32_t u;
+    int32_t n;
+  };
+
+  u = val << (31 - hi);
+  n = n >> (31 - hi + lo);
+  return n;
+}
+
+static inline int64_t simm(u_int64_t val, int hi = 63, int lo = 0) {
+  union {
+    u_int64_t u;
+    int64_t n;
+  };
+
+  u = val << (63 - hi);
+  n = n >> (63 - hi + lo);
+  return n;
+}
+
+static inline Shift shift(u_int32_t val, int lo)
+{
+  return (Shift)pickbits32(val, lo+1, lo);
+}
+
+static inline Extension extension(u_int32_t val, int lo)
+{
+  return (Extension)pickbits32(val, lo+2, lo);
+}
+
+static inline Scaling scaling(u_int32_t val, int lo)
+{
+  return (Scaling)pickbits32(val, lo, lo);
+}
+
+static inline WriteBack writeback(u_int32_t val, int lo)
+{
+  return (WriteBack)pickbits32(val, lo, lo);
+}
+
+static inline CondCode condcode(u_int32_t val, int lo)
+{
+  return (CondCode)pickbits32(val, lo+3, lo);
+}
+
+/*
+ * operation decode
+ */
+// bits [28,25] are the primary dispatch vector
+
+static inline u_int32_t dispatchGroup(u_int32_t val)
+{
+  return pickshift32(val, 28, 25, 0);
+}
+
+/*
+ * the 16 possible values for bits [28,25] identified by tags which
+ * map them to the 5 main instruction groups LDST, DPREG, ADVSIMD,
+ * BREXSYS and DPIMM.
+ *
+ * An extra group PSEUDO is included in one of the unallocated ranges
+ * for simulator-specific pseudo-instructions.
+ */
+enum DispatchGroup {
+  GROUP_PSEUDO_0000,
+  GROUP_UNALLOC_0001,
+  GROUP_UNALLOC_0010,
+  GROUP_UNALLOC_0011,
+  GROUP_LDST_0100,
+  GROUP_DPREG_0101,
+  GROUP_LDST_0110,
+  GROUP_ADVSIMD_0111,
+  GROUP_DPIMM_1000,
+  GROUP_DPIMM_1001,
+  GROUP_BREXSYS_1010,
+  GROUP_BREXSYS_1011,
+  GROUP_LDST_1100,
+  GROUP_DPREG_1101,
+  GROUP_LDST_1110,
+  GROUP_ADVSIMD_1111
+};
+
+// bits [31, 29] of a Pseudo are the secondary dispatch vector
+
+static inline u_int32_t dispatchPseudo(u_int32_t val)
+{
+  return pickshift32(val, 31, 29, 0);
+}
+
+/*
+ * the 8 possible values for bits [31,29] in a Pseudo Instruction.
+ * Bits [28,25] are always 0000.
+ */
+
+enum DispatchPseudo {
+  PSEUDO_UNALLOC_000, // unallocated
+  PSEUDO_UNALLOC_001, // ditto
+  PSEUDO_UNALLOC_010, // ditto
+  PSEUDO_UNALLOC_011, // ditto
+  PSEUDO_UNALLOC_100, // ditto
+  PSEUDO_UNALLOC_101, // ditto
+  PSEUDO_CALLOUT_110, // CALLOUT -- bits [24,0] identify call/ret sig
+  PSEUDO_HALT_111     // HALT -- bits [24, 0] identify halt code
+};
+
+// bits [25, 23] of a DPImm are the secondary dispatch vector
+
+static inline u_int32_t dispatchDPImm(u_int32_t instr)
+{
+  return pickshift32(instr, 25, 23, 0);
+}
+
+/*
+ * the 8 possible values for bits [25,23] in a Data Processing Immediate
+ * Instruction. Bits [28,25] are always 100_.
+ */
+
+enum DispatchDPImm {
+  DPIMM_PCADR_000,  // PC-rel-addressing
+  DPIMM_PCADR_001,  // ditto
+  DPIMM_ADDSUB_010,  // Add/Subtract (immediate)
+  DPIMM_ADDSUB_011, // ditto
+  DPIMM_LOG_100,    // Logical (immediate)
+  DPIMM_MOV_101,    // Move Wide (immediate)
+  DPIMM_BITF_110,   // Bitfield
+  DPIMM_EXTR_111    // Extract
+};
+
+// bits [29,28:26] of a LS are the secondary dispatch vector
+
+static inline u_int32_t dispatchLS(u_int32_t instr)
+{
+  return (pickshift32(instr, 29, 28, 1) |
+          pickshift32(instr, 26, 26, 0));
+}
+
+/*
+ * the 8 possible values for bits [29,28:26] in a Load/Store
+ * Instruction. Bits [28,25] are always _1_0
+ */
+
+enum DispatchLS {
+  LS_EXCL_000,    // Load/store exclusive (includes some unallocated)
+  LS_ADVSIMD_001, // AdvSIMD load/store (various -- includes some unallocated)
+  LS_LIT_010,     // Load register literal (includes some unallocated)
+  LS_LIT_011,     // ditto
+  LS_PAIR_100,    // Load/store register pair (various)
+  LS_PAIR_101,    // ditto
+  LS_OTHER_110,   // other load/store formats
+  LS_OTHER_111    // ditto
+};
+
+// bits [28:24:21] of a DPReg are the secondary dispatch vector
+
+static inline u_int32_t dispatchDPReg(u_int32_t instr)
+{
+  return (pickshift32(instr, 28, 28, 2) |
+          pickshift32(instr, 24, 24, 1) |
+          pickshift32(instr, 21, 21, 0));
+}
+
+/*
+ * the 8 possible values for bits [28:24:21] in a Data Processing
+ * Register Instruction. Bits [28,25] are always _101
+ */
+
+enum DispatchDPReg {
+  DPREG_LOG_000,     // Logical (shifted register)
+  DPREG_LOG_001,     // ditto
+  DPREG_ADDSHF_010,  // Add/subtract (shifted register)
+  DPREG_ADDEXT_011,  // Add/subtract (extended register)
+  DPREG_ADDCOND_100, // Add/subtract (with carry) AND
+                     // Cond compare/select AND
+                     // Data Processing (1/2 source)
+  DPREG_UNALLOC_101, // Unallocated
+  DPREG_3SRC_110, // Data Processing (3 source)
+  DPREG_3SRC_111  // Data Processing (3 source)
+};
+
+// bits [31,29] of a BrExSys are the secondary dispatch vector
+
+static inline u_int32_t dispatchBrExSys(u_int32_t instr)
+{
+  return pickbits32(instr, 31, 29);
+}
+
+/*
+ * the 8 possible values for bits [31,29] in a Branch/Exception/System
+ * Instruction. Bits [28,25] are always 101_
+ */
+
+enum DispatchBr {
+  BR_IMM_000,     // Unconditional branch (immediate)
+  BR_IMMCMP_001,  // Compare & branch (immediate) AND
+                  // Test & branch (immediate)
+  BR_IMMCOND_010, // Conditional branch (immediate) AND Unallocated
+  BR_UNALLOC_011, // Unallocated
+  BR_IMM_100,     // Unconditional branch (immediate)
+  BR_IMMCMP_101,  // Compare & branch (immediate) AND
+                  // Test & branch (immediate)
+  BR_REG_110,     // Unconditional branch (register) AND System AND
+                  // Excn gen AND Unallocated
+  BR_UNALLOC_111  // Unallocated
+};
+
+/*
+ * TODO still need to provide secondary decode and dispatch for
+ * AdvSIMD Insructions with instr[28,25] = 0111 or 1111
+ */
+
+#endif // ifndef DECODE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/depChecker_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "depChecker_aarch64.hpp"
+
+// Nothing to do on aarch64
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/depChecker_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
+#define CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
+
+// Nothing to do on aarch64
+
+#endif // CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/disassembler_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
+
+  static int pd_instruction_alignment() {
+    return 1;
+  }
+
+  static const char* pd_cpu_opts() {
+    return "";
+  }
+
+#endif // CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/dump_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "memory/compactingPermGenGen.hpp"
+#include "memory/generation.inline.hpp"
+#include "memory/space.inline.hpp"
+
+
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument.  Example:
+//
+//      oop obj;
+//      int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#define __ masm->
+
+void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list,
+                                                   void** vtable,
+                                                   char** md_top,
+                                                   char* md_end,
+                                                   char** mc_top,
+                                                   char* mc_end) {
+
+#ifdef BUILTIN_SIM
+
+  // Write a dummy word to the writable shared metaspace.
+  // for filling in later with the address of aarch64_prolog().
+  address *prolog_ptr = (address*)*md_top;
+  *(intptr_t *)(*md_top) = (intptr_t)0;
+  (*md_top) += sizeof(intptr_t);
+#endif
+
+  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+  *(intptr_t *)(*md_top) = vtable_bytes;
+  *md_top += sizeof(intptr_t);
+  void** dummy_vtable = (void**)*md_top;
+  *vtable = dummy_vtable;
+  *md_top += vtable_bytes;
+
+  // Get ready to generate dummy methods.
+
+  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+  Label common_code;
+  for (int i = 0; i < vtbl_list_size; ++i) {
+    for (int j = 0; j < num_virtuals; ++j) {
+      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
+
+      // Load rscratch1 with a value indicating vtable/offset pair.
+      // -- bits[ 7..0]  (8 bits) which virtual method in table?
+      // -- bits[12..8]  (5 bits) which virtual method table?
+      // -- must fit in 13-bit instruction immediate field.
+#ifdef BUILTIN_SIM
+      __ c_stub_prolog(8, 0, MacroAssembler::ret_type_integral, prolog_ptr);
+#endif
+      __ mov(rscratch1, (i << 8) + j);
+      __ b(common_code);
+    }
+  }
+
+  __ bind(common_code);
+
+  // Expecting to be called with "thiscall" convections -- the arguments
+  // are on the stack and the "this" pointer is in c_rarg0. In addition, rscratch1
+  // was set (above) to the offset of the method in the table.
+
+  Register tmp0 = r10, tmp1 = r11;       // AAPCS64 temporary registers
+  __ enter();
+  __ lsr(tmp0, rscratch1, 8);            // isolate vtable identifier.
+  __ mov(tmp1, (address)vtbl_list);      // address of list of vtable pointers.
+  __ ldr(tmp1, Address(tmp1, tmp0, Address::lsl(LogBytesPerWord))); // get correct vtable pointer.
+  __ str(tmp1, Address(c_rarg0));        // update vtable pointer in obj.
+  __ add(rscratch1, tmp1, rscratch1, ext::uxtb, LogBytesPerWord); // address of real method pointer.
+  __ ldr(rscratch1, Address(rscratch1)); // get real method pointer.
+  __ blrt(rscratch1, 8, 0, 1);           // jump to the real method.
+  __ leave();
+  __ ret(lr);
+
+
+  // !!! FIXME AARCH64 -- what is the equivalent for the jdk8 code's
+  //  MetaspaceShared::relocate_vtbl_list??? do we need any such???
+
+  *mc_top = (char*)__ pc();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/os.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
+  address   unextended_sp = (address)_unextended_sp;
+
+  // consider stack guards when trying to determine "safe" stack pointers
+  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+  // sp must be within the usable part of the stack (not in guards)
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+                            (unextended_sp >= sp);
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  // second evaluation on fp+ is added to handle situation where fp is -1
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // to construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
+      }
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!_cb->code_contains(_pc)) {
+      return false;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+
+      if (!fp_safe) return false;
+
+      // Validate the JavaCallWrapper an entry frame must have
+
+      address jcw = (address)entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > fp);
+
+      return jcw_safe;
+
+    }
+
+    intptr_t* sender_sp = NULL;
+    intptr_t* sender_unextended_sp = NULL;
+    address   sender_pc = NULL;
+    intptr_t* saved_fp =  NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address) this->fp()[return_addr_offset];
+      // for interpreted frames, the value below is the sender "raw" sp,
+      // which can be different from the sender unextended sp (the sp seen
+      // by the sender) because of current frame local variables
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
+      saved_fp = (intptr_t*) this->fp()[link_offset];
+
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+      if (_cb->frame_size() <= 0) {
+        return false;
+      }
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      sender_unextended_sp = sender_sp;
+      sender_pc = (address) *(sender_sp-1);
+      // Note: frame::sender_sp_offset is only valid for compiled frame
+      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // fp is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
+      // is really a frame pointer.
+
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+    // Could be a zombie method
+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+      return false;
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender_blob->code_contains(sender_pc)) {
+      return false;
+    }
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+    if (sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    if (sender_blob->is_nmethod()) {
+      nmethod* nm = sender_blob->as_nmethod_or_null();
+      if (nm != NULL) {
+        if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+          return false;
+        }
+      }
+    }
+
+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+
+    if (sender_blob->frame_size() <= 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+
+    if (!sender_blob->is_nmethod()) {
+        return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+    return true;
+  }
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) {
+    return false;
+  }
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
+
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  address* pc_addr = &(((address*) sp())[-1]);
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+                  pc_addr, *pc_addr, pc);
+  }
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+  *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original PC to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // leave _pc as is
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+  frame sender = this->sender(map);
+  return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // convert offset to index to deal with tsi
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  // Entry frame's arguments are always in relation to unextended_sp()
+  return &unextended_sp()[index];
+}
+
+// sender_sp
+#ifdef CC_INTERP
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
+  // seems odd and if we always know interpreted vs. non then sender_sp() is really
+  // doing too much work.
+  return get_interpreterState()->sender_sp();
+}
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return get_interpreterState()->monitor_base();
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  return (BasicObjectLock*) get_interpreterState()->stack_base();
+}
+
+#else // CC_INTERP
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+  // make sure the pointer points inside the frame
+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
+  return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+    *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+#endif // CC_INTERP
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+  if (jfa->last_Java_pc() != NULL ) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+    return fr;
+  }
+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+  return fr;
+}
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.  The unextended SP might also be the saved SP
+// for MethodHandle call sites.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+  frame fr;
+
+  // This is ugly but it's better than to change {get,set}_original_pc
+  // to take an SP value as argument.  And it's only a debugging
+  // method anyway.
+  fr._unextended_sp = unextended_sp;
+
+  address original_pc = nm->get_original_pc(&fr);
+  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
+  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // If we are returning to a compiled MethodHandle call site, the
+  // saved_fp will in fact be a saved value of the unextended SP.  The
+  // simplest way to tell whether we are returning to such a call site
+  // is as follows:
+
+  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
+  if (sender_nm != NULL) {
+    // If the sender PC is a deoptimization point, get the original
+    // PC.  For MethodHandle call site the unextended_sp is stored in
+    // saved_fp.
+    if (sender_nm->is_deopt_mh_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
+      _unextended_sp = _fp;
+    }
+    else if (sender_nm->is_deopt_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
+    }
+    else if (sender_nm->is_method_handle_return(_pc)) {
+      _unextended_sp = _fp;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save fp in a known
+  // location on entry. We must record where that location is
+  // so that if fp was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves fp if we record where it is then
+  // we don't have to always save fp on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  map->set_location(rfp->as_VMReg(), (address) link_addr);
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  // XXXX make this go away
+  if (true) {
+    map->set_location(rfp->as_VMReg()->next(), (address) link_addr);
+  }
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_interpreter_frame
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+  // SP is the raw SP from the sender after adapter or interpreter
+  // extension.
+  intptr_t* sender_sp = this->sender_sp();
+
+  // This is the sp before any possible extension (adapter/locals).
+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+#ifdef COMPILER2
+  if (map->update_map()) {
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+  }
+#endif // COMPILER2
+
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_compiled_frame
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+  // we cannot rely upon the last fp having been saved to the thread
+  // in C2 code but it will have been pushed onto the stack. so we
+  // have to find it relative to the unextended sp
+
+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
+  intptr_t* unextended_sp = l_sender_sp;
+
+  // the return_address is always the word on the stack
+  address sender_pc = (address) *(l_sender_sp-1);
+
+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset);
+
+  // assert (sender_sp() == l_sender_sp, "should be");
+  // assert (*saved_fp_addr == link(), "should be");
+
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    // For C1, the runtime stub might not have oop maps, so set this flag
+    // outside of update_register_map.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+
+    // Since the prolog does the save and restore of EBP there is no oopmap
+    // for it so we must fill in its location as if there was an oopmap entry
+    // since if our caller was compiled code there could be live jvm state in it.
+    update_map_with_saved_link(map, saved_fp_addr);
+  }
+
+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+//------------------------------------------------------------------------------
+// frame::sender
+frame frame::sender(RegisterMap* map) const {
+  // Default is we done have to follow them. The sender_for_xxx will
+  // update it accordingly
+   map->set_include_argument_oops(false);
+
+  if (is_entry_frame())
+    return sender_for_entry_frame(map);
+  if (is_interpreted_frame())
+    return sender_for_interpreter_frame(map);
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  // This test looks odd: why is it not is_compiled_frame() ?  That's
+  // because stubs also have OOP maps.
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), link(), sender_pc());
+}
+
+bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
+  assert(is_interpreted_frame(), "must be interpreter frame");
+  methodOop method = interpreter_frame_method();
+  // When unpacking an optimized frame the frame pointer is
+  // adjusted with:
+  int diff = (method->max_locals() - method->size_of_parameters()) *
+             Interpreter::stackElementWords;
+  return _fp == (fp - diff);
+}
+
+void frame::pd_gc_epilog() {
+  // nothing done here now
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+// QQQ
+#ifdef CC_INTERP
+#else
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  // These are reasonable sanity checks
+  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+    return false;
+  }
+  // These are hacks to keep us out of trouble.
+  // The problem with these is that they mask other problems
+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+    return false;
+  }
+
+  // do some validation of frame elements
+
+  // first the method
+
+  methodOop m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!Universe::heap()->is_valid_method(m)) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+  // this test requires the use of unextended_sp which is the sp as seen by
+  // the current frame, and not sp which is the "raw" pc which could point
+  // further because of local variables of the callee method inserted after
+  // method arguments
+  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
+    return false;
+  }
+
+  // validate bci/bcx
+
+  intptr_t  bcx    = interpreter_frame_bcx();
+  if (m->validate_bci_from_bcx(bcx) < 0) {
+    return false;
+  }
+
+  // validate constantPoolCacheOop
+
+  constantPoolCacheOop cp = *interpreter_frame_cache_addr();
+  if (cp == NULL ||
+      !Space::is_aligned(cp) ||
+      !Universe::heap()->is_permanent((void*)cp)) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
+
+#endif // CC_INTERP
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+#ifdef CC_INTERP
+  // Needed for JVMTI. The result should always be in the
+  // interpreterState object
+  interpreterState istate = get_interpreterState();
+#endif // CC_INTERP
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  methodOop method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  intptr_t* tos_addr;
+  if (method->is_native()) {
+    // TODO : ensure AARCH64 does the same as Intel here i.e. push v0 then r0
+    // Prior to calling into the runtime to report the method_exit the possible
+    // return value is pushed to the native stack. If the result is a jfloat/jdouble
+    // then ST0 is saved before EAX/EDX. See the note in generate_native_result
+    tos_addr = (intptr_t*)sp();
+    if (type == T_FLOAT || type == T_DOUBLE) {
+      // This is times two because we do a push(ltos) after pushing XMM0
+      // and that takes two interpreter stack slots.
+      tos_addr += 2 * Interpreter::stackElementWords;
+    }
+  } else {
+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
+  }
+
+  switch (type) {
+    case T_OBJECT  :
+    case T_ARRAY   : {
+      oop obj;
+      if (method->is_native()) {
+#ifdef CC_INTERP
+        obj = istate->_oop_temp;
+#else
+        obj = (oop) at(interpreter_frame_oop_temp_offset);
+#endif // CC_INTERP
+      } else {
+        oop* obj_p = (oop*)tos_addr;
+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+      }
+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+      *oop_result = obj;
+      break;
+    }
+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+    case T_FLOAT   : {
+        value_result->f = *(jfloat*)tos_addr;
+      break;
+    }
+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+    case T_VOID    : /* Nothing to do */ break;
+    default        : ShouldNotReachHere();
+  }
+
+  return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+  // Not used on aarch64, but we must return something.
+  return NULL;
+}
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if (size > 0) {
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(! is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
+
+#undef DESCRIBE_FP_OFFSET
+
+#define DESCRIBE_FP_OFFSET(name)                                        \
+  {                                                                     \
+    unsigned long *p = (unsigned long *)fp;                             \
+    printf("0x%016lx 0x%016lx %s\n", (unsigned long)(p + frame::name##_offset), \
+           p[frame::name##_offset], #name);                             \
+  }
+
+static __thread unsigned long nextfp;
+static __thread unsigned long nextpc;
+static __thread unsigned long nextsp;
+static __thread RegisterMap *reg_map;
+
+static void printbc(methodOop m, intptr_t bcx) {
+  const char *name;
+  char buf[16];
+  if (m->validate_bci_from_bcx(bcx) < 0
+      || !m->contains((address)bcx)) {
+    name = "???";
+    snprintf(buf, sizeof buf, "(bad)");
+  } else {
+    int bci = m->bci_from((address)bcx);
+    snprintf(buf, sizeof buf, "%d", bci);
+    name = Bytecodes::name(m->code_at(bci));
+  }
+  ResourceMark rm;
+  printf("%s : %s ==> %s\n", m->name_and_sig_as_C_string(), buf, name);
+}
+
+void internal_pf(unsigned long sp, unsigned long fp, unsigned long pc, unsigned long bcx) {
+  if (! fp)
+    return;
+
+  DESCRIBE_FP_OFFSET(return_addr);
+  DESCRIBE_FP_OFFSET(link);
+  DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_method);
+  DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
+  DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+  DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+  DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
+  DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  unsigned long *p = (unsigned long *)fp;
+
+  // We want to see all frames, native and Java.  For compiled and
+  // interpreted frames we have special information that allows us to
+  // unwind them; for everything else we assume that the native frame
+  // pointer chain is intact.
+  frame this_frame((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+  if (this_frame.is_compiled_frame() ||
+      this_frame.is_interpreted_frame()) {
+    frame sender = this_frame.sender(reg_map);
+    nextfp = (unsigned long)sender.fp();
+    nextpc = (unsigned long)sender.pc();
+    nextsp = (unsigned long)sender.unextended_sp();
+  } else {
+    nextfp = p[frame::link_offset];
+    nextpc = p[frame::return_addr_offset];
+    nextsp = (unsigned long)&p[frame::sender_sp_offset];
+  }
+
+  if (bcx == -1ul)
+    bcx = p[frame::interpreter_frame_bcx_offset];
+
+  if (Interpreter::contains((address)pc)) {
+    methodOop m = (methodOop)p[frame::interpreter_frame_method_offset];
+    if(m && m->is_method()) {
+      printbc(m, bcx);
+    } else
+      printf("not a methodOop\n");
+  } else {
+    CodeBlob *cb = CodeCache::find_blob((address)pc);
+    if (cb != NULL) {
+      if (cb->is_nmethod()) {
+        ResourceMark rm;
+        nmethod* nm = (nmethod*)cb;
+        printf("nmethod %s\n", nm->method()->name_and_sig_as_C_string());
+      } else if (cb->name()) {
+        printf("CodeBlob %s\n", cb->name());
+      }
+    }
+  }
+}
+
+extern "C" void npf() {
+  CodeBlob *cb = CodeCache::find_blob((address)nextpc);
+  // C2 does not always chain the frame pointers when it can, instead
+  // preferring to use fixed offsets from SP, so a simple leave() does
+  // not work.  Instead, it adds the frame size to SP then pops FP and
+  // LR.  We have to do the same thing to get a good call chain.
+  if (cb && cb->frame_size())
+    nextfp = nextsp + wordSize * (cb->frame_size() - 2);
+  internal_pf (nextsp, nextfp, nextpc, -1);
+}
+
+extern "C" void pf(unsigned long sp, unsigned long fp, unsigned long pc,
+                   unsigned long bcx, unsigned long thread) {
+  RegisterMap map((JavaThread*)thread, false);
+  if (!reg_map) {
+    reg_map = (RegisterMap*)os::malloc(sizeof map, mtNone);
+  }
+  memcpy(reg_map, &map, sizeof map);
+  {
+    CodeBlob *cb = CodeCache::find_blob((address)pc);
+    if (cb && cb->frame_size())
+      fp = sp + wordSize * (cb->frame_size() - 2);
+  }
+  internal_pf(sp, fp, pc, bcx);
+}
+
+// support for printing out where we are in a Java method
+// needs to be passed current fp and bcp register values
+// prints method name, bc index and bytecode name
+extern "C" void pm(unsigned long fp, unsigned long bcx) {
+  DESCRIBE_FP_OFFSET(interpreter_frame_method);
+  unsigned long *p = (unsigned long *)fp;
+  methodOop m = (methodOop)p[frame::interpreter_frame_method_offset];
+  printbc(m, bcx);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_FRAME_AARCH64_HPP
+#define CPU_AARCH64_VM_FRAME_AARCH64_HPP
+
+#include "runtime/synchronizer.hpp"
+#include "utilities/top.hpp"
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+
+//    [monitors[0]           ]   \
+//     ...                        | monitor block size = k
+//    [monitors[k-1]         ]   /
+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [methodOop             ]                   = method()             method_offset
+
+//    [last esp              ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+
+//    [old frame pointer     ]   <- fp           = link()
+//    [return pc             ]
+
+//    [last sp               ]
+//    [oop temp              ]                     (only for native calls)
+
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C++ interpreter ----------------------------------------
+//
+// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
+//
+//                             <- SP (current esp/rsp)
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    ...                        BytecodeInterpreter::run local variables
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's ebp/rbp ]
+//    [return pc               ]  (return to frame manager)
+//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
+//    [expression stack        ] <- last_Java_sp                           |
+//    [...                     ] * <- interpreter_state.stack              |
+//    [expression stack        ] * <- interpreter_state.stack_base         |
+//    [monitors                ]   \                                       |
+//     ...                          | monitor block size                   |
+//    [monitors                ]   / <- interpreter_state.monitor_base     |
+//    [struct interpretState   ] <-----------------------------------------|
+//    [return pc               ] (return to callee of frame manager [1]
+//    [locals and parameters   ]
+//                               <- sender sp
+
+// [1] When the c++ interpreter calls a new method it returns to the frame
+//     manager which allocates a new frame on the stack. In that case there
+//     is no real callee of this newly allocated frame. The frame manager is
+//     aware of the  additional frame(s) and will pop them as nested calls
+//     complete. Howevers tTo make it look good in the debugger the frame
+//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
+//     with a fake interpreter_state* parameter to make it easy to debug
+//     nested calls.
+
+// Note that contrary to the layout for the assembly interpreter the
+// expression stack allocated for the C++ interpreter is full sized.
+// However this is not as bad as it seems as the interpreter frame_manager
+// will truncate the unused space on succesive method calls.
+//
+// ------------------------------ C++ interpreter ----------------------------------------
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      =  0,
+    return_addr_offset                               =  1,
+    sender_sp_offset                                 =  2,
+
+#ifndef CC_INTERP
+
+    // Interpreter frames
+    interpreter_frame_oop_temp_offset                =  3, // for native calls only
+
+    interpreter_frame_sender_sp_offset               = -1,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
+    interpreter_frame_bcx_offset                     = interpreter_frame_locals_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+#endif // CC_INTERP
+
+    // Entry frames
+    // n.b. these values are determined by the layout defined in
+    // stubGenerator for the Java call stub
+    entry_frame_after_call_words                     = 27,
+    entry_frame_call_wrapper_offset                  = -8,
+
+    // we don't need a save area
+    arg_reg_save_area_bytes                          =  0,
+
+    // TODO - check that this is still correct
+    // Native frames
+
+    native_frame_initial_param_offset                =  2
+
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+
+#ifdef ASSERT
+  // Used in frame::sender_for_{interpreter,compiled}_frame
+  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
+  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
+    verify_deopt_original_pc(nm, unextended_sp, true);
+  }
+#endif
+
+ public:
+  // Constructors
+
+  frame(intptr_t* sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* fp);
+
+  // accessors for the instance variables
+  // Note: not necessarily the real 'frame pointer' (see real_fp)
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // return address of param, zero origin index.
+  inline address* native_param_addr(int idx) const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+  // helper to update a map with callee-saved RBP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+#ifndef CC_INTERP
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // CC_INTERP
+
+#ifdef CC_INTERP
+  inline interpreterState get_interpreterState() const;
+#endif // CC_INTERP
+
+#endif // CPU_AARCH64_VM_FRAME_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
+
+// Inline functions for AArch64 frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+static int spin;
+
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+#ifndef PRODUCT
+  if (fp)
+    if (sp > fp || (fp - sp > 0x100000))
+      for(;;)
+        asm("nop");
+#endif
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+  _sp = sp;
+  _unextended_sp = unextended_sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod");
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = (address)(sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
+  _cb = CodeCache::find_blob(_pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp()
+              && unextended_sp() == other.unextended_sp()
+              && fp() == other.fp()
+              && pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
+inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
+
+// return address of param, zero origin index.
+inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return ((interpreterState)addr_at( -((int)sizeof(BytecodeInterpreter))/wordSize ));
+}
+
+inline intptr_t*    frame::sender_sp()        const {
+  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
+  if (is_interpreted_frame()) {
+    assert(false, "should never happen");
+    return get_interpreterState()->sender_sp();
+  } else {
+    return            addr_at(sender_sp_offset);
+  }
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_bcp);
+}
+
+
+// Constant pool cache
+
+inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_constants);
+}
+
+// Method
+
+inline methodOop* frame::interpreter_frame_method_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_method);
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_mdx);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  assert(is_interpreted_frame(), "wrong frame type");
+  return get_interpreterState()->_stack + 1;
+}
+
+#else /* asm interpreter */
+inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
+}
+
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
+}
+
+
+
+// Constant pool cache
+
+inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const {
+  return (constantPoolCacheOop*)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline methodOop* frame::interpreter_frame_method_addr() const {
+  return (methodOop*)addr_at(interpreter_frame_method_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL) {
+    return sp();
+  } else {
+    // sp() may have been extended or shrunk by an adapter.  At least
+    // check that we don't fall behind the legal region.
+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+    return last_sp;
+  }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+#endif /* CC_INTERP */
+
+inline int frame::pd_oop_map_offset_adjustment() const {
+  return 0;
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+
+inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
+}
+
+inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
+}
+
+inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
+}
+
+inline bool frame::volatile_across_calls(Register reg) {
+  return true;
+}
+
+
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+  oop* result_adr = (oop *)map->location(r0->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+
+  return (*result_adr);
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  oop* result_adr = (oop *)map->location(r0->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+
+  *result_adr = obj;
+}
+
+#endif // CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
+#define CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
+
+const int StackAlignmentInBytes  = 16;
+
+// The maximum B/BL offset range on AArch64 is 128MB.
+#undef CODE_CACHE_DEFAULT_LIMIT
+#define CODE_CACHE_DEFAULT_LIMIT (128*M)
+
+#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, ConvertSleepToYield,      true);
+define_pd_global(bool, ShareVtableStubs,         true);
+define_pd_global(bool, CountInterpCalls,         true);
+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
+
+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
+
+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
+// assign a different value for C2 without touching a number of files. Use
+// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
+// c1 doesn't have this problem because the fix to 4858033 assures us
+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
+// the uep and the vep doesn't get real alignment but just slops on by
+// only assured that the entry instruction meets the 5 byte size requirement.
+define_pd_global(intx, CodeEntryAlignment,       64);
+define_pd_global(intx, OptoLoopAlignment,        16);
+define_pd_global(intx, InlineFrequencyCount,     100);
+
+define_pd_global(intx, StackYellowPages, 2);
+define_pd_global(intx, StackRedPages, 1);
+
+define_pd_global(intx, StackShadowPages, 6 DEBUG_ONLY(+5));
+
+define_pd_global(intx, PreInflateSpin,           10);
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, UseMembar,            true);
+
+// GC Ergo Flags
+define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+#if defined(COMPILER1) || defined(COMPILER2)
+define_pd_global(intx, InlineSmallCode,          1000);
+#endif
+
+#ifdef BUILTIN_SIM
+#define UseBuiltinSim           true
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                        \
+  develop(bool, TrapBasedNullChecks, false,                             \
+          "Not supported on this platform.")                            \
+                                                                        \
+  develop(bool, TrapBasedRangeChecks, false,                            \
+          "Not supported on this platform.")                            \
+                                                                        \
+  product(bool, NotifySimulator, UseBuiltinSim,                         \
+         "tell the AArch64 sim where we are in method code")            \
+                                                                        \
+  product(bool, UseSimulatorCache, false,                               \
+         "tell sim to cache memory updates until exclusive op occurs")  \
+                                                                        \
+  product(bool, DisableBCCheck, true,                                   \
+          "tell sim not to invoke bccheck callback")                    \
+                                                                        \
+  product(bool, NearCpool, true,                                        \
+         "constant pool is close to instructions")                      \
+                                                                        \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")
+
+// Don't attempt to use Neon on builtin sim until builtin sim supports it
+#define UseNeon false
+#define UseCRC32 false
+
+#else
+#define UseBuiltinSim           false
+#define NotifySimulator         false
+#define UseSimulatorCache       false
+#define DisableBCCheck          true
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                        \
+  develop(bool, TrapBasedNullChecks, false,                             \
+          "Not supported on this platform.")                            \
+                                                                        \
+  develop(bool, TrapBasedRangeChecks, false,                            \
+          "Not supported on this platform.")                            \
+                                                                        \
+  product(bool, NearCpool, true,                                        \
+         "constant pool is close to instructions")                      \
+                                                                        \
+  product(bool, UseNeon, false,                                         \
+          "Use Neon for CRC32 computation")                             \
+  product(bool, UseCRC32, false,                                        \
+          "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, UseBlockZeroing, true,                                  \
+          "Use DC ZVA for block zeroing")                               \
+  product(intx, BlockZeroingLowLimit, 256,                              \
+          "Minimum size in bytes when block zeroing will be used")      \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")
+
+#endif
+
+#endif // CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icBuffer_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.inline2.hpp"
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size;
+}
+
+#define __ masm->
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, oop cached_oop, address entry_point) {
+  ResourceMark rm;
+  CodeBuffer      code(code_begin, ic_stub_code_size());
+  MacroAssembler* masm            = new MacroAssembler(&code);
+  // note: even though the code contains an embedded oop, we do not need reloc info
+  // because
+  // (1) the oop is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
+
+  address start = __ pc();
+  Label l;
+
+  __ ldr(rscratch2, l);
+  __ far_jump(ExternalAddress(entry_point));
+  __ bind(l);
+  __ emit_long64((int64_t)cached_oop);
+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+  assert(__ pc() - start == ic_stub_code_size(), "must be");
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeJump* jump = nativeJump_at(code_begin + 4);
+  return jump->jump_destination();
+}
+
+
+oop InlineCacheBuffer::ic_buffer_cached_oop(address code_begin) {
+  // The word containing the cached value is at the end of this IC buffer
+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
+  return (oop)*p;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icache_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "runtime/icache.hpp"
+
+extern void aarch64TestHook();
+
+void ICacheStubGenerator::generate_icache_flush(
+                ICache::flush_icache_stub_t* flush_icache_stub) {
+  // Give anyone who calls this a surprise
+  *flush_icache_stub = (ICache::flush_icache_stub_t)NULL;
+}
+
+void ICache::initialize() {
+  aarch64TestHook();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icache_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ICACHE_AARCH64_HPP
+#define CPU_AARCH64_VM_ICACHE_AARCH64_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM
+// modifies code, part of the processor instruction cache potentially
+// has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  static void initialize();
+  static void invalidate_word(address addr) {
+    __clear_cache((char *)addr, (char *)(addr + 3));
+  }
+  static void invalidate_range(address start, int nbytes) {
+    __clear_cache((char *)start, (char *)(start + nbytes));
+  }
+};
+
+#endif // CPU_AARCH64_VM_ICACHE_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/immediate_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <stdlib.h>
+#include "decode_aarch64.hpp"
+#include "immediate_aarch64.hpp"
+
+// there are at most 2^13 possible logical immediate encodings
+// however, some combinations of immr and imms are invalid
+static const unsigned  LI_TABLE_SIZE = (1 << 13);
+
+static int li_table_entry_count;
+
+// for forward lookup we just use a direct array lookup
+// and assume that the cient has supplied a valid encoding
+// table[encoding] = immediate
+static u_int64_t LITable[LI_TABLE_SIZE];
+
+// for reverse lookup we need a sparse map so we store a table of
+// immediate and encoding pairs sorted by immediate value
+
+struct li_pair {
+  u_int64_t immediate;
+  u_int32_t encoding;
+};
+
+static struct li_pair InverseLITable[LI_TABLE_SIZE];
+
+// comparator to sort entries in the inverse table
+int compare_immediate_pair(const void *i1, const void *i2)
+{
+  struct li_pair *li1 = (struct li_pair *)i1;
+  struct li_pair *li2 = (struct li_pair *)i2;
+  if (li1->immediate < li2->immediate) {
+    return -1;
+  }
+  if (li1->immediate > li2->immediate) {
+    return 1;
+  }
+  return 0;
+}
+
+// helper functions used by expandLogicalImmediate
+
+// for i = 1, ... N result<i-1> = 1 other bits are zero
+static inline u_int64_t ones(int N)
+{
+  return (N == 64 ? (u_int64_t)-1UL : ((1UL << N) - 1));
+}
+
+// result<0> to val<N>
+static inline u_int64_t pickbit(u_int64_t val, int N)
+{
+  return pickbits64(val, N, N);
+}
+
+
+// SPEC bits(M*N) Replicate(bits(M) x, integer N);
+// this is just an educated guess
+
+u_int64_t replicate(u_int64_t bits, int nbits, int count)
+{
+  u_int64_t result = 0;
+  // nbits may be 64 in which case we want mask to be -1
+  u_int64_t mask = ones(nbits);
+  for (int i = 0; i < count ; i++) {
+    result <<= nbits;
+    result |= (bits & mask);
+  }
+  return result;
+}
+
+// this function writes the supplied bimm reference and returns a
+// boolean to indicate success (1) or fail (0) because an illegal
+// encoding must be treated as an UNALLOC instruction
+
+// construct a 32 bit immediate value for a logical immediate operation
+int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
+                            u_int32_t imms, u_int64_t &bimm)
+{
+  int len;                  // ought to be <= 6
+  u_int32_t levels;         // 6 bits
+  u_int32_t tmask_and;      // 6 bits
+  u_int32_t wmask_and;      // 6 bits
+  u_int32_t tmask_or;       // 6 bits
+  u_int32_t wmask_or;       // 6 bits
+  u_int64_t imm64;          // 64 bits
+  u_int64_t tmask, wmask;   // 64 bits
+  u_int32_t S, R, diff;     // 6 bits?
+
+  if (immN == 1) {
+    len = 6; // looks like 7 given the spec above but this cannot be!
+  } else {
+    len = 0;
+    u_int32_t val = (~imms & 0x3f);
+    for (int i = 5; i > 0; i--) {
+      if (val & (1 << i)) {
+        len = i;
+        break;
+      }
+    }
+    if (len < 1) {
+      return 0;
+    }
+    // for valid inputs leading 1s in immr must be less than leading
+    // zeros in imms
+    int len2 = 0;                   // ought to be < len
+    u_int32_t val2 = (~immr & 0x3f);
+    for (int i = 5; i > 0; i--) {
+      if (!(val2 & (1 << i))) {
+        len2 = i;
+        break;
+      }
+    }
+    if (len2 >= len) {
+      return 0;
+    }
+  }
+
+  levels = (1 << len) - 1;
+
+  if ((imms & levels) == levels) {
+    return 0;
+  }
+
+  S = imms & levels;
+  R = immr & levels;
+
+ // 6 bit arithmetic!
+  diff = S - R;
+  tmask_and = (diff | ~levels) & 0x3f;
+  tmask_or = (diff & levels) & 0x3f;
+  tmask = 0xffffffffffffffffULL;
+
+  for (int i = 0; i < 6; i++) {
+    int nbits = 1 << i;
+    u_int64_t and_bit = pickbit(tmask_and, i);
+    u_int64_t or_bit = pickbit(tmask_or, i);
+    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
+    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
+    u_int64_t and_bits_top = (and_bits_sub << nbits) | ones(nbits);
+    u_int64_t or_bits_top = (0 << nbits) | or_bits_sub;
+
+    tmask = ((tmask
+              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
+             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
+  }
+
+  wmask_and = (immr | ~levels) & 0x3f;
+  wmask_or = (immr & levels) & 0x3f;
+
+  wmask = 0;
+
+  for (int i = 0; i < 6; i++) {
+    int nbits = 1 << i;
+    u_int64_t and_bit = pickbit(wmask_and, i);
+    u_int64_t or_bit = pickbit(wmask_or, i);
+    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
+    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
+    u_int64_t and_bits_top = (ones(nbits) << nbits) | and_bits_sub;
+    u_int64_t or_bits_top = (or_bits_sub << nbits) | 0;
+
+    wmask = ((wmask
+              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
+             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
+  }
+
+  if (diff & (1U << 6)) {
+    imm64 = tmask & wmask;
+  } else {
+    imm64 = tmask | wmask;
+  }
+
+
+  bimm = imm64;
+  return 1;
+}
+
+// constructor to initialise the lookup tables
+
+static void initLITables() __attribute__ ((constructor));
+static void initLITables()
+{
+  li_table_entry_count = 0;
+  for (unsigned index = 0; index < LI_TABLE_SIZE; index++) {
+    u_int32_t N = uimm(index, 12, 12);
+    u_int32_t immr = uimm(index, 11, 6);
+    u_int32_t imms = uimm(index, 5, 0);
+    if (expandLogicalImmediate(N, immr, imms, LITable[index])) {
+      InverseLITable[li_table_entry_count].immediate = LITable[index];
+      InverseLITable[li_table_entry_count].encoding = index;
+      li_table_entry_count++;
+    }
+  }
+  // now sort the inverse table
+  qsort(InverseLITable, li_table_entry_count,
+        sizeof(InverseLITable[0]), compare_immediate_pair);
+}
+
+// public APIs provided for logical immediate lookup and reverse lookup
+
+u_int64_t logical_immediate_for_encoding(u_int32_t encoding)
+{
+  return LITable[encoding];
+}
+
+u_int32_t encoding_for_logical_immediate(u_int64_t immediate)
+{
+  struct li_pair pair;
+  struct li_pair *result;
+
+  pair.immediate = immediate;
+
+  result = (struct li_pair *)
+    bsearch(&pair, InverseLITable, li_table_entry_count,
+            sizeof(InverseLITable[0]), compare_immediate_pair);
+
+  if (result) {
+    return result->encoding;
+  }
+
+  return 0xffffffff;
+}
+
+// floating point immediates are encoded in 8 bits
+// fpimm[7] = sign bit
+// fpimm[6:4] = signed exponent
+// fpimm[3:0] = fraction (assuming leading 1)
+// i.e. F = s * 1.f * 2^(e - b)
+
+u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp)
+{
+  union {
+    float fpval;
+    double dpval;
+    u_int64_t val;
+  };
+
+  u_int32_t s, e, f;
+  s = (imm8 >> 7 ) & 0x1;
+  e = (imm8 >> 4) & 0x7;
+  f = imm8 & 0xf;
+  // the fp value is s * n/16 * 2r where n is 16+e
+  fpval = (16.0 + f) / 16.0;
+  // n.b. exponent is signed
+  if (e < 4) {
+    int epos = e;
+    for (int i = 0; i <= epos; i++) {
+      fpval *= 2.0;
+    }
+  } else {
+    int eneg = 7 - e;
+    for (int i = 0; i < eneg; i++) {
+      fpval /= 2.0;
+    }
+  }
+
+  if (s) {
+    fpval = -fpval;
+  }
+  if (is_dp) {
+    dpval = (double)fpval;
+  }
+  return val;
+}
+
+u_int32_t encoding_for_fp_immediate(float immediate)
+{
+  // given a float which is of the form
+  //
+  //     s * n/16 * 2r
+  //
+  // where n is 16+f and imm1:s, imm4:f, simm3:r
+  // return the imm8 result [s:r:f]
+  //
+
+  union {
+    float fpval;
+    u_int32_t val;
+  };
+  fpval = immediate;
+  u_int32_t s, r, f, res;
+  // sign bit is 31
+  s = (val >> 31) & 0x1;
+  // exponent is bits 30-23 but we only want the bottom 3 bits
+  // strictly we ought to check that the bits bits 30-25 are
+  // either all 1s or all 0s
+  r = (val >> 23) & 0x7;
+  // fraction is bits 22-0
+  f = (val >> 19) & 0xf;
+  res = (s << 7) | (r << 4) | f;
+  return res;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/immediate_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _IMMEDIATE_H
+#define _IMMEDIATE_H
+
+#include <sys/types.h>
+
+/*
+ * functions to map backwards and forwards between logical or floating
+ * point immediates and their corresponding encodings. the mapping
+ * from encoding to immediate is required by the simulator. the reverse
+ * mapping is required by the OpenJDK assembler.
+ *
+ * a logical immediate value supplied to or returned from a map lookup
+ * is always 64 bits. this is sufficient for looking up 32 bit
+ * immediates or their encodings since a 32 bit immediate has the same
+ * encoding as the 64 bit immediate produced by concatenating the
+ * immediate with itself.
+ *
+ * a logical immediate encoding is 13 bits N:immr:imms (3 fields of
+ * widths 1:6:6 -- see the arm spec). they appear as bits [22:10] of a
+ * logical immediate instruction. encodings are supplied and returned
+ * as 32 bit values. if a given 13 bit immediate has no corresponding
+ * encoding then a map lookup will return 0xffffffff.
+ */
+
+u_int64_t logical_immediate_for_encoding(u_int32_t encoding);
+u_int32_t encoding_for_logical_immediate(u_int64_t immediate);
+u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp);
+u_int32_t encoding_for_fp_immediate(float immediate);
+
+#endif // _IMMEDIATE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,1503 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interp_masm_aarch64.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "thread_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "thread_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "thread_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "thread_bsd.inline.hpp"
+#endif
+
+
+void InterpreterMacroAssembler::narrow(Register result) {
+
+  // Get method->_constMethod->_result_type
+  ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
+  ldr(rscratch1, Address(rscratch1, methodOopDesc::const_offset()));
+  ldrb(rscratch1, Address(rscratch1, constMethodOopDesc::result_type_offset()));
+
+  Label done, notBool, notByte, notChar;
+
+  // common case first
+  cmpw(rscratch1, T_INT);
+  br(Assembler::EQ, done);
+
+  // mask integer result to narrower return type.
+  cmpw(rscratch1, T_BOOLEAN);
+  br(Assembler::NE, notBool);
+  andw(result, result, 0x1);
+  b(done);
+
+  bind(notBool);
+  cmpw(rscratch1, T_BYTE);
+  br(Assembler::NE, notByte);
+  sbfx(result, result, 0, 8);
+  b(done);
+
+  bind(notByte);
+  cmpw(rscratch1, T_CHAR);
+  br(Assembler::NE, notChar);
+  ubfx(result, result, 0, 16);  // truncate upper 16 bits
+  b(done);
+
+  bind(notChar);
+  sbfx(result, result, 0, 16);     // sign-extend short
+
+  // Nothing to do for T_INT
+  bind(done);
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed.  If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    ldrw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
+    tstw(rscratch1, JavaThread::popframe_pending_bit);
+    br(Assembler::EQ, L);
+    tstw(rscratch1, JavaThread::popframe_processing_bit);
+    br(Assembler::NE, L);
+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
+    // address of the same-named entrypoint in the generated interpreter code.
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    br(r0);
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  ldr(r2, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr(r2, JvmtiThreadState::earlyret_tos_offset());
+  const Address oop_addr(r2, JvmtiThreadState::earlyret_oop_offset());
+  const Address val_addr(r2, JvmtiThreadState::earlyret_value_offset());
+  switch (state) {
+    case atos: ldr(r0, oop_addr);
+               str(zr, oop_addr);
+               verify_oop(r0, state);               break;
+    case ltos: ldr(r0, val_addr);                   break;
+    case btos:                                   // fall through
+    case ztos:                                   // fall through
+    case ctos:                                   // fall through
+    case stos:                                   // fall through
+    case itos: ldrw(r0, val_addr);                  break;
+    case ftos: ldrs(v0, val_addr);                  break;
+    case dtos: ldrd(v0, val_addr);                  break;
+    case vtos: /* nothing to do */                  break;
+    default  : ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  movw(rscratch1, (int) ilgl);
+  strw(rscratch1, tos_addr);
+  strw(zr, val_addr);
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+    cbz(rscratch1, L); // if (thread->jvmti_thread_state() == NULL) exit;
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_state_offset()));
+    cmpw(rscratch1, JvmtiThreadState::earlyret_pending);
+    br(Assembler::NE, L);
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+    ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_tos_offset()));
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), rscratch1);
+    br(r0);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
+  Register reg,
+  int bcp_offset) {
+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+  ldrh(reg, Address(rbcp, bcp_offset));
+  rev16(reg, reg);
+}
+
+void InterpreterMacroAssembler::get_dispatch() {
+  unsigned long offset;
+  adrp(rdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
+  lea(rdispatch, Address(rdispatch, offset));
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    load_unsigned_short(index, Address(rbcp, bcp_offset));
+  } else if (index_size == sizeof(u4)) {
+    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
+    ldrw(index, Address(rbcp, bcp_offset));
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
+    eonw(index, index, zr);  // convert to plain index
+  } else if (index_size == sizeof(u1)) {
+    assert(EnableInvokeDynamic, "tiny index used only for JSR 292");
+    load_unsigned_byte(index, Address(rbcp, bcp_offset));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+// Return
+// Rindex: index into constant pool
+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
+//
+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
+// the true address of the cache entry.
+//
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset,
+                                                           size_t index_size) {
+  assert_different_registers(cache, index);
+  assert_different_registers(cache, rcpool);
+  get_cache_index_at_bcp(index, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry
+  // aarch64 already has the cache in rcpool so there is no need to
+  // install it in cache. instead we pre-add the indexed offset to
+  // rcpool and return it in cache. All clients of this method need to
+  // be modified accordingly.
+  add(cache, rcpool, index, Assembler::LSL, 5);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  // n.b. unlike x86 cache already includes the index offset
+  lea(bytecode, Address(cache,
+                         constantPoolCacheOopDesc::base_offset()
+                         + ConstantPoolCacheEntry::indices_offset()));
+  ldarw(bytecode, bytecode);
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  ubfx(bytecode, bytecode, shift_count, BitsPerByte);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  assert(cache != tmp, "must use different register");
+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry index
+  // and from word offset to byte offset
+  ldr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
+  // skip past the header
+  add(cache, cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
+  add(cache, cache, tmp, Assembler::LSL, 2 + LogBytesPerWord);  // construct pointer to cache entry
+}
+
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
+// subtype of super_klass.
+//
+// Args:
+//      r0: superklass
+//      Rsub_klass: subklass
+//
+// Kills:
+//      r2, r5
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+                                                  Label& ok_is_subtype) {
+  assert(Rsub_klass != r0, "r0 holds superklass");
+  assert(Rsub_klass != r2, "r2 holds 2ndary super array length");
+  assert(Rsub_klass != r5, "r5 holds 2ndary super array scan ptr");
+
+  // Profile the not-null value's klass.
+  profile_typecheck(r2, Rsub_klass, r5); // blows r2, reloads r5
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(r2); // blows r2
+}
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  ldr(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  ldrw(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  ldr(r, post(esp, 2 * Interpreter::stackElementSize));
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  str(r, pre(esp, -wordSize));
+ }
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  str(r, pre(esp, -wordSize));
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  str(r, pre(esp, 2 * -wordSize));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+  ldrs(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+  ldrd(r, post(esp, 2 * Interpreter::stackElementSize));
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+  strs(r, pre(esp, -wordSize));
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+  strd(r, pre(esp, 2* -wordSize));
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+  case atos: pop_ptr();                 break;
+  case btos:
+  case ztos:
+  case ctos:
+  case stos:
+  case itos: pop_i();                   break;
+  case ltos: pop_l();                   break;
+  case ftos: pop_f();                   break;
+  case dtos: pop_d();                   break;
+  case vtos: /* nothing to do */        break;
+  default:   ShouldNotReachHere();
+  }
+  verify_oop(r0, state);
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+  verify_oop(r0, state);
+  switch (state) {
+  case atos: push_ptr();                break;
+  case btos:
+  case ztos:
+  case ctos:
+  case stos:
+  case itos: push_i();                  break;
+  case ltos: push_l();                  break;
+  case ftos: push_f();                  break;
+  case dtos: push_d();                  break;
+  case vtos: /* nothing to do */        break;
+  default  : ShouldNotReachHere();
+  }
+}
+
+// Helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  ldr(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  str(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+  // set sender sp
+  mov(r13, sp);
+  // record last_sp
+  str(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  prepare_to_jump_from_interpreted();
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    ldr(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbz(rscratch1, run_compiled_code);
+    ldr(rscratch1, Address(method, methodOopDesc::interpreter_entry_offset()));
+    br(rscratch1);
+    bind(run_compiled_code);
+  }
+
+  ldr(rscratch1, Address(method, methodOopDesc::from_interpreted_offset()));
+  br(rscratch1);
+}
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.  amd64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+    dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop) {
+  if (VerifyActivationFrameSize) {
+    Unimplemented();
+  }
+  if (verifyoop) {
+    verify_oop(r0, state);
+  }
+  if (table == Interpreter::dispatch_table(state)) {
+    addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
+    ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3)));
+  } else {
+    mov(rscratch2, (address)table);
+    ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
+  }
+  br(rscratch2);
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state), false);
+}
+
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+  // load next bytecode
+  ldrb(rscratch1, Address(pre(rbcp, step)));
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+  ldrb(rscratch1, Address(rbcp, 0));
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+void InterpreterMacroAssembler::remove_activation(
+        TosState state,
+        bool throw_monitor_exception,
+        bool install_monitor_exception,
+        bool notify_jvmdi) {
+  // Note: Registers r3 xmm0 may be in use for the
+  // result check if synchronized method
+  Label unlocked, unlock, no_unlock;
+
+  // get the value of _do_not_unlock_if_synchronized into r3
+  const Address do_not_unlock_if_synchronized(rthread,
+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  ldrb(r3, do_not_unlock_if_synchronized);
+  strb(zr, do_not_unlock_if_synchronized); // reset the flag
+
+ // get method access flags
+  ldr(r1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
+  ldr(r2, Address(r1, methodOopDesc::access_flags_offset()));
+  tst(r2, JVM_ACC_SYNCHRONIZED);
+  br(Assembler::EQ, unlocked);
+
+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+  // is set.
+  cbnz(r3, no_unlock);
+
+  // unlock monitor
+  push(state); // save result
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  const Address monitor(rfp, frame::interpreter_frame_initial_sp_offset *
+                        wordSize - (int) sizeof(BasicObjectLock));
+  // We use c_rarg1 so that if we go slow path it will be the correct
+  // register for unlock_object to pass to VM directly
+  lea(c_rarg1, monitor); // address of first monitor
+
+  ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+  cbnz(r0, unlock);
+
+  pop(state);
+  if (throw_monitor_exception) {
+    // Entry already unlocked, need to throw exception
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll. If requested,
+    // install an illegal_monitor_state_exception.  Continue with
+    // stack unrolling.
+    if (install_monitor_exception) {
+      call_VM(noreg, CAST_FROM_FN_PTR(address,
+                     InterpreterRuntime::new_illegal_monitor_state_exception));
+    }
+    b(unlocked);
+  }
+
+  bind(unlock);
+  unlock_object(c_rarg1);
+  pop(state);
+
+  // Check that for block-structured locking (i.e., that all locked
+  // objects has been unlocked)
+  bind(unlocked);
+
+  // r0: Might contain return value
+
+  // Check that all monitors are unlocked
+  {
+    Label loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    const Address monitor_block_top(
+        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    const Address monitor_block_bot(
+        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+    bind(restart);
+    // We use c_rarg1 so that if we go slow path it will be the correct
+    // register for unlock_object to pass to VM directly
+    ldr(c_rarg1, monitor_block_top); // points to current entry, starting
+                                     // with top-most entry
+    lea(r19, monitor_block_bot);  // points to word before bottom of
+                                  // monitor block
+    b(entry);
+
+    // Entry already locked, need to throw exception
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception
+      MacroAssembler::call_VM(noreg,
+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
+                                   throw_illegal_monitor_state_exception));
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
+      // Unlock does not block, so don't have to worry about the frame.
+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
+
+      push(state);
+      unlock_object(c_rarg1);
+      pop(state);
+
+      if (install_monitor_exception) {
+        call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::
+                                        new_illegal_monitor_state_exception));
+      }
+
+      b(restart);
+    }
+
+    bind(loop);
+    // check if current entry is used
+    ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+    cbnz(rscratch1, exception);
+
+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
+    bind(entry);
+    cmp(c_rarg1, r19); // check if bottom reached
+    br(Assembler::NE, loop); // if not at bottom then check this entry
+  }
+
+  bind(no_unlock);
+
+  // jvmti support
+  if (notify_jvmdi) {
+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
+  } else {
+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+  }
+
+  // remove activation
+  // get sender esp
+  ldr(esp,
+      Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
+  // remove frame anchor
+  leave();
+  // If we're returning to interpreted code we will shortly be
+  // adjusting SP to allow some space for ESP.  If we're returning to
+  // compiled code the saved sender SP was saved in sender_sp, so this
+  // restores it.
+  andr(sp, esp, -16);
+}
+
+#endif // C_INTERP
+
+// Lock object
+//
+// Args:
+//      c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+//      r0
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg = r0;
+    const Register tmp = c_rarg2;
+    const Register obj_reg = c_rarg3; // Will contain the oop
+
+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+    const int mark_offset = lock_offset +
+                            BasicLock::displaced_header_offset_in_bytes();
+
+    Label slow_case;
+
+    // Load object pointer into obj_reg %c_rarg3
+    ldr(obj_reg, Address(lock_reg, obj_offset));
+
+    if (UseBiasedLocking) {
+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
+    }
+
+    // Load (object->mark() | 1) into swap_reg
+    ldr(rscratch1, Address(obj_reg, 0));
+    orr(swap_reg, rscratch1, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    str(swap_reg, Address(lock_reg, mark_offset));
+
+    assert(lock_offset == 0,
+           "displached header must be first word in BasicObjectLock");
+
+    Label fail;
+    if (PrintBiasedLockingStatistics) {
+      Label fast;
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
+      bind(fast);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  rscratch2, rscratch1, tmp);
+      b(done);
+      bind(fail);
+    } else {
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+    }
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 7) == 0, and
+    //  2) rsp <= mark < mark + os::pagesize()
+    //
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - rsp) & (7 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 3 bits clear.
+    // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
+    // NOTE2: aarch64 does not like to subtract sp from rn so take a
+    // copy
+    mov(rscratch1, sp);
+    sub(swap_reg, swap_reg, rscratch1);
+    ands(swap_reg, swap_reg, (unsigned long)(7 - os::vm_page_size()));
+
+    // Save the test result, for recursive case, the result is zero
+    str(swap_reg, Address(lock_reg, mark_offset));
+
+    if (PrintBiasedLockingStatistics) {
+      br(Assembler::NE, slow_case);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  rscratch2, rscratch1, tmp);
+    }
+    br(Assembler::EQ, done);
+
+    bind(slow_case);
+
+    // Call the runtime routine for slow case
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+
+    bind(done);
+  }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+//      r0
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg   = r0;
+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
+    const Register obj_reg    = c_rarg3;  // Will contain the oop
+
+    save_bcp(); // Save in case of exception
+
+    // Convert from BasicObjectLock structure to object and BasicLock
+    // structure Store the BasicLock address into %r0
+    lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+
+    // Load oop into obj_reg(%c_rarg3)
+    ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    // Free entry
+    str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    if (UseBiasedLocking) {
+      biased_locking_exit(obj_reg, header_reg, done);
+    }
+
+    // Load the old header from BasicLock structure
+    ldr(header_reg, Address(swap_reg,
+                            BasicLock::displaced_header_offset_in_bytes()));
+
+    // Test for recursion
+    cbz(header_reg, done);
+
+    // Atomic swap back the old header
+    cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+
+    // Call the runtime routine for slow case.
+    str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+
+    bind(done);
+
+    restore_bcp();
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ldr(mdp, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+  cbz(mdp, zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label set_mdp;
+  stp(r0, r1, Address(pre(sp, -2 * wordSize)));
+
+  // Test MDO to avoid the call if it is NULL.
+  ldr(r0, Address(rmethod, in_bytes(methodOopDesc::method_data_offset())));
+  cbz(r0, set_mdp);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rmethod, rbcp);
+  // r0: mdi
+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
+  ldr(r1, Address(rmethod, in_bytes(methodOopDesc::method_data_offset())));
+  lea(r1, Address(r1, in_bytes(methodDataOopDesc::data_offset())));
+  add(r0, r1, r0);
+  str(r0, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+  bind(set_mdp);
+  ldp(r0, r1, Address(post(sp, 2 * wordSize)));
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  stp(r0, r1, Address(pre(sp, -2 * wordSize)));
+  stp(r2, r3, Address(pre(sp, -2 * wordSize)));
+  test_method_data_pointer(r3, verify_continue); // If mdp is zero, continue
+  get_method(r1);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  ldrsh(r2, Address(r3, in_bytes(DataLayout::bci_offset())));
+  ldr(rscratch1, Address(r1, methodOopDesc::const_offset()));
+  add(r2, r2, rscratch1, Assembler::LSL);
+  lea(r2, Address(r2, constMethodOopDesc::codes_offset()));
+  cmp(r2, rbcp);
+  br(Assembler::EQ, verify_continue);
+  // r1: method
+  // rbcp: bcp // rbcp == 22
+  // r3: mdp
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
+               r1, rbcp, r3);
+  bind(verify_continue);
+  ldp(r2, r3, Address(post(sp, 2 * wordSize)));
+  ldp(r0, r1, Address(post(sp, 2 * wordSize)));
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address data(mdp_in, constant);
+  str(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+
+  assert_different_registers(rscratch2, rscratch1, mdp_in, reg);
+
+  Address addr1(mdp_in, constant);
+  Address addr2(rscratch2, reg, Address::lsl(0));
+  Address &addr = addr1;
+  if (reg != noreg) {
+    lea(rscratch2, addr1);
+    addr = addr2;
+  }
+
+  if (decrement) {
+    // Decrement the register.  Set condition codes.
+    // Intel does this
+    // addptr(data, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    // Label L;
+    // jcc(Assembler::negative, L);
+    // addptr(data, (int32_t) DataLayout::counter_increment);
+    // so we do this
+    ldr(rscratch1, addr);
+    subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment);
+    Label L;
+    br(Assembler::LO, L);       // skip store if counter underflow
+    str(rscratch1, addr);
+    bind(L);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    // Intel does this
+    // Increment the register.  Set carry flag.
+    // addptr(data, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    // sbbptr(data, (int32_t)0);
+    // so we do this
+    ldr(rscratch1, addr);
+    adds(rscratch1, rscratch1, DataLayout::counter_increment);
+    Label L;
+    br(Assembler::CS, L);       // skip store if counter overflow
+    str(rscratch1, addr);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  int header_offset = in_bytes(DataLayout::header_offset());
+  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  // Set the flag
+  ldr(rscratch1, Address(mdp_in, header_offset));
+  orr(rscratch1, rscratch1, header_bits);
+  str(rscratch1, Address(mdp_in, header_offset));
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    ldr(rscratch1, Address(mdp_in, offset));
+    cmp(value, rscratch1);
+  } else {
+    // Put the test value into a register, so caller can use it:
+    ldr(test_value_out, Address(mdp_in, offset));
+    cmp(value, test_value_out);
+  }
+  br(Assembler::NE, not_equal_continue);
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ldr(rscratch1, Address(mdp_in, offset_of_disp));
+  add(mdp_in, mdp_in, rscratch1, LSL);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  lea(rscratch1, Address(mdp_in, offset_of_disp));
+  ldr(rscratch1, Address(rscratch1, reg, Address::lsl(0)));
+  add(mdp_in, mdp_in, rscratch1, LSL);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  add(mdp_in, mdp_in, (unsigned)constant);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // save/restore across call_VM
+  stp(zr, return_bci, Address(pre(sp, -2 * wordSize)));
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  ldp(zr, return_bci, Address(post(sp, 2 * wordSize)));
+}
+
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    Address data(mdp, in_bytes(JumpData::taken_offset()));
+    ldr(bumped_count, data);
+    assert(DataLayout::counter_increment == 1,
+            "flow-free idiom only works with 1");
+    // Intel does this to catch overflow
+    // addptr(bumped_count, DataLayout::counter_increment);
+    // sbbptr(bumped_count, 0);
+    // so we do this
+    adds(bumped_count, bumped_count, DataLayout::counter_increment);
+    Label L;
+    br(Assembler::CS, L);       // skip store if counter overflow
+    str(bumped_count, data);
+    bind(L);
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      Label not_null;
+      // We are making a call.  Increment the count for null receiver.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      b(skip_receiver_profile);
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2, int start_row,
+                                        Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+    return;
+  }
+
+  int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n].  Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    b(done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on receiver[n]...  Test for null.
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (is_virtual_call) {
+          cbz(reg2, found_null);
+          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          b(done);
+          bind(found_null);
+        } else {
+          cbz(reg2, done);
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      cbz(reg2,found_null);
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+      // Found a null.  Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  mov(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    b(done);
+  }
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+  bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci,
+                                            Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      b(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, true);
+
+    bind (profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2, false);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count
+    increment_mdp_data_at(mdp,
+                          in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         in_bytes(MultiBranchData::
+                                  default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes()
+    movw(reg2, in_bytes(MultiBranchData::per_case_size()));
+    movw(rscratch1, in_bytes(MultiBranchData::case_array_offset()));
+    Assembler::maddw(index, index, reg2, rscratch1);
+
+    // Update the case count
+    increment_mdp_data_at(mdp,
+                          index,
+                          in_bytes(MultiBranchData::relative_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::
+                                  relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) {
+    MacroAssembler::verify_oop(reg);
+  }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
+#endif // !CC_INTERP
+
+
+void InterpreterMacroAssembler::notify_method_entry() {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbzw(r3, L);
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::post_method_entry));
+    bind(L);
+  }
+
+  {
+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
+    get_method(c_rarg1);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+                 rthread, c_rarg1);
+  }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    get_method(c_rarg1);
+    call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      rthread, c_rarg1);
+  }
+
+ }
+
+
+void InterpreterMacroAssembler::notify_method_exit(
+    TosState state, NotifyMethodExitMode mode) {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    // Note: frame::interpreter_frame_result has a dependency on how the
+    // method result is saved across the call to post_method_exit. If this
+    // is changed then the interpreter_frame_result implementation will
+    // need to be updated too.
+
+    // For c++ interpreter the result is always stored at a known location in the frame
+    // template interpreter will leave it on the top of the stack.
+    NOT_CC_INTERP(push(state);)
+    ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbz(r3, L);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    bind(L);
+    NOT_CC_INTERP(pop(state));
+  }
+
+  {
+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
+    NOT_CC_INTERP(push(state));
+    get_method(c_rarg1);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+                 rthread, c_rarg1);
+    NOT_CC_INTERP(pop(state));
+  }
+}
+
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+                                                        int increment, int mask,
+                                                        Register scratch, bool preloaded,
+                                                        Condition cond, Label* where) {
+  if (!preloaded) {
+    ldrw(scratch, counter_addr);
+  }
+  add(scratch, scratch, increment);
+  strw(scratch, counter_addr);
+  ands(scratch, scratch, mask);
+  br(cond, *where);
+}
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+                                                  int number_of_arguments) {
+  // interpreter specific
+  //
+  // Note: No need to save/restore rbcp & rlocals pointer since these
+  //       are callee saved registers and no blocking/ GC can happen
+  //       in leaf calls.
+  // also no need to restore method register
+#ifdef ASSERT
+  {
+    Label L;
+    ldr(rscratch1, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+    cbz(rscratch1, L);
+    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+         " last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+                                             Register java_thread,
+                                             Register last_java_sp,
+                                             address  entry_point,
+                                             int      number_of_arguments,
+                                             bool     check_exceptions) {
+  // interpreter specific
+  //
+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+  //       really make a difference for these runtime calls, since they are
+  //       slow anyway. Btw., bcp must be saved/restored since it may change
+  //       due to GC.
+  // assert(java_thread == noreg , "not expecting a precomputed java thread");
+  save_bcp();
+#ifdef ASSERT
+  {
+    Label L;
+    ldr(rscratch1, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+    cbz(rscratch1, L);
+    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+         " last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
+                               entry_point, number_of_arguments,
+                     check_exceptions);
+// interpreter specific
+  // method oop may have moved so reload from interpreter stack frame
+  get_method(rmethod);
+  restore_bcp();
+  restore_locals();
+  // reload the constant pool cache in case a PermGen GC moved it
+  restore_constant_pool_cache();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP
+#define CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP
+
+#include "assembler_aarch64.inline.hpp"
+#include "interpreter/invocationCounter.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+#ifndef CC_INTERP
+ protected:
+
+ protected:
+  using MacroAssembler::call_VM_leaf_base;
+
+  // Interpreter specific version of call_VM_base
+  virtual void call_VM_leaf_base(address entry_point,
+                                 int number_of_arguments);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register java_thread,
+                            Register last_java_sp,
+                            address  entry_point,
+                            int number_of_arguments,
+                            bool check_exceptions);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // base routine for all dispatches
+  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+#endif // CC_INTERP
+
+ public:
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+
+  void load_earlyret_value(TosState state);
+
+#ifdef CC_INTERP
+  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
+  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg);
+
+#else
+
+  // Interpreter-specific registers
+  void save_bcp() {
+    str(rbcp, Address(rfp, frame::interpreter_frame_bcx_offset * wordSize));
+  }
+
+  void restore_bcp() {
+    ldr(rbcp, Address(rfp, frame::interpreter_frame_bcx_offset * wordSize));
+  }
+
+  void restore_locals() {
+    ldr(rlocals, Address(rfp, frame::interpreter_frame_locals_offset * wordSize));
+  }
+
+  void restore_constant_pool_cache() {
+    ldr(rcpool, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
+  }
+
+  void get_dispatch();
+
+  // Helpers for runtime call arguments/results
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg) {
+    ldr(reg, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
+  }
+
+  void get_const(Register reg) {
+    get_method(reg);
+    ldr(reg, Address(reg, in_bytes(methodOopDesc::const_offset())));
+  }
+
+  void get_constant_pool(Register reg) {
+    get_const(reg);
+    ldr(reg, Address(reg, in_bytes(constMethodOopDesc::constants_offset())));
+  }
+
+  void get_constant_pool_cache(Register reg) {
+    get_constant_pool(reg);
+    ldr(reg, Address(reg, constantPoolOopDesc::cache_offset_in_bytes()));
+  }
+
+  void get_cpool_and_tags(Register cpool, Register tags) {
+    get_constant_pool(cpool);
+    ldr(tags, Address(cpool, constantPoolOopDesc::tags_offset_in_bytes()));
+  }
+
+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+
+  void pop_ptr(Register r = r0);
+  void pop_i(Register r = r0);
+  void pop_l(Register r = r0);
+  void pop_f(FloatRegister r = v0);
+  void pop_d(FloatRegister r = v0);
+  void push_ptr(Register r = r0);
+  void push_i(Register r = r0);
+  void push_l(Register r = r0);
+  void push_f(FloatRegister r = v0);
+  void push_d(FloatRegister r = v0);
+
+  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
+
+  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
+
+  void pop(TosState state); // transition vtos -> state
+  void push(TosState state); // transition state -> vtos
+
+  void pop(RegSet regs, Register stack) { ((MacroAssembler*)this)->pop(regs, stack); }
+  void push(RegSet regs, Register stack) { ((MacroAssembler*)this)->push(regs, stack); }
+
+  void empty_expression_stack() {
+    ldr(esp, Address(rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+    // NULL last_sp until next java call
+    str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  }
+
+  // Helpers for swap and dup
+  void load_ptr(int n, Register val);
+  void store_ptr(int n, Register val);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass.
+  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+
+  // Dispatching
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  // dispatch via rscratch1
+  void dispatch_only(TosState state);
+  // dispatch normal table via rscratch1 (assume rscratch1 is loaded already)
+  void dispatch_only_normal(TosState state);
+  void dispatch_only_noverify(TosState state);
+  // load rscratch1 from [rbcp + step] and dispatch via rscratch1
+  void dispatch_next(TosState state, int step = 0);
+  // load rscratch1 from [esi] and dispatch via rscratch1 and table
+  void dispatch_via (TosState state, address* table);
+
+  // jump to an invoked target
+  void prepare_to_jump_from_interpreted();
+  void jump_from_interpreted(Register method, Register temp);
+
+
+  // Returning from interpreted functions
+  //
+  // Removes the current activation (incl. unlocking of monitors)
+  // and sets up the return address.  This code is also used for
+  // exception unwindwing. In that case, we do not want to throw
+  // IllegalMonitorStateExceptions, since that might get us into an
+  // infinite rethrow exception loop.
+  // Additionally this code is used for popFrame and earlyReturn.
+  // In popFrame case we want to skip throwing an exception,
+  // installing an exception, and notifying jvmdi.
+  // In earlyReturn case we only want to skip throwing an exception
+  // and installing an exception.
+  void remove_activation(TosState state,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmdi = true);
+#endif // CC_INTERP
+
+  // FIXME: Give us a valid frame at a null check.
+  virtual void null_check(Register reg, int offset = -1) {
+// #ifdef ASSERT
+//     save_bcp();
+//     set_last_Java_frame(esp, rfp, (address) pc());
+// #endif
+    MacroAssembler::null_check(reg, offset);
+// #ifdef ASSERT
+//     reset_last_Java_frame(true, false);
+// #endif
+  }
+
+  // Object locking
+  void lock_object  (Register lock_reg);
+  void unlock_object(Register lock_reg);
+
+#ifndef CC_INTERP
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Address data, bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+                             bool decrement = false);
+  void increment_mask_and_jump(Address counter_addr,
+                               int increment, int mask,
+                               Register scratch, bool preloaded,
+                               Condition cond, Label* where);
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2, bool is_virtual_call);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2, int start_row,
+                                      Label& done, bool is_virtual_call);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  // narrow int return value
+  void narrow(Register result);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register scratch2,
+                            bool receiver_can_be_null = false);
+  void profile_ret(Register return_bci, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register scratch);
+  void profile_typecheck_failed(Register mdp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register scratch2);
+
+  // Debugging
+  // only if +VerifyOops && state == atos
+  void verify_oop(Register reg, TosState state = atos);
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+#endif // !CC_INTERP
+
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+  // support for jvmti/dtrace
+  void notify_method_entry();
+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
+
+  virtual void _call_Unimplemented(address call_site) {
+    save_bcp();
+    set_last_Java_frame(esp, rfp, (address) pc(), rscratch1);
+    MacroAssembler::_call_Unimplemented(call_site);
+  }
+};
+
+#endif // CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP
+#define CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP
+
+
+// Generation of Interpreter
+//
+  friend class AbstractInterpreterGenerator;
+
+protected:
+
+  void bang_stack_shadow_pages(bool native_call);
+
+private:
+
+  address generate_normal_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  address generate_Reference_get_entry();
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+
+#endif // CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interpreterRT_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+// Implementation of SignatureHandlerGenerator
+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rlocals; }
+Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rscratch1; }
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+  switch (_num_int_args) {
+  case 0:
+    __ ldr(c_rarg1, src);
+    _num_int_args++;
+    break;
+  case 1:
+    __ ldr(c_rarg2, src);
+    _num_int_args++;
+    break;
+  case 2:
+    __ ldr(c_rarg3, src);
+    _num_int_args++;
+    break;
+  case 3:
+    __ ldr(c_rarg4, src);
+    _num_int_args++;
+    break;
+  case 4:
+    __ ldr(c_rarg5, src);
+    _num_int_args++;
+    break;
+  case 5:
+    __ ldr(c_rarg6, src);
+    _num_int_args++;
+    break;
+  case 6:
+    __ ldr(c_rarg7, src);
+    _num_int_args++;
+    break;
+  default:
+    __ ldr(r0, src);
+    __ str(r0, Address(to(), _stack_offset));
+    _stack_offset += wordSize;
+    _num_int_args++;
+    break;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+  switch (_num_int_args) {
+  case 0:
+    __ ldr(c_rarg1, src);
+    _num_int_args++;
+    break;
+  case 1:
+    __ ldr(c_rarg2, src);
+    _num_int_args++;
+    break;
+  case 2:
+    __ ldr(c_rarg3, src);
+    _num_int_args++;
+    break;
+  case 3:
+    __ ldr(c_rarg4, src);
+    _num_int_args++;
+    break;
+  case 4:
+    __ ldr(c_rarg5, src);
+    _num_int_args++;
+    break;
+  case 5:
+    __ ldr(c_rarg6, src);
+    _num_int_args++;
+    break;
+  case 6:
+    __ ldr(c_rarg7, src);
+    _num_int_args++;
+    break;
+  default:
+    __ ldr(r0, src);
+    __ str(r0, Address(to(), _stack_offset));
+    _stack_offset += wordSize;
+    _num_int_args++;
+    break;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+  if (_num_fp_args < Argument::n_float_register_parameters_c) {
+    __ ldrs(as_FloatRegister(_num_fp_args++), src);
+  } else {
+    __ ldrw(r0, src);
+    __ strw(r0, Address(to(), _stack_offset));
+    _stack_offset += wordSize;
+    _num_fp_args++;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+  if (_num_fp_args < Argument::n_float_register_parameters_c) {
+    __ ldrd(as_FloatRegister(_num_fp_args++), src);
+  } else {
+    __ ldr(r0, src);
+    __ str(r0, Address(to(), _stack_offset));
+    _stack_offset += wordSize;
+    _num_fp_args++;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+
+  switch (_num_int_args) {
+  case 0:
+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
+    __ add(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
+    _num_int_args++;
+    break;
+  case 1:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg2, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg2, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+  case 2:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg3, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg3, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+  case 3:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg4, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg4, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+  case 4:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg5, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg5, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+  case 5:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg6, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg6, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+  case 6:
+    {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mov(c_rarg7, 0);
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbz(temp(), L);
+      __ mov(c_rarg7, r0);
+      __ bind(L);
+      _num_int_args++;
+      break;
+    }
+ default:
+   {
+      __ add(r0, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ ldr(temp(), r0);
+      Label L;
+      __ cbnz(temp(), L);
+      __ mov(r0, zr);
+      __ bind(L);
+      __ str(r0, Address(to(), _stack_offset));
+      _stack_offset += wordSize;
+      _num_int_args++;
+      break;
+   }
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // generate code to handle arguments
+  iterate(fingerprint);
+
+  // set the call format
+  // n.b. allow extra 1 for the JNI_Env in c_rarg0
+  unsigned int call_format = ((_num_int_args + 1) << 6) | (_num_fp_args << 2);
+
+  switch (method()->result_type()) {
+  case T_VOID:
+    call_format |= MacroAssembler::ret_type_void;
+    break;
+  case T_FLOAT:
+    call_format |= MacroAssembler::ret_type_float;
+    break;
+  case T_DOUBLE:
+    call_format |= MacroAssembler::ret_type_double;
+    break;
+  default:
+    call_format |= MacroAssembler::ret_type_integral;
+    break;
+  }
+
+  // store the call format in the method
+  __ movw(r0, call_format);
+  __ str(r0, Address(rmethod, methodOopDesc::call_format_offset()));
+
+  // return result handler
+  __ lea(r0, ExternalAddress(Interpreter::result_handler(method()->result_type())));
+  __ ret(lr);
+
+  __ flush();
+}
+
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+  : public NativeSignatureIterator {
+ private:
+  address   _from;
+  intptr_t* _to;
+  intptr_t* _int_args;
+  intptr_t* _fp_args;
+  intptr_t* _fp_identifiers;
+  unsigned int _num_int_args;
+  unsigned int _num_fp_args;
+
+  virtual void pass_int()
+  {
+    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_int_register_parameters_c-1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+      _num_int_args++;
+    }
+  }
+
+  virtual void pass_long()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2*Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_int_register_parameters_c-1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+      _num_int_args++;
+    }
+  }
+
+  virtual void pass_object()
+  {
+    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_int_register_parameters_c-1) {
+      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t)from_addr;
+      _num_int_args++;
+    } else {
+      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
+      _num_int_args++;
+    }
+  }
+
+  virtual void pass_float()
+  {
+    jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_fp_args < Argument::n_float_register_parameters_c) {
+      *_fp_args++ = from_obj;
+      _num_fp_args++;
+    } else {
+      *_to++ = from_obj;
+      _num_fp_args++;
+    }
+  }
+
+  virtual void pass_double()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2*Interpreter::stackElementSize;
+
+    if (_num_fp_args < Argument::n_float_register_parameters_c) {
+      *_fp_args++ = from_obj;
+      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
+      _num_fp_args++;
+    } else {
+      *_to++ = from_obj;
+      _num_fp_args++;
+    }
+  }
+
+ public:
+  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
+    : NativeSignatureIterator(method)
+  {
+    _from = from;
+    _to   = to;
+
+    _int_args = to - (method->is_static() ? 16 : 17);
+    _fp_args =  to - 8;
+    _fp_identifiers = to - 9;
+    *(int*) _fp_identifiers = 0;
+    _num_int_args = (method->is_static() ? 1 : 0);
+    _num_fp_args = 0;
+  }
+
+  // n.b. allow extra 1 for the JNI_Env in c_rarg0
+  unsigned int get_call_format()
+  {
+    unsigned int call_format = ((_num_int_args + 1) << 6) | (_num_fp_args << 2);
+
+    switch (method()->result_type()) {
+    case T_VOID:
+      call_format |= MacroAssembler::ret_type_void;
+      break;
+    case T_FLOAT:
+      call_format |= MacroAssembler::ret_type_float;
+      break;
+    case T_DOUBLE:
+      call_format |= MacroAssembler::ret_type_double;
+      break;
+    default:
+      call_format |= MacroAssembler::ret_type_integral;
+      break;
+    }
+
+    return call_format;
+  }
+};
+
+
+IRT_ENTRY(address,
+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                     methodOopDesc* method,
+                                                     intptr_t* from,
+                                                     intptr_t* to))
+  methodHandle m(thread, (methodOop)method);
+  assert(m->is_native(), "sanity check");
+
+  // handle arguments
+  SlowSignatureHandler ssh(m, (address)from, to);
+  ssh.iterate(UCONST64(-1));
+
+  // set the call format
+  method->set_call_format(ssh.get_call_format());
+
+  // return result handler
+  return Interpreter::result_handler(m->result_type());
+IRT_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interpreterRT_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP
+#define CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP
+
+#include "memory/allocation.hpp"
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+  unsigned int _call_format;
+  unsigned int _num_fp_args;
+  unsigned int _num_int_args;
+  int _stack_offset;
+
+  void pass_int();
+  void pass_long();
+  void pass_float();
+  void pass_double();
+  void pass_object();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+    _num_int_args = (method->is_static() ? 1 : 0);
+    _num_fp_args = 0;
+    _stack_offset = 0;
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+
+  // Code generation support
+  static Register from();
+  static Register to();
+  static Register temp();
+};
+
+#endif // CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interpreter_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+#define __ _masm->
+
+
+address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+  address entry = __ pc();
+
+  __ andr(esp, esp, -16);
+  __ mov(c_rarg3, esp);
+  // rmethod
+  // rlocals
+  // c_rarg3: first stack arg - wordSize
+
+  // adjust sp
+  __ sub(sp, c_rarg3, 18 * wordSize);
+  __ str(lr, Address(__ pre(sp, -2 * wordSize)));
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::slow_signature_handler),
+             rmethod, rlocals, c_rarg3);
+
+  // r0: result handler
+
+  // Stack layout:
+  // rsp: return address           <- sp
+  //      1 garbage
+  //      8 integer args (if static first is unused)
+  //      1 float/double identifiers
+  //      8 double args
+  //        stack args              <- esp
+  //        garbage
+  //        expression stack bottom
+  //        bcp (NULL)
+  //        ...
+
+  // Restore LR
+  __ ldr(lr, Address(__ post(sp, 2 * wordSize)));
+
+  // Do FP first so we can use c_rarg3 as temp
+  __ ldrw(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
+
+  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
+    const FloatRegister r = as_FloatRegister(i);
+
+    Label d, done;
+
+    __ tbnz(c_rarg3, i, d);
+    __ ldrs(r, Address(sp, (10 + i) * wordSize));
+    __ b(done);
+    __ bind(d);
+    __ ldrd(r, Address(sp, (10 + i) * wordSize));
+    __ bind(done);
+  }
+
+  // c_rarg0 contains the result from the call of
+  // InterpreterRuntime::slow_signature_handler so we don't touch it
+  // here.  It will be loaded with the JNIEnv* later.
+  __ ldr(c_rarg1, Address(sp, 1 * wordSize));
+  for (int i = c_rarg2->encoding(); i <= c_rarg7->encoding(); i += 2) {
+    Register rm = as_Register(i), rn = as_Register(i+1);
+    __ ldp(rm, rn, Address(sp, i * wordSize));
+  }
+
+  __ add(sp, sp, 18 * wordSize);
+  __ ret(lr);
+
+  return entry;
+}
+
+
+//
+// Various method entries
+//
+
+address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+  // rmethod: methodOop
+  // r13: sender sp
+  // esp: args
+
+  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+  // These don't need a safepoint check because they aren't virtually
+  // callable. We won't enter these intrinsics from compiled code.
+  // If in the future we added an intrinsic which was virtually callable
+  // we'd have to worry about how to safepoint so that this code is used.
+
+  // mathematical functions inlined by compiler
+  // (interpreter must provide identical implementation
+  // in order to avoid monotonicity bugs when switching
+  // from interpreter to compiler in the middle of some
+  // computation)
+  //
+  // stack:
+  //        [ arg ] <-- esp
+  //        [ arg ]
+  // retaddr in lr
+
+  address entry_point = NULL;
+  Register continuation = lr;
+  switch (kind) {
+  case Interpreter::java_lang_math_abs:
+    entry_point = __ pc();
+    __ ldrd(v0, Address(esp));
+    __ fabsd(v0, v0);
+    __ mov(sp, r13); // Restore caller's SP
+    break;
+  case Interpreter::java_lang_math_sqrt:
+    entry_point = __ pc();
+    __ ldrd(v0, Address(esp));
+    __ fsqrtd(v0, v0);
+    __ mov(sp, r13);
+    break;
+  case Interpreter::java_lang_math_sin :
+  case Interpreter::java_lang_math_cos :
+  case Interpreter::java_lang_math_tan :
+  case Interpreter::java_lang_math_log :
+  case Interpreter::java_lang_math_log10 :
+  case Interpreter::java_lang_math_exp :
+    entry_point = __ pc();
+    __ ldrd(v0, Address(esp));
+    __ mov(sp, r13);
+    __ mov(r19, lr);
+    continuation = r19;  // The first callee-saved register
+    generate_transcendental_entry(kind, 1);
+    break;
+  case Interpreter::java_lang_math_pow :
+    entry_point = __ pc();
+    __ mov(r19, lr);
+    continuation = r19;
+    __ ldrd(v0, Address(esp, 2 * Interpreter::stackElementSize));
+    __ ldrd(v1, Address(esp));
+    __ mov(sp, r13);
+    generate_transcendental_entry(kind, 2);
+    break;
+  default:
+    ;
+  }
+  if (entry_point) {
+    __ br(continuation);
+  }
+
+  return entry_point;
+}
+
+  // double trigonometrics and transcendentals
+  // static jdouble dsin(jdouble x);
+  // static jdouble dcos(jdouble x);
+  // static jdouble dtan(jdouble x);
+  // static jdouble dlog(jdouble x);
+  // static jdouble dlog10(jdouble x);
+  // static jdouble dexp(jdouble x);
+  // static jdouble dpow(jdouble x, jdouble y);
+
+void InterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
+  address fn;
+  switch (kind) {
+  case Interpreter::java_lang_math_sin :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+    break;
+  case Interpreter::java_lang_math_cos :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+    break;
+  case Interpreter::java_lang_math_tan :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+    break;
+  case Interpreter::java_lang_math_log :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+    break;
+  case Interpreter::java_lang_math_log10 :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+    break;
+  case Interpreter::java_lang_math_exp :
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+    break;
+  case Interpreter::java_lang_math_pow :
+    fpargs = 2;
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  const int gpargs = 0, rtype = 3;
+  __ mov(rscratch1, fn);
+  __ blrt(rscratch1, gpargs, fpargs, rtype);
+}
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address InterpreterGenerator::generate_abstract_entry(void) {
+  // rmethod: methodOop
+  // r13: sender SP
+
+  address entry_point = __ pc();
+
+  // abstract method entry
+
+  //  pop return address, reset last_sp to NULL
+  __ empty_expression_stack();
+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                             InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  return entry_point;
+}
+
+
+// Empty method, generate a very fast return.
+
+address InterpreterGenerator::generate_empty_entry(void) {
+  // rmethod: methodOop
+  // r13: sender sp must set sp to this value on return
+
+  if (!UseFastEmptyMethods) {
+    return NULL;
+  }
+
+  address entry_point = __ pc();
+
+  // If we need a safepoint check, generate full interpreter entry.
+  Label slow_path;
+  {
+    unsigned long offset;
+    assert(SafepointSynchronize::_not_synchronized == 0,
+           "SafepointSynchronize::_not_synchronized");
+    __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
+    __ ldrw(rscratch2, Address(rscratch2, offset));
+    __ cbnz(rscratch2, slow_path);
+  }
+
+  // do nothing for empty methods (do not even increment invocation counter)
+  // Code: _return
+  // _return
+  // return w/o popping parameters
+  __ mov(sp, r13); // Restore caller's SP
+  __ br(lr);
+
+  __ bind(slow_path);
+  (void) generate_normal_entry(false);
+  return entry_point;
+
+}
+
+void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+
+  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+  // the days we had adapter frames. When we deoptimize a situation where a
+  // compiled caller calls a compiled caller will have registers it expects
+  // to survive the call to the callee. If we deoptimize the callee the only
+  // way we can restore these registers is to have the oldest interpreter
+  // frame that we create restore these values. That is what this routine
+  // will accomplish.
+
+  // At the moment we have modified c2 to not have any callee save registers
+  // so this problem does not exist and this routine is just a place holder.
+
+  assert(f->is_interpreted_frame(), "must be interpreted");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interpreter_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP
+#define CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP
+
+ public:
+
+  // Offset from rsp (which points to the last stack element)
+  static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
+
+  // Stack index relative to tos (which points at value)
+  static int expr_index_at(int i)        { return stackElementWords * i; }
+
+  // Already negated by c++ interpreter
+  static int local_index_at(int i) {
+    assert(i <= 0, "local direction already negated");
+    return stackElementWords * i;
+  }
+
+#endif // CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP
+#define CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP
+
+private:
+
+  // FP value associated with _last_Java_sp:
+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    OrderAccess::release();
+    _last_Java_fp = NULL;
+    _last_Java_pc = NULL;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    _last_Java_fp = src->_last_Java_fp;
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true
+    OrderAccess::release();
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // Always walkable
+  bool walkable(void) { return true; }
+  // Never any thing to do since we are always walkable and can find address of return addresses
+  void make_walkable(JavaThread* thread) { }
+
+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
+
+  address last_Java_pc(void)                     { return _last_Java_pc; }
+
+private:
+
+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
+
+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
+  // Assert (last_Java_sp == NULL || fp == NULL)
+  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
+
+#endif // CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing a LoadLoad barrier we create an address
+// dependency between loads; this might be more efficient.
+
+
+// Common register usage:
+// r0/v0:      result
+// c_rarg0:    jni env
+// c_rarg1:    obj
+// c_rarg2:    jfield id
+
+static const Register robj          = r3;
+static const Register rcounter      = r4;
+static const Register roffset       = r5;
+static const Register rcounter_addr = r6;
+static const Register result        = r7;
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  const char *name;
+  switch (type) {
+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
+    case T_INT:     name = "jni_fast_GetIntField";     break;
+    case T_LONG:    name = "jni_fast_GetLongField";    break;
+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
+    default:        ShouldNotReachHere();
+  }
+  ResourceMark rm;
+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+  CodeBuffer cbuf(blob);
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+  address fast_entry = __ pc();
+
+  Label slow;
+
+  unsigned long offset;
+  __ adrp(rcounter_addr,
+          SafepointSynchronize::safepoint_counter_addr(), offset);
+  Address safepoint_counter_addr(rcounter_addr, offset);
+  __ ldrw(rcounter, safepoint_counter_addr);
+  __ andw(rscratch1, rcounter, 1);
+  __ cbnzw(rscratch1, slow);
+  __ eor(robj, c_rarg1, rcounter);
+  __ eor(robj, robj, rcounter);               // obj, since
+                                              // robj ^ rcounter ^ rcounter == robj
+                                              // robj is address dependent on rcounter.
+  __ ldr(robj, Address(robj, 0));             // *obj
+  __ lsr(roffset, c_rarg2, 2);                // offset
+
+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
+  switch (type) {
+    case T_BOOLEAN: __ ldrb    (result, Address(robj, roffset)); break;
+    case T_BYTE:    __ ldrsb   (result, Address(robj, roffset)); break;
+    case T_CHAR:    __ ldrh    (result, Address(robj, roffset)); break;
+    case T_SHORT:   __ ldrsh   (result, Address(robj, roffset)); break;
+    case T_FLOAT:   __ ldrw    (result, Address(robj, roffset)); break;
+    case T_INT:     __ ldrsw   (result, Address(robj, roffset)); break;
+    case T_DOUBLE:
+    case T_LONG:    __ ldr     (result, Address(robj, roffset)); break;
+    default:        ShouldNotReachHere();
+  }
+
+  // counter_addr is address dependent on result.
+  __ eor(rcounter_addr, rcounter_addr, result);
+  __ eor(rcounter_addr, rcounter_addr, result);
+  __ ldrw(rscratch1, safepoint_counter_addr);
+  __ cmpw(rcounter, rscratch1);
+  __ br (Assembler::NE, slow);
+
+  switch (type) {
+    case T_FLOAT:   __ fmovs(v0, result); break;
+    case T_DOUBLE:  __ fmovd(v0, result); break;
+    default:        __ mov(r0, result);   break;
+  }
+  __ ret(lr);
+
+  slowcase_entry_pclist[count++] = __ pc();
+  __ bind(slow);
+  address slow_case_addr;
+  switch (type) {
+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
+    default:        ShouldNotReachHere();
+  }
+
+  {
+    __ enter();
+    __ lea(rscratch1, ExternalAddress(slow_case_addr));
+    __ blr(rscratch1);
+    __ maybe_isb();
+    __ leave();
+    __ ret(lr);
+  }
+  __ flush ();
+
+  return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_int_field0(T_DOUBLE);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/jniTypes_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
+#define CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format)
+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+  // I.e., they are functionally 'push' operations if they have a 'pos'
+  // formal parameter.  Note that jlong's and jdouble's are written
+  // _in reverse_ of the order in which they appear in the interpreter
+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
+  // reverse the argument list constructed by JavaCallArguments (see
+  // javaCalls.hpp).
+
+public:
+  // Ints are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to+1).
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+  }
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 1
+  // Doubles are stored in native word format in one JavaCallArgument
+  // slot at *(to+1).
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+  }
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  // No need to worry about alignment on Intel.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
+
+#endif // CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/jni_aarch64.h	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+#if defined(SOLARIS) || defined(LINUX) || defined(_ALLBSD_SOURCE)
+
+
+#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2)))
+  #define JNIEXPORT     __attribute__((visibility("default")))
+  #define JNIIMPORT     __attribute__((visibility("default")))
+#else
+  #define JNIEXPORT
+  #define JNIIMPORT
+#endif
+
+  #define JNICALL
+  typedef int jint;
+#if defined(_LP64) && !defined(__APPLE__)
+  typedef long jlong;
+#else
+  /*
+   * On _LP64 __APPLE__ "long" and "long long" are both 64 bits,
+   * but we use the "long long" typedef to avoid complaints from
+   * the __APPLE__ compiler about fprintf formats.
+   */
+  typedef long long jlong;
+#endif
+
+#else
+  #define JNIEXPORT __declspec(dllexport)
+  #define JNIIMPORT __declspec(dllimport)
+  #define JNICALL __stdcall
+
+  typedef int jint;
+  typedef __int64 jlong;
+#endif
+
+typedef signed char jbyte;
+
+#endif /* !_JAVASOFT_JNI_MD_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "prims/methodHandles.hpp"
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+  return RegisterOrConstant(value);
+}
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+  if (VerifyMethodHandles)
+    verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(),
+                 "MH argument is a Class");
+  __ load_heap_oop(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //PRODUCT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj, KlassHandle klass,
+                                 const char* error_message) {
+  oop* klass_addr = klass.raw_value();
+  assert(klass_addr >= SystemDictionaryHandles::Object_klass().raw_value() &&
+         klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
+         "must be one of the SystemDictionaryHandles");
+  Register temp = rscratch2;
+  Register temp2 = rscratch1; // used by MacroAssembler::cmpptr
+  Label L_ok, L_bad;
+  BLOCK_COMMENT("verify_klass {");
+  __ verify_oop(obj);
+  __ cbz(obj, L_bad);
+  __ push(RegSet::of(temp, temp2), sp);
+  __ load_klass(temp, obj);
+  __ cmpptr(temp, ExternalAddress((address) klass_addr));
+  __ br(Assembler::EQ, L_ok);
+  intptr_t super_check_offset = klass->super_check_offset();
+  __ ldr(temp, Address(temp, super_check_offset));
+  __ cmpptr(temp, ExternalAddress((address) klass_addr));
+  __ br(Assembler::EQ, L_ok);
+  __ pop(RegSet::of(temp, temp2), sp);
+  __ bind(L_bad);
+  __ stop(error_message);
+  __ BIND(L_ok);
+  __ pop(RegSet::of(temp, temp2), sp);
+  BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {  }
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                            bool for_compiler_entry) {
+  assert(method == rmethod, "interpreter calling convention");
+  __ verify_oop(method);
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+
+    __ ldrw(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
+    __ cbzw(rscratch1, run_compiled_code);
+    __ ldr(rscratch1, Address(method, methodOopDesc::interpreter_entry_offset()));
+    __ br(rscratch1);
+    __ BIND(run_compiled_code);
+  }
+
+  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
+                                                     methodOopDesc::from_interpreted_offset();
+  __ ldr(rscratch1,Address(method, entry_offset));
+  __ br(rscratch1);
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2);
+  assert(recv != noreg, "required register");
+  assert(method_temp == rmethod, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  // the following assumes that a methodOop is normally compressed in the vmtarget field:
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+  __ verify_oop(method_temp);
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ load_sized_value(temp2,
+                        Address(method_temp, methodOopDesc::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    Label L;
+    __ ldr(rscratch1, __ argument_address(temp2, -1));
+    __ cmp(recv, rscratch1);
+    __ br(Assembler::EQ, L);
+    __ ldr(r0, __ argument_address(temp2, -1));
+    __ hlt(0);
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ hlt(0);           // empty stubs make SG sick
+    return NULL;
+  }
+
+  // r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+  // rmethod: methodOop
+  // r3: argument locator (parameter slot count, added to rsp)
+  // r1: used as temp to hold mh or receiver
+  // r0, r11: garbage temps, blown away
+  Register argp   = r3;   // argument list ptr, live on error paths
+  Register temp   = r0;
+  Register mh     = r1;   // MH receiver; dies quickly and is recycled
+
+  // here's where control starts out:
+  __ align(CodeEntryAlignment);
+  address entry_point = __ pc();
+
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ ldrb(rscratch1, Address(rmethod, methodOopDesc::intrinsic_id_offset_in_bytes()));
+    __ cmp(rscratch1, (int) iid);
+    __ br(Assembler::EQ, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad methodOopDesc::intrinsic_id");
+    }
+    __ hlt(0);
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address r3_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ load_sized_value(argp,
+                        Address(rmethod, methodOopDesc::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    r3_first_arg_addr = __ argument_address(argp, -1);
+  } else {
+    DEBUG_ONLY(argp = noreg);
+  }
+
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ldr(mh, r3_first_arg_addr);
+    DEBUG_ONLY(argp = noreg);
+  }
+
+  // r3_first_arg_addr is live!
+
+  trace_method_handle_interpreter_entry(_masm, iid);
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
+
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ldr(recv = r2, r3_first_arg_addr);
+    }
+    DEBUG_ONLY(argp = noreg);
+    Register rmember = rmethod;  // MemberName ptr; incoming method ptr is dead now
+    __ pop(rmember);             // extract last argument
+    generate_method_handle_dispatch(_masm, iid, recv, rmember, not_for_compiler_entry);
+  }
+
+  return entry_point;
+}
+
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register temp1 = r10;
+  Register temp2 = r11;
+  Register temp3 = r14;  // r13 is live by this point: it contains the sender SP
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+  }
+
+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
+  assert_different_registers(temp1, temp2, temp3, member_reg);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, rmethod, temp1, for_compiler_entry);
+
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_oop(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_oop(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(temp2_defc, member_clazz);
+        load_klass_from_Class(_masm, temp2_defc);
+        __ verify_oop(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+        // If we get here, the type check failed!
+        __ hlt(0);
+        // __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+    //  r13 - interpreter linkage (if interpreted)  ??? FIXME
+    //  r1 ... r0 - compiler arguments (if compiled)
+
+    Label L_incompatible_class_change_error;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ load_heap_oop(rmethod, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ load_heap_oop(rmethod, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToVirtual:
+    {
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ ldr(temp2_index, member_vmindex);
+
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ cmpw(temp2_index, 0U);
+        __ br(Assembler::GE, L_index_ok);
+        __ hlt(0);
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target methodOop & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rmethod);
+      break;
+    }
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(temp3_intf, member_clazz);
+      load_klass_from_Class(_masm, temp3_intf);
+      __ verify_oop(temp3_intf);
+
+      Register rindex = rmethod;
+      __ ldr(rindex, member_vmindex);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ cmpw(rindex, 0U);
+        __ br(Assembler::GE, L);
+        __ hlt(0);
+        __ bind(L);
+      }
+
+      // given intf, index, and recv klass, dispatch to the implementation method
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 rindex, rmethod,
+                                 temp2,
+                                 L_incompatible_class_change_error);
+      break;
+    }
+
+    default:
+      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    // live at this point:  rmethod, r13 (if interpreted)
+
+    // After figuring out which concrete method to call, jump into it.
+    // Note that this works in the interpreter with no data motion.
+    // But the compiled version will require that r2_recv be shifted out.
+    __ verify_oop(rmethod);
+    jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry);
+    if (iid == vmIntrinsics::_linkToInterface) {
+      __ bind(L_incompatible_class_change_error);
+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+    }
+  }
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+                              oop mh,
+                              intptr_t* saved_regs,
+                              intptr_t* entry_sp) {  }
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+  const char* adaptername;
+  oopDesc* mh;
+  intptr_t* saved_regs;
+  intptr_t* entry_sp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
+#endif //PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/methodHandles_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+static unsigned int adapter_code_size() {
+  return 32000 DEBUG_ONLY(+ 120000);
+}
+
+public:
+
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj, KlassHandle klass,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionaryHandles::MethodHandle_klass(),
+                 "reference is a MH");
+  }
+
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+  // Takes care of special dispatch from single stepping too.
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2,
+                                  bool for_compiler_entry);
+
+  static Register saved_last_sp_register() {
+    // Should be in sharedRuntime, not here.
+    return noreg;
+  }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+void NativeCall::verify() { ; }
+
+address NativeCall::destination() const {
+  address addr = (address)this;
+  address destination = instruction_address() + displacement();
+
+  // Do we use a trampoline stub for this call?
+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+  assert(cb && cb->is_nmethod(), "sanity");
+  nmethod *nm = (nmethod *)cb;
+  if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+    // Yes we do, so get the destination from the trampoline stub.
+    const address trampoline_stub_addr = destination;
+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+  }
+
+  return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+  assert(!assert_lock ||
+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+         "concurrent code patching");
+
+  ResourceMark rm;
+  int code_size = NativeInstruction::instruction_size;
+  address addr_call = addr_at(0);
+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+  // Patch the constant in the call's trampoline stub.
+  address trampoline_stub_addr = get_trampoline();
+  if (trampoline_stub_addr != NULL) {
+    assert (! is_NativeCallTrampolineStub_at(dest), "chained trampolines");
+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+  }
+
+  // Patch the call.
+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
+    set_destination(dest);
+  } else {
+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
+    set_destination(trampoline_stub_addr);
+  }
+
+  ICache::invalidate_range(addr_call, instruction_size);
+}
+
+address NativeCall::get_trampoline() {
+  address call_addr = addr_at(0);
+
+  CodeBlob *code = CodeCache::find_blob(call_addr);
+  assert(code != NULL, "Could not find the containing code blob");
+
+  address bl_destination
+    = MacroAssembler::pd_call_destination(call_addr);
+  if (code->content_contains(bl_destination) &&
+      is_NativeCallTrampolineStub_at(bl_destination))
+    return bl_destination;
+
+  // If the codeBlob is not a nmethod, this is because we get here from the
+  // CodeBlob constructor, which is called within the nmethod constructor.
+  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
+
+//-------------------------------------------------------------------
+
+void NativeMovConstReg::verify() {
+  // make sure code pattern is actually mov reg64, imm64 instructions
+}
+
+intptr_t NativeMovConstReg::data() const {
+  // das(uint64_t(instruction_address()),2);
+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+  if (maybe_cpool_ref(instruction_address())) {
+    return *(intptr_t*)addr;
+  } else {
+    return (intptr_t)addr;
+  }
+}
+
+void NativeMovConstReg::set_data(intptr_t x) {
+  if (maybe_cpool_ref(instruction_address())) {
+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+    *(intptr_t*)addr = x;
+  } else {
+    MacroAssembler::pd_patch_instruction(instruction_address(), (address)x);
+    ICache::invalidate_range(instruction_address(), instruction_size);
+  }
+};
+
+void NativeMovConstReg::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+                instruction_address(), data());
+}
+
+//-------------------------------------------------------------------
+
+address NativeMovRegMem::instruction_address() const      { return addr_at(instruction_offset); }
+
+int NativeMovRegMem::offset() const  {
+  address pc = instruction_address();
+  unsigned insn = *(unsigned*)pc;
+  if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
+    address addr = MacroAssembler::target_addr_for_insn(pc);
+    return *addr;
+  } else {
+    return (int)(intptr_t)MacroAssembler::target_addr_for_insn(instruction_address());
+  }
+}
+
+void NativeMovRegMem::set_offset(int x) {
+  address pc = instruction_address();
+  unsigned insn = *(unsigned*)pc;
+  if (maybe_cpool_ref(pc)) {
+    address addr = MacroAssembler::target_addr_for_insn(pc);
+    *(long*)addr = x;
+  } else {
+    MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x));
+    ICache::invalidate_range(instruction_address(), instruction_size);
+  }
+}
+
+void NativeMovRegMem::verify() {
+#ifdef ASSERT
+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
+#endif
+}
+
+//--------------------------------------------------------------------------------
+
+void NativeJump::verify() { ; }
+
+
+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
+}
+
+
+address NativeJump::jump_destination() const          {
+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
+
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+
+  // return -1 if jump to self
+  dest = (dest == (address) this) ? (address) -1 : dest;
+  return dest;
+}
+
+void NativeJump::set_jump_destination(address dest) {
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+  if (dest == (address) -1)
+    dest = instruction_address();
+
+  int bytes = MacroAssembler::pd_patch_instruction_size(instruction_address(), dest);
+  ICache::invalidate_range(instruction_address(), bytes);
+};
+
+//-------------------------------------------------------------------
+
+bool NativeInstruction::is_safepoint_poll() {
+  // a safepoint_poll is implemented in two steps as either
+  //
+  // adrp(reg, polling_page);
+  // ldr(zr, [reg, #offset]);
+  //
+  // or
+  //
+  // mov(reg, polling_page);
+  // ldr(zr, [reg, #offset]);
+  //
+  // however, we cannot rely on the polling page address load always
+  // directly preceding the read from the page. C1 does that but C2
+  // has to do the load and read as two independent instruction
+  // generation steps. that's because with a single macro sequence the
+  // generic C2 code can only add the oop map before the mov/adrp and
+  // the trap handler expects an oop map to be associated with the
+  // load. with the load scheuled as a prior step the oop map goes
+  // where it is needed.
+  //
+  // so all we can do here is check that marked instruction is a load
+  // word to zr
+  return is_ldrw_to_zr(address(this));
+}
+
+bool NativeInstruction::is_adrp_at(address instr) {
+  unsigned insn = *(unsigned*)instr;
+  return (Instruction_aarch64::extract(insn, 31, 24) & 0b10011111) == 0b10010000;
+}
+
+bool NativeInstruction::is_ldr_literal_at(address instr) {
+  unsigned insn = *(unsigned*)instr;
+  return (Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000;
+}
+
+bool NativeInstruction::is_ldrw_to_zr(address instr) {
+  unsigned insn = *(unsigned*)instr;
+  return (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
+          Instruction_aarch64::extract(insn, 4, 0) == 0b11111);
+}
+
+bool NativeInstruction::is_movz() {
+  return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b10100101;
+}
+
+bool NativeInstruction::is_movk() {
+  return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101;
+}
+
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  return uint_at(0) == 0xd4bbd5a1; // dcps1 #0xdead
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  *(juint*)code_pos = 0xd4bbd5a1; // dcps1 #0xdead
+}
+
+// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie)
+
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop()
+         || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
+         "Aarch64 cannot replace non-jump with jump");
+
+  // Patch this nmethod atomically.
+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
+    ptrdiff_t disp = dest - verified_entry;
+    guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
+
+    unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
+    *(unsigned int*)verified_entry = insn;
+  } else {
+    // We use an illegal instruction for marking a method as
+    // not_entrant or zombie.
+    NativeIllegalInstruction::insert(verified_entry);
+  }
+
+  ICache::invalidate_range(verified_entry, instruction_size);
+}
+
+void NativeGeneralJump::verify() {  }
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, instruction_size);
+  MacroAssembler a(&cb);
+
+  a.movptr(rscratch1, (uintptr_t)entry);
+  a.br(rscratch1);
+
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+// MT-safe patching of a long jump instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  ShouldNotCallThis();
+}
+
+bool NativeInstruction::is_dtrace_trap() { return false; }
+
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
+  return ptr_at(data_offset);
+}
+
+void NativeCallTrampolineStub::set_destination(address new_destination) {
+  set_ptr_at(data_offset, new_destination);
+  OrderAccess::release();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
+#define CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
+
+#include "asm/assembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+#include "utilities/top.hpp"
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovConstRegPatching
+// - - NativeMovRegMem
+// - - NativeMovRegMemPatching
+// - - NativeJump
+// - - NativeIllegalOpCode
+// - - NativeGeneralJump
+// - - NativeReturn
+// - - NativeReturnX (return with argument)
+// - - NativePushConst
+// - - NativeTstRegMem
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+  friend class Relocation;
+  friend bool is_NativeCallTrampolineStub_at(address);
+ public:
+  enum { instruction_size = 4 };
+  inline bool is_nop();
+  bool is_dtrace_trap();
+  inline bool is_illegal();
+  inline bool is_return();
+  bool is_jump();
+  inline bool is_jump_or_nop();
+  inline bool is_cond_jump();
+  bool is_safepoint_poll();
+  inline bool is_mov_literal64();
+  bool is_movz();
+  bool is_movk();
+  bool is_sigill_zombie_not_entrant();
+
+ protected:
+  address addr_at(int offset) const    { return address(this) + offset; }
+
+  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
+  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
+
+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
+
+  address ptr_at(int offset) const    { return *(address*) addr_at(offset); }
+
+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
+
+
+  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; }
+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
+
+  // This doesn't really do anything on AArch64, but it is the place where
+  // cache invalidation belongs, generically:
+  void wrote(int offset);
+
+ public:
+
+  // unit test stuff
+  static void test() {}                 // override for testing
+
+  inline friend NativeInstruction* nativeInstruction_at(address address);
+
+  static bool is_adrp_at(address instr);
+  static bool is_ldr_literal_at(address instr);
+  static bool is_ldrw_to_zr(address instr);
+
+  static bool maybe_cpool_ref(address instr) {
+    return is_adrp_at(instr) || is_ldr_literal_at(instr);
+  }
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+  return (NativeInstruction*)address;
+}
+
+// The natural type of an AArch64 instruction is uint32_t
+inline NativeInstruction* nativeInstruction_at(uint32_t *address) {
+  return (NativeInstruction*)address;
+}
+
+inline NativeCall* nativeCall_at(address address);
+// The NativeCall is an abstraction for accessing/manipulating native call imm32/rel32off
+// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
+
+class NativeCall: public NativeInstruction {
+ public:
+  enum Aarch64_specific_constants {
+    instruction_size            =    4,
+    instruction_offset          =    0,
+    displacement_offset         =    0,
+    return_address_offset       =    4
+  };
+
+  enum { cache_line_size = BytesPerWord };  // conservative estimate!
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(return_address_offset); }
+  int   displacement() const                { return (int_at(displacement_offset) << 6) >> 4; }
+  address displacement_address() const      { return addr_at(displacement_offset); }
+  address return_address() const            { return addr_at(return_address_offset); }
+  address destination() const;
+  void  set_destination(address dest)       {
+    int offset = dest - instruction_address();
+    unsigned int insn = 0b100101 << 26;
+    assert((offset & 3) == 0, "should be");
+    offset >>= 2;
+    offset &= (1 << 26) - 1; // mask off insn part
+    insn |= offset;
+    set_int_at(displacement_offset, insn);
+    ICache::invalidate_range(instruction_address(), instruction_size);
+  }
+
+  void  verify_alignment()                       { ; }
+  void  verify();
+  void  print();
+
+  // Creation
+  inline friend NativeCall* nativeCall_at(address address);
+  inline friend NativeCall* nativeCall_before(address return_address);
+
+  static bool is_call_at(address instr) {
+    const uint32_t insn = (*(uint32_t*)instr);
+    return (insn >> 26) == 0b100101;
+  }
+
+  static bool is_call_before(address return_address) {
+    return is_call_at(return_address - NativeCall::return_address_offset);
+  }
+
+  // MT-safe patching of a call instruction.
+  static void insert(address code_pos, address entry);
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  // Similar to replace_mt_safe, but just changes the destination.  The
+  // important thing is that free-running threads are able to execute
+  // this call instruction at all times.  If the call is an immediate BL
+  // instruction we can simply rely on atomicity of 32-bit writes to
+  // make sure other threads will see no intermediate states.
+
+  // We cannot rely on locks here, since the free-running threads must run at
+  // full speed.
+  //
+  // Used in the runtime linkage of calls; see class CompiledIC.
+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+
+  // The parameter assert_lock disables the assertion during code generation.
+  void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+  address get_trampoline();
+};
+
+inline NativeCall* nativeCall_at(address address) {
+  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+// An interface for accessing/manipulating native mov reg, imm instructions.
+// (used to manipulate inlined 64-bit data calls, etc.)
+class NativeMovConstReg: public NativeInstruction {
+ public:
+  enum Aarch64_specific_constants {
+    instruction_size            =    3 * 4, // movz, movk, movk.  See movptr().
+    instruction_offset          =    0,
+    displacement_offset         =    0,
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  {
+    if (nativeInstruction_at(instruction_address())->is_movz())
+      // Assume movz, movk, movk
+      return addr_at(instruction_size);
+    else if (is_adrp_at(instruction_address()))
+      return addr_at(2*4);
+    else if (is_ldr_literal_at(instruction_address()))
+      return(addr_at(4));
+    assert(false, "Unknown instruction in NativeMovConstReg");
+    return NULL;
+  }
+
+  intptr_t data() const;
+  void  set_data(intptr_t x);
+
+  void flush() {
+    if (! maybe_cpool_ref(instruction_address())) {
+      ICache::invalidate_range(instruction_address(), instruction_size);
+    }
+  }
+
+  void  verify();
+  void  print();
+
+  // unit test stuff
+  static void test() {}
+
+  // Creation
+  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
+  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovConstRegPatching: public NativeMovConstReg {
+ private:
+    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
+    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+    }
+};
+
+// An interface for accessing/manipulating native moves of the form:
+//      mov[b/w/l/q] [reg + offset], reg   (instruction_code_reg2mem)
+//      mov[b/w/l/q] reg, [reg+offset]     (instruction_code_mem2reg
+//      mov[s/z]x[w/b/q] [reg + offset], reg
+//      fld_s  [reg+offset]
+//      fld_d  [reg+offset]
+//      fstp_s [reg + offset]
+//      fstp_d [reg + offset]
+//      mov_literal64  scratch,<pointer> ; mov[b/w/l/q] 0(scratch),reg | mov[b/w/l/q] reg,0(scratch)
+//
+// Warning: These routines must be able to handle any instruction sequences
+// that are generated as a result of the load/store byte,word,long
+// macros.  For example: The load_unsigned_byte instruction generates
+// an xor reg,reg inst prior to generating the movb instruction.  This
+// class must skip the xor instruction.
+
+class NativeMovRegMem: public NativeInstruction {
+  enum AArch64_specific_constants {
+    instruction_size            =    4,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    4
+  };
+
+ public:
+  // helper
+  int instruction_start() const;
+
+  address instruction_address() const;
+
+  address next_instruction_address() const;
+
+  int   offset() const;
+
+  void  set_offset(int x);
+
+  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
+
+  void verify();
+  void print ();
+
+  // unit test stuff
+  static void test() {}
+
+ private:
+  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at (address address) {
+  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovRegMemPatching: public NativeMovRegMem {
+ private:
+  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {Unimplemented(); return 0;  }
+};
+
+// An interface for accessing/manipulating native leal instruction of form:
+//        leal reg, [reg + offset]
+
+class NativeLoadAddress: public NativeMovRegMem {
+  static const bool has_rex = true;
+  static const int rex_size = 1;
+ public:
+
+  void verify();
+  void print ();
+
+  // unit test stuff
+  static void test() {}
+};
+
+class NativeJump: public NativeInstruction {
+ public:
+  enum AArch64_specific_constants {
+    instruction_size            =    4,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    4
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(instruction_size); }
+  address jump_destination() const;
+  void set_jump_destination(address dest);
+
+  // Creation
+  inline friend NativeJump* nativeJump_at(address address);
+
+  void verify();
+
+  // Unit testing stuff
+  static void test() {}
+
+  // Insertion of native jump instruction
+  static void insert(address code_pos, address entry);
+  // MT-safe insertion of native jump at verified method entry
+  static void check_verified_entry_alignment(address entry, address verified_entry);
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+inline NativeJump* nativeJump_at(address address) {
+  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
+#ifdef ASSERT
+  jump->verify();
+#endif
+  return jump;
+}
+
+class NativeGeneralJump: public NativeJump {
+public:
+  enum AArch64_specific_constants {
+    instruction_size            =    4 * 4,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    4 * 4
+  };
+  static void insert_unconditional(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+  static void verify();
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativePopReg : public NativeInstruction {
+ public:
+  // Insert a pop instruction
+  static void insert(address code_pos, Register reg);
+};
+
+
+class NativeIllegalInstruction: public NativeInstruction {
+ public:
+  // Insert illegal opcode as specific address
+  static void insert(address code_pos);
+};
+
+// return instruction that does not pop values of the stack
+class NativeReturn: public NativeInstruction {
+ public:
+};
+
+// return instruction that does pop values of the stack
+class NativeReturnX: public NativeInstruction {
+ public:
+};
+
+// Simple test vs memory
+class NativeTstRegMem: public NativeInstruction {
+ public:
+};
+
+inline bool NativeInstruction::is_nop()         {
+  uint32_t insn = *(uint32_t*)addr_at(0);
+  return insn == 0xd503201f;
+}
+
+inline bool NativeInstruction::is_jump() {
+  uint32_t insn = *(uint32_t*)addr_at(0);
+
+  if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
+    // Unconditional branch (immediate)
+    return true;
+  } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
+    // Conditional branch (immediate)
+    return true;
+  } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
+    // Compare & branch (immediate)
+    return true;
+  } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
+    // Test & branch (immediate)
+    return true;
+  } else
+    return false;
+}
+
+inline bool NativeInstruction::is_jump_or_nop() {
+  return is_nop() || is_jump();
+}
+
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+  enum AArch64_specific_constants {
+    instruction_size            =    4 * 4,
+    instruction_offset          =    0,
+    data_offset                 =    2 * 4,
+    next_instruction_offset     =    4 * 4
+  };
+
+  address destination(nmethod *nm = NULL) const;
+  void set_destination(address new_destination);
+  ptrdiff_t destination_offset() const;
+};
+
+inline bool is_NativeCallTrampolineStub_at(address addr) {
+  // Ensure that the stub is exactly
+  //      ldr   xscratch1, L
+  //      br    xscratch1
+  // L:
+  uint32_t *i = (uint32_t *)addr;
+  return i[0] == 0x58000048 && i[1] == 0xd61f0100;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
+  return (NativeCallTrampolineStub*)addr;
+}
+
+#endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/registerMap_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP
+#define CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP
+
+// machine-dependent implemention for register maps
+  friend class frame;
+
+ private:
+  // This is the hook for finding a register in an "well-known" location,
+  // such as a register block of a predetermined format.
+  // Since there is none, we just return NULL.
+  // See registerMap_sparc.hpp for an example of grabbing registers
+  // from register save areas of a standard layout.
+   address pd_location(VMReg reg) const {return NULL;}
+
+  // no PD state to clear or copy:
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/register_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_aarch64.hpp"
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
+
+const int ConcreteRegisterImpl::max_fpr
+  = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1);
+
+const char* RegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
+    "rscratch1", "rscratch2",
+    "r10", "r11", "r12", "r13", "r14", "r15", "r16",
+    "r17", "r18", "r19",
+    "resp", "rdispatch", "rbcp", "r23", "rlocals", "rmonitors", "rcpool", "rheapbase",
+    "rthread", "rfp", "lr", "sp"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/register_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_REGISTER_AARCH64_HPP
+#define CPU_AARCH64_VM_REGISTER_AARCH64_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_aarch64.hpp"
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+inline Register as_Register(int encoding) {
+  return (Register)(intptr_t) encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers      = 32,
+    number_of_byte_registers = 32
+  };
+
+  // derived registers, offsets, and addresses
+  Register successor() const                          { return as_Register(encoding() + 1); }
+
+  // construction
+  inline friend Register as_Register(int encoding);
+
+  VMReg as_VMReg();
+
+  // accessors
+  int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  bool  has_byte_register() const                { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
+  const char* name() const;
+  int   encoding_nocheck() const                 { return (intptr_t)this; }
+
+  // Return the bit which represents this register.  This is intended
+  // to be ORed into a bitmask: for usage see class RegSet below.
+  unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
+};
+
+// The integer registers of the aarch64 architecture
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+
+CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
+CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
+CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
+CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
+CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
+CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
+CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
+CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
+CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
+CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
+CONSTANT_REGISTER_DECLARATION(Register, r10,  (10));
+CONSTANT_REGISTER_DECLARATION(Register, r11,  (11));
+CONSTANT_REGISTER_DECLARATION(Register, r12,  (12));
+CONSTANT_REGISTER_DECLARATION(Register, r13,  (13));
+CONSTANT_REGISTER_DECLARATION(Register, r14,  (14));
+CONSTANT_REGISTER_DECLARATION(Register, r15,  (15));
+CONSTANT_REGISTER_DECLARATION(Register, r16,  (16));
+CONSTANT_REGISTER_DECLARATION(Register, r17,  (17));
+CONSTANT_REGISTER_DECLARATION(Register, r18,  (18));
+CONSTANT_REGISTER_DECLARATION(Register, r19,  (19));
+CONSTANT_REGISTER_DECLARATION(Register, r20,  (20));
+CONSTANT_REGISTER_DECLARATION(Register, r21,  (21));
+CONSTANT_REGISTER_DECLARATION(Register, r22,  (22));
+CONSTANT_REGISTER_DECLARATION(Register, r23,  (23));
+CONSTANT_REGISTER_DECLARATION(Register, r24,  (24));
+CONSTANT_REGISTER_DECLARATION(Register, r25,  (25));
+CONSTANT_REGISTER_DECLARATION(Register, r26,  (26));
+CONSTANT_REGISTER_DECLARATION(Register, r27,  (27));
+CONSTANT_REGISTER_DECLARATION(Register, r28,  (28));
+CONSTANT_REGISTER_DECLARATION(Register, r29,  (29));
+CONSTANT_REGISTER_DECLARATION(Register, r30,  (30));
+
+CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
+CONSTANT_REGISTER_DECLARATION(Register, zr,  (32));
+CONSTANT_REGISTER_DECLARATION(Register, sp,  (33));
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers = 32
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   encoding_nocheck() const                         { return (intptr_t)this; }
+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+
+};
+
+// The float registers of the AARCH64 architecture
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, v31    , (31));
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+  // A big enough number for C2: all the registers plus flags
+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+  // There is no requirement that any ordering here matches any ordering c2 gives
+  // it's optoregs.
+
+    number_of_registers = (2 * RegisterImpl::number_of_registers +
+                           2 * FloatRegisterImpl::number_of_registers +
+                           1) // flags
+  };
+
+  // added to make it compile
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+// A set of registers
+class RegSet {
+  uint32_t _bitset;
+
+  RegSet(uint32_t bitset) : _bitset(bitset) { }
+
+public:
+
+  RegSet() : _bitset(0) { }
+
+  RegSet(Register r1) : _bitset(r1->bit()) { }
+
+  RegSet operator+(const RegSet aSet) const {
+    RegSet result(_bitset | aSet._bitset);
+    return result;
+  }
+
+  RegSet operator-(const RegSet aSet) const {
+    RegSet result(_bitset & ~aSet._bitset);
+    return result;
+  }
+
+  RegSet &operator+=(const RegSet aSet) {
+    *this = *this + aSet;
+    return *this;
+  }
+
+  static RegSet of(Register r1) {
+    return RegSet(r1);
+  }
+
+  static RegSet of(Register r1, Register r2) {
+    return of(r1) + r2;
+  }
+
+  static RegSet of(Register r1, Register r2, Register r3) {
+    return of(r1, r2) + r3;
+  }
+
+  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
+    return of(r1, r2, r3) + r4;
+  }
+
+  static RegSet range(Register start, Register end) {
+    uint32_t bits = ~0;
+    bits <<= start->encoding();
+    bits <<= 31 - end->encoding();
+    bits >>= 31 - end->encoding();
+
+    return RegSet(bits);
+  }
+
+  uint32_t bits() const { return _bitset; }
+};
+
+#endif // CPU_AARCH64_VM_REGISTER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/register_definitions_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "register_aarch64.hpp"
+# include "interp_masm_aarch64.hpp"
+
+REGISTER_DEFINITION(Register, r0);
+REGISTER_DEFINITION(Register, r1);
+REGISTER_DEFINITION(Register, r2);
+REGISTER_DEFINITION(Register, r3);
+REGISTER_DEFINITION(Register, r4);
+REGISTER_DEFINITION(Register, r5);
+REGISTER_DEFINITION(Register, r6);
+REGISTER_DEFINITION(Register, r7);
+REGISTER_DEFINITION(Register, r8);
+REGISTER_DEFINITION(Register, r9);
+REGISTER_DEFINITION(Register, r10);
+REGISTER_DEFINITION(Register, r11);
+REGISTER_DEFINITION(Register, r12);
+REGISTER_DEFINITION(Register, r13);
+REGISTER_DEFINITION(Register, r14);
+REGISTER_DEFINITION(Register, r15);
+REGISTER_DEFINITION(Register, r16);
+REGISTER_DEFINITION(Register, r17);
+REGISTER_DEFINITION(Register, r18);
+REGISTER_DEFINITION(Register, r19);
+REGISTER_DEFINITION(Register, r20);
+REGISTER_DEFINITION(Register, r21);
+REGISTER_DEFINITION(Register, r22);
+REGISTER_DEFINITION(Register, r23);
+REGISTER_DEFINITION(Register, r24);
+REGISTER_DEFINITION(Register, r25);
+REGISTER_DEFINITION(Register, r26);
+REGISTER_DEFINITION(Register, r27);
+REGISTER_DEFINITION(Register, r28);
+REGISTER_DEFINITION(Register, r29);
+REGISTER_DEFINITION(Register, r30);
+REGISTER_DEFINITION(Register, sp);
+
+REGISTER_DEFINITION(FloatRegister, v0);
+REGISTER_DEFINITION(FloatRegister, v1);
+REGISTER_DEFINITION(FloatRegister, v2);
+REGISTER_DEFINITION(FloatRegister, v3);
+REGISTER_DEFINITION(FloatRegister, v4);
+REGISTER_DEFINITION(FloatRegister, v5);
+REGISTER_DEFINITION(FloatRegister, v6);
+REGISTER_DEFINITION(FloatRegister, v7);
+REGISTER_DEFINITION(FloatRegister, v8);
+REGISTER_DEFINITION(FloatRegister, v9);
+REGISTER_DEFINITION(FloatRegister, v10);
+REGISTER_DEFINITION(FloatRegister, v11);
+REGISTER_DEFINITION(FloatRegister, v12);
+REGISTER_DEFINITION(FloatRegister, v13);
+REGISTER_DEFINITION(FloatRegister, v14);
+REGISTER_DEFINITION(FloatRegister, v15);
+REGISTER_DEFINITION(FloatRegister, v16);
+REGISTER_DEFINITION(FloatRegister, v17);
+REGISTER_DEFINITION(FloatRegister, v18);
+REGISTER_DEFINITION(FloatRegister, v19);
+REGISTER_DEFINITION(FloatRegister, v20);
+REGISTER_DEFINITION(FloatRegister, v21);
+REGISTER_DEFINITION(FloatRegister, v22);
+REGISTER_DEFINITION(FloatRegister, v23);
+REGISTER_DEFINITION(FloatRegister, v24);
+REGISTER_DEFINITION(FloatRegister, v25);
+REGISTER_DEFINITION(FloatRegister, v26);
+REGISTER_DEFINITION(FloatRegister, v27);
+REGISTER_DEFINITION(FloatRegister, v28);
+REGISTER_DEFINITION(FloatRegister, v29);
+REGISTER_DEFINITION(FloatRegister, v30);
+REGISTER_DEFINITION(FloatRegister, v31);
+
+REGISTER_DEFINITION(Register, zr);
+
+REGISTER_DEFINITION(Register, c_rarg0);
+REGISTER_DEFINITION(Register, c_rarg1);
+REGISTER_DEFINITION(Register, c_rarg2);
+REGISTER_DEFINITION(Register, c_rarg3);
+REGISTER_DEFINITION(Register, c_rarg4);
+REGISTER_DEFINITION(Register, c_rarg5);
+REGISTER_DEFINITION(Register, c_rarg6);
+REGISTER_DEFINITION(Register, c_rarg7);
+
+REGISTER_DEFINITION(FloatRegister, c_farg0);
+REGISTER_DEFINITION(FloatRegister, c_farg1);
+REGISTER_DEFINITION(FloatRegister, c_farg2);
+REGISTER_DEFINITION(FloatRegister, c_farg3);
+REGISTER_DEFINITION(FloatRegister, c_farg4);
+REGISTER_DEFINITION(FloatRegister, c_farg5);
+REGISTER_DEFINITION(FloatRegister, c_farg6);
+REGISTER_DEFINITION(FloatRegister, c_farg7);
+
+REGISTER_DEFINITION(Register, j_rarg0);
+REGISTER_DEFINITION(Register, j_rarg1);
+REGISTER_DEFINITION(Register, j_rarg2);
+REGISTER_DEFINITION(Register, j_rarg3);
+REGISTER_DEFINITION(Register, j_rarg4);
+REGISTER_DEFINITION(Register, j_rarg5);
+REGISTER_DEFINITION(Register, j_rarg6);
+REGISTER_DEFINITION(Register, j_rarg7);
+
+REGISTER_DEFINITION(FloatRegister, j_farg0);
+REGISTER_DEFINITION(FloatRegister, j_farg1);
+REGISTER_DEFINITION(FloatRegister, j_farg2);
+REGISTER_DEFINITION(FloatRegister, j_farg3);
+REGISTER_DEFINITION(FloatRegister, j_farg4);
+REGISTER_DEFINITION(FloatRegister, j_farg5);
+REGISTER_DEFINITION(FloatRegister, j_farg6);
+REGISTER_DEFINITION(FloatRegister, j_farg7);
+
+REGISTER_DEFINITION(Register, rscratch1);
+REGISTER_DEFINITION(Register, rscratch2);
+REGISTER_DEFINITION(Register, esp);
+REGISTER_DEFINITION(Register, rdispatch);
+REGISTER_DEFINITION(Register, rcpool);
+REGISTER_DEFINITION(Register, rmonitors);
+REGISTER_DEFINITION(Register, rlocals);
+REGISTER_DEFINITION(Register, rmethod);
+REGISTER_DEFINITION(Register, rbcp);
+
+REGISTER_DEFINITION(Register, lr);
+REGISTER_DEFINITION(Register, rfp);
+REGISTER_DEFINITION(Register, rthread);
+REGISTER_DEFINITION(Register, rheapbase);
+
+REGISTER_DEFINITION(Register, r31_sp);
+
+// TODO : x86 uses rbp to save SP in method handle code
+// we may need to do the same with fp
+// REGISTER_DEFINITION(Register, rbp_mh_SP_save)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+  if (verify_only) {
+    return;
+  }
+
+  int bytes;
+
+  switch(type()) {
+  case relocInfo::oop_type:
+    {
+      oop_Relocation *reloc = (oop_Relocation *)this;
+      if (NativeInstruction::is_ldr_literal_at(addr())) {
+        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
+        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
+        assert(*(address*)constptr == x, "error in oop relocation");
+      } else{
+        bytes = MacroAssembler::patch_oop(addr(), x);
+      }
+    }
+    break;
+  default:
+    bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
+    break;
+  }
+  ICache::invalidate_range(addr(), bytes);
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+  assert(is_call(), "should be a call here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      return nativeCallTrampolineStub_at(trampoline)->destination();
+    }
+  }
+  if (orig_addr != NULL) {
+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
+    // If call is branch to self, don't try to relocate it, just leave it
+    // as branch to self. This happens during code generation if the code
+    // buffer expands. It will be relocated to the trampoline above once
+    // code generation is complete.
+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
+    return new_addr;
+  }
+  return MacroAssembler::pd_call_destination(addr());
+}
+
+
+void Relocation::pd_set_call_destination(address x) {
+  assert(is_call(), "should be a call here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
+      return;
+    }
+  }
+  MacroAssembler::pd_patch_instruction(addr(), x);
+  assert(pd_call_destination(addr()) == x, "fail in reloc");
+}
+
+address* Relocation::pd_address_in_code() {
+  return (address*)(addr() + 8);
+}
+
+
+address Relocation::pd_get_address_from_code() {
+  return MacroAssembler::pd_call_destination(addr());
+}
+
+// !!! FIXME AARCH64 - just made next 3 methods up from Intel code so check them
+
+int Relocation::pd_breakpoint_size() {
+  // minimum breakpoint size, in short words
+  return NativeInstruction::instruction_size / sizeof(short);
+}
+
+void Relocation::pd_swap_in_breakpoint(address x, short* instrs, int instrlen) {
+  Untested("pd_swap_in_breakpoint");
+  if (instrs != NULL) {
+    assert(instrlen * sizeof(short) == NativeInstruction::instruction_size, "enough instrlen in reloc. data");
+    for (int i = 0; i < instrlen; i++) {
+      instrs[i] = ((short*)x)[i];
+    }
+  }
+  NativeIllegalInstruction::insert(x);
+}
+
+
+void Relocation::pd_swap_out_breakpoint(address x, short* instrs, int instrlen) {
+  Untested("pd_swap_out_breakpoint");
+  assert(NativeInstruction::instruction_size == sizeof(short), "right address unit for update");
+  NativeInstruction* ni = nativeInstruction_at(x);
+  *(short*)ni->addr_at(0) = instrs[0];
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+  if (NativeInstruction::maybe_cpool_ref(addr())) {
+    address old_addr = old_addr_for(addr(), src, dest);
+    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
+  }
+}
+
+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest)  {
+  if (NativeInstruction::maybe_cpool_ref(addr())) {
+    address old_addr = old_addr_for(addr(), src, dest);
+    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP
+#define CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+    // Intel instructions are byte-aligned.
+    // FIXME for AARCH64
+    offset_unit        =  1,
+    format_width       =  2
+  };
+
+#endif // CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/runtime_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#endif
+
+
+// This file should really contain the code for generating the OptoRuntime
+// exception_blob. However that code uses SimpleRuntimeFrame which only
+// exists in sharedRuntime_x86_64.cpp. When there is a sharedRuntime_<arch>.hpp
+// file and SimpleRuntimeFrame is able to move there then the exception_blob
+// code will move here where it belongs.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,3127 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/compiledICHolderOop.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
+
+#define __ masm->
+
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
+class SimpleRuntimeFrame {
+
+  public:
+
+  // Most of the runtime stubs have this simple frame layout.
+  // This class exists to make the layout shared in one place.
+  // Offsets are for compiler stack slots, which are jints.
+  enum layout {
+    // The frame sender code expects that rbp will be in the "natural" place and
+    // will override any oopMap setting for it. We must therefore force the layout
+    // so that it agrees with the frame sender code.
+    // we don't expect any arg reg save area so aarch64 asserts that
+    // frame::arg_reg_save_area_bytes == 0
+    rbp_off = 0,
+    rbp_off2,
+    return_off, return_off2,
+    framesize
+  };
+};
+
+// FIXME -- this is used by C1
+class RegisterSaver {
+ public:
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+  static void restore_live_registers(MacroAssembler* masm);
+
+  // Offsets into the register save area
+  // Used by deoptimization when it is managing result register
+  // values on its own
+
+  static int r0_offset_in_bytes(void)    { return (32 + r0->encoding()) * wordSize; }
+  static int reg_offset_in_bytes(Register r)    { return r0_offset_in_bytes() + r->encoding() * wordSize; }
+  static int rmethod_offset_in_bytes(void)    { return reg_offset_in_bytes(rmethod); }
+  static int rscratch1_offset_in_bytes(void)    { return (32 + rscratch1->encoding()) * wordSize; }
+  static int v0_offset_in_bytes(void)   { return 0; }
+  static int return_offset_in_bytes(void) { return (32 /* floats*/ + 31 /* gregs*/) * wordSize; }
+
+  // During deoptimization only the result registers need to be restored,
+  // all the other values have already been extracted.
+  static void restore_result_registers(MacroAssembler* masm);
+
+    // Capture info about frame layout
+  enum layout {
+                fpu_state_off = 0,
+                fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
+                // The frame sender code expects that rfp will be in
+                // the "natural" place and will override any oopMap
+                // setting for it. We must therefore force the layout
+                // so that it agrees with the frame sender code.
+                r0_off = fpu_state_off+FPUStateSizeInWords,
+                rfp_off = r0_off + 30 * 2,
+                return_off = rfp_off + 2,      // slot for return address
+                reg_save_size = return_off + 2};
+
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
+                                     reg_save_size*BytesPerInt, 16);
+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
+  // The caller will allocate additional_frame_words
+  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
+  // CodeBlob frame size is in words.
+  int frame_size_in_words = frame_size_in_bytes / wordSize;
+  *total_frame_words = frame_size_in_words;
+
+  // Save Integer and Float registers.
+
+  __ enter();
+  __ push_CPU_state();
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+
+  for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
+    Register r = as_Register(i);
+    if (r < rheapbase && r != rscratch1 && r != rscratch2) {
+      int sp_offset = 2 * (i + 32); // SP offsets are in 4-byte words,
+                                    // register slots are 8 bytes
+                                    // wide, 32 floating-point
+                                    // registers
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                r->as_VMReg());
+    }
+  }
+
+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+    FloatRegister r = as_FloatRegister(i);
+    int sp_offset = 2 * i;
+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                              r->as_VMReg());
+  }
+
+  return oop_map;
+}
+
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+  __ pop_CPU_state();
+  __ leave();
+}
+
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+
+  // Just restore result register. Only used by deoptimization. By
+  // now any callee save register that needs to be restored to a c2
+  // caller of the deoptee has been extracted into the vframeArray
+  // and will be stuffed into the c2i adapter we create for later
+  // restoration so only result registers need to be restored here.
+
+  // Restore fp result register
+  __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
+  // Restore integer result register
+  __ ldr(r0, Address(sp, r0_offset_in_bytes()));
+
+  // Pop all of the register save are off the stack
+  __ add(sp, sp, round_to(return_offset_in_bytes(), 16));
+}
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+  return size > 16;
+}
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+static int reg2offset_in(VMReg r) {
+  // Account for saved rfp and lr
+  // This should really be in_preserve_stack_slots
+  return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
+}
+
+static int reg2offset_out(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+template <class T> static const T& min (const T& a, const T& b) {
+  return (a > b) ? b : a;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
+// up to RegisterImpl::number_of_registers) are the 64-bit
+// integer registers.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words,
+// which are 64-bit.  The OUTPUTS are in 32-bit units.
+
+// The Java calling convention is a "shifted" version of the C ABI.
+// By skipping the first C ABI register we can call non-static jni
+// methods with small numbers of arguments without having to shuffle
+// the arguments at all. Since we control the java ABI we ought to at
+// least get some advantage out of it.
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+
+  // Create the mapping between argument positions and
+  // registers.
+  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
+    j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
+    j_farg0, j_farg1, j_farg2, j_farg3,
+    j_farg4, j_farg5, j_farg6, j_farg7
+  };
+
+
+  uint int_args = 0;
+  uint fp_args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return round_to(stk_args, 2);
+}
+
+// Patch the callers callsite with entry to compiled code if it exists.
+static void patch_callers_callsite(MacroAssembler *masm) {
+  Label L;
+  __ verify_oop(rmethod);
+  __ ldr(rscratch1, Address(rmethod, in_bytes(methodOopDesc::code_offset())));
+  __ cbz(rscratch1, L);
+
+  __ enter();
+  __ push_CPU_state();
+
+  // VM needs caller's callsite
+  // VM needs target method
+  // This needs to be a long call since we will relocate this adapter to
+  // the codeBuffer and it may not reach
+
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+
+  __ mov(c_rarg0, rmethod);
+  __ mov(c_rarg1, lr);
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
+  __ blrt(rscratch1, 2, 0, 0);
+  __ maybe_isb();
+  __ membar(Assembler::LoadLoad | Assembler::LoadStore);
+
+  __ pop_CPU_state();
+  // restore sp
+  __ leave();
+  __ bind(L);
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs,
+                            Label& skip_fixup) {
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  patch_callers_callsite(masm);
+
+  __ bind(skip_fixup);
+
+  int words_pushed = 0;
+
+  // Since all args are passed on the stack, total_args_passed *
+  // Interpreter::stackElementSize is the space we need.
+
+  int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+  __ mov(r13, sp);
+
+  // stack is aligned, keep it that way
+  extraspace = round_to(extraspace, 2*wordSize);
+
+  if (extraspace)
+    __ sub(sp, sp, extraspace);
+
+  // Now write the args into the outgoing interpreter space
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // offset to start parameters
+    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
+    int next_off = st_off - Interpreter::stackElementSize;
+
+    // Say 4 args:
+    // i   st_off
+    // 0   32 T_LONG
+    // 1   24 T_VOID
+    // 2   16 T_OBJECT
+    // 3    8 T_BOOL
+    // -    0 return address
+    //
+    // However to make thing extra confusing. Because we can fit a long/double in
+    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+    // leaves one slot empty and only stores to a single slot. In this case the
+    // slot that is occupied is the T_VOID slot. See I said it was confusing.
+
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // memory to memory use rscratch1
+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
+                    + extraspace
+                    + words_pushed * wordSize);
+      if (!r_2->is_valid()) {
+        // sign extend??
+        __ ldrw(rscratch1, Address(sp, ld_off));
+        __ str(rscratch1, Address(sp, st_off));
+
+      } else {
+
+        __ ldr(rscratch1, Address(sp, ld_off));
+
+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+        // T_DOUBLE and T_LONG use two slots in the interpreter
+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+          // ld_off == LSW, ld_off+wordSize == MSW
+          // st_off == MSW, next_off == LSW
+          __ str(rscratch1, Address(sp, next_off));
+#ifdef ASSERT
+          // Overwrite the unused slot with known junk
+          __ mov(rscratch1, 0xdeadffffdeadaaaaul);
+          __ str(rscratch1, Address(sp, st_off));
+#endif /* ASSERT */
+        } else {
+          __ str(rscratch1, Address(sp, st_off));
+        }
+      }
+    } else if (r_1->is_Register()) {
+      Register r = r_1->as_Register();
+      if (!r_2->is_valid()) {
+        // must be only an int (or less ) so move only 32bits to slot
+        // why not sign extend??
+        __ str(r, Address(sp, st_off));
+      } else {
+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+        // T_DOUBLE and T_LONG use two slots in the interpreter
+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+          // long/double in gpr
+#ifdef ASSERT
+          // Overwrite the unused slot with known junk
+          __ mov(rscratch1, 0xdeadffffdeadaaabul);
+          __ str(rscratch1, Address(sp, st_off));
+#endif /* ASSERT */
+          __ str(r, Address(sp, next_off));
+        } else {
+          __ str(r, Address(sp, st_off));
+        }
+      }
+    } else {
+      assert(r_1->is_FloatRegister(), "");
+      if (!r_2->is_valid()) {
+        // only a float use just part of the slot
+        __ strs(r_1->as_FloatRegister(), Address(sp, st_off));
+      } else {
+#ifdef ASSERT
+        // Overwrite the unused slot with known junk
+        __ mov(rscratch1, 0xdeadffffdeadaaacul);
+        __ str(rscratch1, Address(sp, st_off));
+#endif /* ASSERT */
+        __ strd(r_1->as_FloatRegister(), Address(sp, next_off));
+      }
+    }
+  }
+
+  __ mov(esp, sp); // Interp expects args on caller's expression stack
+
+  __ ldr(rscratch1, Address(rmethod, in_bytes(methodOopDesc::interpreter_entry_offset())));
+  __ br(rscratch1);
+}
+
+
+static void gen_i2c_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs) {
+
+  // Note: r13 contains the senderSP on entry. We must preserve it since
+  // we may do a i2c -> c2i transition if we lose a race where compiled
+  // code goes non-entrant while we get args ready.
+
+  // In addition we use r13 to locate all the interpreter args because
+  // we must align the stack to 16 bytes.
+
+  // Adapters are frameless.
+
+  // An i2c adapter is frameless because the *caller* frame, which is
+  // interpreted, routinely repairs its own esp (from
+  // interpreter_frame_last_sp), even if a callee has modified the
+  // stack pointer.  It also recalculates and aligns sp.
+
+  // A c2i adapter is frameless because the *callee* frame, which is
+  // interpreted, routinely repairs its caller's sp (from sender_sp,
+  // which is set up via the senderSP register).
+
+  // In other words, if *either* the caller or callee is interpreted, we can
+  // get the stack pointer repaired after a call.
+
+  // This is why c2i and i2c adapters cannot be indefinitely composed.
+  // In particular, if a c2i adapter were to somehow call an i2c adapter,
+  // both caller and callee would be compiled methods, and neither would
+  // clean up the stack pointer changes performed by the two adapters.
+  // If this happens, control eventually transfers back to the compiled
+  // caller, but with an uncorrected stack, causing delayed havoc.
+
+  if (VerifyAdapterCalls &&
+      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
+#if 0
+    // So, let's test for cascading c2i/i2c adapters right now.
+    //  assert(Interpreter::contains($return_addr) ||
+    //         StubRoutines::contains($return_addr),
+    //         "i2c adapter must return to an interpreter frame");
+    __ block_comment("verify_i2c { ");
+    Label L_ok;
+    if (Interpreter::code() != NULL)
+      range_check(masm, rax, r11,
+                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
+                  L_ok);
+    if (StubRoutines::code1() != NULL)
+      range_check(masm, rax, r11,
+                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
+                  L_ok);
+    if (StubRoutines::code2() != NULL)
+      range_check(masm, rax, r11,
+                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
+                  L_ok);
+    const char* msg = "i2c adapter must return to an interpreter frame";
+    __ block_comment(msg);
+    __ stop(msg);
+    __ bind(L_ok);
+    __ block_comment("} verify_i2ce ");
+#endif
+  }
+
+  // Cut-out for having no stack args.
+  int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
+  if (comp_args_on_stack) {
+    __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
+    __ andr(sp, rscratch1, -16);
+  }
+
+  // Will jump to the compiled code just as if compiled code was doing it.
+  // Pre-load the register-jump target early, to schedule it better.
+  __ ldr(rscratch1, Address(rmethod, in_bytes(methodOopDesc::from_compiled_offset())));
+
+  // Now generate the shuffle code.
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // Pick up 0, 1 or 2 words from SP+offset.
+
+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+            "scrambled load targets?");
+    // Load in argument order going down.
+    int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
+    // Point to interpreter value (vs. tag)
+    int next_off = ld_off - Interpreter::stackElementSize;
+    //
+    //
+    //
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // Convert stack slot to an SP offset (+ wordSize to account for return address )
+      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
+      if (!r_2->is_valid()) {
+        // sign extend???
+        __ ldrsw(rscratch2, Address(esp, ld_off));
+        __ str(rscratch2, Address(sp, st_off));
+      } else {
+        //
+        // We are using two optoregs. This can be either T_OBJECT,
+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
+        // So we must adjust where to pick up the data to match the
+        // interpreter.
+        //
+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+        // are accessed as negative so LSW is at LOW address
+
+        // ld_off is MSW so get LSW
+        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
+                           next_off : ld_off;
+        __ ldr(rscratch2, Address(esp, offset));
+        // st_off is LSW (i.e. reg.first())
+        __ str(rscratch2, Address(sp, st_off));
+      }
+    } else if (r_1->is_Register()) {  // Register argument
+      Register r = r_1->as_Register();
+      if (r_2->is_valid()) {
+        //
+        // We are using two VMRegs. This can be either T_OBJECT,
+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
+        // So we must adjust where to pick up the data to match the
+        // interpreter.
+
+        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
+                           next_off : ld_off;
+
+        // this can be a misaligned move
+        __ ldr(r, Address(esp, offset));
+      } else {
+        // sign extend and use a full word?
+        __ ldrw(r, Address(esp, ld_off));
+      }
+    } else {
+      if (!r_2->is_valid()) {
+        __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
+      } else {
+        __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
+      }
+    }
+  }
+
+  // 6243940 We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find there should this case occur.
+
+  __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
+
+  __ br(rscratch1);
+}
+
+#ifdef BUILTIN_SIM
+static void generate_i2c_adapter_name(char *result, int total_args_passed, const BasicType *sig_bt)
+{
+  strcpy(result, "i2c(");
+  int idx = 4;
+  for (int i = 0; i < total_args_passed; i++) {
+    switch(sig_bt[i]) {
+    case T_BOOLEAN:
+      result[idx++] = 'Z';
+      break;
+    case T_CHAR:
+      result[idx++] = 'C';
+      break;
+    case T_FLOAT:
+      result[idx++] = 'F';
+      break;
+    case T_DOUBLE:
+      assert((i < (total_args_passed - 1)) && (sig_bt[i+1] == T_VOID),
+             "double must be followed by void");
+      i++;
+      result[idx++] = 'D';
+      break;
+    case T_BYTE:
+      result[idx++] = 'B';
+      break;
+    case T_SHORT:
+      result[idx++] = 'S';
+      break;
+    case T_INT:
+      result[idx++] = 'I';
+      break;
+    case T_LONG:
+      assert((i < (total_args_passed - 1)) && (sig_bt[i+1] == T_VOID),
+             "long must be followed by void");
+      i++;
+      result[idx++] = 'L';
+      break;
+    case T_OBJECT:
+      result[idx++] = 'O';
+      break;
+    case T_ARRAY:
+      result[idx++] = '[';
+      break;
+    case T_ADDRESS:
+      result[idx++] = 'P';
+      break;
+    case T_NARROWOOP:
+      result[idx++] = 'N';
+      break;
+    default:
+      result[idx++] = '?';
+      break;
+    }
+  }
+  result[idx++] = ')';
+  result[idx] = '\0';
+}
+#endif
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            int comp_args_on_stack,
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs,
+                                                            AdapterFingerPrint* fingerprint) {
+  address i2c_entry = __ pc();
+#ifdef BUILTIN_SIM
+  char *name = NULL;
+  AArch64Simulator *sim = NULL;
+  size_t len = 65536;
+  if (NotifySimulator) {
+    name = NEW_C_HEAP_ARRAY(char, len, mtInternal);
+  }
+
+  if (name) {
+    generate_i2c_adapter_name(name, total_args_passed, sig_bt);
+    sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+    sim->notifyCompile(name, i2c_entry);
+  }
+#endif
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+
+  Label ok;
+
+  Register holder = rscratch2;
+  Register receiver = j_rarg0;
+  Register tmp = r10;  // A call-clobbered register not used for arg passing
+
+  // -------------------------------------------------------------------------
+  // Generate a C2I adapter.  On entry we know rmethod holds the methodOop during calls
+  // to the interpreter.  The args start out packed in the compiled layout.  They
+  // need to be unpacked into the interpreter layout.  This will almost always
+  // require some stack space.  We grow the current (compiled) stack, then repack
+  // the args.  We  finally end in a jump to the generic interpreter entry point.
+  // On exit from the interpreter, the interpreter will restore our SP (lest the
+  // compiled code, which relys solely on SP and not FP, get sick).
+
+  {
+    __ block_comment("c2i_unverified_entry {");
+    __ load_klass(rscratch1, receiver);
+    __ ldr(tmp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
+    __ cmp(rscratch1, tmp);
+    __ ldr(rmethod, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
+    __ br(Assembler::EQ, ok);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+    __ bind(ok);
+    // Method might have been compiled since the call site was patched to
+    // interpreted; if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ ldr(rscratch1, Address(rmethod, in_bytes(methodOopDesc::code_offset())));
+    __ cbz(rscratch1, skip_fixup);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+    __ block_comment("} c2i_unverified_entry");
+  }
+
+  address c2i_entry = __ pc();
+
+#ifdef BUILTIN_SIM
+  if (name) {
+    name[0] = 'c';
+    name[2] = 'i';
+    sim->notifyCompile(name, c2i_entry);
+    FREE_C_HEAP_ARRAY(char, name, mtInternal);
+  }
+#endif
+
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  __ flush();
+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         VMRegPair *regs2,
+                                         int total_args_passed) {
+  assert(regs2 == NULL, "not needed on aarch64");
+// We return the amount of VMRegImpl stack slots we need to reserve for all
+// the arguments NOT counting out_preserve_stack_slots.
+
+    static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
+      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
+    };
+    static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
+      c_farg0, c_farg1, c_farg2, c_farg3,
+      c_farg4, c_farg5, c_farg6, c_farg7
+    };
+
+    uint int_args = 0;
+    uint fp_args = 0;
+    uint stk_args = 0; // inc by 2 each time
+
+    for (int i = 0; i < total_args_passed; i++) {
+      switch (sig_bt[i]) {
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+      case T_INT:
+        if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+#ifdef _WIN64
+          fp_args++;
+          // Allocate slots for callee to stuff register args the stack.
+          stk_args += 2;
+#endif
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_LONG:
+        assert(sig_bt[i + 1] == T_VOID, "expecting half");
+        // fall through
+      case T_OBJECT:
+      case T_ARRAY:
+      case T_ADDRESS:
+        if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+#ifdef _WIN64
+          fp_args++;
+          stk_args += 2;
+#endif
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_FLOAT:
+        if (fp_args < Argument::n_float_register_parameters_c) {
+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+#ifdef _WIN64
+          int_args++;
+          // Allocate slots for callee to stuff register args the stack.
+          stk_args += 2;
+#endif
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_DOUBLE:
+        assert(sig_bt[i + 1] == T_VOID, "expecting half");
+        if (fp_args < Argument::n_float_register_parameters_c) {
+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+#ifdef _WIN64
+          int_args++;
+          // Allocate slots for callee to stuff register args the stack.
+          stk_args += 2;
+#endif
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_VOID: // Halves of longs and doubles
+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+        regs[i].set_bad();
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+#ifdef _WIN64
+  // windows abi requires that we always allocate enough stack space
+  // for 4 64bit registers to be stored down.
+  if (stk_args < 8) {
+    stk_args = 8;
+  }
+#endif // _WIN64
+
+  return stk_args;
+}
+
+// Do we need to convert ints to longs for c calls?
+bool SharedRuntime::c_calling_convention_requires_ints_as_longs() {
+  return false;
+}
+
+// On 64 bit we will store integer like items to the stack as
+// 64 bits items (sparc abi) even though java would only store
+// 32bits for a parameter. On 32bit it will simply be 32 bits
+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
+      __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      // stack to reg
+      __ ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    // Do we really have to sign extend???
+    // __ movslq(src.first()->as_Register(), src.first()->as_Register());
+    __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+  } else {
+    if (dst.first() != src.first()) {
+      __ sxtw(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+                        OopMap* map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int* receiver_offset) {
+
+  // must pass a handle. First figure out the location we use as a handle
+
+  Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register();
+
+  // See if oop is NULL if it is we need no handle
+
+  if (src.first()->is_stack()) {
+
+    // Oop is already on the stack as an argument
+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+    if (is_receiver) {
+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
+    }
+
+    __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
+    __ lea(rHandle, Address(rfp, reg2offset_in(src.first())));
+    // conditionally move a NULL
+    __ cmp(rscratch1, zr);
+    __ csel(rHandle, zr, rHandle, Assembler::EQ);
+  } else {
+
+    // Oop is in an a register we must store it to the space we reserve
+    // on the stack for oop_handles and pass a handle if oop is non-NULL
+
+    const Register rOop = src.first()->as_Register();
+    int oop_slot;
+    if (rOop == j_rarg0)
+      oop_slot = 0;
+    else if (rOop == j_rarg1)
+      oop_slot = 1;
+    else if (rOop == j_rarg2)
+      oop_slot = 2;
+    else if (rOop == j_rarg3)
+      oop_slot = 3;
+    else if (rOop == j_rarg4)
+      oop_slot = 4;
+    else if (rOop == j_rarg5)
+      oop_slot = 5;
+    else if (rOop == j_rarg6)
+      oop_slot = 6;
+    else {
+      assert(rOop == j_rarg7, "wrong register");
+      oop_slot = 7;
+    }
+
+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
+    int offset = oop_slot*VMRegImpl::stack_slot_size;
+
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+    // Store oop in handle area, may be NULL
+    __ str(rOop, Address(sp, offset));
+    if (is_receiver) {
+      *receiver_offset = offset;
+    }
+
+    __ cmp(rOop, zr);
+    __ lea(rHandle, Address(sp, offset));
+    // conditionally move a NULL
+    __ csel(rHandle, zr, rHandle, Assembler::EQ);
+  }
+
+  // If arg is on the stack then place it otherwise it is already in correct reg.
+  if (dst.first()->is_stack()) {
+    __ str(rHandle, Address(sp, reg2offset_out(dst.first())));
+  }
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
+         src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ ldrw(rscratch1, Address(rfp, reg2offset_in(src.first())));
+      __ strw(rscratch1, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (src.first() != dst.first()) {
+    if (src.is_single_phys_reg() && dst.is_single_phys_reg())
+      __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    else
+      ShouldNotReachHere();
+  }
+}
+
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
+      __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      // stack to reg
+      __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    // Do we really have to sign extend???
+    // __ movslq(src.first()->as_Register(), src.first()->as_Register());
+    __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+  } else {
+    if (dst.first() != src.first()) {
+      __ mov(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
+         src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
+      __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (src.first() != dst.first()) {
+    if (src.is_single_phys_reg() && dst.is_single_phys_reg())
+      __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    else
+      ShouldNotReachHere();
+  }
+}
+
+
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+  case T_FLOAT:
+    __ strs(v0, Address(rfp, -wordSize));
+    break;
+  case T_DOUBLE:
+    __ strd(v0, Address(rfp, -wordSize));
+    break;
+  case T_VOID:  break;
+  default: {
+    __ str(r0, Address(rfp, -wordSize));
+    }
+  }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+  case T_FLOAT:
+    __ ldrs(v0, Address(rfp, -wordSize));
+    break;
+  case T_DOUBLE:
+    __ ldrd(v0, Address(rfp, -wordSize));
+    break;
+  case T_VOID:  break;
+  default: {
+    __ ldr(r0, Address(rfp, -wordSize));
+    }
+  }
+}
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  RegSet x;
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      x = x + args[i].first()->as_Register();
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ strd(args[i].first()->as_FloatRegister(), Address(__ pre(sp, -2 * wordSize)));
+    }
+  }
+  __ push(x, sp);
+}
+
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  RegSet x;
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      x = x + args[i].first()->as_Register();
+    } else {
+      ;
+    }
+  }
+  __ pop(x, sp);
+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+    if (args[i].first()->is_Register()) {
+      ;
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
+    }
+  }
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) { Unimplemented(); }
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
+
+
+class ComputeMoveOrder: public StackObj {
+  class MoveOperation: public ResourceObj {
+    friend class ComputeMoveOrder;
+   private:
+    VMRegPair        _src;
+    VMRegPair        _dst;
+    int              _src_index;
+    int              _dst_index;
+    bool             _processed;
+    MoveOperation*  _next;
+    MoveOperation*  _prev;
+
+    static int get_id(VMRegPair r) { Unimplemented(); return 0; }
+
+   public:
+    MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
+      _src(src)
+    , _src_index(src_index)
+    , _dst(dst)
+    , _dst_index(dst_index)
+    , _next(NULL)
+    , _prev(NULL)
+    , _processed(false) { Unimplemented(); }
+
+    VMRegPair src() const              { Unimplemented(); return _src; }
+    int src_id() const                 { Unimplemented(); return 0; }
+    int src_index() const              { Unimplemented(); return 0; }
+    VMRegPair dst() const              { Unimplemented(); return _src; }
+    void set_dst(int i, VMRegPair dst) { Unimplemented(); }
+    int dst_index() const              { Unimplemented(); return 0; }
+    int dst_id() const                 { Unimplemented(); return 0; }
+    MoveOperation* next() const        { Unimplemented(); return 0; }
+    MoveOperation* prev() const        { Unimplemented(); return 0; }
+    void set_processed()               { Unimplemented(); }
+    bool is_processed() const          { Unimplemented(); return 0; }
+
+    // insert
+    void break_cycle(VMRegPair temp_register) { Unimplemented(); }
+
+    void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
+  };
+
+ private:
+  GrowableArray<MoveOperation*> edges;
+
+ public:
+  ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
+                    BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
+
+  // Collected all the move operations
+  void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
+
+  // Walk the edges breaking cycles between moves.  The result list
+  // can be walked in order to produce the proper set of loads
+  GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
+};
+
+
+static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) {
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  if (cb) {
+    __ far_call(RuntimeAddress(dest));
+  } else {
+    assert((unsigned)gpargs < 256, "eek!");
+    assert((unsigned)fpargs < 32, "eek!");
+    __ lea(rscratch1, RuntimeAddress(dest));
+    __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
+    __ blrt(rscratch1, rscratch2);
+    // __ blrt(rscratch1, gpargs, fpargs, type);
+    __ maybe_isb();
+  }
+}
+
+static void verify_oop_args(MacroAssembler* masm,
+                            methodHandle method,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = r19;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < method->size_of_parameters(); i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 methodHandle method,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, method, sig_bt, regs);
+  vmIntrinsics::ID iid = method->intrinsic_id();
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+  if (ref_kind != 0) {
+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
+    member_reg = r19;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (iid == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+    VMReg r = regs[member_arg_pos].first();
+    if (r->is_stack()) {
+      __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(method->size_of_parameters() > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      fatal("receiver always in a register");
+      receiver_reg = r2;  // known to be free at this point
+      __ ldr(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, iid,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                methodHandle method,
+                                                int compile_id,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
+                                                BasicType ret_type) {
+#ifdef BUILTIN_SIM
+  if (NotifySimulator) {
+    // Names are up to 65536 chars long.  UTF8-coded strings are up to
+    // 3 bytes per character.  We concatenate three such strings.
+    // Yes, I know this is ridiculous, but it's debug code and glibc
+    // allocates large arrays very efficiently.
+    size_t len = (65536 * 3) * 3;
+    char *name = new char[len];
+
+    strncpy(name, method()->method_holder()->klass_part()->name()->as_utf8(), len);
+    strncat(name, ".", len);
+    strncat(name, method()->name()->as_utf8(), len);
+    strncat(name, method()->signature()->as_utf8(), len);
+    AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck)->notifyCompile(name, __ pc());
+    delete[] name;
+  }
+#endif
+
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         method,
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
+  // An OopMap for lock (and class if static)
+  OopMapSet *oop_maps = new OopMapSet();
+  intptr_t start = (intptr_t)__ pc();
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
+
+  const int total_in_args = method->size_of_parameters();
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
+  }
+
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
+
+  int argc = 0;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
+  }
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require.
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Compute framesize for the wrapper.  We need to handlize all oops in
+  // incoming registers
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Now the space for the inbound oop handle area
+  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_ARRAY:  // specific to LP64 (7145024)
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
+
+  int oop_handle_offset = stack_slots;
+  stack_slots += total_save_slots;
+
+  // Now any space we need for handlizing a klass if static method
+
+  int klass_slot_offset = 0;
+  int klass_offset = -1;
+  int lock_slot_offset = 0;
+  bool is_static = false;
+
+  if (method->is_static()) {
+    klass_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+    is_static = true;
+  }
+
+  // Plus a lock if needed
+
+  if (method->is_synchronized()) {
+    lock_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+  }
+
+  // Now a place (+2) to save return values or temp during shuffling
+  // + 4 for return address (which we own) and saved rfp
+  stack_slots += 6;
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | 2 slots for moves   |
+  //      |---------------------|
+  //      | lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //      | klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //      | oopHandle area      |
+  //      |---------------------| <- oop_handle_offset (8 java arg registers)
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  // First thing make an ic check to see if we should even be here
+
+  // We are free to use all registers as temps without saving them and
+  // restoring them except rfp. rfp is the only callee save register
+  // as far as the interpreter and the compiler(s) are concerned.
+
+
+  const Register ic_reg = rscratch2;
+  const Register receiver = j_rarg0;
+
+  Label hit;
+  Label exception_pending;
+
+  assert_different_registers(ic_reg, receiver, rscratch1);
+  __ verify_oop(receiver);
+  __ cmp_klass(receiver, ic_reg, rscratch1);
+  __ br(Assembler::EQ, hit);
+
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+  // Verified entry point must be aligned
+  __ align(8);
+
+  __ bind(hit);
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // Generate stack overflow check
+
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.  For this action to be legal we
+  // must ensure that this first instruction is a B, BL, NOP, BKPT,
+  // SVC, HVC, or SMC.  Make it a NOP.
+  __ nop();
+
+  if (UseStackBanging) {
+    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
+  } else {
+    Unimplemented();
+  }
+
+  // Generate a new frame for the wrapper.
+  __ enter();
+  // -2 because return address is already present and so is saved rfp
+  __ sub(sp, sp, stack_size - 2*wordSize);
+
+  // Frame is now completed as far as size and linkage.
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+  // record entry into native wrapper code
+  if (NotifySimulator) {
+    __ notify(Assembler::method_entry);
+  }
+
+  // We use r20 as the oop handle for the receiver/klass
+  // It is callee save so it survives the call to native
+
+  const Register oop_handle_reg = r20;
+
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
+
+  //
+  // We immediately shuffle the arguments so that any vm call we have to
+  // make from here on out (sync slow path, jvmti, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for
+  // them.
+
+  // -----------------
+  // The Grand Shuffle
+
+  // The Java calling convention is either equal (linux) or denser (win64) than the
+  // c calling convention. However the because of the jni_env argument the c calling
+  // convention always has at least one more (and two for static) arguments than Java.
+  // Therefore if we move the args from java -> c backwards then we will never have
+  // a register->register conflict and we don't have to build a dependency graph
+  // and figure out how to break any cycles.
+  //
+
+  // Record esp-based slot for receiver on stack for non-static methods
+  int receiver_offset = -1;
+
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the
+  // caller.
+  //
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  // Mark location of rfp (someday)
+  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp));
+
+
+  int float_args = 0;
+  int int_args = 0;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
+  GrowableArray<int> arg_order(2 * total_in_args);
+  VMRegPair tmp_vmreg;
+  tmp_vmreg.set1(r19->as_VMReg());
+
+  if (!is_critical_native) {
+    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+      arg_order.push(i);
+      arg_order.push(c_arg);
+    }
+  } else {
+    // Compute a valid move order, using tmp_vmreg to break any cycles
+    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
+  }
+
+  int temploc = -1;
+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
+    int i = arg_order.at(ai);
+    int c_arg = arg_order.at(ai + 1);
+    __ block_comment(err_msg("move %d -> %d", i, c_arg));
+    if (c_arg == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // This arg needs to be moved to a temporary
+      __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
+      in_regs[i] = tmp_vmreg;
+      temploc = i;
+      continue;
+    } else if (i == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // Read from the temporary location
+      assert(temploc != -1, "must be valid");
+      i = temploc;
+      temploc = -1;
+    }
+#ifdef ASSERT
+    if (in_regs[i].first()->is_Register()) {
+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
+    } else if (in_regs[i].first()->is_FloatRegister()) {
+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
+    }
+    if (out_regs[c_arg].first()->is_Register()) {
+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+    }
+#endif /* ASSERT */
+    switch (in_sig_bt[i]) {
+      case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+          }
+#endif
+          int_args++;
+          break;
+        }
+      case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
+                    ((i == 0) && (!is_static)),
+                    &receiver_offset);
+        int_args++;
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        float_move(masm, in_regs[i], out_regs[c_arg]);
+        float_args++;
+        break;
+
+      case T_DOUBLE:
+        assert( i + 1 < total_in_args &&
+                in_sig_bt[i + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        double_move(masm, in_regs[i], out_regs[c_arg]);
+        float_args++;
+        break;
+
+      case T_LONG :
+        long_move(masm, in_regs[i], out_regs[c_arg]);
+        int_args++;
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        move32_64(masm, in_regs[i], out_regs[c_arg]);
+        int_args++;
+    }
+  }
+
+  // point c_arg at the first arg that is already loaded in case we
+  // need to spill before we call out
+  int c_arg = total_c_args - total_in_args;
+
+  // Pre-load a static method's oop into r20.  Used both by locking code and
+  // the normal JNI call code.
+  if (method->is_static() && !is_critical_native) {
+
+    //  load oop into a register
+
+    // !!! FIXME AARCH64 -- bizarrely gcc accepts the following
+    // separate expressions expressions but fails to compile when they
+    // are composed as arguments to the call
+    Klass * k =  Klass::cast(method->method_holder());
+    oop obj = k->java_mirror();
+    jobject jobj = JNIHandles::make_local(obj);
+    __ movoop(oop_handle_reg, jobj);
+    // so why does this call not get compiled?
+    //  __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
+
+    // Now handlize the static class mirror it's known not-null.
+    __ str(oop_handle_reg, Address(sp, klass_offset));
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+    // Now get the handle
+    __ lea(oop_handle_reg, Address(sp, klass_offset));
+    // store the klass handle as second argument
+    __ mov(c_rarg1, oop_handle_reg);
+    // and protect the arg if we must spill
+    c_arg--;
+  }
+
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out
+
+  intptr_t the_pc = (intptr_t) __ pc();
+  oop_maps->add_gc_map(the_pc - start, map);
+
+  __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1);
+
+
+  // We have all of the arguments setup at this point. We must not touch any register
+  // argument registers at this point (what if we save/restore them there are no oop?
+
+  {
+    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      rthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      rthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
+  // Lock a synchronized method
+
+  // Register definitions used by locking and unlocking
+
+  const Register swap_reg = r0;
+  const Register obj_reg  = r19;  // Will contain the oop
+  const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
+  const Register old_hdr  = r13;  // value of old header at unlock time
+  const Register tmp = lr;
+
+  Label slow_path_lock;
+  Label lock_done;
+
+  if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
+
+
+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+    // Get the handle (the 2nd argument)
+    __ mov(oop_handle_reg, c_rarg1);
+
+    // Get address of the box
+
+    __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+
+    // Load the oop from the handle
+    __ ldr(obj_reg, Address(oop_handle_reg, 0));
+
+    if (UseBiasedLocking) {
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
+    }
+
+    // Load (object->mark() | 1) into swap_reg %r0
+    __ ldr(rscratch1, Address(obj_reg, 0));
+    __ orr(swap_reg, rscratch1, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    __ str(swap_reg, Address(lock_reg, mark_word_offset));
+
+    // src -> dest iff dest == r0 else r0 <- dest
+    { Label here;
+      __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL);
+    }
+
+    // Hmm should this move to the slow path code area???
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) sp <= mark < mark + os::pagesize()
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
+
+    __ sub(swap_reg, sp, swap_reg);
+    __ neg(swap_reg, swap_reg);
+    __ ands(swap_reg, swap_reg, 3 - os::vm_page_size());
+
+    // Save the test result, for recursive case, the result is zero
+    __ str(swap_reg, Address(lock_reg, mark_word_offset));
+    __ br(Assembler::NE, slow_path_lock);
+
+    // Slow path will re-enter here
+
+    __ bind(lock_done);
+  }
+
+
+  // Finally just about ready to make the JNI call
+
+
+  // get JNIEnv* which is first argument to native
+  if (!is_critical_native) {
+    __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
+  }
+
+  // Now set thread in native
+  __ mov(rscratch1, _thread_in_native);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+
+  {
+    int return_type = 0;
+    switch (ret_type) {
+    case T_VOID: break;
+      return_type = 0; break;
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+    case T_BOOLEAN:
+    case T_LONG:
+      return_type = 1; break;
+    case T_ARRAY:
+    case T_OBJECT:
+      return_type = 1; break;
+    case T_FLOAT:
+      return_type = 2; break;
+    case T_DOUBLE:
+      return_type = 3; break;
+    default:
+      ShouldNotReachHere();
+    }
+    rt_call(masm, native_func,
+            int_args + 2, // AArch64 passes up to 8 args in int registers
+            float_args,   // and up to 8 float args
+            return_type);
+  }
+
+  // Unpack native results.
+  switch (ret_type) {
+  case T_BOOLEAN: __ c2bool(r0);                     break;
+  case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
+  case T_BYTE   : __ sbfx(r0, r0, 0, 8);            break;
+  case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
+  case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
+  case T_DOUBLE :
+  case T_FLOAT  :
+    // Result is in v0 we'll save as needed
+    break;
+  case T_ARRAY:                 // Really a handle
+  case T_OBJECT:                // Really a handle
+      break; // can't de-handlize until after safepoint check
+  case T_VOID: break;
+  case T_LONG: break;
+  default       : ShouldNotReachHere();
+  }
+
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
+  //     Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization is progress, and escapes.
+  __ mov(rscratch1, _thread_in_native_trans);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+
+  if(os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ dmb(Assembler::SY);
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(rthread, r2);
+    }
+  }
+
+  Label after_transition;
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    Label Continue;
+
+    { unsigned long offset;
+      __ adrp(rscratch1,
+              ExternalAddress((address)SafepointSynchronize::address_of_state()),
+              offset);
+      __ ldrw(rscratch1, Address(rscratch1, offset));
+    }
+    __ cmpw(rscratch1, SafepointSynchronize::_not_synchronized);
+
+    Label L;
+    __ br(Assembler::NE, L);
+    __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
+    __ cbz(rscratch1, Continue);
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ mov(c_rarg0, rthread);
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+    if (!is_critical_native) {
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
+    __ blrt(rscratch1, 1, 0, 1);
+    __ maybe_isb();
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ b(after_transition);
+    }
+
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ mov(rscratch1, _thread_in_Java);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+  __ bind(after_transition);
+
+  Label reguard;
+  Label reguard_done;
+  __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
+  __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled);
+  __ br(Assembler::EQ, reguard);
+  __ bind(reguard_done);
+
+  // native result if any is live
+
+  // Unlock
+  Label unlock_done;
+  Label slow_path_unlock;
+  if (method->is_synchronized()) {
+
+    // Get locked oop from the handle we passed to jni
+    __ ldr(obj_reg, Address(oop_handle_reg, 0));
+
+    Label done;
+
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, old_hdr, done);
+    }
+
+    // Simple recursive lock?
+
+    __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ cbz(rscratch1, done);
+
+    // Must save r0 if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+
+    // get address of the stack lock
+    __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    //  get old displaced header
+    __ ldr(old_hdr, Address(r0, 0));
+
+    // Atomic swap old header if oop still contains the stack lock
+    Label succeed;
+    __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
+    __ bind(succeed);
+
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+
+  }
+  {
+    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
+    save_native_result(masm, ret_type, stack_slots);
+    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         rthread, c_rarg1);
+    restore_native_result(masm, ret_type, stack_slots);
+  }
+
+  __ reset_last_Java_frame(false, true);
+
+  // Unpack oop result
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+      Label L;
+      __ cbz(r0, L);
+      __ ldr(r0, Address(r0, 0));
+      __ bind(L);
+      __ verify_oop(r0);
+  }
+
+  if (!is_critical_native) {
+    // reset handle block
+    __ ldr(r2, Address(rthread, JavaThread::active_handles_offset()));
+    __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes()));
+  }
+
+  __ leave();
+
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ cbnz(rscratch1, exception_pending);
+  }
+
+  // record exit from native wrapper code
+  if (NotifySimulator) {
+    __ notify(Assembler::method_reentry);
+  }
+
+  // We're done
+  __ ret(lr);
+
+  // Unexpected paths are out of line and go here
+
+  if (!is_critical_native) {
+    // forward the exception
+    __ bind(exception_pending);
+
+    // and forward the exception
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
+
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    // BEGIN Slow path lock
+    __ bind(slow_path_lock);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    __ mov(c_rarg0, obj_reg);
+    __ mov(c_rarg1, lock_reg);
+    __ mov(c_rarg2, rthread);
+
+    // Not a leaf but we have last_Java_frame setup as we want
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+      __ cbz(rscratch1, L);
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ b(lock_done);
+
+    // END Slow path lock
+
+    // BEGIN Slow path unlock
+    __ bind(slow_path_unlock);
+
+    // If we haven't already saved the native result we must save it now as xmm registers
+    // are still exposed.
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ mov(c_rarg0, obj_reg);
+
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+    // NOTE that obj_reg == r19 currently
+    __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+
+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 2, 0, 1);
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+      __ cbz(rscratch1, L);
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
+#endif /* ASSERT */
+
+    __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+    __ b(unlock_done);
+
+    // END Slow path unlock
+
+  } // synchronized
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0);
+  restore_native_result(masm, ret_type, stack_slots);
+  // and continue
+  __ b(reguard_done);
+
+
+
+  __ flush();
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            vep_offset,
+                                            frame_complete,
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+                                            oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
+  return nm;
+
+}
+
+
+#ifdef HAVE_DTRACE_H
+// ---------------------------------------------------------------------------
+// Generate a dtrace nmethod for a given signature.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// abi and then leaves nops at the position you would expect to call a native
+// function. When the probe is enabled the nops are replaced with a trap
+// instruction that dtrace inserts and the trace will cause a notification
+// to dtrace.
+//
+// The probes are only able to take primitive types and java/lang/String as
+// arguments.  No other java types are allowed. Strings are converted to utf8
+// strings so that from dtrace point of view java strings are converted to C
+// strings. There is an arbitrary fixed limit on the total space that a method
+// can use for converting the strings. (256 chars per string in the signature).
+// So any java string larger then this is truncated.
+
+static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
+static bool offsets_initialized = false;
+
+
+nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
+                                                methodHandle method) { Unimplemented(); return 0; }
+
+#endif // HAVE_DTRACE_H
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+  assert(callee_locals >= callee_parameters,
+          "test and remove; got more parms than locals");
+  if (callee_locals < callee_parameters)
+    return 0;                   // No adjustment for negative locals
+  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+  // diff is counted in stack words
+  return round_to(diff, 2);
+}
+
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("deopt_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  int frame_size_in_words;
+  OopMap* map = NULL;
+  OopMapSet *oop_maps = new OopMapSet();
+
+#ifdef BUILTIN_SIM
+  AArch64Simulator *simulator;
+  if (NotifySimulator) {
+    simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+    simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
+  }
+#endif
+
+  // -------------
+  // This code enters when returning to a de-optimized nmethod.  A return
+  // address has been pushed on the the stack, and return values are in
+  // registers.
+  // If we are doing a normal deopt then we were called from the patched
+  // nmethod from the point we returned to the nmethod. So the return
+  // address on the stack is wrong by NativeCall::instruction_size
+  // We will adjust the value so it looks like we have the original return
+  // address on the stack (like when we eagerly deoptimized).
+  // In the case of an exception pending when deoptimizing, we enter
+  // with a return address on the stack that points after the call we patched
+  // into the exception handler. We have the following register state from,
+  // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
+  //    r0: exception oop
+  //    r19: exception handler
+  //    r3: throwing pc
+  // So in this case we simply jam r3 into the useless return address and
+  // the stack looks just like we want.
+  //
+  // At this point we need to de-opt.  We save the argument return
+  // registers.  We call the first C routine, fetch_unroll_info().  This
+  // routine captures the return values and returns a structure which
+  // describes the current frame size and the sizes of all replacement frames.
+  // The current frame is compiled code and may contain many inlined
+  // functions, each with their own JVM state.  We pop the current frame, then
+  // push all the new frames.  Then we call the C routine unpack_frames() to
+  // populate these frames.  Finally unpack_frames() returns us the new target
+  // address.  Notice that callee-save registers are BLOWN here; they have
+  // already been captured in the vframeArray at the time the return PC was
+  // patched.
+  address start = __ pc();
+  Label cont;
+
+  // Prolog for non exception case!
+
+  // Save everything in sight.
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+  // Normal deoptimization.  Save exec mode for unpack_frames.
+  __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
+  __ b(cont);
+
+  int reexecute_offset = __ pc() - start;
+
+  // Reexecute case
+  // return address is the pc describes what bci to do re-execute at
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+  __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
+  __ b(cont);
+
+  int exception_offset = __ pc() - start;
+
+  // Prolog for exception case
+
+  // all registers are dead at this entry point, except for r0, and
+  // r3 which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+
+  __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
+  __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
+
+  int exception_in_tls_offset = __ pc() - start;
+
+  // new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+  // tos: stack at point of call to method that threw the exception (i.e. only
+  // args are on the stack, no return address)
+
+  // The return address pushed by save_live_registers will be patched
+  // later with the throwing pc. The correct value is not available
+  // now because loading it from memory would destroy registers.
+
+  // NB: The SP at this point must be the SP of the method that is
+  // being deoptimized.  Deoptimization assumes that the frame created
+  // here by save_live_registers is immediately below the method's SP.
+  // This is a somewhat fragile mechanism.
+
+  // Save everything in sight.
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+  // Now it is safe to overwrite any register
+
+  // Deopt during an exception.  Save exec mode for unpack_frames.
+  __ mov(rcpool, Deoptimization::Unpack_exception); // callee-saved
+
+  // load throwing pc from JavaThread and patch it as the return address
+  // of the current frame. Then clear the field in JavaThread
+
+  __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
+  __ str(r3, Address(rfp, wordSize));
+  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+  __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  __ verify_oop(r0);
+
+  // verify that there is no pending exception
+  Label no_pending_exception;
+  __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+  __ cbz(rscratch1, no_pending_exception);
+  __ stop("must not have pending exception here");
+  __ bind(no_pending_exception);
+#endif
+
+  __ bind(cont);
+
+  // Call C code.  Need thread and this frame, but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.
+  //
+  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
+
+  // fetch_unroll_info needs to call last_java_frame().
+
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
+#ifdef ASSERT0
+  { Label L;
+    __ ldr(rscratch1, Address(rthread,
+                              JavaThread::last_Java_fp_offset()));
+    __ cbz(rscratch1, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif // ASSERT
+  __ mov(c_rarg0, rthread);
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ bind(retaddr);
+
+  // Need to have an oopmap that tells fetch_unroll_info where to
+  // find any register it might need.
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+  __ reset_last_Java_frame(false, true);
+
+  // Load UnrollBlock* into rdi
+  __ mov(r5, r0);
+
+   Label noException;
+  __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
+  __ br(Assembler::NE, noException);
+  __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  // QQQ this is useless it was NULL above
+  __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
+
+  __ verify_oop(r0);
+
+  // Overwrite the result registers with the exception results.
+  __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
+  // I think this is useless
+  // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
+
+  __ bind(noException);
+
+  // Only register save data is on the stack.
+  // Now restore the result registers.  Everything else is either dead
+  // or captured in the vframeArray.
+  RegisterSaver::restore_result_registers(masm);
+
+  // All of the register save area has been popped of the stack. Only the
+  // return address remains.
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+  //
+  // Note: by leaving the return address of self-frame on the stack
+  // and using the size of frame 2 to adjust the stack
+  // when we are done the return to frame 3 will still be on the stack.
+
+  // Pop deoptimized frame
+  __ ldrw(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+  __ sub(r2, r2, 2 * wordSize);
+  __ add(sp, sp, r2);
+  // !!! FIXME AARCH64 -- think we should restore rfp here before stack bang???
+  // jdk7 x86 does it later but but that may well be a bug
+
+  __ ldp(rfp, lr, __ post(sp, 2 * wordSize));
+  // LR should now be the return address to the caller (3)
+
+  // Stack bang to make sure there's enough room for these interpreter frames.
+  if (UseStackBanging) {
+    __ ldrw(r19, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+    __ bang_stack_size(r19, r2);
+  }
+
+  // Load address of array of frame pcs into r2
+  __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+  // Trash the old pc
+  // __ addptr(sp, wordSize);  FIXME ????
+
+  // Load address of array of frame sizes into r4
+  __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+  // Load counter into r3
+  __ ldrw(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+
+  // !!! FIXME AARCH64 -- this is were jdk7 x86 restores rbp while
+  // jdk8 x86 does it where we load rfp up above check this
+  // here is the jdk7 x86 code
+  //
+  // Pick up the initial fp we should save
+  // movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+  //
+
+  // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+
+  const Register sender_sp = r6;
+
+  __ mov(sender_sp, sp);
+  __ ldrw(r19, Address(r5,
+                       Deoptimization::UnrollBlock::
+                       caller_adjustment_offset_in_bytes()));
+  __ sub(sp, sp, r19);
+
+  // Push interpreter frames in a loop
+  __ mov(rscratch1, (address)badHeapOopVal);        // Make a recognizable pattern
+  __ mov(rscratch2, rscratch1);
+  Label loop;
+  __ bind(loop);
+  __ ldr(r19, Address(__ post(r4, wordSize)));          // Load frame size
+  __ sub(r19, r19, 2*wordSize);           // We'll push pc and fp by hand
+  __ ldr(lr, Address(__ post(r2, wordSize)));  // Load pc
+  __ enter();                           // Save old & set new fp
+  __ sub(sp, sp, r19);                  // Prolog
+  // This value is corrected by layout_activation_impl
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+  __ mov(sender_sp, sp);               // Pass sender_sp to next frame
+  __ sub(r3, r3, 1);                   // Decrement counter
+  __ cbnz(r3, loop);
+
+    // Re-push self-frame
+  __ ldr(lr, Address(r2));
+  __ enter();
+
+  // Allocate a full sized register save area.  We subtract 2 because
+  // enter() just pushed 2 words
+  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
+
+  // Restore frame locals after moving the frame
+  __ strd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
+  __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  //
+  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
+
+  // Use rfp because the frames look interpreted now
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
+
+  __ mov(c_rarg0, rthread);
+  __ movw(c_rarg1, rcpool); // second arg: exec_mode
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
+  __ blrt(rscratch1, 2, 0, 0);
+
+  // Set an oopmap for the call site
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start,
+                       new OopMap( frame_size_in_words, 0 ));
+
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true, true);
+
+  // Collect return values
+  __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
+  __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
+  // I think this is useless (throwing pc?)
+  // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
+
+  // Pop self-frame.
+  __ leave();                           // Epilog
+
+  // Jump to interpreter
+  __ ret(lr);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+
+#ifdef BUILTIN_SIM
+  if (NotifySimulator) {
+    unsigned char *base = _deopt_blob->code_begin();
+    simulator->notifyRelocate(start, base - start);
+  }
+#endif
+}
+
+uint SharedRuntime::out_preserve_stack_slots() {
+  return 0;
+}
+
+#ifdef COMPILER2
+//------------------------------generate_uncommon_trap_blob--------------------
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+#ifdef BUILTIN_SIM
+  AArch64Simulator *simulator;
+  if (NotifySimulator) {
+    simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+    simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
+  }
+#endif
+
+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+  address start = __ pc();
+
+  // Push self-frame.  We get here with a return address in LR
+  // and sp should be 16 byte aligned
+  // push rfp and retaddr by hand
+  __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
+  // we don't expect an arg reg save area
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+  // compiler left unloaded_class_index in j_rarg0 move to where the
+  // runtime expects it.
+  if (c_rarg1 != j_rarg0) {
+    __ movw(c_rarg1, j_rarg0);
+  }
+
+  // we need to set the past SP to the stack pointer of the stub frame
+  // and the pc to the address where this runtime call will return
+  // although actually any pc in this code blob will do).
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // capture callee-saved registers as well as return values.
+  // Thread is in rdi already.
+  //
+  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
+  //
+  // n.b. 2 gp args, 0 fp args, integral return type
+
+  __ mov(c_rarg0, rthread);
+  __ lea(rscratch1,
+         RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                         Deoptimization::uncommon_trap)));
+  __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
+
+  // location of rfp is known implicitly by the frame sender code
+
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+  __ reset_last_Java_frame(false, true);
+
+  // move UnrollBlock* into r4
+  __ mov(r4, r0);
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+
+  // Pop self-frame.  We have no frame, and must rely only on r0 and sp.
+  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
+
+  // Pop deoptimized frame (int)
+  __ ldrw(r2, Address(r4,
+                      Deoptimization::UnrollBlock::
+                      size_of_deoptimized_frame_offset_in_bytes()));
+  __ sub(r2, r2, 2 * wordSize);
+  __ add(sp, sp, r2);
+  // !!! FIXME AARCH64 -- once again jdk7 restores rbp later but I think
+  // we need to follow jdk8 and do it here
+  __ ldp(rfp, lr, __ post(sp, 2 * wordSize));
+  // LR should now be the return address to the caller (3) frame
+
+  // Stack bang to make sure there's enough room for these interpreter frames.
+  if (UseStackBanging) {
+    __ ldrw(r1, Address(r4,
+                        Deoptimization::UnrollBlock::
+                        total_frame_sizes_offset_in_bytes()));
+    __ bang_stack_size(r1, r2);
+  }
+
+  // Load address of array of frame pcs into r2 (address*)
+  __ ldr(r2, Address(r4,
+                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+  // Load address of array of frame sizes into r5 (intptr_t*)
+  __ ldr(r5, Address(r4,
+                     Deoptimization::UnrollBlock::
+                     frame_sizes_offset_in_bytes()));
+
+  // Counter
+  __ ldrw(r3, Address(r4,
+                      Deoptimization::UnrollBlock::
+                      number_of_frames_offset_in_bytes())); // (int)
+
+  // !!! FIXME AARCH64 -- this is where jdk7 reloads rbp but we loaded it earlier
+  // original jdk7 x86 code is
+  //
+  // Pick up the initial fp we should save
+  // __ movptr(rbp,
+  //          Address(rdi,
+  //                  Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+
+  // Now adjust the caller's stack to make up for the extra locals but
+  // record the original sp so that we can save it in the skeletal
+  // interpreter frame and the stack walking of interpreter_sender
+  // will get the unextended sp value and not the "real" sp value.
+
+  const Register sender_sp = r8;
+
+  __ mov(sender_sp, sp);
+  __ ldrw(r1, Address(r4,
+                      Deoptimization::UnrollBlock::
+                      caller_adjustment_offset_in_bytes())); // (int)
+  __ sub(sp, sp, r1);
+
+  // Push interpreter frames in a loop
+  Label loop;
+  __ bind(loop);
+  __ ldr(r1, Address(r5, 0));       // Load frame size
+  __ sub(r1, r1, 2 * wordSize);     // We'll push pc and rfp by hand
+  __ ldr(lr, Address(r2, 0));       // Save return address
+  __ enter();                       // and old rfp & set new rfp
+  __ sub(sp, sp, r1);               // Prolog
+  __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+  // This value is corrected by layout_activation_impl
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ mov(sender_sp, sp);          // Pass sender_sp to next frame
+  __ add(r5, r5, wordSize);       // Bump array pointer (sizes)
+  __ add(r2, r2, wordSize);       // Bump array pointer (pcs)
+  __ subsw(r3, r3, 1);            // Decrement counter
+  __ br(Assembler::GT, loop);
+  __ ldr(lr, Address(r2, 0));     // save final return address
+  // Re-push self-frame
+  __ enter();                     // & old rfp & set new rfp
+
+  // Use rfp because the frames look interpreted now
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  // Thread is in rdi already.
+  //
+  // BasicType unpack_frames(JavaThread* thread, int exec_mode);
+  //
+  // n.b. 2 gp args, 0 fp args, integral return type
+
+  // sp should already be aligned
+  __ mov(c_rarg0, rthread);
+  __ movw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
+  __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+
+  // Set an oopmap for the call site
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true, true);
+
+  // Pop self-frame.
+  __ leave();                 // Epilog
+
+  // Jump to interpreter
+  __ ret(lr);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
+                                                 SimpleRuntimeFrame::framesize >> 1);
+
+#ifdef BUILTIN_SIM
+  if (NotifySimulator) {
+    unsigned char *base = _deopt_blob->code_begin();
+    simulator->notifyRelocate(start, base - start);
+  }
+#endif
+}
+#endif // COMPILER2
+
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// and setup oopmap.
+//
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map;
+
+  // Allocate space for the code.  Setup code generation tools.
+  CodeBuffer buffer("handler_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  address start   = __ pc();
+  address call_pc = NULL;
+  int frame_size_in_words;
+  bool cause_return = (poll_type == POLL_AT_RETURN);
+  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
+
+  // Save Integer and Float registers.
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+  // The following is basically a call_VM.  However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselves.
+
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
+
+  // The return address must always be correct so that frame constructor never
+  // sees an invalid pc.
+
+  if (!cause_return) {
+    // overwrite the return address pushed by save_live_registers
+    __ ldr(c_rarg0, Address(rthread, JavaThread::saved_exception_pc_offset()));
+    __ str(c_rarg0, Address(rfp, wordSize));
+  }
+
+  // Do the call
+  __ mov(c_rarg0, rthread);
+  __ lea(rscratch1, RuntimeAddress(call_ptr));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  oop_maps->add_gc_map( __ pc() - start, map);
+
+  Label noException;
+
+  __ reset_last_Java_frame(false, true);
+
+  __ maybe_isb();
+
+  __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+  __ cbz(rscratch1, noException);
+
+  // Exception pending
+
+  RegisterSaver::restore_live_registers(masm);
+
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+  // No exception case
+  __ bind(noException);
+
+  // Normal exit, restore registers and exit.
+  RegisterSaver::restore_live_registers(masm);
+
+  __ ret(lr);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  // allocate space for the code
+  ResourceMark rm;
+
+  CodeBuffer buffer(name, 1000, 512);
+  MacroAssembler* masm                = new MacroAssembler(&buffer);
+
+  int frame_size_in_words;
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map = NULL;
+
+  int start = __ offset();
+
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+  int frame_complete = __ offset();
+
+  {
+    Label retaddr;
+    __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
+
+    __ mov(c_rarg0, rthread);
+    __ lea(rscratch1, RuntimeAddress(destination));
+
+    __ blrt(rscratch1, 1, 0, 1);
+    __ bind(retaddr);
+  }
+
+  // Set an oopmap for the call site.
+  // We need this not only for callee-saved registers, but also for volatile
+  // registers that the compiler might be keeping live across a safepoint.
+
+  oop_maps->add_gc_map( __ offset() - start, map);
+
+  __ maybe_isb();
+
+  // r0 contains the address we are going to jump to assuming no exception got installed
+
+  // clear last_Java_sp
+  __ reset_last_Java_frame(false, true);
+  // check for pending exceptions
+  Label pending;
+  __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+  __ cbnz(rscratch1, pending);
+
+  // get the returned methodOop
+  __ get_vm_result(rmethod, rthread);
+  __ str(rmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(rmethod)));
+
+  // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch
+  __ str(r0, Address(sp, RegisterSaver::rscratch1_offset_in_bytes()));
+  RegisterSaver::restore_live_registers(masm);
+
+  // We are back the the original state on entry and ready to go.
+
+  __ br(rscratch1);
+
+  // Pending exception after the safepoint
+
+  __ bind(pending);
+
+  RegisterSaver::restore_live_registers(masm);
+
+  // exception pending => remove activation and forward to exception handler
+
+  __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
+
+  __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // return the  blob
+  // frame_size_words or bytes??
+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
+}
+
+
+#ifdef COMPILER2
+// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
+//
+//------------------------------generate_exception_blob---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in x86_64.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jmp.
+//
+// Arguments:
+//   r0: exception oop
+//   r3: exception pc
+//
+// Results:
+//   r0: exception oop
+//   r3: exception pc in caller or ???
+//   destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//       Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved.
+//
+
+void OptoRuntime::generate_exception_blob() {
+  assert(!OptoRuntime::is_callee_saved_register(R3_num), "");
+  assert(!OptoRuntime::is_callee_saved_register(R0_num), "");
+  assert(!OptoRuntime::is_callee_saved_register(R2_num), "");
+
+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("exception_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  // TODO check various assumptions made here
+  //
+  // make sure we do so before running this
+
+  address start = __ pc();
+
+  // push rfp and retaddr by hand
+  // Exception pc is 'return address' for stack walker
+  __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
+  // there are no callee save registers and we don't expect an
+  // arg reg save area
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+  // Store exception in Thread object. We cannot pass any arguments to the
+  // handle_exception call, since we do not want to make any assumption
+  // about the size of the frame where the exception happened in.
+  __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
+
+  // This call does all the hard work.  It checks if an exception handler
+  // exists in the method.
+  // If so, it returns the handler address.
+  // If not, it prepares for stack-unwinding, restoring the callee-save
+  // registers of the frame being removed.
+  //
+  // address OptoRuntime::handle_exception_C(JavaThread* thread)
+  //
+  // n.b. 1 gp arg, 0 fp args, integral return type
+
+  // the stack should always be aligned
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, noreg, the_pc, rscratch1);
+  __ mov(c_rarg0, rthread);
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
+  __ blrt(rscratch1, 1, 0, MacroAssembler::ret_type_integral);
+  __ maybe_isb();
+
+  // Set an oopmap for the call site.  This oopmap will only be used if we
+  // are unwinding the stack.  Hence, all locations will be dead.
+  // Callee-saved registers will be the same as the frame above (i.e.,
+  // handle_exception_stub), since they were restored when we got the
+  // exception.
+
+  OopMapSet* oop_maps = new OopMapSet();
+
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  __ reset_last_Java_frame(false, true);
+
+  // Restore callee-saved registers
+
+  // rfp is an implicitly saved callee saved register (i.e. the calling
+  // convention will save restore it in prolog/epilog) Other than that
+  // there are no callee save registers now that adapter frames are gone.
+  // and we dont' expect an arg reg save area
+  __ ldp(rfp, r3, Address(__ post(sp, 2 * wordSize)));
+
+  // r0: exception handler
+
+  // Restore SP from BP if the exception PC is a MethodHandle call site.
+  __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
+  // n.b. Intel uses special register rbp_mh_SP_save here but we will
+  // just hard wire rfp
+  __ cmpw(rscratch1, zr);
+  // the obvious way to conditionally copy rfp to sp if NE
+  // Label skip;
+  // __ br(Assembler::EQ, skip);
+  // __ mov(sp, rfp);
+  // __ bind(skip);
+  // same but branchless
+  __ mov(rscratch1, sp);
+  __ csel(rscratch1, rfp, rscratch1, Assembler::NE);
+  __ mov(sp, rscratch1);
+
+  // We have a handler in r0 (could be deopt blob).
+  __ mov(r8, r0);
+
+  // Get the exception oop
+  __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  // Get the exception pc in case we are deoptimized
+  __ ldr(r4, Address(rthread, JavaThread::exception_pc_offset()));
+#ifdef ASSERT
+  __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
+#endif
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
+
+  // r0: exception oop
+  // r8:  exception handler
+  // r4: exception pc
+  // Jump to handler
+
+  __ br(r8);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Set exception blob
+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
+}
+#endif // COMPILER2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,2590 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/top.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "thread_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "thread_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "thread_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "thread_bsd.inline.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#undef __
+#define __ _masm->
+#define TIMES_OOP Address::sxtw(exact_log2(UseCompressedOops ? 4 : 8))
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Stub Code definitions
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+#ifdef PRODUCT
+#define inc_counter_np(counter) (0)
+#else
+  void inc_counter_np_(int& counter) {
+    __ lea(rscratch2, ExternalAddress((address)&counter));
+    __ ldrw(rscratch1, Address(rscratch2));
+    __ addw(rscratch1, rscratch1, 1);
+    __ strw(rscratch1, Address(rscratch2));
+  }
+#define inc_counter_np(counter) \
+  BLOCK_COMMENT("inc_counter " #counter); \
+  inc_counter_np_(counter);
+#endif
+
+  // Call stubs are used to call Java from C
+  //
+  // Arguments:
+  //    c_rarg0:   call wrapper address                   address
+  //    c_rarg1:   result                                 address
+  //    c_rarg2:   result type                            BasicType
+  //    c_rarg3:   method                                 methodOop
+  //    c_rarg4:   (interpreter) entry point              address
+  //    c_rarg5:   parameters                             intptr_t*
+  //    c_rarg6:   parameter size (in words)              int
+  //    c_rarg7:   thread                                 Thread*
+  //
+  // There is no return from the stub itself as any Java result
+  // is written to result
+  //
+  // we save r30 (lr) as the return PC at the base of the frame and
+  // link r29 (fp) below it as the frame pointer installing sp (r31)
+  // into fp.
+  //
+  // we save r0-r7, which accounts for all the c arguments.
+  //
+  // TODO: strictly do we need to save them all? they are treated as
+  // volatile by C so could we omit saving the ones we are going to
+  // place in global registers (thread? method?) or those we only use
+  // during setup of the Java call?
+  //
+  // we don't need to save r8 which C uses as an indirect result location
+  // return register.
+  //
+  // we don't need to save r9-r15 which both C and Java treat as
+  // volatile
+  //
+  // we don't need to save r16-18 because Java does not use them
+  //
+  // we save r19-r28 which Java uses as scratch registers and C
+  // expects to be callee-save
+  //
+  // we don't save any FP registers since only v8-v15 are callee-save
+  // (strictly only the f and d components) and Java uses them as
+  // callee-save. v0-v7 are arg registers and C treats v16-v31 as
+  // volatile (as does Java?)
+  //
+  // so the stub frame looks like this when we enter Java code
+  //
+  //     [ return_from_Java     ] <--- sp
+  //     [ argument word n      ]
+  //      ...
+  // -27 [ argument word 1      ]
+  // -26 [ saved d15            ] <--- sp_after_call
+  // -25 [ saved d14            ]
+  // -24 [ saved d13            ]
+  // -23 [ saved d12            ]
+  // -22 [ saved d11            ]
+  // -21 [ saved d10            ]
+  // -20 [ saved d9             ]
+  // -19 [ saved d8             ]
+  // -18 [ saved r28            ]
+  // -17 [ saved r27            ]
+  // -16 [ saved r26            ]
+  // -15 [ saved r25            ]
+  // -14 [ saved r24            ]
+  // -13 [ saved r23            ]
+  // -12 [ saved r22            ]
+  // -11 [ saved r21            ]
+  // -10 [ saved r20            ]
+  //  -9 [ saved r19            ]
+  //  -8 [ call wrapper    (r0) ]
+  //  -7 [ result          (r1) ]
+  //  -6 [ result type     (r2) ]
+  //  -5 [ method          (r3) ]
+  //  -4 [ entry point     (r4) ]
+  //  -3 [ parameters      (r5) ]
+  //  -2 [ parameter size  (r6) ]
+  //  -1 [ thread (r7)          ]
+  //   0 [ saved fp       (r29) ] <--- fp == saved sp (r31)
+  //   1 [ saved lr       (r30) ]
+
+  // Call stub stack layout word offsets from fp
+  enum call_stub_layout {
+    sp_after_call_off = -26,
+
+    d15_off            = -26,
+    d14_off            = -25,
+    d13_off            = -24,
+    d12_off            = -23,
+    d11_off            = -22,
+    d10_off            = -21,
+    d9_off             = -20,
+    d8_off             = -19,
+
+    r28_off            = -18,
+    r27_off            = -17,
+    r26_off            = -16,
+    r25_off            = -15,
+    r24_off            = -14,
+    r23_off            = -13,
+    r22_off            = -12,
+    r21_off            = -11,
+    r20_off            = -10,
+    r19_off            =  -9,
+    call_wrapper_off   =  -8,
+    result_off         =  -7,
+    result_type_off    =  -6,
+    method_off         =  -5,
+    entry_point_off    =  -4,
+    parameters_off     =  -3,
+    parameter_size_off =  -2,
+    thread_off         =  -1,
+    fp_f               =   0,
+    retaddr_off        =   1,
+  };
+
+  address generate_call_stub(address& return_address) {
+    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
+           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
+           "adjust this code");
+
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    const Address sp_after_call(rfp, sp_after_call_off * wordSize);
+
+    const Address call_wrapper  (rfp, call_wrapper_off   * wordSize);
+    const Address result        (rfp, result_off         * wordSize);
+    const Address result_type   (rfp, result_type_off    * wordSize);
+    const Address method        (rfp, method_off         * wordSize);
+    const Address entry_point   (rfp, entry_point_off    * wordSize);
+    const Address parameters    (rfp, parameters_off     * wordSize);
+    const Address parameter_size(rfp, parameter_size_off * wordSize);
+
+    const Address thread        (rfp, thread_off         * wordSize);
+
+    const Address d15_save      (rfp, d15_off * wordSize);
+    const Address d14_save      (rfp, d14_off * wordSize);
+    const Address d13_save      (rfp, d13_off * wordSize);
+    const Address d12_save      (rfp, d12_off * wordSize);
+    const Address d11_save      (rfp, d11_off * wordSize);
+    const Address d10_save      (rfp, d10_off * wordSize);
+    const Address d9_save       (rfp, d9_off * wordSize);
+    const Address d8_save       (rfp, d8_off * wordSize);
+
+    const Address r28_save      (rfp, r28_off * wordSize);
+    const Address r27_save      (rfp, r27_off * wordSize);
+    const Address r26_save      (rfp, r26_off * wordSize);
+    const Address r25_save      (rfp, r25_off * wordSize);
+    const Address r24_save      (rfp, r24_off * wordSize);
+    const Address r23_save      (rfp, r23_off * wordSize);
+    const Address r22_save      (rfp, r22_off * wordSize);
+    const Address r21_save      (rfp, r21_off * wordSize);
+    const Address r20_save      (rfp, r20_off * wordSize);
+    const Address r19_save      (rfp, r19_off * wordSize);
+
+    // stub code
+
+    // we need a C prolog to bootstrap the x86 caller into the sim
+    __ c_stub_prolog(8, 0, MacroAssembler::ret_type_void);
+
+    address aarch64_entry = __ pc();
+
+#ifdef BUILTIN_SIM
+    // Save sender's SP for stack traces.
+    __ mov(rscratch1, sp);
+    __ str(rscratch1, Address(__ pre(sp, -2 * wordSize)));
+#endif
+    // set up frame and move sp to end of save area
+    __ enter();
+    __ sub(sp, rfp, -sp_after_call_off * wordSize);
+
+    // save register parameters and Java scratch/global registers
+    // n.b. we save thread even though it gets installed in
+    // rthread because we want to sanity check rthread later
+    __ str(c_rarg7,  thread);
+    __ strw(c_rarg6, parameter_size);
+    __ str(c_rarg5,  parameters);
+    __ str(c_rarg4,  entry_point);
+    __ str(c_rarg3,  method);
+    __ str(c_rarg2,  result_type);
+    __ str(c_rarg1,  result);
+    __ str(c_rarg0,  call_wrapper);
+    __ str(r19,      r19_save);
+    __ str(r20,      r20_save);
+    __ str(r21,      r21_save);
+    __ str(r22,      r22_save);
+    __ str(r23,      r23_save);
+    __ str(r24,      r24_save);
+    __ str(r25,      r25_save);
+    __ str(r26,      r26_save);
+    __ str(r27,      r27_save);
+    __ str(r28,      r28_save);
+
+    __ strd(v8,      d8_save);
+    __ strd(v9,      d9_save);
+    __ strd(v10,     d10_save);
+    __ strd(v11,     d11_save);
+    __ strd(v12,     d12_save);
+    __ strd(v13,     d13_save);
+    __ strd(v14,     d14_save);
+    __ strd(v15,     d15_save);
+
+    // install Java thread in global register now we have saved
+    // whatever value it held
+    __ mov(rthread, c_rarg7);
+    // And method
+    __ mov(rmethod, c_rarg3);
+
+    // set up the heapbase register
+    __ reinit_heapbase();
+
+#ifdef ASSERT
+    // make sure we have no pending exceptions
+    {
+      Label L;
+      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+      __ cmp(rscratch1, (unsigned)NULL_WORD);
+      __ br(Assembler::EQ, L);
+      __ stop("StubRoutines::call_stub: entered with pending exception");
+      __ BIND(L);
+    }
+#endif
+    // pass parameters if any
+    __ mov(esp, sp);
+    __ sub(rscratch1, sp, c_rarg6, ext::uxtw, LogBytesPerWord); // Move SP out of the way
+    __ andr(sp, rscratch1, -2 * wordSize);
+
+    BLOCK_COMMENT("pass parameters if any");
+    Label parameters_done;
+    // parameter count is still in c_rarg6
+    // and parameter pointer identifying param 1 is in c_rarg5
+    __ cbzw(c_rarg6, parameters_done);
+
+    address loop = __ pc();
+    __ ldr(rscratch1, Address(__ post(c_rarg5, wordSize)));
+    __ subsw(c_rarg6, c_rarg6, 1);
+    __ push(rscratch1);
+    __ br(Assembler::GT, loop);
+
+    __ BIND(parameters_done);
+
+    // call Java entry -- passing methdoOop, and current sp
+    //      rmethod: methodOop
+    //      r13: sender sp
+    BLOCK_COMMENT("call Java function");
+    __ mov(r13, sp);
+    __ blr(c_rarg4);
+
+    // tell the simulator we have returned to the stub
+
+    // we do this here because the notify will already have been done
+    // if we get to the next instruction via an exception
+    //
+    // n.b. adding this instruction here affects the calculation of
+    // whether or not a routine returns to the call stub (used when
+    // doing stack walks) since the normal test is to check the return
+    // pc against the address saved below. so we may need to allow for
+    // this extra instruction in the check.
+
+    if (NotifySimulator) {
+      __ notify(Assembler::method_reentry);
+    }
+    // save current address for use by exception handling code
+
+    return_address = __ pc();
+
+    // store result depending on type (everything that is not
+    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    // n.b. this assumes Java returns an integral result in r0
+    // and a floating result in j_farg0
+    __ ldr(j_rarg2, result);
+    Label is_long, is_float, is_double, exit;
+    __ ldr(j_rarg1, result_type);
+    __ cmp(j_rarg1, T_OBJECT);
+    __ br(Assembler::EQ, is_long);
+    __ cmp(j_rarg1, T_LONG);
+    __ br(Assembler::EQ, is_long);
+    __ cmp(j_rarg1, T_FLOAT);
+    __ br(Assembler::EQ, is_float);
+    __ cmp(j_rarg1, T_DOUBLE);
+    __ br(Assembler::EQ, is_double);
+
+    // handle T_INT case
+    __ strw(r0, Address(j_rarg2));
+
+    __ BIND(exit);
+
+    // pop parameters
+    __ sub(esp, rfp, -sp_after_call_off * wordSize);
+
+#ifdef ASSERT
+    // verify that threads correspond
+    {
+      Label L, S;
+      __ ldr(rscratch1, thread);
+      __ cmp(rthread, rscratch1);
+      __ br(Assembler::NE, S);
+      __ get_thread(rscratch1);
+      __ cmp(rthread, rscratch1);
+      __ br(Assembler::EQ, L);
+      __ BIND(S);
+      __ stop("StubRoutines::call_stub: threads must correspond");
+      __ BIND(L);
+    }
+#endif
+
+    // restore callee-save registers
+    __ ldrd(v15,      d15_save);
+    __ ldrd(v14,      d14_save);
+    __ ldrd(v13,      d13_save);
+    __ ldrd(v12,      d12_save);
+    __ ldrd(v11,      d11_save);
+    __ ldrd(v10,      d10_save);
+    __ ldrd(v9,       d9_save);
+    __ ldrd(v8,       d8_save);
+
+    __ ldr(r28,      r28_save);
+    __ ldr(r27,      r27_save);
+    __ ldr(r26,      r26_save);
+    __ ldr(r25,      r25_save);
+    __ ldr(r24,      r24_save);
+    __ ldr(r23,      r23_save);
+    __ ldr(r22,      r22_save);
+    __ ldr(r21,      r21_save);
+    __ ldr(r20,      r20_save);
+    __ ldr(r19,      r19_save);
+    __ ldr(c_rarg0,  call_wrapper);
+    __ ldr(c_rarg1,  result);
+    __ ldrw(c_rarg2, result_type);
+    __ ldr(c_rarg3,  method);
+    __ ldr(c_rarg4,  entry_point);
+    __ ldr(c_rarg5,  parameters);
+    __ ldr(c_rarg6,  parameter_size);
+    __ ldr(c_rarg7,  thread);
+
+#ifndef PRODUCT
+    // tell the simulator we are about to end Java execution
+    if (NotifySimulator) {
+      __ notify(Assembler::method_exit);
+    }
+#endif
+    // leave frame and return to caller
+    __ leave();
+    __ ret(lr);
+
+    // handle return types different from T_INT
+
+    __ BIND(is_long);
+    __ str(r0, Address(j_rarg2, 0));
+    __ br(Assembler::AL, exit);
+
+    __ BIND(is_float);
+    __ strs(j_farg0, Address(j_rarg2, 0));
+    __ br(Assembler::AL, exit);
+
+    __ BIND(is_double);
+    __ strd(j_farg0, Address(j_rarg2, 0));
+    __ br(Assembler::AL, exit);
+
+    return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code.  The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  //
+  // Note: Usually the parameters are removed by the callee. In case
+  // of an exception crossing an activation frame boundary, that is
+  // not the case if the callee is compiled code => need to setup the
+  // rsp.
+  //
+  // r0: exception oop
+
+  // NOTE: this is used as a target from the signal handler so it
+  // needs an x86 prolog which returns into the current simulator
+  // executing the generated catch_exception code. so the prolog
+  // needs to install rax in a sim register and adjust the sim's
+  // restart pc to enter the generated code at the start position
+  // then return from native to simulated execution.
+
+  address generate_catch_exception() {
+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
+    address start = __ pc();
+
+    // same as in generate_call_stub():
+    const Address sp_after_call(rfp, sp_after_call_off * wordSize);
+    const Address thread        (rfp, thread_off         * wordSize);
+
+#ifdef ASSERT
+    // verify that threads correspond
+    {
+      Label L, S;
+      __ ldr(rscratch1, thread);
+      __ cmp(rthread, rscratch1);
+      __ br(Assembler::NE, S);
+      __ get_thread(rscratch1);
+      __ cmp(rthread, rscratch1);
+      __ br(Assembler::EQ, L);
+      __ bind(S);
+      __ stop("StubRoutines::catch_exception: threads must correspond");
+      __ bind(L);
+    }
+#endif
+
+    // set pending exception
+    __ verify_oop(r0);
+
+    __ str(r0, Address(rthread, Thread::pending_exception_offset()));
+    __ mov(rscratch1, (address)__FILE__);
+    __ str(rscratch1, Address(rthread, Thread::exception_file_offset()));
+    __ movw(rscratch1, (int)__LINE__);
+    __ strw(rscratch1, Address(rthread, Thread::exception_line_offset()));
+
+    // complete return to VM
+    assert(StubRoutines::_call_stub_return_address != NULL,
+           "_call_stub_return_address must have been generated before");
+    __ b(StubRoutines::_call_stub_return_address);
+
+    return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception.  The pending exception check happened in the runtime
+  // or native call stub.  The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Contract with Java-level exception handlers:
+  // r0: exception
+  // r3: throwing pc
+  //
+  // NOTE: At entry of this stub, exception-pc must be in LR !!
+
+  // NOTE: this is always used as a jump target within generated code
+  // so it just needs to be generated code wiht no x86 prolog
+
+  address generate_forward_exception() {
+    StubCodeMark mark(this, "StubRoutines", "forward exception");
+    address start = __ pc();
+
+    // Upon entry, LR points to the return address returning into
+    // Java (interpreted or compiled) code; i.e., the return address
+    // becomes the throwing pc.
+    //
+    // Arguments pushed before the runtime call are still on the stack
+    // but the exception handler will reset the stack pointer ->
+    // ignore them.  A potential result in registers can be ignored as
+    // well.
+
+#ifdef ASSERT
+    // make sure this code is only executed if there is a pending exception
+    {
+      Label L;
+      __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+      __ cbnz(rscratch1, L);
+      __ stop("StubRoutines::forward exception: no pending exception (1)");
+      __ bind(L);
+    }
+#endif
+
+    // compute exception handler into r19
+
+    // call the VM to find the handler address associated with the
+    // caller address. pass thread in r0 and caller pc (ret address)
+    // in r1. n.b. the caller pc is in lr, unlike x86 where it is on
+    // the stack.
+    __ mov(c_rarg1, lr);
+    // lr will be trashed by the VM call so we move it to R19
+    // (callee-saved) because we also need to pass it to the handler
+    // returned by this call.
+    __ mov(r19, lr);
+    BLOCK_COMMENT("call exception_handler_for_return_address");
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+                         SharedRuntime::exception_handler_for_return_address),
+                    rthread, c_rarg1);
+    // we should not really care that lr is no longer the callee
+    // address. we saved the value the handler needs in r19 so we can
+    // just copy it to r3. however, the C2 handler will push its own
+    // frame and then calls into the VM and the VM code asserts that
+    // the PC for the frame above the handler belongs to a compiled
+    // Java method. So, we restore lr here to satisfy that assert.
+    __ mov(lr, r19);
+    // setup r0 & r3 & clear pending exception
+    __ mov(r3, r19);
+    __ mov(r19, r0);
+    __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
+    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+    // make sure exception is set
+    {
+      Label L;
+      __ cbnz(r0, L);
+      __ stop("StubRoutines::forward exception: no pending exception (2)");
+      __ bind(L);
+    }
+#endif
+
+    // continue at exception handler
+    // r0: exception
+    // r3: throwing pc
+    // r19: exception handler
+    __ verify_oop(r0);
+    __ br(r19);
+
+    return start;
+  }
+
+  // Non-destructive plausibility checks for oops
+  //
+  // Arguments:
+  //    r0: oop to verify
+  //    rscratch1: error message
+  //
+  // Stack after saving c_rarg3:
+  //    [tos + 0]: saved c_rarg3
+  //    [tos + 1]: saved c_rarg2
+  //    [tos + 2]: saved lr
+  //    [tos + 3]: saved rscratch2
+  //    [tos + 4]: saved r0
+  //    [tos + 5]: saved rscratch1
+  address generate_verify_oop() {
+
+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
+    address start = __ pc();
+
+    Label exit, error;
+
+    // save c_rarg2 and c_rarg3
+    __ stp(c_rarg3, c_rarg2, Address(__ pre(sp, -16)));
+
+    // __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
+    __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
+    __ ldr(c_rarg3, Address(c_rarg2));
+    __ add(c_rarg3, c_rarg3, 1);
+    __ str(c_rarg3, Address(c_rarg2));
+
+    // object is in r0
+    // make sure object is 'reasonable'
+    __ cbz(r0, exit); // if obj is NULL it is OK
+
+    // Check if the oop is in the right area of memory
+    __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask());
+    __ andr(c_rarg2, r0, c_rarg3);
+    __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits());
+
+    // Compare c_rarg2 and c_rarg3.  We don't use a compare
+    // instruction here because the flags register is live.
+    __ eor(c_rarg2, c_rarg2, c_rarg3);
+    __ cbnz(c_rarg2, error);
+
+    // make sure klass is 'reasonable', which is not zero.
+    __ load_klass(r0, r0);  // get klass
+    __ cbz(r0, error);      // if klass is NULL it is broken
+    // Check if the klass is in the right area of memory
+    __ mov(c_rarg3, (intptr_t) Universe::verify_klass_mask());
+    __ andr(c_rarg2, r0, c_rarg3);
+    __ mov(c_rarg3, (intptr_t) Universe::verify_klass_bits());
+    // Compare c_rarg2 and c_rarg3.  We don't use a compare
+    // instruction here because the flags register is live.
+    __ eor(c_rarg2, c_rarg2, c_rarg3);
+    __ cbnz(c_rarg2, error);
+    // return if everything seems ok
+    __ bind(exit);
+
+    __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16)));
+    __ ret(lr);
+
+    // handle errors
+    __ bind(error);
+    __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16)));
+
+    __ push(RegSet::range(r0, r29), sp);
+    // debug(char* msg, int64_t pc, int64_t regs[])
+    __ mov(c_rarg0, rscratch1);      // pass address of error message
+    __ mov(c_rarg1, lr);             // pass return address
+    __ mov(c_rarg2, sp);             // pass address of regs on stack
+#ifndef PRODUCT
+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+    BLOCK_COMMENT("call MacroAssembler::debug");
+    __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+    __ blrt(rscratch1, 3, 0, 1);
+
+    return start;
+  }
+
+  void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
+
+  // Generate code for an array write pre barrier
+  //
+  //     addr    -  starting address
+  //     count   -  element count
+  //     tmp     - scratch register
+  //
+  //     Destroy no registers except rscratch1 and rscratch2
+  //
+  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      // With G1, don't generate the call if we statically know that the target in uninitialized
+      if (!dest_uninitialized) {
+        __ push_call_clobbered_registers();
+        if (count == c_rarg0) {
+          if (addr == c_rarg1) {
+            // exactly backwards!!
+            __ mov(rscratch1, c_rarg0);
+            __ mov(c_rarg0, c_rarg1);
+            __ mov(c_rarg1, rscratch1);
+          } else {
+            __ mov(c_rarg1, count);
+            __ mov(c_rarg0, addr);
+          }
+        } else {
+          __ mov(c_rarg0, addr);
+          __ mov(c_rarg1, count);
+        }
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+        __ pop_call_clobbered_registers();
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
+
+      }
+    }
+  }
+
+  //
+  // Generate code for an array write post barrier
+  //
+  //  Input:
+  //     start    - register containing starting address of destination array
+  //     end      - register containing ending address of destination array
+  //     scratch  - scratch register
+  //
+  //  The input registers are overwritten.
+  //  The ending address is inclusive.
+  void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
+    assert_different_registers(start, end, scratch);
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+
+        {
+          __ push_call_clobbered_registers();
+          // must compute element count unless barrier set interface is changed (other platforms supply count)
+          assert_different_registers(start, end, scratch);
+          __ lea(scratch, Address(end, BytesPerHeapOop));
+          __ sub(scratch, scratch, start);               // subtract start to get #bytes
+          __ lsr(scratch, scratch, LogBytesPerHeapOop);  // convert to element count
+          __ mov(c_rarg0, start);
+          __ mov(c_rarg1, scratch);
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
+          __ pop_call_clobbered_registers();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+        {
+          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+          Label L_loop;
+
+           __ lsr(start, start, CardTableModRefBS::card_shift);
+           __ lsr(end, end, CardTableModRefBS::card_shift);
+           __ sub(end, end, start); // number of bytes to copy
+
+          const Register count = end; // 'end' register contains bytes count now
+          __ load_byte_map_base(scratch);
+          __ add(start, start, scratch);
+          if (UseConcMarkSweepGC) {
+            __ membar(__ StoreStore);
+          }
+          __ BIND(L_loop);
+          __ strb(zr, Address(start, count));
+          __ subs(count, count, 1);
+          __ br(Assembler::HS, L_loop);
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
+  address generate_zero_longs(Register base, Register cnt) {
+    Register tmp = rscratch1;
+    Register tmp2 = rscratch2;
+    int zva_length = VM_Version::zva_length();
+    Label initial_table_end, loop_zva;
+    Label fini;
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "zero_longs");
+    address start = __ pc();
+
+    // Base must be 16 byte aligned. If not just return and let caller handle it
+    __ tst(base, 0x0f);
+    __ br(Assembler::NE, fini);
+    // Align base with ZVA length.
+    __ neg(tmp, base);
+    __ andr(tmp, tmp, zva_length - 1);
+
+    // tmp: the number of bytes to be filled to align the base with ZVA length.
+    __ add(base, base, tmp);
+    __ sub(cnt, cnt, tmp, Assembler::ASR, 3);
+    __ adr(tmp2, initial_table_end);
+    __ sub(tmp2, tmp2, tmp, Assembler::LSR, 2);
+    __ br(tmp2);
+
+    for (int i = -zva_length + 16; i < 0; i += 16)
+      __ stp(zr, zr, Address(base, i));
+    __ bind(initial_table_end);
+
+    __ sub(cnt, cnt, zva_length >> 3);
+    __ bind(loop_zva);
+    __ dc(Assembler::ZVA, base);
+    __ subs(cnt, cnt, zva_length >> 3);
+    __ add(base, base, zva_length);
+    __ br(Assembler::GE, loop_zva);
+    __ add(cnt, cnt, zva_length >> 3); // count not zeroed by DC ZVA
+    __ bind(fini);
+    __ ret(lr);
+
+    return start;
+  }
+
+  typedef enum {
+    copy_forwards = 1,
+    copy_backwards = -1
+  } copy_direction;
+
+  // Bulk copy of blocks of 8 words.
+  //
+  // count is a count of words.
+  //
+  // Precondition: count >= 2
+  //
+  // Postconditions:
+  //
+  // The least significant bit of count contains the remaining count
+  // of words to copy.  The rest of count is trash.
+  //
+  // s and d are adjusted to point to the remaining words to copy
+  //
+  void generate_copy_longs(Label &start, Register s, Register d, Register count,
+                           copy_direction direction) {
+    int unit = wordSize * direction;
+
+    int offset;
+    const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
+      t4 = r7, t5 = r10, t6 = r11, t7 = r12;
+    const Register stride = r13;
+
+    assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
+    assert_different_registers(s, d, count, rscratch1);
+
+    Label again, large, small;
+    __ align(CodeEntryAlignment);
+    __ bind(start);
+    __ cmp(count, 8);
+    __ br(Assembler::LO, small);
+    if (direction == copy_forwards) {
+      __ sub(s, s, 2 * wordSize);
+      __ sub(d, d, 2 * wordSize);
+    }
+    __ subs(count, count, 16);
+    __ br(Assembler::GE, large);
+
+    // 8 <= count < 16 words.  Copy 8.
+    __ ldp(t0, t1, Address(s, 2 * unit));
+    __ ldp(t2, t3, Address(s, 4 * unit));
+    __ ldp(t4, t5, Address(s, 6 * unit));
+    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+
+    __ stp(t0, t1, Address(d, 2 * unit));
+    __ stp(t2, t3, Address(d, 4 * unit));
+    __ stp(t4, t5, Address(d, 6 * unit));
+    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
+
+    if (direction == copy_forwards) {
+      __ add(s, s, 2 * wordSize);
+      __ add(d, d, 2 * wordSize);
+    }
+
+    {
+      Label L1, L2;
+      __ bind(small);
+      __ tbz(count, exact_log2(4), L1);
+      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ bind(L1);
+
+      __ tbz(count, 1, L2);
+      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ bind(L2);
+    }
+
+    __ ret(lr);
+
+    __ align(CodeEntryAlignment);
+    __ bind(large);
+
+    // Fill 8 registers
+    __ ldp(t0, t1, Address(s, 2 * unit));
+    __ ldp(t2, t3, Address(s, 4 * unit));
+    __ ldp(t4, t5, Address(s, 6 * unit));
+    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+
+    int prefetch = PrefetchCopyIntervalInBytes;
+    bool use_stride = false;
+    if (direction == copy_backwards) {
+       use_stride = prefetch > 256;
+       prefetch = -prefetch;
+       if (use_stride) __ mov(stride, prefetch);
+    }
+
+    __ bind(again);
+
+    if (PrefetchCopyIntervalInBytes > 0)
+      __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
+
+    __ stp(t0, t1, Address(d, 2 * unit));
+    __ ldp(t0, t1, Address(s, 2 * unit));
+    __ stp(t2, t3, Address(d, 4 * unit));
+    __ ldp(t2, t3, Address(s, 4 * unit));
+    __ stp(t4, t5, Address(d, 6 * unit));
+    __ ldp(t4, t5, Address(s, 6 * unit));
+    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
+    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+
+    __ subs(count, count, 8);
+    __ br(Assembler::HS, again);
+
+    // Drain
+    __ stp(t0, t1, Address(d, 2 * unit));
+    __ stp(t2, t3, Address(d, 4 * unit));
+    __ stp(t4, t5, Address(d, 6 * unit));
+    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
+
+    if (direction == copy_forwards) {
+      __ add(s, s, 2 * wordSize);
+      __ add(d, d, 2 * wordSize);
+    }
+
+    {
+      Label L1, L2;
+      __ tbz(count, exact_log2(4), L1);
+      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ bind(L1);
+
+      __ tbz(count, 1, L2);
+      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
+      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      __ bind(L2);
+    }
+
+    __ ret(lr);
+  }
+
+  // Small copy: less than 16 bytes.
+  //
+  // NB: Ignores all of the bits of count which represent more than 15
+  // bytes, so a caller doesn't have to mask them.
+
+  void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) {
+    bool is_backwards = step < 0;
+    size_t granularity = uabs(step);
+    int direction = is_backwards ? -1 : 1;
+    int unit = wordSize * direction;
+
+    Label Lpair, Lword, Lint, Lshort, Lbyte;
+
+    assert(granularity
+           && granularity <= sizeof (jlong), "Impossible granularity in copy_memory_small");
+
+    const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6;
+
+    // ??? I don't know if this bit-test-and-branch is the right thing
+    // to do.  It does a lot of jumping, resulting in several
+    // mispredicted branches.  It might make more sense to do this
+    // with something like Duff's device with a single computed branch.
+
+    __ tbz(count, 3 - exact_log2(granularity), Lword);
+    __ ldr(tmp, Address(__ adjust(s, unit, is_backwards)));
+    __ str(tmp, Address(__ adjust(d, unit, is_backwards)));
+    __ bind(Lword);
+
+    if (granularity <= sizeof (jint)) {
+      __ tbz(count, 2 - exact_log2(granularity), Lint);
+      __ ldrw(tmp, Address(__ adjust(s, sizeof (jint) * direction, is_backwards)));
+      __ strw(tmp, Address(__ adjust(d, sizeof (jint) * direction, is_backwards)));
+      __ bind(Lint);
+    }
+
+    if (granularity <= sizeof (jshort)) {
+      __ tbz(count, 1 - exact_log2(granularity), Lshort);
+      __ ldrh(tmp, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards)));
+      __ strh(tmp, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards)));
+      __ bind(Lshort);
+    }
+
+    if (granularity <= sizeof (jbyte)) {
+      __ tbz(count, 0, Lbyte);
+      __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
+      __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
+      __ bind(Lbyte);
+    }
+  }
+
+  Label copy_f, copy_b;
+
+  // All-singing all-dancing memory copy.
+  //
+  // Copy count units of memory from s to d.  The size of a unit is
+  // step, which can be positive or negative depending on the direction
+  // of copy.  If is_aligned is false, we align the source address.
+  //
+
+  void copy_memory(bool is_aligned, Register s, Register d,
+                   Register count, Register tmp, int step) {
+    copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
+    bool is_backwards = step < 0;
+    int granularity = uabs(step);
+    const Register t0 = r3, t1 = r4;
+
+    if (is_backwards) {
+      __ lea(s, Address(s, count, Address::uxtw(exact_log2(-step))));
+      __ lea(d, Address(d, count, Address::uxtw(exact_log2(-step))));
+    }
+
+    Label done, tail;
+
+    __ cmp(count, 16/granularity);
+    __ br(Assembler::LO, tail);
+
+    // Now we've got the small case out of the way we can align the
+    // source address on a 2-word boundary.
+
+    Label aligned;
+
+    if (is_aligned) {
+      // We may have to adjust by 1 word to get s 2-word-aligned.
+      __ tbz(s, exact_log2(wordSize), aligned);
+      __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards)));
+      __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards)));
+      __ sub(count, count, wordSize/granularity);
+    } else {
+      if (is_backwards) {
+        __ andr(rscratch2, s, 2 * wordSize - 1);
+      } else {
+        __ neg(rscratch2, s);
+        __ andr(rscratch2, rscratch2, 2 * wordSize - 1);
+      }
+      // rscratch2 is the byte adjustment needed to align s.
+      __ cbz(rscratch2, aligned);
+      int shift = exact_log2(granularity);
+      if (shift)  __ lsr(rscratch2, rscratch2, shift);
+      __ sub(count, count, rscratch2);
+
+#if 0
+      // ?? This code is only correct for a disjoint copy.  It may or
+      // may not make sense to use it in that case.
+
+      // Copy the first pair; s and d may not be aligned.
+      __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0));
+      __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0));
+
+      // Align s and d, adjust count
+      if (is_backwards) {
+        __ sub(s, s, rscratch2);
+        __ sub(d, d, rscratch2);
+      } else {
+        __ add(s, s, rscratch2);
+        __ add(d, d, rscratch2);
+      }
+#else
+      copy_memory_small(s, d, rscratch2, rscratch1, step);
+#endif
+    }
+
+    __ cmp(count, 16/granularity);
+    __ br(Assembler::LT, tail);
+    __ bind(aligned);
+
+    // s is now 2-word-aligned.
+
+    // We have a count of units and some trailing bytes.  Adjust the
+    // count and do a bulk copy of words.
+    __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
+    if (direction == copy_forwards)
+      __ bl(copy_f);
+    else
+      __ bl(copy_b);
+
+    // And the tail.
+
+    __ bind(tail);
+    copy_memory_small(s, d, count, tmp, step);
+  }
+
+
+  void clobber_registers() {
+#ifdef ASSERT
+    __ mov(rscratch1, (uint64_t)0xdeadbeef);
+    __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
+    for (Register r = r3; r <= r18; r++)
+      if (r != rscratch1) __ mov(r, rscratch1);
+#endif
+  }
+
+  // Scan over array at a for count oops, verifying each one.
+  // Preserves a and count, clobbers rscratch1 and rscratch2.
+  void verify_oop_array (size_t size, Register a, Register count, Register temp) {
+    Label loop, end;
+    __ mov(rscratch1, a);
+    __ mov(rscratch2, zr);
+    __ bind(loop);
+    __ cmp(rscratch2, count);
+    __ br(Assembler::HS, end);
+    if (size == (size_t)wordSize) {
+      __ ldr(temp, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
+      __ verify_oop(temp);
+    } else {
+      __ ldrw(r16, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
+      __ decode_heap_oop(temp); // calls verify_oop
+    }
+    __ add(rscratch2, rscratch2, size);
+    __ b(loop);
+    __ bind(end);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
+                                  const char *name, bool dest_uninitialized = false) {
+    Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+    __ enter();
+    if (is_oop) {
+      __ push(RegSet::of(d, count), sp);
+      // no registers are destroyed by this call
+      gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
+    }
+    copy_memory(aligned, s, d, count, rscratch1, size);
+    if (is_oop) {
+      __ pop(RegSet::of(d, count), sp);
+      if (VerifyOops)
+        verify_oop_array(size, d, count, r16);
+      __ sub(count, count, 1); // make an inclusive end pointer
+      __ lea(count, Address(d, count, Address::uxtw(exact_log2(size))));
+      gen_write_ref_array_post_barrier(d, count, rscratch1);
+    }
+    __ leave();
+    __ ret(lr);
+#ifdef BUILTIN_SIM
+    {
+      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+      sim->notifyCompile(const_cast<char*>(name), start);
+    }
+#endif
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+                                 address *entry, const char *name,
+                                 bool dest_uninitialized = false) {
+    Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ cmp(d, s);
+    __ br(Assembler::LS, nooverlap_target);
+
+    __ enter();
+    if (is_oop) {
+      __ push(RegSet::of(d, count), sp);
+      // no registers are destroyed by this call
+      gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
+    }
+    copy_memory(aligned, s, d, count, rscratch1, -size);
+    if (is_oop) {
+      __ pop(RegSet::of(d, count), sp);
+      if (VerifyOops)
+        verify_oop_array(size, d, count, r16);
+      __ sub(count, count, 1); // make an inclusive end pointer
+      __ lea(count, Address(d, count, Address::uxtw(exact_log2(size))));
+      gen_write_ref_array_post_barrier(d, count, rscratch1);
+    }
+    __ leave();
+    __ ret(lr);
+#ifdef BUILTIN_SIM
+    {
+      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+      sim->notifyCompile(const_cast<char*>(name), start);
+    }
+#endif
+    return start;
+}
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_byte_copy().
+  //
+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
+                                      address* entry, const char *name) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_short_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_short_copy().
+  //
+  address generate_disjoint_short_copy(bool aligned,
+                                       address* entry, const char *name) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
+                                       address *entry, const char *name) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
+
+  }
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_int_copy(bool aligned, address *entry,
+                                         const char *name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
+                                     address *entry, const char *name,
+                                     bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  // Side Effects:
+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
+  //
+  address generate_disjoint_long_copy(bool aligned, address *entry,
+                                          const char *name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  address generate_conjoint_long_copy(bool aligned,
+                                      address nooverlap_target, address *entry,
+                                      const char *name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  // Side Effects:
+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
+  //
+  address generate_disjoint_oop_copy(bool aligned, address *entry,
+                                     const char *name, bool dest_uninitialized) {
+    const bool is_oop = true;
+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  address generate_conjoint_oop_copy(bool aligned,
+                                     address nooverlap_target, address *entry,
+                                     const char *name, bool dest_uninitialized) {
+    const bool is_oop = true;
+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
+                                  name, dest_uninitialized);
+  }
+
+
+  // Helper for generating a dynamic type check.
+  // Smashes rscratch1.
+  void generate_type_check(Register sub_klass,
+                           Register super_check_offset,
+                           Register super_klass,
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
+
+    // Fall through on failure!
+    __ BIND(L_miss);
+  }
+
+  //
+  //  Generate checkcasting array copy stub
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - element count, treated as ssize_t, can be zero
+  //    c_rarg3   - size_t ckoff (super_check_offset)
+  //    c_rarg4   - oop ckval (super_klass)
+  //
+  //  Output:
+  //    r0 ==  0  -  success
+  //    r0 == -1^K - failure, where K is partial transfer count
+  //
+  address generate_checkcast_copy(const char *name, address *entry,
+                                  bool dest_uninitialized = false) {
+
+    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
+
+    // Input registers (after setup_arg_regs)
+    const Register from        = c_rarg0;   // source array address
+    const Register to          = c_rarg1;   // destination array address
+    const Register count       = c_rarg2;   // elementscount
+    const Register ckoff       = c_rarg3;   // super_check_offset
+    const Register ckval       = c_rarg4;   // super_klass
+
+    // Registers used as temps (r18, r19, r20 are save-on-entry)
+    const Register count_save  = r21;       // orig elementscount
+    const Register start_to    = r20;       // destination array start address
+    const Register copied_oop  = r18;       // actual oop copied
+    const Register r19_klass   = r19;       // oop._klass
+
+    //---------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the two arrays are subtypes of Object[] but the
+    // destination array type is not equal to or a supertype
+    // of the source type.  Each element must be separately
+    // checked.
+
+    assert_different_registers(from, to, count, ckoff, ckval, start_to,
+                               copied_oop, r19_klass, count_save);
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef ASSERT
+    // caller guarantees that the arrays really are different
+    // otherwise, we would have to make conjoint checks
+    { Label L;
+      array_overlap_test(L, TIMES_OOP);
+      __ stop("checkcast_copy within a single array");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    // Caller of this entry point must set up the argument registers.
+    if (entry != NULL) {
+      *entry = __ pc();
+      BLOCK_COMMENT("Entry:");
+    }
+
+     // Empty array:  Nothing to do.
+    __ cbz(count, L_done);
+
+    __ push(RegSet::of(r18, r19, r20, r21), sp);
+
+#ifdef ASSERT
+    BLOCK_COMMENT("assert consistent ckoff/ckval");
+    // The ckoff and ckval must be mutually consistent,
+    // even though caller generates both.
+    { Label L;
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ ldrw(start_to, Address(ckval, sco_offset));
+      __ cmpw(ckoff, start_to);
+      __ br(Assembler::EQ, L);
+      __ stop("super_check_offset inconsistent");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
+
+    // save the original count
+    __ mov(count_save, count);
+
+    // Copy from low to high addresses
+    __ mov(start_to, to);              // Save destination array start address
+    __ b(L_load_element);
+
+    // ======== begin loop ========
+    // (Loop is rotated; its entry is L_load_element.)
+    // Loop control:
+    //   for (; count != 0; count--) {
+    //     copied_oop = load_heap_oop(from++);
+    //     ... generate_type_check ...;
+    //     store_heap_oop(to++, copied_oop);
+    //   }
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_store_element);
+    __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop);  // store the oop
+    __ sub(count, count, 1);
+    __ cbz(count, L_do_card_marks);
+
+    // ======== loop entry is here ========
+    __ BIND(L_load_element);
+    __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8)); // load the oop
+    __ cbz(copied_oop, L_store_element);
+
+    __ load_klass(r19_klass, copied_oop);// query the object klass
+    generate_type_check(r19_klass, ckoff, ckval, L_store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register count = remaining oops, count_orig = total oops.
+    // Emit GC store barriers for the oops we have copied and report
+    // their number to the caller.
+
+    __ subs(count, count_save, count);     // K = partially copied oop count
+    __ eon(count, count, zr);                   // report (-1^K) to caller
+    __ br(Assembler::EQ, L_done_pop);
+
+    __ BIND(L_do_card_marks);
+    __ add(to, to, -heapOopSize);         // make an inclusive end pointer
+    gen_write_ref_array_post_barrier(start_to, to, rscratch1);
+
+    __ bind(L_done_pop);
+    __ pop(RegSet::of(r18, r19, r20, r21), sp);
+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
+
+    __ bind(L_done);
+    __ mov(r0, count);
+    __ leave();
+    __ ret(lr);
+
+    return start;
+  }
+
+  // Perform range checks on the proposed arraycopy.
+  // Kills temp, but nothing else.
+  // Also, clean the sign bits of src_pos and dst_pos.
+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
+                              Register src_pos, // source position (c_rarg1)
+                              Register dst,     // destination array oo (c_rarg2)
+                              Register dst_pos, // destination position (c_rarg3)
+                              Register length,
+                              Register temp,
+                              Label& L_failed) { Unimplemented(); }
+
+  // These stubs get called from some dumb test routine.
+  // I'll write them properly when they're called from
+  // something that's actually doing something.
+  static void fake_arraycopy_stub(address src, address dst, int count) {
+    assert(count == 0, "huh?");
+  }
+
+
+  //
+  // Generate stub for array fill. If "aligned" is true, the
+  // "to" address is assumed to be heapword aligned.
+  //
+  // Arguments for generated stub:
+  //   to:    c_rarg0
+  //   value: c_rarg1
+  //   count: c_rarg2 treated as signed
+  //
+  address generate_fill(BasicType t, bool aligned, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    BLOCK_COMMENT("Entry:");
+
+    const Register to        = c_rarg0;  // source array address
+    const Register value     = c_rarg1;  // value
+    const Register count     = c_rarg2;  // elements count
+
+    const Register bz_base = r10;        // base for block_zero routine
+    const Register cnt_words = r11;      // temp register
+
+    __ enter();
+
+    Label L_fill_elements, L_exit1;
+
+    int shift = -1;
+    switch (t) {
+      case T_BYTE:
+        shift = 0;
+        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
+        __ bfi(value, value, 8, 8);   // 8 bit -> 16 bit
+        __ bfi(value, value, 16, 16); // 16 bit -> 32 bit
+        __ br(Assembler::LO, L_fill_elements);
+        break;
+      case T_SHORT:
+        shift = 1;
+        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
+        __ bfi(value, value, 16, 16); // 16 bit -> 32 bit
+        __ br(Assembler::LO, L_fill_elements);
+        break;
+      case T_INT:
+        shift = 2;
+        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
+        __ br(Assembler::LO, L_fill_elements);
+        break;
+      default: ShouldNotReachHere();
+    }
+
+    // Align source address at 8 bytes address boundary.
+    Label L_skip_align1, L_skip_align2, L_skip_align4;
+    if (!aligned) {
+      switch (t) {
+        case T_BYTE:
+          // One byte misalignment happens only for byte arrays.
+          __ tbz(to, 0, L_skip_align1);
+          __ strb(value, Address(__ post(to, 1)));
+          __ subw(count, count, 1);
+          __ bind(L_skip_align1);
+          // Fallthrough
+        case T_SHORT:
+          // Two bytes misalignment happens only for byte and short (char) arrays.
+          __ tbz(to, 1, L_skip_align2);
+          __ strh(value, Address(__ post(to, 2)));
+          __ subw(count, count, 2 >> shift);
+          __ bind(L_skip_align2);
+          // Fallthrough
+        case T_INT:
+          // Align to 8 bytes, we know we are 4 byte aligned to start.
+          __ tbz(to, 2, L_skip_align4);
+          __ strw(value, Address(__ post(to, 4)));
+          __ subw(count, count, 4 >> shift);
+          __ bind(L_skip_align4);
+          break;
+        default: ShouldNotReachHere();
+      }
+    }
+
+    //
+    //  Fill large chunks
+    //
+    __ lsrw(cnt_words, count, 3 - shift); // number of words
+    __ bfi(value, value, 32, 32);         // 32 bit -> 64 bit
+    __ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
+    if (UseBlockZeroing) {
+      Label non_block_zeroing, rest;
+      // count >= BlockZeroingLowLimit && value == 0
+      __ cmp(cnt_words, BlockZeroingLowLimit >> 3);
+      __ ccmp(value, 0 /* comparing value */, 0 /* NZCV */, Assembler::GE);
+      __ br(Assembler::NE, non_block_zeroing);
+      __ mov(bz_base, to);
+      __ block_zero(bz_base, cnt_words, true);
+      __ mov(to, bz_base);
+      __ b(rest);
+      __ bind(non_block_zeroing);
+      __ fill_words(to, cnt_words, value);
+      __ bind(rest);
+    }
+    else {
+      __ fill_words(to, cnt_words, value);
+    }
+
+    // Remaining count is less than 8 bytes. Fill it by a single store.
+    // Note that the total length is no less than 8 bytes.
+    if (t == T_BYTE || t == T_SHORT) {
+      Label L_exit1;
+      __ cbzw(count, L_exit1);
+      __ add(to, to, count, Assembler::LSL, shift); // points to the end
+      __ str(value, Address(to, -8));    // overwrite some elements
+      __ bind(L_exit1);
+      __ leave();
+      __ ret(lr);
+    }
+
+    // Handle copies less than 8 bytes.
+    Label L_fill_2, L_fill_4, L_exit2;
+    __ bind(L_fill_elements);
+    switch (t) {
+      case T_BYTE:
+        __ tbz(count, 0, L_fill_2);
+        __ strb(value, Address(__ post(to, 1)));
+        __ bind(L_fill_2);
+        __ tbz(count, 1, L_fill_4);
+        __ strh(value, Address(__ post(to, 2)));
+        __ bind(L_fill_4);
+        __ tbz(count, 2, L_exit2);
+        __ strw(value, Address(to));
+        break;
+      case T_SHORT:
+        __ tbz(count, 0, L_fill_4);
+        __ strh(value, Address(__ post(to, 2)));
+        __ bind(L_fill_4);
+        __ tbz(count, 1, L_exit2);
+        __ strw(value, Address(to));
+        break;
+      case T_INT:
+        __ cbzw(count, L_exit2);
+        __ strw(value, Address(to));
+        break;
+      default: ShouldNotReachHere();
+    }
+    __ bind(L_exit2);
+    __ leave();
+    __ ret(lr);
+    return start;
+  }
+
+  void generate_arraycopy_stubs() {
+    address entry;
+    address entry_jbyte_arraycopy;
+    address entry_jshort_arraycopy;
+    address entry_jint_arraycopy;
+    address entry_oop_arraycopy;
+    address entry_jlong_arraycopy;
+    address entry_checkcast_arraycopy;
+
+    generate_copy_longs(copy_f, r0, r1, rscratch2, copy_forwards);
+    generate_copy_longs(copy_b, r0, r1, rscratch2, copy_backwards);
+
+    StubRoutines::aarch64::_zero_longs = generate_zero_longs(r10, r11);
+
+    //*** jbyte
+    // Always need aligned and unaligned versions
+    StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
+                                                                                  "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
+                                                                                  &entry_jbyte_arraycopy,
+                                                                                  "jbyte_arraycopy");
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
+                                                                                  "arrayof_jbyte_disjoint_arraycopy");
+    StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
+                                                                                  "arrayof_jbyte_arraycopy");
+
+    //*** jshort
+    // Always need aligned and unaligned versions
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
+                                                                                    "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
+                                                                                    &entry_jshort_arraycopy,
+                                                                                    "jshort_arraycopy");
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
+                                                                                    "arrayof_jshort_disjoint_arraycopy");
+    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
+                                                                                    "arrayof_jshort_arraycopy");
+
+    //*** jint
+    // Aligned versions
+    StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
+                                                                                "arrayof_jint_disjoint_arraycopy");
+    StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+                                                                                "arrayof_jint_arraycopy");
+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+    // entry_jint_arraycopy always points to the unaligned version
+    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
+                                                                                "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
+                                                                                &entry_jint_arraycopy,
+                                                                                "jint_arraycopy");
+
+    //*** jlong
+    // It is always aligned
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
+                                                                                  "arrayof_jlong_disjoint_arraycopy");
+    StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+                                                                                  "arrayof_jlong_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+    StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
+
+    //*** oops
+    {
+      // With compressed oops we need unaligned versions; notice that
+      // we overwrite entry_oop_arraycopy.
+      bool aligned = !UseCompressedOops;
+
+      StubRoutines::_arrayof_oop_disjoint_arraycopy
+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
+                                     /*dest_uninitialized*/false);
+      StubRoutines::_arrayof_oop_arraycopy
+        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
+                                     /*dest_uninitialized*/false);
+      // Aligned versions without pre-barriers
+      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
+                                     /*dest_uninitialized*/true);
+      StubRoutines::_arrayof_oop_arraycopy_uninit
+        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
+                                     /*dest_uninitialized*/true);
+    }
+
+    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
+    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
+
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_encryptBlock() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+
+    Label L_doLast;
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rscratch1;
+
+    address start = __ pc();
+    __ enter();
+
+    __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ ld1(v0, __ T16B, from); // get 16 bytes of input
+
+    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+    __ rev32(v3, __ T16B, v3);
+    __ rev32(v4, __ T16B, v4);
+    __ aese(v0, v1);
+    __ aesmc(v0, v0);
+    __ aese(v0, v2);
+    __ aesmc(v0, v0);
+    __ aese(v0, v3);
+    __ aesmc(v0, v0);
+    __ aese(v0, v4);
+    __ aesmc(v0, v0);
+
+    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+    __ rev32(v3, __ T16B, v3);
+    __ rev32(v4, __ T16B, v4);
+    __ aese(v0, v1);
+    __ aesmc(v0, v0);
+    __ aese(v0, v2);
+    __ aesmc(v0, v0);
+    __ aese(v0, v3);
+    __ aesmc(v0, v0);
+    __ aese(v0, v4);
+    __ aesmc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ cmpw(keylen, 44);
+    __ br(Assembler::EQ, L_doLast);
+
+    __ aese(v0, v1);
+    __ aesmc(v0, v0);
+    __ aese(v0, v2);
+    __ aesmc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ cmpw(keylen, 52);
+    __ br(Assembler::EQ, L_doLast);
+
+    __ aese(v0, v1);
+    __ aesmc(v0, v0);
+    __ aese(v0, v2);
+    __ aesmc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ BIND(L_doLast);
+
+    __ aese(v0, v1);
+    __ aesmc(v0, v0);
+    __ aese(v0, v2);
+
+    __ ld1(v1, __ T16B, key);
+    __ rev32(v1, __ T16B, v1);
+    __ eor(v0, __ T16B, v0, v1);
+
+    __ st1(v0, __ T16B, to);
+
+    __ mov(r0, 0);
+
+    __ leave();
+    __ ret(lr);
+
+    return start;
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_decryptBlock() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+    Label L_doLast;
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rscratch1;
+
+    address start = __ pc();
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ ld1(v0, __ T16B, from); // get 16 bytes of input
+
+    __ ld1(v5, __ T16B, __ post(key, 16));
+    __ rev32(v5, __ T16B, v5);
+
+    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+    __ rev32(v3, __ T16B, v3);
+    __ rev32(v4, __ T16B, v4);
+    __ aesd(v0, v1);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v2);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v3);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v4);
+    __ aesimc(v0, v0);
+
+    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+    __ rev32(v3, __ T16B, v3);
+    __ rev32(v4, __ T16B, v4);
+    __ aesd(v0, v1);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v2);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v3);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v4);
+    __ aesimc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ cmpw(keylen, 44);
+    __ br(Assembler::EQ, L_doLast);
+
+    __ aesd(v0, v1);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v2);
+    __ aesimc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ cmpw(keylen, 52);
+    __ br(Assembler::EQ, L_doLast);
+
+    __ aesd(v0, v1);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v2);
+    __ aesimc(v0, v0);
+
+    __ ld1(v1, v2, __ T16B, __ post(key, 32));
+    __ rev32(v1, __ T16B, v1);
+    __ rev32(v2, __ T16B, v2);
+
+    __ BIND(L_doLast);
+
+    __ aesd(v0, v1);
+    __ aesimc(v0, v0);
+    __ aesd(v0, v2);
+
+    __ eor(v0, __ T16B, v0, v5);
+
+    __ st1(v0, __ T16B, to);
+
+    __ mov(r0, 0);
+
+    __ leave();
+    __ ret(lr);
+
+    return start;
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   x0        - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+
+    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+    const Register keylen      = rscratch1;
+
+    address start = __ pc();
+      __ enter();
+
+      __ mov(rscratch2, len_reg);
+      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+      __ ld1(v0, __ T16B, rvec);
+
+      __ cmpw(keylen, 52);
+      __ br(Assembler::CC, L_loadkeys_44);
+      __ br(Assembler::EQ, L_loadkeys_52);
+
+      __ ld1(v17, v18, __ T16B, __ post(key, 32));
+      __ rev32(v17, __ T16B, v17);
+      __ rev32(v18, __ T16B, v18);
+    __ BIND(L_loadkeys_52);
+      __ ld1(v19, v20, __ T16B, __ post(key, 32));
+      __ rev32(v19, __ T16B, v19);
+      __ rev32(v20, __ T16B, v20);
+    __ BIND(L_loadkeys_44);
+      __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
+      __ rev32(v21, __ T16B, v21);
+      __ rev32(v22, __ T16B, v22);
+      __ rev32(v23, __ T16B, v23);
+      __ rev32(v24, __ T16B, v24);
+      __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
+      __ rev32(v25, __ T16B, v25);
+      __ rev32(v26, __ T16B, v26);
+      __ rev32(v27, __ T16B, v27);
+      __ rev32(v28, __ T16B, v28);
+      __ ld1(v29, v30, v31, __ T16B, key);
+      __ rev32(v29, __ T16B, v29);
+      __ rev32(v30, __ T16B, v30);
+      __ rev32(v31, __ T16B, v31);
+
+    __ BIND(L_aes_loop);
+      __ ld1(v1, __ T16B, __ post(from, 16));
+      __ eor(v0, __ T16B, v0, v1);
+
+      __ br(Assembler::CC, L_rounds_44);
+      __ br(Assembler::EQ, L_rounds_52);
+
+      __ aese(v0, v17); __ aesmc(v0, v0);
+      __ aese(v0, v18); __ aesmc(v0, v0);
+    __ BIND(L_rounds_52);
+      __ aese(v0, v19); __ aesmc(v0, v0);
+      __ aese(v0, v20); __ aesmc(v0, v0);
+    __ BIND(L_rounds_44);
+      __ aese(v0, v21); __ aesmc(v0, v0);
+      __ aese(v0, v22); __ aesmc(v0, v0);
+      __ aese(v0, v23); __ aesmc(v0, v0);
+      __ aese(v0, v24); __ aesmc(v0, v0);
+      __ aese(v0, v25); __ aesmc(v0, v0);
+      __ aese(v0, v26); __ aesmc(v0, v0);
+      __ aese(v0, v27); __ aesmc(v0, v0);
+      __ aese(v0, v28); __ aesmc(v0, v0);
+      __ aese(v0, v29); __ aesmc(v0, v0);
+      __ aese(v0, v30);
+      __ eor(v0, __ T16B, v0, v31);
+
+      __ st1(v0, __ T16B, __ post(to, 16));
+      __ sub(len_reg, len_reg, 16);
+      __ cbnz(len_reg, L_aes_loop);
+
+      __ st1(v0, __ T16B, rvec);
+
+      __ mov(r0, rscratch2);
+
+      __ leave();
+      __ ret(lr);
+
+      return start;
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   rax       - input length
+  //
+  address generate_cipherBlockChaining_decryptAESCrypt() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+
+    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+    const Register keylen      = rscratch1;
+
+    address start = __ pc();
+      __ enter();
+
+      __ mov(rscratch2, len_reg);
+      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+      __ ld1(v2, __ T16B, rvec);
+
+      __ ld1(v31, __ T16B, __ post(key, 16));
+      __ rev32(v31, __ T16B, v31);
+
+      __ cmpw(keylen, 52);
+      __ br(Assembler::CC, L_loadkeys_44);
+      __ br(Assembler::EQ, L_loadkeys_52);
+
+      __ ld1(v17, v18, __ T16B, __ post(key, 32));
+      __ rev32(v17, __ T16B, v17);
+      __ rev32(v18, __ T16B, v18);
+    __ BIND(L_loadkeys_52);
+      __ ld1(v19, v20, __ T16B, __ post(key, 32));
+      __ rev32(v19, __ T16B, v19);
+      __ rev32(v20, __ T16B, v20);
+    __ BIND(L_loadkeys_44);
+      __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
+      __ rev32(v21, __ T16B, v21);
+      __ rev32(v22, __ T16B, v22);
+      __ rev32(v23, __ T16B, v23);
+      __ rev32(v24, __ T16B, v24);
+      __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
+      __ rev32(v25, __ T16B, v25);
+      __ rev32(v26, __ T16B, v26);
+      __ rev32(v27, __ T16B, v27);
+      __ rev32(v28, __ T16B, v28);
+      __ ld1(v29, v30, __ T16B, key);
+      __ rev32(v29, __ T16B, v29);
+      __ rev32(v30, __ T16B, v30);
+
+    __ BIND(L_aes_loop);
+      __ ld1(v0, __ T16B, __ post(from, 16));
+      __ orr(v1, __ T16B, v0, v0);
+
+      __ br(Assembler::CC, L_rounds_44);
+      __ br(Assembler::EQ, L_rounds_52);
+
+      __ aesd(v0, v17); __ aesimc(v0, v0);
+      __ aesd(v0, v18); __ aesimc(v0, v0);
+    __ BIND(L_rounds_52);
+      __ aesd(v0, v19); __ aesimc(v0, v0);
+      __ aesd(v0, v20); __ aesimc(v0, v0);
+    __ BIND(L_rounds_44);
+      __ aesd(v0, v21); __ aesimc(v0, v0);
+      __ aesd(v0, v22); __ aesimc(v0, v0);
+      __ aesd(v0, v23); __ aesimc(v0, v0);
+      __ aesd(v0, v24); __ aesimc(v0, v0);
+      __ aesd(v0, v25); __ aesimc(v0, v0);
+      __ aesd(v0, v26); __ aesimc(v0, v0);
+      __ aesd(v0, v27); __ aesimc(v0, v0);
+      __ aesd(v0, v28); __ aesimc(v0, v0);
+      __ aesd(v0, v29); __ aesimc(v0, v0);
+      __ aesd(v0, v30);
+      __ eor(v0, __ T16B, v0, v31);
+      __ eor(v0, __ T16B, v0, v2);
+
+      __ st1(v0, __ T16B, __ post(to, 16));
+      __ orr(v2, __ T16B, v1, v1);
+
+      __ sub(len_reg, len_reg, 16);
+      __ cbnz(len_reg, L_aes_loop);
+
+      __ st1(v2, __ T16B, rvec);
+
+      __ mov(r0, rscratch2);
+
+      __ leave();
+      __ ret(lr);
+
+    return start;
+  }
+
+  // AARCH64 use safefetch stubs unless we are building for the simulator
+  // in which case the x86 asm code in linux_aarch64.S is used
+
+#ifndef BUILTIN_SIM
+  // Safefetch stubs.
+  void generate_safefetch(const char* name, int size, address* entry,
+                          address* fault_pc, address* continuation_pc) {
+    // safefetch signatures:
+    //   int      SafeFetch32(int*      adr, int      errValue);
+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+    //
+    // arguments:
+    //   c_rarg0 = adr
+    //   c_rarg1 = errValue
+    //
+    // result:
+    //   PPC_RET  = *adr or errValue
+
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // Entry point, pc or function descriptor.
+    *entry = __ pc();
+
+    // Load *adr into c_rarg1, may fault.
+    *fault_pc = __ pc();
+    switch (size) {
+      case 4:
+        // int32_t
+        __ ldrw(c_rarg1, Address(c_rarg0, 0));
+        break;
+      case 8:
+        // int64_t
+        __ ldr(c_rarg1, Address(c_rarg0, 0));
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // return errValue or *adr
+    *continuation_pc = __ pc();
+    __ mov(r0, c_rarg1);
+    __ ret(lr);
+  }
+#endif
+
+  // !!! FIXME AARCH64 -- not in jdk7 but left it in as crc32 is needed
+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   c_rarg0   - int crc
+   *   c_rarg1   - byte* buf
+   *   c_rarg2   - int length
+   *
+   * Output:
+   *       r0   - int crc result
+   *
+   * Preserves:
+   *       r13
+   *
+   */
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "what are we doing here?");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+    address start = __ pc();
+
+    const Register crc   = c_rarg0;  // crc
+    const Register buf   = c_rarg1;  // source java byte array address
+    const Register len   = c_rarg2;  // length
+    const Register table0 = c_rarg3; // crc_table address
+    const Register table1 = c_rarg4;
+    const Register table2 = c_rarg5;
+    const Register table3 = c_rarg6;
+    const Register tmp3 = c_rarg7;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ kernel_crc32(crc, buf, len,
+              table0, table1, table2, table3, rscratch1, rscratch2, tmp3);
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }
+
+#undef __
+#define __ masm->
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Since we need to preserve callee-saved values (currently
+  // only for C2, but done for C1 as well) we need a callee-saved oop
+  // map and therefore have to make these stubs into RuntimeStubs
+  // rather than BufferBlobs.  If the compiler needs all registers to
+  // be preserved between the fault point and the exception handler
+  // then it must assume responsibility for that in
+  // AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+
+  address generate_throw_exception(const char* name,
+                                   address runtime_entry,
+                                   Register arg1 = noreg,
+                                   Register arg2 = noreg) {
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+    // if they expect all registers to be preserved.
+    // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0
+    enum layout {
+      rfp_off = 0,
+      rfp_off2,
+      return_off,
+      return_off2,
+      framesize // inclusive of return address
+    };
+
+    int insts_size = 512;
+    int locs_size  = 64;
+
+    CodeBuffer code(name, insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+
+    address start = __ pc();
+
+    // This is an inlined and slightly modified version of call_VM
+    // which has the ability to fetch the return PC out of
+    // thread-local storage and also sets up last_Java_sp slightly
+    // differently than the real call_VM
+
+    __ enter(); // Save FP and LR before call
+
+    assert(is_even(framesize/2), "sp not 16-byte aligned");
+
+    // lr and fp are already in place
+    __ sub(sp, rfp, ((unsigned)framesize-4) << LogBytesPerInt); // prolog
+
+    int frame_complete = __ pc() - start;
+
+    // Set up last_Java_sp and last_Java_fp
+    address the_pc = __ pc();
+    __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1);
+
+    // Call runtime
+    if (arg1 != noreg) {
+      assert(arg2 != c_rarg1, "clobbered");
+      __ mov(c_rarg1, arg1);
+    }
+    if (arg2 != noreg) {
+      __ mov(c_rarg2, arg2);
+    }
+    __ mov(c_rarg0, rthread);
+    BLOCK_COMMENT("call runtime_entry");
+    __ mov(rscratch1, runtime_entry);
+    __ blrt(rscratch1, 3 /* number_of_arguments */, 0, 1);
+
+    // Generate oop map
+    OopMap* map = new OopMap(framesize, 0);
+
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ reset_last_Java_frame(true, true);
+    __ maybe_isb();
+
+    __ leave();
+
+    // check for pending exceptions
+#ifdef ASSERT
+    Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbnz(rscratch1, L);
+    __ should_not_reach_here();
+    __ bind(L);
+#endif // ASSERT
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+
+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
+    RuntimeStub* stub =
+      RuntimeStub::new_runtime_stub(name,
+                                    &code,
+                                    frame_complete,
+                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+                                    oop_maps, false);
+    return stub->entry_point();
+  }
+
+  // Initialization
+  void generate_initial() {
+    // Generate initial stubs and initializes the entry points
+
+    // entry points that exist in all platforms Note: This is code
+    // that could be shared among different platforms - however the
+    // benefit seems to be smaller than the disadvantage of having a
+    // much more complicated generator structure. See also comment in
+    // stubRoutines.hpp.
+
+    StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+    StubRoutines::_call_stub_entry =
+      generate_call_stub(StubRoutines::_call_stub_return_address);
+
+    // is referenced by megamorphic call
+    StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_StackOverflowError));
+    // !!! FIXME AARCH64 -- not in jdk7 but left in as we need CRC32
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
+  }
+
+  void generate_all() {
+    // support for verify_oop (must happen after universe_init)
+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
+    StubRoutines::_throw_AbstractMethodError_entry =
+      generate_throw_exception("AbstractMethodError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_AbstractMethodError));
+
+    StubRoutines::_throw_IncompatibleClassChangeError_entry =
+      generate_throw_exception("IncompatibleClassChangeError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_IncompatibleClassChangeError));
+
+    StubRoutines::_throw_NullPointerException_at_call_entry =
+      generate_throw_exception("NullPointerException at call throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_NullPointerException_at_call));
+
+    // arraycopy stubs used by compilers
+    generate_arraycopy_stubs();
+
+#ifndef BUILTIN_SIM
+    if (UseAESIntrinsics) {
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+    }
+
+    // Safefetch stubs.
+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
+                                                       &StubRoutines::_safefetch32_fault_pc,
+                                                       &StubRoutines::_safefetch32_continuation_pc);
+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+                                                       &StubRoutines::_safefetchN_fault_pc,
+                                                       &StubRoutines::_safefetchN_continuation_pc);
+#endif
+  }
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+}; // end class declaration
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/stubRoutines_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "thread_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "thread_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "thread_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "thread_bsd.inline.hpp"
+#endif
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::aarch64::_get_previous_fp_entry = NULL;
+address StubRoutines::aarch64::_get_previous_sp_entry = NULL;
+
+address StubRoutines::aarch64::_f2i_fixup = NULL;
+address StubRoutines::aarch64::_f2l_fixup = NULL;
+address StubRoutines::aarch64::_d2i_fixup = NULL;
+address StubRoutines::aarch64::_d2l_fixup = NULL;
+address StubRoutines::aarch64::_float_sign_mask = NULL;
+address StubRoutines::aarch64::_float_sign_flip = NULL;
+address StubRoutines::aarch64::_double_sign_mask = NULL;
+address StubRoutines::aarch64::_double_sign_flip = NULL;
+address StubRoutines::aarch64::_zero_longs = NULL;
+
+/**
+ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
+ */
+juint StubRoutines::aarch64::_crc_table[]
+                   __attribute__ ((aligned(4096))) =
+{
+    // Table 0
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL,
+
+    // Table 1
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL,
+
+    // Table 2
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL,
+
+    // Table 3
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL,
+    // Constants for Neon CRC232 implementation
+    // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed
+    // k4 = 0xED627DAE = x^256 mod poly - bit reversed
+    0x78ED02D5UL, 0xED627DAEUL,         // k4:k3
+    0xED78D502UL, 0x62EDAE7DUL,         // byte swap
+    0x02D578EDUL, 0x7DAEED62UL,         // word swap
+    0xD502ED78UL, 0xAE7D62EDUL,         // byte swap of word swap
+};
+
+address StubRoutines::_crc_table_adr = NULL;
+address StubRoutines::_updateBytesCRC32 = NULL;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP
+#define CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+// n.b. if we are notifying entry/exit to the simulator then the call
+// stub does a notify at normal return placing
+// call_stub_return_address one instruction beyond the notify. the
+// latter address is sued by the stack unwind code when doign an
+// exception return.
+static bool    returns_to_call_stub(address return_pc)   {
+  return return_pc == _call_stub_return_address + (NotifySimulator ? -4 : 0);
+}
+
+enum platform_dependent_constants {
+  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
+  code_size2 = 22000           // simply increase if too small (assembler will crash if too small)
+};
+
+class aarch64 {
+ friend class StubGenerator;
+
+ private:
+  static address _get_previous_fp_entry;
+  static address _get_previous_sp_entry;
+
+  static address _f2i_fixup;
+  static address _f2l_fixup;
+  static address _d2i_fixup;
+  static address _d2l_fixup;
+
+  static address _float_sign_mask;
+  static address _float_sign_flip;
+  static address _double_sign_mask;
+  static address _double_sign_flip;
+
+  static address _zero_longs;
+
+ public:
+
+  static address get_previous_fp_entry()
+  {
+    return _get_previous_fp_entry;
+  }
+
+  static address get_previous_sp_entry()
+  {
+    return _get_previous_sp_entry;
+  }
+
+  static address f2i_fixup()
+  {
+    return _f2i_fixup;
+  }
+
+  static address f2l_fixup()
+  {
+    return _f2l_fixup;
+  }
+
+  static address d2i_fixup()
+  {
+    return _d2i_fixup;
+  }
+
+  static address d2l_fixup()
+  {
+    return _d2l_fixup;
+  }
+
+  static address float_sign_mask()
+  {
+    return _float_sign_mask;
+  }
+
+  static address float_sign_flip()
+  {
+    return _float_sign_flip;
+  }
+
+  static address double_sign_mask()
+  {
+    return _double_sign_mask;
+  }
+
+  static address double_sign_flip()
+  {
+    return _double_sign_flip;
+  }
+
+  static address get_zero_longs()
+  {
+    return _zero_longs;
+  }
+
+private:
+  static juint    _crc_table[];
+};
+
+public:
+  static address  _crc_table_adr;
+  static address _updateBytesCRC32;
+
+  static address crc_table_addr()      { return _crc_table_adr; }
+  static address updateBytesCRC32()    { return _updateBytesCRC32; }
+
+#endif // CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP
+#define CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP
+
+ protected:
+
+void generate_fixed_frame(bool native_call);
+
+ // address generate_asm_interpreter_entry(bool synchronized);
+
+#endif // CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,2199 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "interpreter/bytecodeTracer.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#include <sys/types.h>
+
+#ifndef PRODUCT
+#include "oops/methodOop.hpp"
+#endif // !PRODUCT
+
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
+
+#define __ _masm->
+
+#ifndef CC_INTERP
+
+//-----------------------------------------------------------------------------
+
+extern "C" void entry(CodeBuffer*);
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+#ifdef ASSERT
+  {
+    Label L;
+    __ ldr(rscratch1, Address(rfp,
+                       frame::interpreter_frame_monitor_block_top_offset *
+                       wordSize));
+    __ mov(rscratch2, sp);
+    __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack
+                           // grows negative)
+    __ br(Assembler::HS, L); // check if frame is complete
+    __ stop ("interpreter frame not set up");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // throw exception
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
+        const char* name) {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  // ??? convention: expect aberrant index in register r1
+  __ movw(c_rarg2, r1);
+  __ mov(c_rarg1, (address)name);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::
+                              throw_ArrayIndexOutOfBoundsException),
+             c_rarg1, c_rarg2);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+
+  // object is at TOS
+  __ pop(c_rarg1);
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::
+                              throw_ClassCastException),
+             c_rarg1);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+        const char* name, const char* message, bool pass_oop) {
+  assert(!pass_oop || message == NULL, "either oop or message but not both");
+  address entry = __ pc();
+  if (pass_oop) {
+    // object is at TOS
+    __ pop(c_rarg2);
+  }
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  __ lea(c_rarg1, Address((address)name));
+  if (pass_oop) {
+    __ call_VM(r0, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::
+                                    create_klass_exception),
+               c_rarg1, c_rarg2);
+  } else {
+    // kind of lame ExternalAddress can't take NULL because
+    // external_word_Relocation will assert.
+    if (message != NULL) {
+      __ lea(c_rarg2, Address((address)message));
+    } else {
+      __ mov(c_rarg2, NULL_WORD);
+    }
+    __ call_VM(r0,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
+               c_rarg1, c_rarg2);
+  }
+  // throw exception
+  __ b(address(Interpreter::throw_exception_entry()));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ dispatch_next(state);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step) {
+  address entry = __ pc();
+
+  // Restore stack bottom in case i2c adjusted stack
+  __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // and NULL it as marker that esp is now tos until next java call
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(rmethod);
+
+  Label L_got_cache, L_giant_index;
+  if (EnableInvokeDynamic) {
+    __ ldrb(rscratch1, Address(rbcp, 0));
+    __ cmp(rscratch1, Bytecodes::_invokedynamic);
+    __ br(Assembler::EQ, L_giant_index);
+  }
+  // Pop N words from the stack
+  __ get_cache_and_index_at_bcp(r1, r2, 1, sizeof(u2));
+  __ bind(L_got_cache);
+  __ ldr(r1, Address(r1, in_bytes(constantPoolCacheOopDesc::base_offset()) + 3 * wordSize));
+  __ andr(r1, r1, 0xFF);
+
+  __ add(esp, esp, r1, Assembler::LSL, 3);
+
+  // Restore machine SP
+  __ ldrh(rscratch1, Address(rmethod, methodOopDesc::max_stack_offset()));
+  __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size()
+         + (EnableInvokeDynamic ? 2 : 0));
+  __ ldr(rscratch2,
+         Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3);
+  __ andr(sp, rscratch1, -16);
+
+#ifndef PRODUCT
+  // tell the simulator that the method has been reentered
+  if (NotifySimulator) {
+    __ notify(Assembler::method_reentry);
+  }
+#endif
+  __ get_dispatch();
+  __ dispatch_next(state, step);
+
+  // out of the main line of code...
+  if (EnableInvokeDynamic) {
+    __ bind(L_giant_index);
+    __ get_cache_and_index_at_bcp(r1, r2, 1, sizeof(u4));
+    __ b(L_got_cache);
+  }
+
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+                                                               int step) {
+  address entry = __ pc();
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(rmethod);
+
+  // handle exceptions
+  {
+    Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  __ get_dispatch();
+
+  // Calculate stack limit
+  __ ldrh(rscratch1, Address(rmethod, methodOopDesc::max_stack_offset()));
+  __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size()
+         + (EnableInvokeDynamic ? 2 : 0));
+  __ ldr(rscratch2,
+         Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3);
+  __ andr(sp, rscratch1, -16);
+
+  // Restore expression stack pointer
+  __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // NULL last_sp until next java call
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+  __ dispatch_next(state, step);
+  return entry;
+}
+
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : i = 4; break;
+    case T_LONG   : i = 5; break;
+    case T_VOID   : i = 6; break;
+    case T_FLOAT  : i = 7; break;
+    case T_DOUBLE : i = 8; break;
+    case T_OBJECT : i = 9; break;
+    case T_ARRAY  : i = 9; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
+         "index out of bounds");
+  return i;
+}
+
+
+address TemplateInterpreterGenerator::generate_result_handler_for(
+        BasicType type) {
+    address entry = __ pc();
+  switch (type) {
+  case T_BOOLEAN: __ c2bool(r0);          break;
+  case T_CHAR   : __ uxth(r0, r0);        break;
+  case T_BYTE   : __ sxtb(r0, r0);        break;
+  case T_SHORT  : __ sxth(r0, r0);        break;
+  case T_INT    : __ uxtw(r0, r0);        break;  // FIXME: We almost certainly don't need this
+  case T_LONG   : /* nothing to do */        break;
+  case T_VOID   : /* nothing to do */        break;
+  case T_FLOAT  : /* nothing to do */        break;
+  case T_DOUBLE : /* nothing to do */        break;
+  case T_OBJECT :
+    // retrieve result from frame
+    __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
+    // and verify it
+    __ verify_oop(r0);
+    break;
+  default       : ShouldNotReachHere();
+  }
+  __ ret(lr);                                  // return from result handler
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(
+        TosState state,
+        address runtime_entry) {
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ membar(Assembler::AnyAny);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+  return entry;
+}
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+//       so we have a 'sticky' overflow test
+//
+// rmethod: method
+// r2: invocation counter
+//
+void InterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
+  const Address invocation_counter(rmethod,
+                                   in_bytes(methodOopDesc::invocation_counter_offset()) +
+                                   in_bytes(InvocationCounter::counter_offset()));
+  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
+  if (TieredCompilation) {
+    int increment = InvocationCounter::count_increment;
+    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
+    Label no_mdo, done;
+    if (ProfileInterpreter) {
+      // Are we profiling?
+      __ ldr(r0, Address(rmethod, methodOopDesc::method_data_offset()));
+      __ cbz(r0, no_mdo);
+      // Increment counter in the MDO
+      const Address mdo_invocation_counter(r0, in_bytes(methodDataOopDesc::invocation_counter_offset()) +
+                                               in_bytes(InvocationCounter::counter_offset()));
+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, r2, false, Assembler::EQ, overflow);
+      __ b(done);
+    }
+    __ bind(no_mdo);
+    // Increment counter in methodOop
+    __ increment_mask_and_jump(invocation_counter, increment, mask, r2, false, Assembler::EQ, overflow);
+    __ bind(done);
+  } else {
+    const Address backedge_counter(rmethod,
+                  methodOopDesc::backedge_counter_offset() +
+                  InvocationCounter::counter_offset());
+
+    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
+      __ ldrw(r1, Address(rmethod, methodOopDesc ::interpreter_invocation_counter_offset()));
+      __ addw(r1, r1, 1);
+      __ strw(r1, Address(rmethod, methodOopDesc::interpreter_invocation_counter_offset()));
+    }
+    // Update standard invocation counters
+    __ ldrw(r0, backedge_counter);
+
+    __ addw(r2, r2, InvocationCounter::count_increment);
+    __ andw(r0, r0, InvocationCounter::count_mask_value);
+
+    __ strw(r2, invocation_counter);    // save invocation count
+    __ addw(r2, r0, r2);                // add both counters
+
+    // profile_method is non-null only for interpreted method so
+    // profile_method != NULL == !native_call
+
+    if (ProfileInterpreter && profile_method != NULL) {
+      // Test to see if we should create a method data oop
+      unsigned long offset;
+      __ adrp(rscratch2, ExternalAddress((address)&InvocationCounter::InterpreterProfileLimit),
+              offset);
+      __ ldrw(rscratch2, Address(rscratch2, offset));
+      __ cmpw(r2, rscratch2);
+      __ br(Assembler::LT, *profile_method_continue);
+
+      // if no method data exists, go to profile_method
+      __ test_method_data_pointer(rscratch2, *profile_method);
+    }
+
+    {
+      unsigned long offset;
+      __ adrp(rscratch2,
+              ExternalAddress((address)&InvocationCounter::InterpreterInvocationLimit),
+              offset);
+      __ ldrw(rscratch2, Address(rscratch2, offset));
+      __ cmpw(r2, rscratch2);
+      __ br(Assembler::HS, *overflow);
+    }
+  }
+}
+
+void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
+
+  // Asm interpreter on entry
+  // On return (i.e. jump to entry_point) [ back to invocation of interpreter ]
+  // Everything as it was on entry
+
+  // InterpreterRuntime::frequency_counter_overflow takes two
+  // arguments, the first (thread) is passed by call_VM, the second
+  // indicates if the counter overflow occurs at a backwards branch
+  // (NULL bcp).  We pass zero for it.  The call returns the address
+  // of the verified entry point for the method or NULL if the
+  // compilation did not complete (either went background or bailed
+  // out).
+  __ mov(c_rarg1, 0);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::frequency_counter_overflow),
+             c_rarg1);
+
+  __ b(*do_continue);
+}
+
+// See if we've got enough room on the stack for locals plus overhead.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+//      r3: number of additional locals this frame needs (what we must check)
+//      rmethod: methodOop
+//
+// Kills:
+//      r0
+void InterpreterGenerator::generate_stack_overflow_check(void) {
+
+  // monitor entry size: see picture of stack set
+  // (generate_method_entry) and frame_amd64.hpp
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+  // total overhead size: entry_size + (saved rbp through expr stack
+  // bottom).  be sure to change this if you add/subtract anything
+  // to/from the overhead area
+  const int overhead_size =
+    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
+
+  const int page_size = os::vm_page_size();
+
+  Label after_frame_check;
+
+  // see if the frame is greater than one page in size. If so,
+  // then we need to verify there is enough stack space remaining
+  // for the additional locals.
+  //
+  // Note that we use SUBS rather than CMP here because the immediate
+  // field of this instruction may overflow.  SUBS can cope with this
+  // because it is a macro that will expand to some number of MOV
+  // instructions and a register operation.
+  __ subs(rscratch1, r3, (page_size - overhead_size) / Interpreter::stackElementSize);
+  __ br(Assembler::LS, after_frame_check);
+
+  // compute rsp as if this were going to be the last frame on
+  // the stack before the red zone
+
+  const Address stack_base(rthread, Thread::stack_base_offset());
+  const Address stack_size(rthread, Thread::stack_size_offset());
+
+  // locals + overhead, in bytes
+  __ mov(r0, overhead_size);
+  __ add(r0, r0, r3, Assembler::LSL, Interpreter::logStackElementSize);  // 2 slots per parameter.
+
+  __ ldr(rscratch1, stack_base);
+  __ ldr(rscratch2, stack_size);
+
+#ifdef ASSERT
+  Label stack_base_okay, stack_size_okay;
+  // verify that thread stack base is non-zero
+  __ cbnz(rscratch1, stack_base_okay);
+  __ stop("stack base is zero");
+  __ bind(stack_base_okay);
+  // verify that thread stack size is non-zero
+  __ cbnz(rscratch2, stack_size_okay);
+  __ stop("stack size is zero");
+  __ bind(stack_size_okay);
+#endif
+
+  // Add stack base to locals and subtract stack size
+  __ sub(rscratch1, rscratch1, rscratch2); // Stack limit
+  __ add(r0, r0, rscratch1);
+
+  // Use the maximum number of pages we might bang.
+  const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages :
+                                                                              (StackRedPages+StackYellowPages);
+
+  // add in the red and yellow zone sizes
+  __ add(r0, r0, max_pages * page_size * 2);
+
+  // check against the current stack bottom
+  __ cmp(sp, r0);
+  __ br(Assembler::HI, after_frame_check);
+
+  // Remove the incoming args, peeling the machine SP back to where it
+  // was in the caller.  This is not strictly necessary, but unless we
+  // do so the stack frame may have a garbage FP; this ensures a
+  // correct call stack that we can always unwind.  The ANDR should be
+  // unnecessary because the sender SP in r13 is always aligned, but
+  // it doesn't hurt.
+  __ andr(sp, r13, -16);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
+
+  // all done with frame size check
+  __ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+//
+// Args:
+//      rmethod: methodOop
+//      rlocals: locals
+//
+// Kills:
+//      r0
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterGenerator::lock_method(void) {
+  // synchronize method
+  const Address access_flags(rmethod, methodOopDesc::access_flags_offset());
+  const Address monitor_block_top(
+        rfp,
+        frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+  {
+    Label L;
+    __ ldrw(r0, access_flags);
+    __ tst(r0, JVM_ACC_SYNCHRONIZED);
+    __ br(Assembler::NE, L);
+    __ stop("method doesn't need synchronization");
+    __ bind(L);
+  }
+#endif // ASSERT
+
+  // get synchronization object
+  {
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    Label done;
+    __ ldrw(r0, access_flags);
+    __ tst(r0, JVM_ACC_STATIC);
+    // get receiver (assume this is frequent case)
+    __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
+    __ br(Assembler::EQ, done);
+    __ ldr(r0, Address(rmethod, methodOopDesc::const_offset()));
+    __ ldr(r0, Address(r0, constMethodOopDesc::constants_offset()));
+    __ ldr(r0, Address(r0, constantPoolOopDesc::pool_holder_offset_in_bytes()));
+    __ ldr(r0, Address(r0, mirror_offset));
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ cbnz(r0, L);
+      __ stop("synchronization object is NULL");
+      __ bind(L);
+    }
+#endif // ASSERT
+
+    __ bind(done);
+  }
+
+  // add space for monitor & lock
+  __ sub(sp, sp, entry_size); // add space for a monitor entry
+  __ sub(esp, esp, entry_size);
+  __ mov(rscratch1, esp);
+  __ str(rscratch1, monitor_block_top);  // set new monitor block top
+  // store object
+  __ str(r0, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
+  __ mov(c_rarg1, esp); // object address
+  __ lock_object(c_rarg1);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+//
+// Args:
+//      lr: return address
+//      rmethod: methodOop
+//      rlocals: pointer to locals
+//      rcpool: cp cache
+//      stack_pointer: previous sp
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+  // initialize fixed part of activation frame
+  if (native_call) {
+    __ sub(esp, sp, 12 *  wordSize);
+    __ mov(rbcp, zr);
+    __ stp(esp, zr, Address(__ pre(sp, -12 * wordSize)));
+    // add 2 zero-initialized slots for native calls
+    __ stp(zr, zr, Address(sp, 10 * wordSize));
+  } else {
+    __ sub(esp, sp, 10 *  wordSize);
+    __ ldr(rscratch1, Address(rmethod, methodOopDesc::const_offset()));      // get ConstMethod
+    __ add(rbcp, rscratch1, in_bytes(constMethodOopDesc::codes_offset())); // get codebase
+    __ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize)));
+  }
+
+  if (ProfileInterpreter) {
+    Label method_data_continue;
+    __ ldr(rscratch1, Address(rmethod, methodOopDesc::method_data_offset()));
+    __ cbz(rscratch1, method_data_continue);
+    __ lea(rscratch1, Address(rscratch1, in_bytes(methodDataOopDesc::data_offset())));
+    __ bind(method_data_continue);
+    __ stp(rscratch1, rmethod, Address(sp, 4 * wordSize));  // save methodOop and mdp (method data pointer)
+  } else {
+    __ stp(zr, rmethod, Address(sp, 4 * wordSize));        // save methodOop (no mdp)
+  }
+
+  __ ldr(rcpool, Address(rmethod, methodOopDesc::const_offset()));
+  __ ldr(rcpool, Address(rcpool, constMethodOopDesc::constants_offset()));
+  __ ldr(rcpool, Address(rcpool, constantPoolOopDesc::cache_offset_in_bytes()));
+  __ stp(rlocals, rcpool, Address(sp, 2 * wordSize));
+
+  __ stp(rfp, lr, Address(sp, 8 * wordSize));
+  __ lea(rfp, Address(sp, 8 * wordSize));
+
+  // set sender sp
+  // leave last_sp as null
+  __ stp(zr, r13, Address(sp, 6 * wordSize));
+
+  // Move SP out of the way
+  if (! native_call) {
+    __ ldrh(rscratch1, Address(rmethod, methodOopDesc::max_stack_offset()));
+    __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size()
+           + (EnableInvokeDynamic ? 2 : 0));
+    __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3);
+    __ andr(sp, rscratch1, -16);
+  }
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Call an accessor method (assuming it is resolved, otherwise drop
+// into vanilla (slow path) entry
+address InterpreterGenerator::generate_accessor_entry(void) {
+  return NULL;
+}
+
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+  //
+  // rmethod: Method*
+  // r13: senderSP must preserve for slow path, set SP to it on fast path
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+    const Register local_0 = c_rarg0;
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ ldr(local_0, Address(esp, 0));
+    __ cbz(local_0, slow_path);
+
+
+    // Load the value of the referent field.
+    const Address field_address(local_0, referent_offset);
+    __ load_heap_oop(local_0, field_address);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ enter(); // g1_write may call runtime
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            local_0 /* pre_val */,
+                            rthread /* thread */,
+                            rscratch2 /* tmp */,
+                            true /* tosca_live */,
+                            true /* expand_call */);
+    __ leave();
+    // areturn
+    __ andr(sp, r13, -16);  // done with stack
+    __ ret(lr);
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // INCLUDE_ALL_GCS
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rmethod: methodOop
+    // r13: senderSP must preserved for slow path
+    // esp: args
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
+    __ ldrw(rscratch1, Address(rscratch1, offset));
+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
+    __ cbnz(rscratch1, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = c_rarg0;  // crc
+    const Register val = c_rarg1;  // source java byte value
+    const Register tbl = c_rarg2;  // scratch
+
+    // Arguments are reversed on java expression stack
+    __ ldrw(val, Address(esp, 0));              // byte value
+    __ ldrw(crc, Address(esp, wordSize));       // Initial CRC
+
+    __ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset);
+    __ add(tbl, tbl, offset);
+
+    __ ornw(crc, zr, crc); // ~crc
+    __ update_byte_crc32(crc, val, tbl);
+    __ ornw(crc, zr, crc); // ~crc
+
+    // result in c_rarg0
+
+    __ andr(sp, r13, -16);
+    __ ret(lr);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rmethod,: methodOop
+    // r13: senderSP must preserved for slow path
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
+    __ ldrw(rscratch1, Address(rscratch1, offset));
+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
+    __ cbnz(rscratch1, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = c_rarg0;  // crc
+    const Register buf = c_rarg1;  // source java byte array address
+    const Register len = c_rarg2;  // length
+    const Register off = len;      // offset (never overlaps with 'len')
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ ldr(buf, Address(esp, 2*wordSize)); // long buf
+      __ ldrw(off, Address(esp, wordSize)); // offset
+      __ add(buf, buf, off); // + offset
+      __ ldrw(crc,   Address(esp, 4*wordSize)); // Initial CRC
+    } else {
+      __ ldr(buf, Address(esp, 2*wordSize)); // byte[] array
+      __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ ldrw(off, Address(esp, wordSize)); // offset
+      __ add(buf, buf, off); // + offset
+      __ ldrw(crc,   Address(esp, 3*wordSize)); // Initial CRC
+    }
+    // Can now load 'len' since we're finished with 'off'
+    __ ldrw(len, Address(esp, 0x0)); // Length
+
+    __ andr(sp, r13, -16); // Restore the caller's SP
+
+    // We are frameless so we can just jump to the stub.
+    __ b(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()));
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+// !!! FIXME AARCH64 - this is not in jdk8 and I think we need it for jdk7
+void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+  // Bang each page in the shadow zone. We can't assume it's been done for
+  // an interpreter frame with greater than a page of locals, so each page
+  // needs to be checked.  Only true for non-native.
+  if (UseStackBanging) {
+    const int start_page = native_call ? StackShadowPages : 1;
+    const int page_size = os::vm_page_size();
+    for (int pages = start_page; pages <= StackShadowPages ; pages++) {
+      __ sub(rscratch2, sp, pages*page_size);
+      __ str(zr, Address(rscratch2));
+    }
+  }
+}
+
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+
+  // rmethod: methodOop
+  // rscratch1: sender sp
+
+  address entry_point = __ pc();
+
+  const Address size_of_parameters(rmethod, methodOopDesc::
+                                            size_of_parameters_offset());
+
+  const Address invocation_counter(rmethod, methodOopDesc::
+                                            invocation_counter_offset() +
+                                            InvocationCounter::counter_offset());
+  const Address access_flags      (rmethod, methodOopDesc::access_flags_offset());
+  // get parameter size (always needed)
+  __ load_unsigned_short(r2, size_of_parameters);
+
+  // native calls don't need the stack size check since they have no
+  // expression stack and the arguments are already on the stack and
+  // we only add a handful of words to the stack
+
+  // rmethod: methodOop
+  // r2: size of parameters
+  // rscratch1: sender sp
+
+  // for natives the size of locals is zero
+
+  // compute beginning of parameters (rlocals)
+  __ add(rlocals, esp, r2, ext::uxtx, 3);
+  __ add(rlocals, rlocals, -wordSize);
+
+  // Pull SP back to minimum size: this avoids holes in the stack
+  __ andr(sp, esp, -16);
+
+  if (inc_counter) {
+    __ ldr(r2, invocation_counter);  // (pre-)fetch invocation count
+  }
+
+  // initialize fixed part of activation frame
+  generate_fixed_frame(true);
+#ifndef PRODUCT
+  // tell the simulator that a method has been entered
+  if (NotifySimulator) {
+    __ notify(Assembler::method_entry);
+  }
+#endif
+
+  // make sure method is native & not abstract
+#ifdef ASSERT
+  __ ldrw(r0, access_flags);
+  {
+    Label L;
+    __ tst(r0, JVM_ACC_NATIVE);
+    __ br(Assembler::NE, L);
+    __ stop("tried to execute non-native method as native");
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ tst(r0, JVM_ACC_ABSTRACT);
+    __ br(Assembler::EQ, L);
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception
+  // handler would try to exit the monitor of synchronized methods
+  // which hasn't been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation
+  // will check this flag.
+
+   const Address do_not_unlock_if_synchronized(rthread,
+        in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  __ mov(rscratch2, true);
+  __ strb(rscratch2, do_not_unlock_if_synchronized);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+  bang_stack_shadow_pages(true);
+
+  // reset the _do_not_unlock_if_synchronized flag
+  __ strb(zr, do_not_unlock_if_synchronized);
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    {
+      Label L;
+      __ ldrw(r0, access_flags);
+      __ tst(r0, JVM_ACC_SYNCHRONIZED);
+      __ br(Assembler::EQ, L);
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+    const Address monitor_block_top(rfp,
+                 frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ ldr(rscratch1, monitor_block_top);
+    __ cmp(esp, rscratch1);
+    __ br(Assembler::EQ, L);
+    __ stop("broken stack frame setup in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti support
+  __ notify_method_entry();
+
+  // work registers
+  const Register t = r17;
+  const Register result_handler = r19;
+
+  // allocate space for parameters
+  __ load_unsigned_short(t, Address(rmethod, methodOopDesc::size_of_parameters_offset()));
+
+  __ sub(rscratch1, esp, t, ext::uxtx, Interpreter::logStackElementSize);
+  __ andr(sp, rscratch1, -16);
+  __ mov(esp, rscratch1);
+
+  // get signature handler
+  {
+    Label L;
+    __ ldr(t, Address(rmethod, methodOopDesc::signature_handler_offset()));
+    __ cbnz(t, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::prepare_native_call),
+               rmethod);
+    __ ldr(t, Address(rmethod, methodOopDesc::signature_handler_offset()));
+    __ bind(L);
+  }
+
+  // call signature handler
+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals,
+         "adjust this code");
+  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
+         "adjust this code");
+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1,
+          "adjust this code");
+
+  // The generated handlers do not touch rmethod (the method).
+  // However, large signatures cannot be cached and are generated
+  // each time here.  The slow-path generator can do a GC on return,
+  // so we must reload it after the call.
+  __ blr(t);
+  __ get_method(rmethod);        // slow path can do a GC, reload rmethod
+
+
+  // result handler is in r0
+  // set result handler
+  __ mov(result_handler, r0);
+  // pass mirror handle if static call
+  {
+    Label L;
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ldrw(t, Address(rmethod, methodOopDesc::access_flags_offset()));
+    __ tst(t, JVM_ACC_STATIC);
+    __ br(Assembler::EQ, L);
+    // get mirror
+    __ ldr(t, Address(rmethod, methodOopDesc::const_offset()));
+    __ ldr(t, Address(t, constMethodOopDesc::constants_offset()));
+    __ ldr(t, Address(t, constantPoolOopDesc::pool_holder_offset_in_bytes()));
+    __ ldr(t, Address(t, mirror_offset));
+    // copy mirror into activation frame
+    __ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize));
+    // pass handle to mirror
+    __ add(c_rarg1, rfp, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ bind(L);
+  }
+
+  // get native function entry point in r10
+  {
+    Label L;
+    __ ldr(r10, Address(rmethod, methodOopDesc::native_function_offset()));
+    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+    __ mov(rscratch2, unsatisfied);
+    __ ldr(rscratch2, rscratch2);
+    __ cmp(r10, rscratch2);
+    __ br(Assembler::NE, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::prepare_native_call),
+               rmethod);
+    __ get_method(rmethod);
+    __ verify_oop(rmethod);
+    __ ldr(r10, Address(rmethod, methodOopDesc::native_function_offset()));
+    __ bind(L);
+  }
+
+  // pass JNIEnv
+  __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset()));
+
+  // It is enough that the pc() points into the right code
+  // segment. It does not have to be the correct return pc.
+  __ set_last_Java_frame(esp, rfp, (address)NULL, rscratch1);
+
+  // change thread state
+#ifdef ASSERT
+  {
+    Label L;
+    __ ldrw(t, Address(rthread, JavaThread::thread_state_offset()));
+    __ cmp(t, _thread_in_Java);
+    __ br(Assembler::EQ, L);
+    __ stop("Wrong thread state in native stub");
+    __ bind(L);
+  }
+#endif
+
+  // Change state to native
+  __ mov(rscratch1, _thread_in_native);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+
+  // load call format
+  __ ldrw(rscratch1, Address(rmethod, methodOopDesc::call_format_offset()));
+
+  // Call the native method.
+  __ blrt(r10, rscratch1);
+  __ maybe_isb();
+  __ get_method(rmethod);
+  // result potentially in r0 or v0
+
+  // make room for the pushes we're about to do
+  __ sub(rscratch1, esp, 4 * wordSize);
+  __ andr(sp, rscratch1, -16);
+
+  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
+  // in order to extract the result of a method call. If the order of these
+  // pushes change or anything else is added to the stack then the code in
+  // interpreter_frame_result must also change.
+  __ push(dtos);
+  __ push(ltos);
+
+  // change thread state
+  __ mov(rscratch1, _thread_in_native_trans);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+
+  if (os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ dsb(Assembler::SY);
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(rthread, rscratch2);
+    }
+  }
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    Label Continue;
+    {
+      unsigned long offset;
+      __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
+      __ ldrw(rscratch2, Address(rscratch2, offset));
+    }
+    assert(SafepointSynchronize::_not_synchronized == 0,
+           "SafepointSynchronize::_not_synchronized");
+    Label L;
+    __ cbnz(rscratch2, L);
+    __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
+    __ cbz(rscratch2, Continue);
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception
+    // and forward it and never return here preventing us from
+    // clearing _last_native_pc down below.  Also can't use
+    // call_VM_leaf either as it will check to see if r13 & r14 are
+    // preserved and correspond to the bcp/locals pointers. So we do a
+    // runtime call by hand.
+    //
+    __ mov(c_rarg0, rthread);
+    __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+    __ blrt(rscratch2, 1, 0, 0);
+    __ maybe_isb();
+    __ get_method(rmethod);
+    __ reinit_heapbase();
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ mov(rscratch1, _thread_in_Java);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+
+  // reset_last_Java_frame
+  __ reset_last_Java_frame(true, true);
+
+  // reset handle block
+  __ ldr(t, Address(rthread, JavaThread::active_handles_offset()));
+  __ str(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
+
+  // If result is an oop unbox and store it in frame where gc will see it
+  // and result handler will pick it up
+
+  {
+    Label no_oop, store_result;
+    __ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
+    __ cmp(t, result_handler);
+    __ br(Assembler::NE, no_oop);
+    // retrieve result
+    __ pop(ltos);
+    __ cbz(r0, store_result);
+    __ ldr(r0, Address(r0, 0));
+    __ bind(store_result);
+    __ str(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
+    // keep stack depth as expected by pushing oop which will eventually be discarded
+    __ push(ltos);
+    __ bind(no_oop);
+  }
+
+  {
+    Label no_reguard;
+    __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset())));
+    __ ldrb(rscratch1, Address(rscratch1));
+    __ cmp(rscratch1, JavaThread::stack_guard_yellow_disabled);
+    __ br(Assembler::NE, no_reguard);
+
+    __ pusha(); // XXX only save smashed registers
+    __ mov(c_rarg0, rthread);
+    __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+    __ blrt(rscratch2, 0, 0, 0);
+    __ popa(); // XXX only restore smashed registers
+    __ bind(no_reguard);
+  }
+
+  // The method register is junk from after the thread_in_native transition
+  // until here.  Also can't call_VM until the bcp has been
+  // restored.  Need bcp for throwing exception below so get it now.
+  __ get_method(rmethod);
+  __ verify_oop(rmethod);
+
+  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
+  // rbcp == code_base()
+  __ ldr(rbcp, Address(rmethod, methodOopDesc::const_offset()));     // get constMethodOop
+  __ add(rbcp, rbcp, in_bytes(constMethodOopDesc::codes_offset()));  // get codebase
+  // handle exceptions (exception handling will handle unlocking!)
+  {
+    Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, L);
+    // Note: At some point we may want to unify this with the code
+    // used in call_VM_base(); i.e., we should use the
+    // StubRoutines::forward_exception code. For now this doesn't work
+    // here because the rsp is not correctly set at this point.
+    __ MacroAssembler::call_VM(noreg,
+                               CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  // do unlocking if necessary
+  {
+    Label L;
+    __ ldrw(t, Address(rmethod, methodOopDesc::access_flags_offset()));
+    __ tst(t, JVM_ACC_SYNCHRONIZED);
+    __ br(Assembler::EQ, L);
+    // the code below should be shared with interpreter macro
+    // assembler implementation
+    {
+      Label unlock;
+      // BasicObjectLock will be first in list, since this is a
+      // synchronized method. However, need to check that the object
+      // has not been unlocked by an explicit monitorexit bytecode.
+
+      // monitor expect in c_rarg1 for slow unlock path
+      __ lea (c_rarg1, Address(rfp,   // address of first monitor
+                               (intptr_t)(frame::interpreter_frame_initial_sp_offset *
+                                          wordSize - sizeof(BasicObjectLock))));
+
+      __ ldr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+      __ cbnz(t, unlock);
+
+      // Entry already unlocked, need to throw exception
+      __ MacroAssembler::call_VM(noreg,
+                                 CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+      __ should_not_reach_here();
+
+      __ bind(unlock);
+      __ unlock_object(c_rarg1);
+    }
+    __ bind(L);
+  }
+
+  // jvmti support
+  // Note: This must happen _after_ handling/throwing any exceptions since
+  //       the exception handler code notifies the runtime of method exits
+  //       too. If this happens before, method entry/exit notifications are
+  //       not properly paired (was bug - gri 11/22/99).
+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
+
+  // restore potential result in r0:d0, call result handler to
+  // restore potential result in ST0 & handle result
+
+  __ pop(ltos);
+  __ pop(dtos);
+
+  __ blr(result_handler);
+
+  // remove activation
+  __ ldr(esp, Address(rfp,
+                    frame::interpreter_frame_sender_sp_offset *
+                    wordSize)); // get sender sp
+  // remove frame anchor
+  __ leave();
+
+  // resture sender sp
+  __ mov(sp, esp);
+
+  __ ret(lr);
+
+  if (inc_counter) {
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+
+  // rscratch1: sender sp
+  address entry_point = __ pc();
+
+  const Address size_of_parameters(rmethod,
+                                   methodOopDesc::size_of_parameters_offset());
+  const Address size_of_locals(rmethod, methodOopDesc::size_of_locals_offset());
+  const Address invocation_counter(rmethod,
+                                   methodOopDesc::invocation_counter_offset() +
+                                   InvocationCounter::counter_offset());
+  const Address access_flags(rmethod, methodOopDesc::access_flags_offset());
+
+  // get parameter size (always needed)
+  __ load_unsigned_short(r2, size_of_parameters);
+
+  // r2: size of parameters
+
+  __ load_unsigned_short(r3, size_of_locals); // get size of locals in words
+  __ sub(r3, r3, r2); // r3 = no. of additional locals
+
+  // see if we've got enough room on the stack for locals plus overhead.
+  generate_stack_overflow_check();
+
+  // compute beginning of parameters (rlocals)
+  __ add(rlocals, esp, r2, ext::uxtx, 3);
+  __ sub(rlocals, rlocals, wordSize);
+
+  // Make room for locals
+  __ sub(rscratch1, esp, r3, ext::uxtx, 3);
+  __ andr(sp, rscratch1, -16);
+
+  // r3 - # of additional locals
+  // allocate space for locals
+  // explicitly initialize locals
+  {
+    Label exit, loop;
+    __ ands(zr, r3, r3);
+    __ br(Assembler::LE, exit); // do nothing if r3 <= 0
+    __ bind(loop);
+    __ str(zr, Address(__ post(rscratch1, wordSize)));
+    __ sub(r3, r3, 1); // until everything initialized
+    __ cbnz(r3, loop);
+    __ bind(exit);
+  }
+
+  // And the base dispatch table
+  __ get_dispatch();
+
+  // (pre-)fetch invocation count
+  if (inc_counter) {
+    __ ldrw(r2, invocation_counter);
+  }
+  // initialize fixed part of activation frame
+  generate_fixed_frame(false);
+#ifndef PRODUCT
+  // tell the simulator that a method has been entered
+  if (NotifySimulator) {
+    __ notify(Assembler::method_entry);
+  }
+#endif
+  // make sure method is not native & not abstract
+#ifdef ASSERT
+  __ ldrw(r0, access_flags);
+  {
+    Label L;
+    __ tst(r0, JVM_ACC_NATIVE);
+    __ br(Assembler::EQ, L);
+    __ stop("tried to execute native method as non-native");
+    __ bind(L);
+  }
+ {
+    Label L;
+    __ tst(r0, JVM_ACC_ABSTRACT);
+    __ br(Assembler::EQ, L);
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception
+  // handler would try to exit the monitor of synchronized methods
+  // which hasn't been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation
+  // will check this flag.
+
+   const Address do_not_unlock_if_synchronized(rthread,
+        in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  __ mov(rscratch2, true);
+  __ strb(rscratch2, do_not_unlock_if_synchronized);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  Label profile_method;
+  Label profile_method_continue;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow,
+                          &profile_method,
+                          &profile_method_continue);
+    if (ProfileInterpreter) {
+      __ bind(profile_method_continue);
+    }
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+  bang_stack_shadow_pages(false);
+
+  // reset the _do_not_unlock_if_synchronized flag
+  __ strb(zr, do_not_unlock_if_synchronized);
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    // Allocate monitor and lock method
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    {
+      Label L;
+      __ ldrw(r0, access_flags);
+      __ tst(r0, JVM_ACC_SYNCHRONIZED);
+      __ br(Assembler::EQ, L);
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+     const Address monitor_block_top (rfp,
+                 frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ ldr(rscratch1, monitor_block_top);
+    __ cmp(esp, rscratch1);
+    __ br(Assembler::EQ, L);
+    __ stop("broken stack frame setup in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti support
+  __ notify_method_entry();
+
+  __ dispatch_next(vtos);
+
+  // invocation counter overflow
+  if (inc_counter) {
+    if (ProfileInterpreter) {
+      // We have decided to profile this method in the interpreter
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ set_method_data_pointer_for_bcp();
+      // don't think we need this
+      __ get_method(r1);
+      __ b(profile_method_continue);
+    }
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+// Entry points
+//
+// Here we generate the various kind of entries into the interpreter.
+// The two main entry type are generic bytecode methods and native
+// call method.  These both come in synchronized and non-synchronized
+// versions but the frame layout they create is very similar. The
+// other method entry types are really just special purpose entries
+// that are really entry and interpretation all in one. These are for
+// trivial methods like accessor, empty, or special math methods.
+//
+// When control flow reaches any of the entry types for the interpreter
+// the following holds ->
+//
+// Arguments:
+//
+// rmethod: methodOop
+//
+// Stack layout immediately at entry
+//
+// [ return address     ] <--- rsp
+// [ parameter n        ]
+//   ...
+// [ parameter 1        ]
+// [ expression stack   ] (caller's java expression stack)
+
+// Assuming that we don't go to one of the trivial specialized entries
+// the stack will look like below when we are ready to execute the
+// first bytecode (or call the native routine). The register usage
+// will be as the template based interpreter expects (see
+// interpreter_aarch64.hpp).
+//
+// local variables follow incoming parameters immediately; i.e.
+// the return address is moved to the end of the locals).
+//
+// [ monitor entry      ] <--- esp
+//   ...
+// [ monitor entry      ]
+// [ expr. stack bottom ]
+// [ saved rbcp         ]
+// [ current rlocals    ]
+// [ Method*            ]
+// [ saved rfp          ] <--- rfp
+// [ return address     ]
+// [ local variable m   ]
+//   ...
+// [ local variable 1   ]
+// [ parameter n        ]
+//   ...
+// [ parameter 1        ] <--- rlocals
+
+address AbstractInterpreterGenerator::generate_method_entry(
+                                        AbstractInterpreter::MethodKind kind) {
+  // determine code generation flags
+  bool synchronized = false;
+  address entry_point = NULL;
+
+  switch (kind) {
+  case Interpreter::zerolocals             :                                                                             break;
+  case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
+  case Interpreter::native                 : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break;
+  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true);  break;
+  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry();       break;
+  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry();    break;
+  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();    break;
+
+  case Interpreter::java_lang_math_sin     : // fall thru
+  case Interpreter::java_lang_math_cos     : // fall thru
+  case Interpreter::java_lang_math_tan     : // fall thru
+  case Interpreter::java_lang_math_abs     : // fall thru
+  case Interpreter::java_lang_math_log     : // fall thru
+  case Interpreter::java_lang_math_log10   : // fall thru
+  case Interpreter::java_lang_math_sqrt    : // fall thru
+  case Interpreter::java_lang_math_pow     : // fall thru
+  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
+  case Interpreter::java_lang_ref_reference_get
+                                           : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+  case Interpreter::java_util_zip_CRC32_update
+                                           : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry();  break;
+  case Interpreter::java_util_zip_CRC32_updateBytes
+                                           : // fall thru
+  case Interpreter::java_util_zip_CRC32_updateByteBuffer
+                                           : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break;
+  default                                  : ShouldNotReachHere();                                                       break;
+  }
+
+  if (entry_point) {
+    return entry_point;
+  }
+
+  return ((InterpreterGenerator*) this)->
+                                generate_normal_entry(synchronized);
+}
+
+
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     :
+      return false;
+    default:
+      return true;
+  }
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
+  const int entry_size = frame::interpreter_frame_monitor_size();
+
+  // total overhead size: entry_size + (saved rfp thru expr stack
+  // bottom).  be sure to change this if you add/subtract anything
+  // to/from the overhead area
+  const int overhead_size =
+    -(frame::interpreter_frame_initial_sp_offset) + entry_size;
+
+  const int stub_code = frame::entry_frame_after_call_words;
+  const int extra_stack = methodOopDesc::extra_stack_entries();
+  const int method_stack = (method->max_locals() + method->max_stack() + extra_stack) *
+                           Interpreter::stackElementWords;
+  return (overhead_size + method_stack + stub_code);
+}
+
+int AbstractInterpreter::layout_activation(methodOop method,
+                                           int tempcount,
+                                           int popframe_extra_args,
+                                           int moncount,
+                                           int caller_actual_parameters,
+                                           int callee_param_count,
+                                           int callee_locals,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame,
+                                           bool is_bottom_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+  // The frame interpreter_frame, if not NULL, is guaranteed to be the
+  // right size, as determined by a previous call to this method.
+  // It is also guaranteed to be walkable even though it is in a skeletal state
+
+  // fixed size of an interpreter frame:
+  int max_locals = method->max_locals() * Interpreter::stackElementWords;
+  int extra_locals = (method->max_locals() - method->size_of_parameters()) *
+                     Interpreter::stackElementWords;
+
+  int overhead = frame::sender_sp_offset -
+                 frame::interpreter_frame_initial_sp_offset;
+  // Our locals were accounted for by the caller (or last_frame_adjust
+  // on the transistion) Since the callee parameters already account
+  // for the callee's params we only need to account for the extra
+  // locals.
+  int size = overhead +
+         (callee_locals - callee_param_count)*Interpreter::stackElementWords +
+         moncount * frame::interpreter_frame_monitor_size() +
+         tempcount* Interpreter::stackElementWords + popframe_extra_args;
+
+  // On AArch64 we always keep the stack pointer 16-aligned, so we
+  // must round up here.
+  size = round_to(size, 2);
+
+  if (interpreter_frame != NULL) {
+#ifdef ASSERT
+    if (!EnableInvokeDynamic)
+      // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+      // Probably, since deoptimization doesn't work yet.
+      assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+    assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+#endif
+
+    interpreter_frame->interpreter_frame_set_method(method);
+    // NOTE the difference in using sender_sp and
+    // interpreter_frame_sender_sp interpreter_frame_sender_sp is
+    // the original sp of the caller (the unextended_sp) and
+    // sender_sp is fp+16 XXX
+    intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+
+#ifdef ASSERT
+    if (caller->is_interpreted_frame()) {
+      assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+    }
+#endif
+
+    interpreter_frame->interpreter_frame_set_locals(locals);
+    BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+    BasicObjectLock* monbot = montop - moncount;
+    interpreter_frame->interpreter_frame_set_monitor_end(monbot);
+
+    // Set last_sp
+    intptr_t*  esp = (intptr_t*) monbot -
+                     tempcount*Interpreter::stackElementWords -
+                     popframe_extra_args;
+    interpreter_frame->interpreter_frame_set_last_sp(esp);
+
+    // All frames but the initial (oldest) interpreter frame we fill in have
+    // a value for sender_sp that allows walking the stack but isn't
+    // truly correct. Correct the value here.
+    if (extra_locals != 0 &&
+        interpreter_frame->sender_sp() ==
+        interpreter_frame->interpreter_frame_sender_sp()) {
+      interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
+                                                         extra_locals);
+    }
+    *interpreter_frame->interpreter_frame_cache_addr() =
+      method->constants()->cache();
+
+    // interpreter_frame->obj_at_put(frame::sender_sp_offset,
+    //                            (oop)interpreter_frame->addr_at(frame::sender_sp_offset));
+  }
+  return size;
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+  // Entry point in previous activation (i.e., if the caller was
+  // interpreted)
+  Interpreter::_rethrow_exception_entry = __ pc();
+  // Restore sp to interpreter_frame_last_sp even though we are going
+  // to empty the expression stack for the exception processing.
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // r0: exception
+  // r3: return address/pc that threw exception
+  __ restore_bcp();    // rbcp points to call/send
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ reinit_heapbase();  // restore rheapbase as heapbase.
+  __ get_dispatch();
+
+#ifndef PRODUCT
+  // tell the simulator that the caller method has been reentered
+  if (NotifySimulator) {
+    __ get_method(rmethod);
+    __ notify(Assembler::method_reentry);
+  }
+#endif
+  // Entry point for exceptions thrown within interpreter code
+  Interpreter::_throw_exception_entry = __ pc();
+  // If we came here via a NullPointerException on the receiver of a
+  // method, rmethod may be corrupt.
+  __ get_method(rmethod);
+  // expression stack is undefined here
+  // r0: exception
+  // rbcp: exception bcp
+  __ verify_oop(r0);
+  __ mov(c_rarg1, r0);
+
+  // expression stack must be empty before entering the VM in case of
+  // an exception
+  __ empty_expression_stack();
+  // find exception handler address and preserve exception oop
+  __ call_VM(r3,
+             CAST_FROM_FN_PTR(address,
+                          InterpreterRuntime::exception_handler_for_exception),
+             c_rarg1);
+
+  // Calculate stack limit
+  __ ldrh(rscratch1, Address(rmethod, methodOopDesc::max_stack_offset()));
+  __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size()
+         + (EnableInvokeDynamic ? 2 : 0) + 2);
+  __ ldr(rscratch2,
+         Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3);
+  __ andr(sp, rscratch1, -16);
+
+  // r0: exception handler entry point
+  // r3: preserved exception oop
+  // rbcp: bcp for exception handler
+  __ push_ptr(r3); // push exception which is now the only value on the stack
+  __ br(r0); // jump to exception handler (may be _remove_activation_entry!)
+
+  // If the exception is not handled in the current frame the frame is
+  // removed and the exception is rethrown (i.e. exception
+  // continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bxi for the instruction
+  // which caused the exception and the expression stack is
+  // empty. Thus, for any VM calls at this point, GC will find a legal
+  // oop map (with empty expression stack).
+
+  //
+  // JVMTI PopFrame support
+  //
+
+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition
+  // indicating that we are currently handling popframe, so that
+  // call_VMs that may happen later do not trigger new popframe
+  // handling cycles.
+  __ ldrw(r3, Address(rthread, JavaThread::popframe_condition_offset()));
+  __ orr(r3, r3, JavaThread::popframe_processing_bit);
+  __ strw(r3, Address(rthread, JavaThread::popframe_condition_offset()));
+
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    Label caller_not_deoptimized;
+    __ ldr(c_rarg1, Address(rfp, frame::return_addr_offset * wordSize));
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::interpreter_contains), c_rarg1);
+    __ cbnz(r0, caller_not_deoptimized);
+
+    // Compute size of arguments for saving when returning to
+    // deoptimized caller
+    __ get_method(r0);
+    __ load_unsigned_short(r0, Address(r0, in_bytes(methodOopDesc::
+                                              size_of_parameters_offset())));
+    __ lsl(r0, r0, Interpreter::logStackElementSize);
+    __ restore_locals(); // XXX do we need this?
+    __ sub(rlocals, rlocals, r0);
+    __ add(rlocals, rlocals, wordSize);
+    // Save these arguments
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                           Deoptimization::
+                                           popframe_preserve_args),
+                          rthread, r0, rlocals);
+
+    __ remove_activation(vtos,
+                         /* throw_monitor_exception */ false,
+                         /* install_monitor_exception */ false,
+                         /* notify_jvmdi */ false);
+
+    // Inform deoptimization that it is responsible for restoring
+    // these arguments
+    __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit);
+    __ strw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
+
+    // Continue in deoptimization handler
+    __ ret(lr);
+
+    __ bind(caller_not_deoptimized);
+  }
+
+  __ remove_activation(vtos,
+                       /* throw_monitor_exception */ false,
+                       /* install_monitor_exception */ false,
+                       /* notify_jvmdi */ false);
+
+  // Restore the last_sp and null it out
+  __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(rmethod);
+  __ get_dispatch();
+
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+
+  // Clear the popframe condition flag
+  __ strw(zr, Address(rthread, JavaThread::popframe_condition_offset()));
+  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
+
+  if (EnableInvokeDynamic) {
+    Label L_done;
+
+    __ ldrb(rscratch1, Address(rbcp, 0));
+    __ cmpw(rscratch1, Bytecodes::_invokestatic);
+    __ br(Assembler::NE, L_done);
+
+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
+
+    __ ldr(c_rarg0, Address(rlocals, 0));
+    __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp);
+
+    __ cbz(r0, L_done);
+
+    __ str(r0, Address(esp, 0));
+    __ bind(L_done);
+  }
+
+  // Restore machine SP
+  __ ldrh(rscratch1, Address(rmethod, methodOopDesc::max_stack_offset()));
+  __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size()
+         + (EnableInvokeDynamic ? 2 : 0));
+  __ ldr(rscratch2,
+         Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3);
+  __ andr(sp, rscratch1, -16);
+
+  __ dispatch_next(vtos);
+  // end of PopFrame support
+
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // preserve exception over this code sequence
+  __ pop_ptr(r0);
+  __ str(r0, Address(rthread, JavaThread::vm_result_offset()));
+  // remove the activation (without doing throws on illegalMonitorExceptions)
+  __ remove_activation(vtos, false, true, false);
+  // restore exception
+  __ get_vm_result(r0, rthread);
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects the
+  // following registers set up:
+  //
+  // r0: exception
+  // lr: return address/pc that threw exception
+  // esp: expression stack of caller
+  // rfp: fp of caller
+  __ stp(r0, lr, Address(__ pre(sp, -2 * wordSize)));  // save exception & return address
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                          SharedRuntime::exception_handler_for_return_address),
+                        rthread, lr);
+  __ mov(r1, r0);                               // save exception handler
+  __ ldp(r0, lr, Address(__ post(sp, 2 * wordSize)));  // restore exception & return address
+  // We might be returning to a deopt handler that expects r3 to
+  // contain the exception pc
+  __ mov(r3, lr);
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ br(r1);                                    // jump to exception
+                                                // handler of caller
+}
+
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+  address entry = __ pc();
+
+  __ restore_bcp();
+  __ restore_locals();
+  __ empty_expression_stack();
+  __ load_earlyret_value(state);
+
+  __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+  Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset());
+
+  // Clear the earlyret state
+  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
+  __ str(zr, cond_addr);
+
+  __ remove_activation(state,
+                       false, /* throw_monitor_exception */
+                       false, /* install_monitor_exception */
+                       true); /* notify_jvmdi */
+  __ ret(lr);
+
+  return entry;
+} // end of ForceEarlyReturn support
+
+
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  aep = __ pc();  __ push_ptr();  __ b(L);
+  fep = __ pc();  __ push_f();    __ b(L);
+  dep = __ pc();  __ push_d();    __ b(L);
+  lep = __ pc();  __ push_l();    __ b(L);
+  bep = cep = sep =
+  iep = __ pc();  __ push_i();
+  vep = __ pc();
+  __ bind(L);
+  generate_and_dispatch(t);
+}
+
+//-----------------------------------------------------------------------------
+// Generation of individual instructions
+
+// helpers for generate_and_dispatch
+
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+  : TemplateInterpreterGenerator(code) {
+   generate_all(); // down here so it can be "virtual"
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+
+  __ push(lr);
+  __ push(state);
+  __ push(RegSet::range(r0, r15), sp);
+  __ mov(c_rarg2, r0);  // Pass itos
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode),
+             c_rarg1, c_rarg2, c_rarg3);
+  __ pop(RegSet::range(r0, r15), sp);
+  __ pop(state);
+  __ pop(lr);
+  __ ret(lr);                                   // return from result handler
+
+  return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+  Register rscratch3 = r0;
+  __ push(rscratch1);
+  __ push(rscratch2);
+  __ push(rscratch3);
+  Label L;
+  __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
+  __ bind(L);
+  __ ldxr(rscratch1, rscratch2);
+  __ add(rscratch1, rscratch1, 1);
+  __ stxr(rscratch3, rscratch1, rscratch2);
+  __ cbnzw(rscratch3, L);
+  __ pop(rscratch3);
+  __ pop(rscratch2);
+  __ pop(rscratch1);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
+
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+
+  assert(Interpreter::trace_code(t->tos_in()) != NULL,
+         "entry must have been generated");
+  __ bl(Interpreter::trace_code(t->tos_in()));
+  __ reinit_heapbase();
+}
+
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  Label L;
+  __ push(rscratch1);
+  __ mov(rscratch1, (address) &BytecodeCounter::_counter_value);
+  __ ldr(rscratch1, Address(rscratch1));
+  __ mov(rscratch2, StopInterpreterAt);
+  __ cmpw(rscratch1, rscratch2);
+  __ br(Assembler::NE, L);
+  __ brk(0);
+  __ bind(L);
+  __ pop(rscratch1);
+}
+
+#ifdef BUILTIN_SIM
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+extern "C" {
+  static int PAGESIZE = getpagesize();
+  int is_mapped_address(u_int64_t address)
+  {
+    address = (address & ~((u_int64_t)PAGESIZE - 1));
+    if (msync((void *)address, PAGESIZE, MS_ASYNC) == 0) {
+      return true;
+    }
+    if (errno != ENOMEM) {
+      return true;
+    }
+    return false;
+  }
+
+  void bccheck1(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode)
+  {
+    if (method != 0) {
+      method[0] = '\0';
+    }
+    if (bcidx != 0) {
+      *bcidx = -2;
+    }
+    if (decode != 0) {
+      decode[0] = 0;
+    }
+
+    if (framesize != 0) {
+      *framesize = -1;
+    }
+
+    if (Interpreter::contains((address)pc)) {
+      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+      methodOop meth;
+      address bcp;
+      if (fp) {
+#define FRAME_SLOT_METHOD 3
+#define FRAME_SLOT_BCP 7
+        meth = (methodOop)sim->getMemory()->loadU64(fp - (FRAME_SLOT_METHOD << 3));
+        bcp = (address)sim->getMemory()->loadU64(fp - (FRAME_SLOT_BCP << 3));
+#undef FRAME_SLOT_METHOD
+#undef FRAME_SLOT_BCP
+      } else {
+        meth = (methodOop)sim->getCPUState().xreg(RMETHOD, 0);
+        bcp = (address)sim->getCPUState().xreg(RBCP, 0);
+      }
+      if (meth->is_native()) {
+        return;
+      }
+      if(method && meth->is_method()) {
+        ResourceMark rm;
+        method[0] = 'I';
+        method[1] = ' ';
+        meth->name_and_sig_as_C_string(method + 2, 398);
+      }
+      if (bcidx) {
+        if (meth->contains(bcp)) {
+          *bcidx = meth->bci_from(bcp);
+        } else {
+          *bcidx = -2;
+        }
+      }
+      if (decode) {
+        if (!BytecodeTracer::closure()) {
+          BytecodeTracer::set_closure(BytecodeTracer::std_closure());
+        }
+        stringStream str(decode, 400);
+        BytecodeTracer::trace(meth, bcp, &str);
+      }
+    } else {
+      if (method) {
+        CodeBlob *cb = CodeCache::find_blob((address)pc);
+        if (cb != NULL) {
+          if (cb->is_nmethod()) {
+            ResourceMark rm;
+            nmethod* nm = (nmethod*)cb;
+            method[0] = 'C';
+            method[1] = ' ';
+            nm->method()->name_and_sig_as_C_string(method + 2, 398);
+          } else if (cb->is_adapter_blob()) {
+            strcpy(method, "B adapter blob");
+          } else if (cb->is_runtime_stub()) {
+            strcpy(method, "B runtime stub");
+          } else if (cb->is_exception_stub()) {
+            strcpy(method, "B exception stub");
+          } else if (cb->is_deoptimization_stub()) {
+            strcpy(method, "B deoptimization stub");
+          } else if (cb->is_safepoint_stub()) {
+            strcpy(method, "B safepoint stub");
+          } else if (cb->is_uncommon_trap_stub()) {
+            strcpy(method, "B uncommon trap stub");
+          } else if (cb->contains((address)StubRoutines::call_stub())) {
+            strcpy(method, "B call stub");
+          } else {
+            strcpy(method, "B unknown blob : ");
+            strcat(method, cb->name());
+          }
+          if (framesize != NULL) {
+            *framesize = cb->frame_size();
+          }
+        }
+      }
+    }
+  }
+
+
+  JNIEXPORT void bccheck(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode)
+  {
+    bccheck1(pc, fp, method, bcidx, framesize, decode);
+  }
+}
+
+#endif // BUILTIN_SIM
+#endif // !PRODUCT
+#endif // ! CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP
+#define CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP
+
+
+  protected:
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI
+  const static int InterpreterCodeSize = 200 * 1024;
+
+#endif // CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,3909 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+
+#ifndef CC_INTERP
+
+#define __ _masm->
+
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+  // No amd64 specific initialization
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+  return Address(rlocals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address iaddress(Register r) {
+  return Address(rlocals, r, Address::lsl(3));
+}
+
+static inline Address laddress(Register r, Register scratch,
+                               InterpreterMacroAssembler* _masm) {
+  __ lea(scratch, Address(rlocals, r, Address::lsl(3)));
+  return Address(scratch, Interpreter::local_offset_in_bytes(1));
+}
+
+static inline Address faddress(Register r) {
+  return iaddress(r);
+}
+
+static inline Address daddress(Register r, Register scratch,
+                               InterpreterMacroAssembler* _masm) {
+  return laddress(r, scratch, _masm);
+}
+
+static inline Address aaddress(Register r) {
+  return iaddress(r);
+}
+
+static inline Address at_rsp() {
+  return Address(esp, 0);
+}
+
+// At top of Java expression stack which may be different than esp().  It
+// isn't for category 1 objects.
+static inline Address at_tos   () {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
+}
+
+static inline Address at_tos_p1() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
+}
+
+static inline Address at_tos_p4() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
+}
+
+static inline Address at_tos_p5() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
+}
+
+// Condition conversion
+static Assembler::Condition j_not(TemplateTable::Condition cc) {
+  switch (cc) {
+  case TemplateTable::equal        : return Assembler::NE;
+  case TemplateTable::not_equal    : return Assembler::EQ;
+  case TemplateTable::less         : return Assembler::GE;
+  case TemplateTable::less_equal   : return Assembler::GT;
+  case TemplateTable::greater      : return Assembler::LE;
+  case TemplateTable::greater_equal: return Assembler::LT;
+  }
+  ShouldNotReachHere();
+  return Assembler::EQ;
+}
+
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the Address described by obj.
+// If val == noreg this means store a NULL
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == r0, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.offset() == 0) {
+          if (obj.base() != r3) {
+            __ mov(r3, obj.base());
+          }
+        } else {
+          __ lea(r3, obj);
+        }
+        __ g1_write_barrier_pre(r3 /* obj */,
+                                r1 /* pre_val */,
+                                rthread /* thread */,
+                                r10  /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
+        if (val == noreg) {
+          __ store_heap_oop_null(Address(r3, 0));
+        } else {
+          // G1 barrier needs uncompressed oop for region cross check.
+          Register new_val = val;
+          if (UseCompressedOops) {
+            new_val = rscratch2;
+            __ mov(new_val, val);
+          }
+          __ store_heap_oop(Address(r3, 0), val);
+          __ g1_write_barrier_post(r3 /* store_adr */,
+                                   new_val /* new_val */,
+                                   rthread /* thread */,
+                                   r10 /* tmp */,
+                                   r1 /* tmp2 */);
+        }
+
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop_null(obj);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.offset() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ lea(r3, obj);
+            __ store_check(r3);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop_null(obj);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(rbcp, offset);
+}
+
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no)
+{
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_zputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
+      __ movw(bc_reg, bc);
+      __ cmpw(temp_reg, (unsigned) 0);
+      __ br(Assembler::EQ, L_patch_done);  // don't patch
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    // the pair bytecodes have already done the load.
+    if (load_bc_into_bc_reg) {
+      __ movw(bc_reg, bc);
+    }
+  }
+
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label L_fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+    __ load_unsigned_byte(temp_reg, at_bcp(0));
+    __ cmpw(temp_reg, Bytecodes::_breakpoint);
+    __ br(Assembler::NE, L_fast_patch);
+    // Let breakpoint table handling rewrite to quicker bytecode
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), rmethod, rbcp, bc_reg);
+    __ b(L_patch_done);
+    __ bind(L_fast_patch);
+  }
+
+#ifdef ASSERT
+  Label L_okay;
+  __ load_unsigned_byte(temp_reg, at_bcp(0));
+  __ cmpw(temp_reg, (int) Bytecodes::java_code(bc));
+  __ br(Assembler::EQ, L_okay);
+  __ cmpw(temp_reg, bc_reg);
+  __ br(Assembler::EQ, L_okay);
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
+#endif
+
+  // patch bytecode
+  __ strb(bc_reg, at_bcp(0));
+  __ bind(L_patch_done);
+}
+
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+  // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null()
+{
+  transition(vtos, atos);
+  __ mov(r0, 0);
+}
+
+void TemplateTable::iconst(int value)
+{
+  transition(vtos, itos);
+  __ mov(r0, value);
+}
+
+void TemplateTable::lconst(int value)
+{
+  __ mov(r0, value);
+}
+
+void TemplateTable::fconst(int value)
+{
+  transition(vtos, ftos);
+  switch (value) {
+  case 0:
+    __ fmovs(v0, zr);
+    break;
+  case 1:
+    __ fmovs(v0, 1.0);
+    break;
+  case 2:
+    __ fmovs(v0, 2.0);
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void TemplateTable::dconst(int value)
+{
+  transition(vtos, dtos);
+  switch (value) {
+  case 0:
+    __ fmovd(v0, zr);
+    break;
+  case 1:
+    __ fmovd(v0, 1.0);
+    break;
+  case 2:
+    __ fmovd(v0, 2.0);
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void TemplateTable::bipush()
+{
+  transition(vtos, itos);
+  __ load_signed_byte32(r0, at_bcp(1));
+}
+
+void TemplateTable::sipush()
+{
+  transition(vtos, itos);
+  __ load_unsigned_short(r0, at_bcp(1));
+  __ revw(r0, r0);
+  __ asrw(r0, r0, 16);
+}
+
+void TemplateTable::ldc(bool wide)
+{
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, Done;
+
+  if (wide) {
+    __ get_unsigned_2_byte_index_at_bcp(r1, 1);
+  } else {
+    __ load_unsigned_byte(r1, at_bcp(1));
+  }
+  __ get_cpool_and_tags(r2, r0);
+
+  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
+  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+
+  // get type
+  __ add(r3, r1, tags_offset);
+  __ lea(r3, Address(r0, r3));
+  __ ldarb(r3, r3);
+
+  // unresolved string - get the resolved string
+  __ cmp(r3, JVM_CONSTANT_UnresolvedString);
+  __ br(Assembler::EQ, call_ldc);
+
+  // unresolved class - get the resolved class
+  __ cmp(r3, JVM_CONSTANT_UnresolvedClass);
+  __ br(Assembler::EQ, call_ldc);
+
+  // unresolved class in error state - call into runtime to throw the error
+  // from the first resolution attempt
+  __ cmp(r3, JVM_CONSTANT_UnresolvedClassInError);
+  __ br(Assembler::EQ, call_ldc);
+
+  // resolved class - need to call vm to get java mirror of the class
+  __ cmp(r3, JVM_CONSTANT_Class);
+  __ br(Assembler::NE, notClass);
+
+  __ bind(call_ldc);
+  __ mov(c_rarg1, wide);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
+  __ push_ptr(r0);
+  __ verify_oop(r0);
+  __ b(Done);
+
+  __ bind(notClass);
+  __ cmp(r3, JVM_CONSTANT_Float);
+  __ br(Assembler::NE, notFloat);
+  // ftos
+  __ adds(r1, r2, r1, Assembler::LSL, 3);
+  __ ldrs(v0, Address(r1, base_offset));
+  __ push_f();
+  __ b(Done);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  {
+    Label L;
+    __ cmp(r3, JVM_CONSTANT_Integer);
+    __ br(Assembler::EQ, L);
+    __ cmp(r3, JVM_CONSTANT_String);
+    __ br(Assembler::EQ, L);
+    __ cmp(r3, JVM_CONSTANT_Object);
+    __ br(Assembler::EQ, L);
+    __ stop("unexpected tag type in ldc");
+    __ bind(L);
+  }
+#endif
+  // atos and itos
+  Label isOop;
+  __ cmp(r3, JVM_CONSTANT_Integer);
+  __ br(Assembler::NE, isOop);
+  __ adds(r1, r2, r1, Assembler::LSL, 3);
+  __ ldrw(r0, Address(r1, base_offset));
+  __ push_i(r0);
+  __ b(Done);
+
+  __ bind(isOop);
+  __ adds(r1, r2, r1, Assembler::LSL, 3);
+  __ ldr(r0, Address(r1, base_offset));
+  __ push_ptr(r0);
+
+  if (VerifyOops) {
+    __ verify_oop(r0);
+  }
+
+  __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+// %%% We should use this to handle Class and String constants also.
+// %%% It will simplify the ldc/primitive path considerably.
+void TemplateTable::fast_aldc(bool wide)
+{
+  transition(vtos, atos);
+
+  if (!EnableInvokeDynamic) {
+    // We should not encounter this bytecode if !EnableInvokeDynamic.
+    // The verifier will stop it.  However, if we get past the verifier,
+    // this will stop the thread in a reasonable way, without crashing the JVM.
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                     InterpreterRuntime::throw_IncompatibleClassChangeError));
+    // the call_VM checks for exception, so we should never return here.
+    __ should_not_reach_here();
+    return;
+  }
+
+  const Register cache = r2;
+  const Register index = r3;
+
+  resolve_cache_and_index(f12_oop, r0, cache, index, wide ? sizeof(u2) : sizeof(u1));
+  if (VerifyOops) {
+    __ verify_oop(r0);
+  }
+
+  Label L_done, L_throw_exception;
+
+  const Register con_klass_temp = r2;  // same as cache
+  const Register array_klass_temp = r3;  // same as index
+  __ load_klass(con_klass_temp, r0);
+  __ lea(array_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
+  __ ldr(array_klass_temp, Address(array_klass_temp, 0));
+  __ cmp(con_klass_temp, array_klass_temp);
+  __ br(Assembler::NE, L_done);
+  __ ldrw(rscratch1, Address(r0, arrayOopDesc::length_offset_in_bytes()));
+  __ cbnz(rscratch1, L_throw_exception);
+  __ mov(r0, zr);
+  __ b(L_done);
+
+  __ bind(L_throw_exception);
+  __ load_heap_oop(r0, Address(r0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ b(ExternalAddress(Interpreter::throw_exception_entry()));
+
+  __ bind(L_done);
+}
+
+void TemplateTable::ldc2_w()
+{
+  transition(vtos, vtos);
+  Label Long, Done;
+  __ get_unsigned_2_byte_index_at_bcp(r0, 1);
+
+  __ get_cpool_and_tags(r1, r2);
+  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
+  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+
+  // get type
+  __ lea(r2, Address(r2, r0, Address::lsl(0)));
+  __ load_unsigned_byte(r2, Address(r2, tags_offset));
+  __ cmpw(r2, (int)JVM_CONSTANT_Double);
+  __ br(Assembler::NE, Long);
+  // dtos
+  __ lea (r2, Address(r1, r0, Address::lsl(3)));
+  __ ldrd(v0, Address(r2, base_offset));
+  __ push_d();
+  __ b(Done);
+
+  __ bind(Long);
+  // ltos
+  __ lea(r0, Address(r1, r0, Address::lsl(3)));
+  __ ldr(r0, Address(r0, base_offset));
+  __ push_l();
+
+  __ bind(Done);
+}
+
+void TemplateTable::locals_index(Register reg, int offset)
+{
+  __ ldrb(reg, at_bcp(offset));
+  __ neg(reg, reg);
+}
+
+void TemplateTable::iload()
+{
+  transition(vtos, itos);
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    Register bc = r4;
+
+    // get next bytecode
+    __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+
+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
+    // last two iloads in a pair.  Comparing against fast_iload means that
+    // the next bytecode is neither an iload or a caload, and therefore
+    // an iload pair.
+    __ cmpw(r1, Bytecodes::_iload);
+    __ br(Assembler::EQ, done);
+
+    // if _fast_iload rewrite to _fast_iload2
+    __ cmpw(r1, Bytecodes::_fast_iload);
+    __ movw(bc, Bytecodes::_fast_iload2);
+    __ br(Assembler::EQ, rewrite);
+
+    // if _caload rewrite to _fast_icaload
+    __ cmpw(r1, Bytecodes::_caload);
+    __ movw(bc, Bytecodes::_fast_icaload);
+    __ br(Assembler::EQ, rewrite);
+
+    // else rewrite to _fast_iload
+    __ movw(bc, Bytecodes::_fast_iload);
+
+    // rewrite
+    // bc: new bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_iload, bc, r1, false);
+    __ bind(done);
+
+  }
+
+  // do iload, get the local value into tos
+  locals_index(r1);
+  __ ldr(r0, iaddress(r1));
+
+}
+
+void TemplateTable::fast_iload2()
+{
+  transition(vtos, itos);
+  locals_index(r1);
+  __ ldr(r0, iaddress(r1));
+  __ push(itos);
+  locals_index(r1, 3);
+  __ ldr(r0, iaddress(r1));
+}
+
+void TemplateTable::fast_iload()
+{
+  transition(vtos, itos);
+  locals_index(r1);
+  __ ldr(r0, iaddress(r1));
+}
+
+void TemplateTable::lload()
+{
+  transition(vtos, ltos);
+  __ ldrb(r1, at_bcp(1));
+  __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
+  __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::fload()
+{
+  transition(vtos, ftos);
+  locals_index(r1);
+  // n.b. we use ldrd here because this is a 64 bit slot
+  // this is comparable to the iload case
+  __ ldrd(v0, faddress(r1));
+}
+
+void TemplateTable::dload()
+{
+  transition(vtos, dtos);
+  __ ldrb(r1, at_bcp(1));
+  __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
+  __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::aload()
+{
+  transition(vtos, atos);
+  locals_index(r1);
+  __ ldr(r0, iaddress(r1));
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+  __ ldrh(reg, at_bcp(2));
+  __ rev16w(reg, reg);
+  __ neg(reg, reg);
+}
+
+void TemplateTable::wide_iload() {
+  transition(vtos, itos);
+  locals_index_wide(r1);
+  __ ldr(r0, iaddress(r1));
+}
+
+void TemplateTable::wide_lload()
+{
+  transition(vtos, ltos);
+  __ ldrh(r1, at_bcp(2));
+  __ rev16w(r1, r1);
+  __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
+  __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_fload()
+{
+  transition(vtos, ftos);
+  locals_index_wide(r1);
+  // n.b. we use ldrd here because this is a 64 bit slot
+  // this is comparable to the iload case
+  __ ldrd(v0, faddress(r1));
+}
+
+void TemplateTable::wide_dload()
+{
+  transition(vtos, dtos);
+  __ ldrh(r1, at_bcp(2));
+  __ rev16w(r1, r1);
+  __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
+  __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_aload()
+{
+  transition(vtos, atos);
+  locals_index_wide(r1);
+  __ ldr(r0, aaddress(r1));
+}
+
+void TemplateTable::index_check(Register array, Register index)
+{
+  // destroys r1, rscratch1
+  // check array
+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+  // sign extend index for use by indexed load
+  // __ movl2ptr(index, index);
+  // check index
+  Register length = rscratch1;
+  __ ldrw(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
+  __ cmpw(index, length);
+  if (index != r1) {
+    // ??? convention: move aberrant index into r1 for exception message
+    assert(r1 != array, "different registers");
+    __ mov(r1, index);
+  }
+  Label ok;
+  __ br(Assembler::LO, ok);
+  __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+  __ br(rscratch1);
+  __ bind(ok);
+}
+
+void TemplateTable::iaload()
+{
+  transition(itos, itos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1, Address(r0, r1, Address::uxtw(2)));
+  __ ldrw(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_INT)));
+}
+
+void TemplateTable::laload()
+{
+  transition(itos, ltos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1, Address(r0, r1, Address::uxtw(3)));
+  __ ldr(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::faload()
+{
+  transition(itos, ftos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(2)));
+  __ ldrs(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+}
+
+void TemplateTable::daload()
+{
+  transition(itos, dtos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(3)));
+  __ ldrd(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aaload()
+{
+  transition(itos, atos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  int s = (UseCompressedOops ? 2 : 3);
+  __ lea(r1, Address(r0, r1, Address::uxtw(s)));
+  __ load_heap_oop(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+void TemplateTable::baload()
+{
+  transition(itos, itos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(0)));
+  __ load_signed_byte(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::caload()
+{
+  transition(itos, itos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
+  __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+// iload followed by caload frequent pair
+void TemplateTable::fast_icaload()
+{
+  transition(vtos, itos);
+  // load index out of locals
+  locals_index(r2);
+  __ ldr(r1, iaddress(r2));
+
+  __ pop_ptr(r0);
+
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
+  __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::saload()
+{
+  transition(itos, itos);
+  __ mov(r1, r0);
+  __ pop_ptr(r0);
+  // r0: array
+  // r1: index
+  index_check(r0, r1); // leaves index in r1, kills rscratch1
+  __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
+  __ load_signed_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+}
+
+void TemplateTable::iload(int n)
+{
+  transition(vtos, itos);
+  __ ldr(r0, iaddress(n));
+}
+
+void TemplateTable::lload(int n)
+{
+  transition(vtos, ltos);
+  __ ldr(r0, laddress(n));
+}
+
+void TemplateTable::fload(int n)
+{
+  transition(vtos, ftos);
+  __ ldrs(v0, faddress(n));
+}
+
+void TemplateTable::dload(int n)
+{
+  transition(vtos, dtos);
+  __ ldrd(v0, daddress(n));
+}
+
+void TemplateTable::aload(int n)
+{
+  transition(vtos, atos);
+  __ ldr(r0, iaddress(n));
+}
+
+void TemplateTable::aload_0()
+{
+  // According to bytecode histograms, the pairs:
+  //
+  // _aload_0, _fast_igetfield
+  // _aload_0, _fast_agetfield
+  // _aload_0, _fast_fgetfield
+  //
+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
+  // _aload_0 bytecode checks if the next bytecode is either
+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+  // rewrites the current bytecode into a pair bytecode; otherwise it
+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
+  // the pair check anymore.
+  //
+  // Note: If the next bytecode is _getfield, the rewrite must be
+  //       delayed, otherwise we may miss an opportunity for a pair.
+  //
+  // Also rewrite frequent pairs
+  //   aload_0, aload_1
+  //   aload_0, iload_1
+  // These bytecodes with a small amount of code are most profitable
+  // to rewrite
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    const Register bc = r4;
+
+    // get next bytecode
+    __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+    // do actual aload_0
+    aload(0);
+
+    // if _getfield then wait with rewrite
+    __ cmpw(r1, Bytecodes::Bytecodes::_getfield);
+    __ br(Assembler::EQ, done);
+
+    // if _igetfield then reqrite to _fast_iaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ cmpw(r1, Bytecodes::_fast_igetfield);
+    __ movw(bc, Bytecodes::_fast_iaccess_0);
+    __ br(Assembler::EQ, rewrite);
+
+    // if _agetfield then reqrite to _fast_aaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ cmpw(r1, Bytecodes::_fast_agetfield);
+    __ movw(bc, Bytecodes::_fast_aaccess_0);
+    __ br(Assembler::EQ, rewrite);
+
+    // if _fgetfield then reqrite to _fast_faccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ cmpw(r1, Bytecodes::_fast_fgetfield);
+    __ movw(bc, Bytecodes::_fast_faccess_0);
+    __ br(Assembler::EQ, rewrite);
+
+    // else rewrite to _fast_aload0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0);
+
+    // rewrite
+    // bc: new bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_aload_0, bc, r1, false);
+
+    __ bind(done);
+  } else {
+    aload(0);
+  }
+}
+
+void TemplateTable::istore()
+{
+  transition(itos, vtos);
+  locals_index(r1);
+  // FIXME: We're being very pernickerty here storing a jint in a
+  // local with strw, which costs an extra instruction over what we'd
+  // be able to do with a simple str.  We should just store the whole
+  // word.
+  __ lea(rscratch1, iaddress(r1));
+  __ strw(r0, Address(rscratch1));
+}
+
+void TemplateTable::lstore()
+{
+  transition(ltos, vtos);
+  locals_index(r1);
+  __ str(r0, laddress(r1, rscratch1, _masm));
+}
+
+void TemplateTable::fstore() {
+  transition(ftos, vtos);
+  locals_index(r1);
+  __ lea(rscratch1, iaddress(r1));
+  __ strs(v0, Address(rscratch1));
+}
+
+void TemplateTable::dstore() {
+  transition(dtos, vtos);
+  locals_index(r1);
+  __ strd(v0, daddress(r1, rscratch1, _masm));
+}
+
+void TemplateTable::astore()
+{
+  transition(vtos, vtos);
+  __ pop_ptr(r0);
+  locals_index(r1);
+  __ str(r0, aaddress(r1));
+}
+
+void TemplateTable::wide_istore() {
+  transition(vtos, vtos);
+  __ pop_i();
+  locals_index_wide(r1);
+  __ lea(rscratch1, iaddress(r1));
+  __ strw(r0, Address(rscratch1));
+}
+
+void TemplateTable::wide_lstore() {
+  transition(vtos, vtos);
+  __ pop_l();
+  locals_index_wide(r1);
+  __ str(r0, laddress(r1, rscratch1, _masm));
+}
+
+void TemplateTable::wide_fstore() {
+  transition(vtos, vtos);
+  __ pop_f();
+  locals_index_wide(r1);
+  __ lea(rscratch1, faddress(r1));
+  __ strs(v0, rscratch1);
+}
+
+void TemplateTable::wide_dstore() {
+  transition(vtos, vtos);
+  __ pop_d();
+  locals_index_wide(r1);
+  __ strd(v0, daddress(r1, rscratch1, _masm));
+}
+
+void TemplateTable::wide_astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(r0);
+  locals_index_wide(r1);
+  __ str(r0, aaddress(r1));
+}
+
+void TemplateTable::iastore() {
+  transition(itos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // r0: value
+  // r1: index
+  // r3: array
+  index_check(r3, r1); // prefer index in r1
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
+  __ strw(r0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_INT)));
+}
+
+void TemplateTable::lastore() {
+  transition(ltos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // r0: value
+  // r1: index
+  // r3: array
+  index_check(r3, r1); // prefer index in r1
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
+  __ str(r0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::fastore() {
+  transition(ftos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // v0: value
+  // r1:  index
+  // r3:  array
+  index_check(r3, r1); // prefer index in r1
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
+  __ strs(v0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+}
+
+void TemplateTable::dastore() {
+  transition(dtos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // v0: value
+  // r1:  index
+  // r3:  array
+  index_check(r3, r1); // prefer index in r1
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
+  __ strd(v0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aastore() {
+  Label is_null, ok_is_subtype, done;
+  transition(vtos, vtos);
+  // stack: ..., array, index, value
+  __ ldr(r0, at_tos());    // value
+  __ ldr(r2, at_tos_p1()); // index
+  __ ldr(r3, at_tos_p2()); // array
+
+  Address element_address(r4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  index_check(r3, r2);     // kills r1
+  __ lea(r4, Address(r3, r2, Address::uxtw(UseCompressedOops? 2 : 3)));
+
+  // do array store check - check for NULL value first
+  __ cbz(r0, is_null);
+
+  // Move subklass into r1
+  __ load_klass(r1, r0);
+  // Move superklass into r0
+  __ load_klass(r0, r3);
+  __ ldr(r0, Address(r0,
+                     objArrayKlass::element_klass_offset()));
+  // Compress array + index*oopSize + 12 into a single register.  Frees r2.
+
+  // Generate subtype check.  Blows r2, r5
+  // Superklass in r0.  Subklass in r1.
+  __ gen_subtype_check(r1, ok_is_subtype);
+
+  // Come here on failure
+  // object is at TOS
+  __ b(Interpreter::_throw_ArrayStoreException_entry);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+
+  // Get the value we will store
+  __ ldr(r0, at_tos());
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, element_address, r0, _bs->kind(), true);
+  __ b(done);
+
+  // Have a NULL in r0, r3=array, r2=index.  Store NULL at ary[idx]
+  __ bind(is_null);
+  __ profile_null_seen(r2);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
+
+  // Pop stack arguments
+  __ bind(done);
+  __ add(esp, esp, 3 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::bastore()
+{
+  transition(itos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // r0: value
+  // r1: index
+  // r3: array
+  index_check(r3, r1); // prefer index in r1
+
+  // Need to check whether array is boolean or byte
+  // since both types share the bastore bytecode.
+  __ load_klass(r2, r3);
+  __ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
+  int diffbit = Klass::layout_helper_boolean_diffbit();
+  __ andw(rscratch1, r2, diffbit);
+  Label L_skip;
+  __ cbzw(rscratch1, L_skip);
+  __ andw(r0, r0, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
+  __ bind(L_skip);
+
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
+  __ strb(r0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::castore()
+{
+  transition(itos, vtos);
+  __ pop_i(r1);
+  __ pop_ptr(r3);
+  // r0: value
+  // r1: index
+  // r3: array
+  index_check(r3, r1); // prefer index in r1
+  __ lea(rscratch1, Address(r3, r1, Address::uxtw(1)));
+  __ strh(r0, Address(rscratch1,
+                      arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::sastore()
+{
+  castore();
+}
+
+void TemplateTable::istore(int n)
+{
+  transition(itos, vtos);
+  __ str(r0, iaddress(n));
+}
+
+void TemplateTable::lstore(int n)
+{
+  transition(ltos, vtos);
+  __ str(r0, laddress(n));
+}
+
+void TemplateTable::fstore(int n)
+{
+  transition(ftos, vtos);
+  __ strs(v0, faddress(n));
+}
+
+void TemplateTable::dstore(int n)
+{
+  transition(dtos, vtos);
+  __ strd(v0, daddress(n));
+}
+
+void TemplateTable::astore(int n)
+{
+  transition(vtos, vtos);
+  __ pop_ptr(r0);
+  __ str(r0, iaddress(n));
+}
+
+void TemplateTable::pop()
+{
+  transition(vtos, vtos);
+  __ add(esp, esp, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2()
+{
+  transition(vtos, vtos);
+  __ add(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup()
+{
+  transition(vtos, vtos);
+  __ ldr(r0, Address(esp, 0));
+  __ push(r0);
+  // stack: ..., a, a
+}
+
+void TemplateTable::dup_x1()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ldr(r0, at_tos());  // load b
+  __ ldr(r2, at_tos_p1());  // load a
+  __ str(r0, at_tos_p1());  // store b
+  __ str(r2, at_tos());  // store a
+  __ push(r0);                  // push b
+  // stack: ..., b, a, b
+}
+
+void TemplateTable::dup_x2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ ldr(r0, at_tos());  // load c
+  __ ldr(r2, at_tos_p2());  // load a
+  __ str(r0, at_tos_p2());  // store c in a
+  __ push(r0);      // push c
+  // stack: ..., c, b, c, c
+  __ ldr(r0, at_tos_p2());  // load b
+  __ str(r2, at_tos_p2());  // store a in b
+  // stack: ..., c, a, c, c
+  __ str(r0, at_tos_p1());  // store b in c
+  // stack: ..., c, a, b, c
+}
+
+void TemplateTable::dup2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ldr(r0, at_tos_p1());  // load a
+  __ push(r0);                  // push a
+  __ ldr(r0, at_tos_p1());  // load b
+  __ push(r0);                  // push b
+  // stack: ..., a, b, a, b
+}
+
+void TemplateTable::dup2_x1()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ ldr(r2, at_tos());  // load c
+  __ ldr(r0, at_tos_p1());  // load b
+  __ push(r0);                  // push b
+  __ push(r2);                  // push c
+  // stack: ..., a, b, c, b, c
+  __ str(r2, at_tos_p3());  // store c in b
+  // stack: ..., a, c, c, b, c
+  __ ldr(r2, at_tos_p4());  // load a
+  __ str(r2, at_tos_p2());  // store a in 2nd c
+  // stack: ..., a, c, a, b, c
+  __ str(r0, at_tos_p4());  // store b in a
+  // stack: ..., b, c, a, b, c
+}
+
+void TemplateTable::dup2_x2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c, d
+  __ ldr(r2, at_tos());  // load d
+  __ ldr(r0, at_tos_p1());  // load c
+  __ push(r0)            ;      // push c
+  __ push(r2);                  // push d
+  // stack: ..., a, b, c, d, c, d
+  __ ldr(r0, at_tos_p4());  // load b
+  __ str(r0, at_tos_p2());  // store b in d
+  __ str(r2, at_tos_p4());  // store d in b
+  // stack: ..., a, d, c, b, c, d
+  __ ldr(r2, at_tos_p5());  // load a
+  __ ldr(r0, at_tos_p3());  // load c
+  __ str(r2, at_tos_p3());  // store a in c
+  __ str(r0, at_tos_p5());  // store c in a
+  // stack: ..., c, d, a, b, c, d
+}
+
+void TemplateTable::swap()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ldr(r2, at_tos_p1());  // load a
+  __ ldr(r0, at_tos());  // load b
+  __ str(r2, at_tos());  // store a in b
+  __ str(r0, at_tos_p1());  // store b in a
+  // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op)
+{
+  transition(itos, itos);
+  // r0 <== r1 op r0
+  __ pop_i(r1);
+  switch (op) {
+  case add  : __ addw(r0, r1, r0); break;
+  case sub  : __ subw(r0, r1, r0); break;
+  case mul  : __ mulw(r0, r1, r0); break;
+  case _and : __ andw(r0, r1, r0); break;
+  case _or  : __ orrw(r0, r1, r0); break;
+  case _xor : __ eorw(r0, r1, r0); break;
+  case shl  : __ lslvw(r0, r1, r0); break;
+  case shr  : __ asrvw(r0, r1, r0); break;
+  case ushr : __ lsrvw(r0, r1, r0);break;
+  default   : ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lop2(Operation op)
+{
+  transition(ltos, ltos);
+  // r0 <== r1 op r0
+  __ pop_l(r1);
+  switch (op) {
+  case add  : __ add(r0, r1, r0); break;
+  case sub  : __ sub(r0, r1, r0); break;
+  case mul  : __ mul(r0, r1, r0); break;
+  case _and : __ andr(r0, r1, r0); break;
+  case _or  : __ orr(r0, r1, r0); break;
+  case _xor : __ eor(r0, r1, r0); break;
+  default   : ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::idiv()
+{
+  transition(itos, itos);
+  // explicitly check for div0
+  Label no_div0;
+  __ cbnzw(r0, no_div0);
+  __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
+  __ br(rscratch1);
+  __ bind(no_div0);
+  __ pop_i(r1);
+  // r0 <== r1 idiv r0
+  __ corrected_idivl(r0, r1, r0, /* want_remainder */ false);
+}
+
+void TemplateTable::irem()
+{
+  transition(itos, itos);
+  // explicitly check for div0
+  Label no_div0;
+  __ cbnzw(r0, no_div0);
+  __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
+  __ br(rscratch1);
+  __ bind(no_div0);
+  __ pop_i(r1);
+  // r0 <== r1 irem r0
+  __ corrected_idivl(r0, r1, r0, /* want_remainder */ true);
+}
+
+void TemplateTable::lmul()
+{
+  transition(ltos, ltos);
+  __ pop_l(r1);
+  __ mul(r0, r0, r1);
+}
+
+void TemplateTable::ldiv()
+{
+  transition(ltos, ltos);
+  // explicitly check for div0
+  Label no_div0;
+  __ cbnz(r0, no_div0);
+  __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
+  __ br(rscratch1);
+  __ bind(no_div0);
+  __ pop_l(r1);
+  // r0 <== r1 ldiv r0
+  __ corrected_idivq(r0, r1, r0, /* want_remainder */ false);
+}
+
+void TemplateTable::lrem()
+{
+  transition(ltos, ltos);
+  // explicitly check for div0
+  Label no_div0;
+  __ cbnz(r0, no_div0);
+  __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
+  __ br(rscratch1);
+  __ bind(no_div0);
+  __ pop_l(r1);
+  // r0 <== r1 lrem r0
+  __ corrected_idivq(r0, r1, r0, /* want_remainder */ true);
+}
+
+void TemplateTable::lshl()
+{
+  transition(itos, ltos);
+  // shift count is in r0
+  __ pop_l(r1);
+  __ lslv(r0, r1, r0);
+}
+
+void TemplateTable::lshr()
+{
+  transition(itos, ltos);
+  // shift count is in r0
+  __ pop_l(r1);
+  __ asrv(r0, r1, r0);
+}
+
+void TemplateTable::lushr()
+{
+  transition(itos, ltos);
+  // shift count is in r0
+  __ pop_l(r1);
+  __ lsrv(r0, r1, r0);
+}
+
+void TemplateTable::fop2(Operation op)
+{
+  transition(ftos, ftos);
+  switch (op) {
+  case add:
+    // n.b. use ldrd because this is a 64 bit slot
+    __ pop_f(v1);
+    __ fadds(v0, v1, v0);
+    break;
+  case sub:
+    __ pop_f(v1);
+    __ fsubs(v0, v1, v0);
+    break;
+  case mul:
+    __ pop_f(v1);
+    __ fmuls(v0, v1, v0);
+    break;
+  case div:
+    __ pop_f(v1);
+    __ fdivs(v0, v1, v0);
+    break;
+  case rem:
+    __ fmovs(v1, v0);
+    __ pop_f(v0);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::frem),
+                         0, 2, MacroAssembler::ret_type_float);
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void TemplateTable::dop2(Operation op)
+{
+  transition(dtos, dtos);
+  switch (op) {
+  case add:
+    // n.b. use ldrd because this is a 64 bit slot
+    __ pop_d(v1);
+    __ faddd(v0, v1, v0);
+    break;
+  case sub:
+    __ pop_d(v1);
+    __ fsubd(v0, v1, v0);
+    break;
+  case mul:
+    __ pop_d(v1);
+    __ fmuld(v0, v1, v0);
+    break;
+  case div:
+    __ pop_d(v1);
+    __ fdivd(v0, v1, v0);
+    break;
+  case rem:
+    __ fmovd(v1, v0);
+    __ pop_d(v0);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::drem),
+                         0, 2, MacroAssembler::ret_type_double);
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void TemplateTable::ineg()
+{
+  transition(itos, itos);
+  __ negw(r0, r0);
+
+}
+
+void TemplateTable::lneg()
+{
+  transition(ltos, ltos);
+  __ neg(r0, r0);
+}
+
+void TemplateTable::fneg()
+{
+  transition(ftos, ftos);
+  __ fnegs(v0, v0);
+}
+
+void TemplateTable::dneg()
+{
+  transition(dtos, dtos);
+  __ fnegd(v0, v0);
+}
+
+void TemplateTable::iinc()
+{
+  transition(vtos, vtos);
+  __ load_signed_byte(r1, at_bcp(2)); // get constant
+  locals_index(r2);
+  __ ldr(r0, iaddress(r2));
+  __ addw(r0, r0, r1);
+  __ str(r0, iaddress(r2));
+}
+
+void TemplateTable::wide_iinc()
+{
+  transition(vtos, vtos);
+  // __ mov(r1, zr);
+  __ ldrw(r1, at_bcp(2)); // get constant and index
+  __ rev16(r1, r1);
+  __ ubfx(r2, r1, 0, 16);
+  __ neg(r2, r2);
+  __ sbfx(r1, r1, 16, 16);
+  __ ldr(r0, iaddress(r2));
+  __ addw(r0, r0, r1);
+  __ str(r0, iaddress(r2));
+}
+
+void TemplateTable::convert()
+{
+  // Checking
+#ifdef ASSERT
+  {
+    TosState tos_in  = ilgl;
+    TosState tos_out = ilgl;
+    switch (bytecode()) {
+    case Bytecodes::_i2l: // fall through
+    case Bytecodes::_i2f: // fall through
+    case Bytecodes::_i2d: // fall through
+    case Bytecodes::_i2b: // fall through
+    case Bytecodes::_i2c: // fall through
+    case Bytecodes::_i2s: tos_in = itos; break;
+    case Bytecodes::_l2i: // fall through
+    case Bytecodes::_l2f: // fall through
+    case Bytecodes::_l2d: tos_in = ltos; break;
+    case Bytecodes::_f2i: // fall through
+    case Bytecodes::_f2l: // fall through
+    case Bytecodes::_f2d: tos_in = ftos; break;
+    case Bytecodes::_d2i: // fall through
+    case Bytecodes::_d2l: // fall through
+    case Bytecodes::_d2f: tos_in = dtos; break;
+    default             : ShouldNotReachHere();
+    }
+    switch (bytecode()) {
+    case Bytecodes::_l2i: // fall through
+    case Bytecodes::_f2i: // fall through
+    case Bytecodes::_d2i: // fall through
+    case Bytecodes::_i2b: // fall through
+    case Bytecodes::_i2c: // fall through
+    case Bytecodes::_i2s: tos_out = itos; break;
+    case Bytecodes::_i2l: // fall through
+    case Bytecodes::_f2l: // fall through
+    case Bytecodes::_d2l: tos_out = ltos; break;
+    case Bytecodes::_i2f: // fall through
+    case Bytecodes::_l2f: // fall through
+    case Bytecodes::_d2f: tos_out = ftos; break;
+    case Bytecodes::_i2d: // fall through
+    case Bytecodes::_l2d: // fall through
+    case Bytecodes::_f2d: tos_out = dtos; break;
+    default             : ShouldNotReachHere();
+    }
+    transition(tos_in, tos_out);
+  }
+#endif // ASSERT
+  // static const int64_t is_nan = 0x8000000000000000L;
+
+  // Conversion
+  switch (bytecode()) {
+  case Bytecodes::_i2l:
+    __ sxtw(r0, r0);
+    break;
+  case Bytecodes::_i2f:
+    __ scvtfws(v0, r0);
+    break;
+  case Bytecodes::_i2d:
+    __ scvtfwd(v0, r0);
+    break;
+  case Bytecodes::_i2b:
+    __ sxtbw(r0, r0);
+    break;
+  case Bytecodes::_i2c:
+    __ uxthw(r0, r0);
+    break;
+  case Bytecodes::_i2s:
+    __ sxthw(r0, r0);
+    break;
+  case Bytecodes::_l2i:
+    __ uxtw(r0, r0);
+    break;
+  case Bytecodes::_l2f:
+    __ scvtfs(v0, r0);
+    break;
+  case Bytecodes::_l2d:
+    __ scvtfd(v0, r0);
+    break;
+  case Bytecodes::_f2i:
+  {
+    Label L_Okay;
+    __ clear_fpsr();
+    __ fcvtzsw(r0, v0);
+    __ get_fpsr(r1);
+    __ cbzw(r1, L_Okay);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
+                         0, 1, MacroAssembler::ret_type_integral);
+    __ bind(L_Okay);
+  }
+    break;
+  case Bytecodes::_f2l:
+  {
+    Label L_Okay;
+    __ clear_fpsr();
+    __ fcvtzs(r0, v0);
+    __ get_fpsr(r1);
+    __ cbzw(r1, L_Okay);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2l),
+                         0, 1, MacroAssembler::ret_type_integral);
+    __ bind(L_Okay);
+  }
+    break;
+  case Bytecodes::_f2d:
+    __ fcvts(v0, v0);
+    break;
+  case Bytecodes::_d2i:
+  {
+    Label L_Okay;
+    __ clear_fpsr();
+    __ fcvtzdw(r0, v0);
+    __ get_fpsr(r1);
+    __ cbzw(r1, L_Okay);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
+                         0, 1, MacroAssembler::ret_type_integral);
+    __ bind(L_Okay);
+  }
+    break;
+  case Bytecodes::_d2l:
+  {
+    Label L_Okay;
+    __ clear_fpsr();
+    __ fcvtzd(r0, v0);
+    __ get_fpsr(r1);
+    __ cbzw(r1, L_Okay);
+    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2l),
+                         0, 1, MacroAssembler::ret_type_integral);
+    __ bind(L_Okay);
+  }
+    break;
+  case Bytecodes::_d2f:
+    __ fcvtd(v0, v0);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lcmp()
+{
+  transition(ltos, itos);
+  Label done;
+  __ pop_l(r1);
+  __ cmp(r1, r0);
+  __ mov(r0, (u_int64_t)-1L);
+  __ br(Assembler::LT, done);
+  // __ mov(r0, 1UL);
+  // __ csel(r0, r0, zr, Assembler::NE);
+  // and here is a faster way
+  __ csinc(r0, zr, zr, Assembler::EQ);
+  __ bind(done);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result)
+{
+  Label done;
+  if (is_float) {
+    // XXX get rid of pop here, use ... reg, mem32
+    __ pop_f(v1);
+    __ fcmps(v1, v0);
+  } else {
+    // XXX get rid of pop here, use ... reg, mem64
+    __ pop_d(v1);
+    __ fcmpd(v1, v0);
+  }
+  if (unordered_result < 0) {
+    // we want -1 for unordered or less than, 0 for equal and 1 for
+    // greater than.
+    __ mov(r0, (u_int64_t)-1L);
+    // for FP LT tests less than or unordered
+    __ br(Assembler::LT, done);
+    // install 0 for EQ otherwise 1
+    __ csinc(r0, zr, zr, Assembler::EQ);
+  } else {
+    // we want -1 for less than, 0 for equal and 1 for unordered or
+    // greater than.
+    __ mov(r0, 1L);
+    // for FP HI tests greater than or unordered
+    __ br(Assembler::HI, done);
+    // install 0 for EQ otherwise ~0
+    __ csinv(r0, zr, zr, Assembler::EQ);
+
+  }
+  __ bind(done);
+}
+
+void TemplateTable::branch(bool is_jsr, bool is_wide)
+{
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
+  __ profile_taken_branch(r0, r1);
+  const ByteSize be_offset = methodOopDesc::backedge_counter_offset() +
+                             InvocationCounter::counter_offset();
+  const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() +
+                              InvocationCounter::counter_offset();
+  const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+
+  // load branch displacement
+  if (!is_wide) {
+    __ ldrh(r2, at_bcp(1));
+    __ rev16(r2, r2);
+    // sign extend the 16 bit value in r2
+    __ sbfm(r2, r2, 0, 15);
+  } else {
+    __ ldrw(r2, at_bcp(1));
+    __ revw(r2, r2);
+    // sign extend the 32 bit value in r2
+    __ sbfm(r2, r2, 0, 31);
+  }
+
+  // Handle all the JSR stuff here, then exit.
+  // It's much shorter and cleaner than intermingling with the non-JSR
+  // normal-branch stuff occurring below.
+
+  if (is_jsr) {
+    // Pre-load the next target bytecode into rscratch1
+    __ load_unsigned_byte(rscratch1, Address(rbcp, r2));
+    // compute return address as bci
+    __ ldr(rscratch2, Address(rmethod, methodOopDesc::const_offset()));
+    __ add(rscratch2, rscratch2,
+           in_bytes(constMethodOopDesc::codes_offset()) - (is_wide ? 5 : 3));
+    __ sub(r1, rbcp, rscratch2);
+    __ push_i(r1);
+    // Adjust the bcp by the 16-bit displacement in r2
+    __ add(rbcp, rbcp, r2);
+    __ dispatch_only(vtos);
+    return;
+  }
+
+  // Normal (non-jsr) branch handling
+
+  // Adjust the bcp by the displacement in r2
+  __ add(rbcp, rbcp, r2);
+
+  assert(UseLoopCounter || !UseOnStackReplacement,
+         "on-stack-replacement requires loop counters");
+  Label backedge_counter_overflow;
+  Label profile_method;
+  Label dispatch;
+  if (UseLoopCounter) {
+    // increment backedge counter for backward branches
+    // r0: MDO
+    // w1: MDO bumped taken-count
+    // r2: target offset
+    __ cmp(r2, zr);
+    __ br(Assembler::GT, dispatch); // count only if backward branch
+
+    if (TieredCompilation) {
+      Label no_mdo;
+      int increment = InvocationCounter::count_increment;
+      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
+      if (ProfileInterpreter) {
+        // Are we profiling?
+        __ ldr(r1, Address(rmethod, in_bytes(methodOopDesc::method_data_offset())));
+        __ cbz(r1, no_mdo);
+        // Increment the MDO backedge counter
+        const Address mdo_backedge_counter(r1, in_bytes(methodDataOopDesc::backedge_counter_offset()) +
+                                           in_bytes(InvocationCounter::counter_offset()));
+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+                                   r0, false, Assembler::EQ, &backedge_counter_overflow);
+        __ b(dispatch);
+      }
+      __ bind(no_mdo);
+      // Increment backedge counter in methodOop
+      __ increment_mask_and_jump(Address(rmethod, be_offset), increment, mask,
+                                 r0, false, Assembler::EQ, &backedge_counter_overflow);
+    } else {
+      // increment counter
+      __ ldrw(r0, Address(rmethod, be_offset));        // load backedge counter
+      __ addw(rscratch1, r0, InvocationCounter::count_increment); // increment counter
+      __ strw(rscratch1, Address(rmethod, be_offset)); // store counter
+
+      __ ldrw(r0, Address(rmethod, inv_offset));    // load invocation counter
+      __ andw(r0, r0, (unsigned)InvocationCounter::count_mask_value); // and the status bits
+      __ addw(r0, r0, rscratch1);        // add both counters
+
+      if (ProfileInterpreter) {
+        // Test to see if we should create a method data oop
+        __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
+        __ ldrw(rscratch1, rscratch1);
+        __ cmpw(r0, rscratch1);
+        __ br(Assembler::LT, dispatch);
+
+        // if no method data exists, go to profile method
+        __ test_method_data_pointer(r0, profile_method);
+
+        if (UseOnStackReplacement) {
+          // check for overflow against w1 which is the MDO taken count
+          __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
+          __ ldrw(rscratch1, rscratch1);
+          __ cmpw(r1, rscratch1);
+          __ br(Assembler::LO, dispatch); // Intel == Assembler::below
+
+          // When ProfileInterpreter is on, the backedge_count comes
+          // from the methodDataOop, which value does not get reset on
+          // the call to frequency_counter_overflow().  To avoid
+          // excessive calls to the overflow routine while the method is
+          // being compiled, add a second test to make sure the overflow
+          // function is called only once every overflow_frequency.
+          const int overflow_frequency = 1024;
+          __ andsw(r1, r1, overflow_frequency - 1);
+          __ br(Assembler::EQ, backedge_counter_overflow);
+
+        }
+      } else {
+        if (UseOnStackReplacement) {
+          // check for overflow against w0, which is the sum of the
+          // counters
+          __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
+          __ ldrw(rscratch1, rscratch1);
+          __ cmpw(r0, rscratch1);
+          __ br(Assembler::HS, backedge_counter_overflow); // Intel == Assembler::aboveEqual
+        }
+      }
+    }
+  }
+  __ bind(dispatch);
+
+  // Pre-load the next target bytecode into rscratch1
+  __ load_unsigned_byte(rscratch1, Address(rbcp, 0));
+
+  // continue with the bytecode @ target
+  // rscratch1: target bytecode
+  // rbcp: target bcp
+  __ dispatch_only(vtos);
+
+  if (UseLoopCounter) {
+    if (ProfileInterpreter) {
+      // Out-of-line code to allocate method data oop.
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
+      __ set_method_data_pointer_for_bcp();
+      __ b(dispatch);
+    }
+
+    if (TieredCompilation || UseOnStackReplacement) {
+      // invocation counter overflow
+      __ bind(backedge_counter_overflow);
+      __ neg(r2, r2);
+      __ add(r2, r2, rbcp);     // branch bcp
+      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
+      __ call_VM(noreg,
+                 CAST_FROM_FN_PTR(address,
+                                  InterpreterRuntime::frequency_counter_overflow),
+                 r2);
+      if (!UseOnStackReplacement)
+        __ b(dispatch);
+    }
+
+    if (UseOnStackReplacement) {
+      __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
+
+      // r0: osr nmethod (osr ok) or NULL (osr not possible)
+      // w1: target bytecode
+      // r2: scratch
+      __ cbz(r0, dispatch);     // test result -- no osr if null
+      // nmethod may have been invalidated (VM may block upon call_VM return)
+      __ ldrw(r2, Address(r0, nmethod::entry_bci_offset()));
+      // InvalidOSREntryBci == -2 which overflows cmpw as unsigned
+      // use cmnw against -InvalidOSREntryBci which does the same thing
+      __ cmn(r2, -InvalidOSREntryBci);
+      __ br(Assembler::EQ, dispatch);
+
+      // We have the address of an on stack replacement routine in r0
+      // We need to prepare to execute the OSR method. First we must
+      // migrate the locals and monitors off of the stack.
+
+      __ mov(r19, r0);                             // save the nmethod
+
+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+      // r0 is OSR buffer, move it to expected parameter location
+      __ mov(j_rarg0, r0);
+
+      // remove activation
+      // get sender esp
+      __ ldr(esp,
+          Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
+      // remove frame anchor
+      __ leave();
+      // Ensure compiled code always sees stack at proper alignment
+      __ andr(sp, esp, -16);
+
+      // and begin the OSR nmethod
+      __ ldr(rscratch1, Address(r19, nmethod::osr_entry_point_offset()));
+      __ br(rscratch1);
+    }
+  }
+}
+
+
+void TemplateTable::if_0cmp(Condition cc)
+{
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  if (cc == equal)
+    __ cbnzw(r0, not_taken);
+  else if (cc == not_equal)
+    __ cbzw(r0, not_taken);
+  else {
+    __ andsw(zr, r0, r0);
+    __ br(j_not(cc), not_taken);
+  }
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(r0);
+}
+
+void TemplateTable::if_icmp(Condition cc)
+{
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  __ pop_i(r1);
+  __ cmpw(r1, r0, Assembler::LSL);
+  __ br(j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(r0);
+}
+
+void TemplateTable::if_nullcmp(Condition cc)
+{
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  if (cc == equal)
+    __ cbnz(r0, not_taken);
+  else
+    __ cbz(r0, not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(r0);
+}
+
+void TemplateTable::if_acmp(Condition cc)
+{
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  __ pop_ptr(r1);
+  __ cmp(r1, r0);
+  __ br(j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(r0);
+}
+
+void TemplateTable::ret() {
+  transition(vtos, vtos);
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
+  locals_index(r1);
+  __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
+  __ profile_ret(r1, r2);
+  __ ldr(rbcp, Address(rmethod, methodOopDesc::const_offset()));
+  __ lea(rbcp, Address(rbcp, r1));
+  __ add(rbcp, rbcp, in_bytes(constMethodOopDesc::codes_offset()));
+  __ dispatch_next(vtos);
+}
+
+void TemplateTable::wide_ret() {
+  transition(vtos, vtos);
+  locals_index_wide(r1);
+  __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
+  __ profile_ret(r1, r2);
+  __ ldr(rbcp, Address(rmethod, methodOopDesc::const_offset()));
+  __ lea(rbcp, Address(rbcp, r1));
+  __ add(rbcp, rbcp, in_bytes(constMethodOopDesc::codes_offset()));
+  __ dispatch_next(vtos);
+}
+
+
+void TemplateTable::tableswitch() {
+  Label default_case, continue_execution;
+  transition(itos, vtos);
+  // align rbcp
+  __ lea(r1, at_bcp(BytesPerInt));
+  __ andr(r1, r1, -BytesPerInt);
+  // load lo & hi
+  __ ldrw(r2, Address(r1, BytesPerInt));
+  __ ldrw(r3, Address(r1, 2 * BytesPerInt));
+  __ rev32(r2, r2);
+  __ rev32(r3, r3);
+  // check against lo & hi
+  __ cmpw(r0, r2);
+  __ br(Assembler::LT, default_case);
+  __ cmpw(r0, r3);
+  __ br(Assembler::GT, default_case);
+  // lookup dispatch offset
+  __ subw(r0, r0, r2);
+  __ lea(r3, Address(r1, r0, Address::uxtw(2)));
+  __ ldrw(r3, Address(r3, 3 * BytesPerInt));
+  __ profile_switch_case(r0, r1, r2);
+  // continue execution
+  __ bind(continue_execution);
+  __ rev32(r3, r3);
+  __ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
+  __ add(rbcp, rbcp, r3, ext::sxtw);
+  __ dispatch_only(vtos);
+  // handle default
+  __ bind(default_case);
+  __ profile_switch_default(r0);
+  __ ldrw(r3, Address(r1, 0));
+  __ b(continue_execution);
+}
+
+void TemplateTable::lookupswitch() {
+  transition(itos, itos);
+  __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+void TemplateTable::fast_linearswitch() {
+  transition(itos, vtos);
+  Label loop_entry, loop, found, continue_execution;
+  // bswap r0 so we can avoid bswapping the table entries
+  __ rev32(r0, r0);
+  // align rbcp
+  __ lea(r19, at_bcp(BytesPerInt)); // btw: should be able to get rid of
+                                    // this instruction (change offsets
+                                    // below)
+  __ andr(r19, r19, -BytesPerInt);
+  // set counter
+  __ ldrw(r1, Address(r19, BytesPerInt));
+  __ rev32(r1, r1);
+  __ b(loop_entry);
+  // table search
+  __ bind(loop);
+  __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
+  __ ldrw(rscratch1, Address(rscratch1, 2 * BytesPerInt));
+  __ cmpw(r0, rscratch1);
+  __ br(Assembler::EQ, found);
+  __ bind(loop_entry);
+  __ subs(r1, r1, 1);
+  __ br(Assembler::PL, loop);
+  // default case
+  __ profile_switch_default(r0);
+  __ ldrw(r3, Address(r19, 0));
+  __ b(continue_execution);
+  // entry found -> get offset
+  __ bind(found);
+  __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
+  __ ldrw(r3, Address(rscratch1, 3 * BytesPerInt));
+  __ profile_switch_case(r1, r0, r19);
+  // continue execution
+  __ bind(continue_execution);
+  __ rev32(r3, r3);
+  __ add(rbcp, rbcp, r3, ext::sxtw);
+  __ ldrb(rscratch1, Address(rbcp, 0));
+  __ dispatch_only(vtos);
+}
+
+void TemplateTable::fast_binaryswitch() {
+  transition(itos, vtos);
+  // Implementation using the following core algorithm:
+  //
+  // int binary_search(int key, LookupswitchPair* array, int n) {
+  //   // Binary search according to "Methodik des Programmierens" by
+  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+  //   int i = 0;
+  //   int j = n;
+  //   while (i+1 < j) {
+  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+  //     // with      Q: for all i: 0 <= i < n: key < a[i]
+  //     // where a stands for the array and assuming that the (inexisting)
+  //     // element a[n] is infinitely big.
+  //     int h = (i + j) >> 1;
+  //     // i < h < j
+  //     if (key < array[h].fast_match()) {
+  //       j = h;
+  //     } else {
+  //       i = h;
+  //     }
+  //   }
+  //   // R: a[i] <= key < a[i+1] or Q
+  //   // (i.e., if key is within array, i is the correct index)
+  //   return i;
+  // }
+
+  // Register allocation
+  const Register key   = r0; // already set (tosca)
+  const Register array = r1;
+  const Register i     = r2;
+  const Register j     = r3;
+  const Register h     = rscratch1;
+  const Register temp  = rscratch2;
+
+  // Find array start
+  __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
+                                          // get rid of this
+                                          // instruction (change
+                                          // offsets below)
+  __ andr(array, array, -BytesPerInt);
+
+  // Initialize i & j
+  __ mov(i, 0);                            // i = 0;
+  __ ldrw(j, Address(array, -BytesPerInt)); // j = length(array);
+
+  // Convert j into native byteordering
+  __ rev32(j, j);
+
+  // And start
+  Label entry;
+  __ b(entry);
+
+  // binary search loop
+  {
+    Label loop;
+    __ bind(loop);
+    // int h = (i + j) >> 1;
+    __ addw(h, i, j);                           // h = i + j;
+    __ lsrw(h, h, 1);                                   // h = (i + j) >> 1;
+    // if (key < array[h].fast_match()) {
+    //   j = h;
+    // } else {
+    //   i = h;
+    // }
+    // Convert array[h].match to native byte-ordering before compare
+    __ ldr(temp, Address(array, h, Address::lsl(3)));
+    __ rev32(temp, temp);
+    __ cmpw(key, temp);
+    // j = h if (key <  array[h].fast_match())
+    __ csel(j, h, j, Assembler::LT);
+    // i = h if (key >= array[h].fast_match())
+    __ csel(i, h, i, Assembler::GE);
+    // while (i+1 < j)
+    __ bind(entry);
+    __ addw(h, i, 1);          // i+1
+    __ cmpw(h, j);             // i+1 < j
+    __ br(Assembler::LT, loop);
+  }
+
+  // end of binary search, result index is i (must check again!)
+  Label default_case;
+  // Convert array[i].match to native byte-ordering before compare
+  __ ldr(temp, Address(array, i, Address::lsl(3)));
+  __ rev32(temp, temp);
+  __ cmpw(key, temp);
+  __ br(Assembler::NE, default_case);
+
+  // entry found -> j = offset
+  __ add(j, array, i, ext::uxtx, 3);
+  __ ldrw(j, Address(j, BytesPerInt));
+  __ profile_switch_case(i, key, array);
+  __ rev32(j, j);
+  __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
+  __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
+  __ dispatch_only(vtos);
+
+  // default case -> j = default offset
+  __ bind(default_case);
+  __ profile_switch_default(i);
+  __ ldrw(j, Address(array, -2 * BytesPerInt));
+  __ rev32(j, j);
+  __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
+  __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
+  __ dispatch_only(vtos);
+}
+
+
+void TemplateTable::_return(TosState state)
+{
+  transition(state, state);
+  assert(_desc->calls_vm(),
+         "inconsistent calls_vm information"); // call in remove_activation
+
+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+    assert(state == vtos, "only valid state");
+
+    __ ldr(c_rarg1, aaddress(0));
+    __ load_klass(r3, c_rarg1);
+    __ ldrw(r3, Address(r3, Klass::access_flags_offset()));
+    __ tst(r3, JVM_ACC_HAS_FINALIZER);
+    Label skip_register_finalizer;
+    __ br(Assembler::EQ, skip_register_finalizer);
+
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
+
+    __ bind(skip_register_finalizer);
+  }
+
+  // Issue a StoreStore barrier after all stores but before return
+  // from any constructor for any class with a final field.  We don't
+  // know if this is a finalizer, so we always do so.
+  if (_desc->bytecode() == Bytecodes::_return)
+    __ membar(MacroAssembler::StoreStore);
+
+  // Narrow result if state is itos but result type is smaller.
+  // Need to narrow in the return bytecode rather than in generate_return_entry
+  // since compiled code callers expect the result to already be narrowed.
+  if (state == itos) {
+    __ narrow(r0);
+  }
+
+  __ remove_activation(state);
+  __ ret(lr);
+}
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's
+// in order.  Store buffers on most chips allow reads & writes to
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
+// without some kind of memory barrier (i.e., it's not sufficient that
+// the interpreter does not reorder volatile references, the hardware
+// also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
+//     writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that
+//     happen after the read float up to before the read.  It's OK for
+//     non-volatile memory refs that happen before the volatile read to
+//     float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile
+//     memory refs that happen BEFORE the write float down to after the
+//     write.  It's OK for non-volatile memory refs that happen after the
+//     volatile write to float up before it.
+//
+// We only put in barriers around volatile refs (they are expensive),
+// not _between_ memory refs (that would require us to track the
+// flavor of the previous memory refs).  Requirements (2) and (3)
+// require some barriers before volatile stores and after volatile
+// loads.  These nearly cover requirement (1) but miss the
+// volatile-store-volatile-load case.  This final case is placed after
+// volatile-stores although it could just as well go before
+// volatile-loads.
+
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register result,
+                                            Register Rcache,
+                                            Register index,
+                                            size_t index_size) {
+  const Register temp = r19;
+  assert_different_registers(result, Rcache, index, temp);
+
+  Label resolved;
+  if (byte_no == f12_oop) {
+    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
+    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
+    // there is a 1-1 relation between bytecode type and CP entry type.
+    // The caller will also load a methodOop from f2.
+    assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+    __ ldr(result, Address(Rcache, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+    __ cbnz(result, resolved);
+  } else {
+    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+    assert(result == noreg, "");  //else change code for setting result
+    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+    __ cmp(temp, (int) bytecode());  // have we resolved this bytecode?
+    __ br(Assembler::EQ, resolved);
+  }
+
+  // resolve first time through
+  address entry;
+  switch (bytecode()) {
+  case Bytecodes::_getstatic:
+  case Bytecodes::_putstatic:
+  case Bytecodes::_getfield:
+  case Bytecodes::_putfield:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
+    break;
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokespecial:
+  case Bytecodes::_invokestatic:
+  case Bytecodes::_invokeinterface:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
+    break;
+  case Bytecodes::_invokehandle:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
+    break;
+  case Bytecodes::_invokedynamic:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
+    break;
+  case Bytecodes::_fast_aldc:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+    break;
+  case Bytecodes::_fast_aldc_w:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+    break;
+  default:
+    fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+    break;
+  }
+  __ mov(temp, (int) bytecode());
+  __ call_VM(noreg, entry, temp);
+
+  // Update registers with resolved info
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
+  // so all clients ofthis method must be modified accordingly
+  if (result != noreg)
+    __ ldr(result, Address(Rcache, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+  __ bind(resolved);
+}
+
+// The Rcache and index registers must be set before call
+// n.b unlike x86 cache already includes the index offset
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+  assert_different_registers(cache, index, flags, off);
+
+  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+  // Field offset
+  __ ldr(off, Address(cache, in_bytes(cp_base_offset +
+                                          ConstantPoolCacheEntry::f2_offset())));
+  // Flags
+  __ ldrw(flags, Address(cache, in_bytes(cp_base_offset +
+                                           ConstantPoolCacheEntry::flags_offset())));
+
+  // klass overwrite register
+  if (is_static) {
+    __ ldr(obj, Address(cache, in_bytes(cp_base_offset +
+                                        ConstantPoolCacheEntry::f1_offset())));
+  }
+}
+
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal, /*unused*/
+                                               bool is_invokedynamic) {
+  // setup registers
+  const Register cache = rscratch2;
+  const Register index = r4;
+  assert_different_registers(method, flags);
+  assert_different_registers(method, cache, index);
+  assert_different_registers(itable_index, flags);
+  assert_different_registers(itable_index, cache, index);
+  // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+  const int method_offset = in_bytes(
+    constantPoolCacheOopDesc::base_offset() +
+      (is_invokevirtual
+       ? ConstantPoolCacheEntry::f2_offset()
+       : ConstantPoolCacheEntry::f1_offset()));
+  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  // access constant pool cache fields
+  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                    ConstantPoolCacheEntry::f2_offset());
+
+  if (byte_no == f12_oop) {
+    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
+    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
+    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
+
+    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
+    __ ldr(method, Address(cache, index_offset));
+    itable_index = noreg;  // hack to disable load below
+  } else {
+    resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+    __ ldr(method, Address(cache, method_offset));
+  }
+  if (itable_index != noreg) {
+    // pick up itable index from f2 also:
+    assert(byte_no == f1_byte, "already picked up f1");
+    __ ldr(itable_index, Address(cache, index_offset));
+  }
+  __ ldrw(flags, Address(cache, flags_offset));
+}
+
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+  // do the JVMTI work here to avoid disturbing the register state below
+  // We use c_rarg registers here because we want to use the register used in
+  // the call to the VM
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    assert_different_registers(cache, index, r0);
+    __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
+    __ ldrw(r0, Address(rscratch1));
+    __ cbzw(r0, L1);
+
+    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
+    __ lea(c_rarg2, Address(c_rarg2, in_bytes(constantPoolCacheOopDesc::base_offset())));
+
+    if (is_static) {
+      __ mov(c_rarg1, zr); // NULL object reference
+    } else {
+      __ ldr(c_rarg1, at_tos()); // get object pointer without popping it
+      __ verify_oop(c_rarg1);
+    }
+    // c_rarg1: object pointer or NULL
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on the stack
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                       InterpreterRuntime::post_field_access),
+               c_rarg1, c_rarg2, c_rarg3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::pop_and_check_object(Register r)
+{
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static)
+{
+  const Register cache = r2;
+  const Register index = r3;
+  const Register obj   = r4;
+  const Register off   = r19;
+  const Register flags = r0;
+  const Register bc    = r4; // uses same reg as obj, so don't mix them
+
+  resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+  jvmti_post_field_access(cache, index, is_static, false);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  if (!is_static) {
+    // obj is on the stack
+    pop_and_check_object(obj);
+  }
+
+  const Address field(obj, off);
+
+  Label Done, notByte, notBool, notInt, notShort, notChar,
+              notLong, notFloat, notObj, notDouble;
+
+  // x86 uses a shift and mask or wings it with a shift plus assert
+  // the mask is not needed. aarch64 just uses bitfield extract
+  __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
+
+  assert(btos == 0, "change code, btos != 0");
+  __ cbnz(flags, notByte);
+
+  // btos
+  __ load_signed_byte(r0, field);
+  __ push(btos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notByte);
+  __ cmp(flags, ztos);
+  __ br(Assembler::NE, notBool);
+
+  // ztos (same code as btos)
+  __ ldrsb(r0, field);
+  __ push(ztos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    // use btos rewriting, no truncating to t/f bit is needed for getfield.
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notBool);
+  __ cmp(flags, atos);
+  __ br(Assembler::NE, notObj);
+  // atos
+  __ load_heap_oop(r0, field);
+  __ push(atos);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notObj);
+  __ cmp(flags, itos);
+  __ br(Assembler::NE, notInt);
+  // itos
+  __ ldrw(r0, field);
+  __ push(itos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_igetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notInt);
+  __ cmp(flags, ctos);
+  __ br(Assembler::NE, notChar);
+  // ctos
+  __ load_unsigned_short(r0, field);
+  __ push(ctos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notChar);
+  __ cmp(flags, stos);
+  __ br(Assembler::NE, notShort);
+  // stos
+  __ load_signed_short(r0, field);
+  __ push(stos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notShort);
+  __ cmp(flags, ltos);
+  __ br(Assembler::NE, notLong);
+  // ltos
+  __ ldr(r0, field);
+  __ push(ltos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_lgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notLong);
+  __ cmp(flags, ftos);
+  __ br(Assembler::NE, notFloat);
+  // ftos
+  __ ldrs(v0, field);
+  __ push(ftos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  __ cmp(flags, dtos);
+  __ br(Assembler::NE, notDouble);
+#endif
+  // dtos
+  __ ldrd(v0, field);
+  __ push(dtos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, bc, r1);
+  }
+#ifdef ASSERT
+  __ b(Done);
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+  // It's really not worth bothering to check whether this field
+  // really is volatile in the slow case.
+  __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad |
+                                                    MacroAssembler::LoadStore));
+}
+
+
+void TemplateTable::getfield(int byte_no)
+{
+  getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::getstatic(int byte_no)
+{
+  getfield_or_static(byte_no, true);
+}
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+  transition(vtos, vtos);
+
+  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L1;
+    assert_different_registers(cache, index, r0);
+    __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
+    __ ldrw(r0, Address(rscratch1));
+    __ cbz(r0, L1);
+
+    __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
+
+    if (is_static) {
+      // Life is simple.  Null out the object pointer.
+      __ mov(c_rarg1, zr);
+    } else {
+      // Life is harder. The stack holds the value on top, followed by
+      // the object.  We don't know the size of the value, though; it
+      // could be one or two words depending on its type. As a result,
+      // we must find the type to determine where the object is.
+      __ ldrw(c_rarg3, Address(c_rarg2,
+                               in_bytes(cp_base_offset +
+                                        ConstantPoolCacheEntry::flags_offset())));
+      __ lsr(c_rarg3, c_rarg3,
+             ConstantPoolCacheEntry::tos_state_shift);
+      ConstantPoolCacheEntry::verify_tos_state_shift();
+      Label nope2, done, ok;
+      __ ldr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
+      __ cmpw(c_rarg3, ltos);
+      __ br(Assembler::EQ, ok);
+      __ cmpw(c_rarg3, dtos);
+      __ br(Assembler::NE, nope2);
+      __ bind(ok);
+      __ ldr(c_rarg1, at_tos_p2()); // ltos (two word jvalue)
+      __ bind(nope2);
+    }
+    // cache entry pointer
+    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
+    // object (tos)
+    __ mov(c_rarg3, esp);
+    // c_rarg1: object pointer set up above (NULL if static)
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on the stack
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               c_rarg1, c_rarg2, c_rarg3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+  const Register cache = r2;
+  const Register index = r3;
+  const Register obj   = r2;
+  const Register off   = r19;
+  const Register flags = r0;
+  const Register bc    = r4;
+
+  resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+  jvmti_post_field_mod(cache, index, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  Label Done;
+  __ mov(r5, flags);
+
+  {
+    Label notVolatile;
+    __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore |
+                                               MacroAssembler::LoadStore));
+    __ bind(notVolatile);
+  }
+
+  // field address
+  const Address field(obj, off);
+
+  Label notByte, notBool, notInt, notShort, notChar,
+        notLong, notFloat, notObj, notDouble;
+
+  // x86 uses a shift and mask or wings it with a shift plus assert
+  // the mask is not needed. aarch64 just uses bitfield extract
+  __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
+
+  assert(btos == 0, "change code, btos != 0");
+  __ cbnz(flags, notByte);
+
+  // btos
+  {
+    __ pop(btos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strb(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notByte);
+  __ cmp(flags, ztos);
+  __ br(Assembler::NE, notBool);
+
+  // ztos
+  {
+    __ pop(ztos);
+    if (!is_static) pop_and_check_object(obj);
+    __ andw(r0, r0, 0x1);
+    __ strb(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notBool);
+  __ cmp(flags, atos);
+  __ br(Assembler::NE, notObj);
+
+  // atos
+  {
+    __ pop(atos);
+    if (!is_static) pop_and_check_object(obj);
+    // Store into the field
+    do_oop_store(_masm, field, r0, _bs->kind(), false);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notObj);
+  __ cmp(flags, itos);
+  __ br(Assembler::NE, notInt);
+
+  // itos
+  {
+    __ pop(itos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strw(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notInt);
+  __ cmp(flags, ctos);
+  __ br(Assembler::NE, notChar);
+
+  // ctos
+  {
+    __ pop(ctos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strh(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notChar);
+  __ cmp(flags, stos);
+  __ br(Assembler::NE, notShort);
+
+  // stos
+  {
+    __ pop(stos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strh(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notShort);
+  __ cmp(flags, ltos);
+  __ br(Assembler::NE, notLong);
+
+  // ltos
+  {
+    __ pop(ltos);
+    if (!is_static) pop_and_check_object(obj);
+    __ str(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notLong);
+  __ cmp(flags, ftos);
+  __ br(Assembler::NE, notFloat);
+
+  // ftos
+  {
+    __ pop(ftos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strs(v0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  __ cmp(flags, dtos);
+  __ br(Assembler::NE, notDouble);
+#endif
+
+  // dtos
+  {
+    __ pop(dtos);
+    if (!is_static) pop_and_check_object(obj);
+    __ strd(v0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no);
+    }
+  }
+
+#ifdef ASSERT
+  __ b(Done);
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad |
+                                               MacroAssembler::StoreStore));
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::putfield(int byte_no)
+{
+  putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+  putfield_or_static(byte_no, true);
+}
+
+void TemplateTable::jvmti_post_fast_field_mod()
+{
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L2;
+    __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
+    __ ldrw(c_rarg3, Address(rscratch1));
+    __ cbzw(c_rarg3, L2);
+    __ pop_ptr(r19);                  // copy the object pointer from tos
+    __ verify_oop(r19);
+    __ push_ptr(r19);                 // put the object pointer back on tos
+    // Save tos values before call_VM() clobbers them. Since we have
+    // to do it for every data type, we use the saved values as the
+    // jvalue object.
+    switch (bytecode()) {          // load values into the jvalue object
+    case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ push_i(r0); break;
+    case Bytecodes::_fast_dputfield: __ push_d(); break;
+    case Bytecodes::_fast_fputfield: __ push_f(); break;
+    case Bytecodes::_fast_lputfield: __ push_l(r0); break;
+
+    default:
+      ShouldNotReachHere();
+    }
+    __ mov(c_rarg3, esp);             // points to jvalue on the stack
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(c_rarg2, r0, 1);
+    __ verify_oop(r19);
+    // r19: object pointer copied above
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on the stack
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               r19, c_rarg2, c_rarg3);
+
+    switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
+    case Bytecodes::_fast_dputfield: __ pop_d(); break;
+    case Bytecodes::_fast_fputfield: __ pop_f(); break;
+    case Bytecodes::_fast_lputfield: __ pop_l(r0); break;
+    }
+    __ bind(L2);
+  }
+}
+
+void TemplateTable::fast_storefield(TosState state)
+{
+  transition(state, vtos);
+
+  ByteSize base = constantPoolCacheOopDesc::base_offset();
+
+  jvmti_post_fast_field_mod();
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(r2, r1, 1);
+
+  // test for volatile with r3
+  __ ldrw(r3, Address(r2, in_bytes(base +
+                                   ConstantPoolCacheEntry::flags_offset())));
+
+  // replace index with field offset from cache entry
+  __ ldr(r1, Address(r2, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
+
+  {
+    Label notVolatile;
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore |
+                                               MacroAssembler::LoadStore));
+    __ bind(notVolatile);
+  }
+
+  Label notVolatile;
+
+  // Get object from stack
+  pop_and_check_object(r2);
+
+  // field address
+  const Address field(r2, r1);
+
+  // access field
+  switch (bytecode()) {
+  case Bytecodes::_fast_aputfield:
+    do_oop_store(_masm, field, r0, _bs->kind(), false);
+    break;
+  case Bytecodes::_fast_lputfield:
+    __ str(r0, field);
+    break;
+  case Bytecodes::_fast_iputfield:
+    __ strw(r0, field);
+    break;
+  case Bytecodes::_fast_zputfield:
+    __ andw(r0, r0, 0x1);  // boolean is true if LSB is 1
+    // fall through to bputfield
+  case Bytecodes::_fast_bputfield:
+    __ strb(r0, field);
+    break;
+  case Bytecodes::_fast_sputfield:
+    // fall through
+  case Bytecodes::_fast_cputfield:
+    __ strh(r0, field);
+    break;
+  case Bytecodes::_fast_fputfield:
+    __ strs(v0, field);
+    break;
+  case Bytecodes::_fast_dputfield:
+    __ strd(v0, field);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad |
+                                               MacroAssembler::StoreStore));
+    __ bind(notVolatile);
+  }
+}
+
+
+void TemplateTable::fast_accessfield(TosState state)
+{
+  transition(atos, state);
+  // Do the JVMTI work here to avoid disturbing the register state below
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
+    __ ldrw(r2, Address(rscratch1));
+    __ cbzw(r2, L1);
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(c_rarg2, rscratch2, 1);
+    __ verify_oop(r0);
+    __ push_ptr(r0);  // save object pointer before call_VM() clobbers it
+    __ mov(c_rarg1, r0);
+    // c_rarg1: object pointer copied above
+    // c_rarg2: cache entry pointer
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_access),
+               c_rarg1, c_rarg2);
+    __ pop_ptr(r0); // restore object pointer
+    __ bind(L1);
+  }
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(r2, r1, 1);
+  __ ldr(r1, Address(r2, in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                  ConstantPoolCacheEntry::f2_offset())));
+  // !!! FIXME AARCH64 -- this is commented out in jdk7/8-x86 as is
+  // the planting of the membar after the switch. think we do need
+  // this for aarch64 but check!
+  __ ldrw(r3, Address(r2, in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                   ConstantPoolCacheEntry::flags_offset())));
+
+  // r0: object
+  __ verify_oop(r0);
+  __ null_check(r0);
+  const Address field(r0, r1);
+
+  // access field
+  switch (bytecode()) {
+  case Bytecodes::_fast_agetfield:
+    __ load_heap_oop(r0, field);
+    __ verify_oop(r0);
+    break;
+  case Bytecodes::_fast_lgetfield:
+    __ ldr(r0, field);
+    break;
+  case Bytecodes::_fast_igetfield:
+    __ ldrw(r0, field);
+    break;
+  case Bytecodes::_fast_bgetfield:
+    __ load_signed_byte(r0, field);
+    break;
+  case Bytecodes::_fast_sgetfield:
+    __ load_signed_short(r0, field);
+    break;
+  case Bytecodes::_fast_cgetfield:
+    __ load_unsigned_short(r0, field);
+    break;
+  case Bytecodes::_fast_fgetfield:
+    __ ldrs(v0, field);
+    break;
+  case Bytecodes::_fast_dgetfield:
+    __ ldrd(v0, field);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  // !!! FIXME AARCH64 -- this is commented out in jdk7/8-x86. think
+  // we do need this for aarch64 but check!
+  {
+    Label notVolatile;
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad |
+                                               MacroAssembler::LoadStore));
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::fast_xaccess(TosState state)
+{
+  transition(vtos, state);
+
+  // get receiver
+  __ ldr(r0, aaddress(0));
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(r2, r3, 2);
+  __ ldr(r1, Address(r2, in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                  ConstantPoolCacheEntry::f2_offset())));
+  // make sure exception is reported in correct bcp range (getfield is
+  // next instruction)
+  __ increment(rbcp);
+  __ null_check(r0);
+  switch (state) {
+  case itos:
+    __ ldrw(r0, Address(r0, r1, Address::lsl(0)));
+    break;
+  case atos:
+    __ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
+    __ verify_oop(r0);
+    break;
+  case ftos:
+    __ ldrs(v0, Address(r0, r1, Address::lsl(0)));
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  // !!! FIXME AARCH64 -- this is commented out in jdk7/8-x86. think
+  // we do need this for aarch64 but check!
+  {
+    Label notVolatile;
+    __ ldrw(r3, Address(r2, in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                     ConstantPoolCacheEntry::flags_offset())));
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+    __ bind(notVolatile);
+  }
+
+  __ decrement(rbcp);
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::count_calls(Register method, Register temp)
+{
+  __ call_Unimplemented();
+}
+
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method, // linked method (or i-klass)
+                                   Register index,  // itable index, MethodType, etc.
+                                   Register recv,   // if caller wants to see it
+                                   Register flags   // if caller wants to test it
+                                   ) {
+  // determine flags
+  Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == r3, "");
+  assert(recv  == noreg || recv  == r2, "");
+
+  // setup registers & access constant pool cache
+  if (recv  == noreg)  recv  = r2;
+  if (flags == noreg)  flags = r3;
+  assert_different_registers(method, index, recv, flags);
+
+  // save 'interpreter return address'
+  __ save_bcp();
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  // maybe push appendix to arguments (just before return address)
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ verify_oop(index);
+    __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push);
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push(index);;  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
+  // load receiver if needed (note: no return address pushed yet)
+  if (load_receiver) {
+    __ andw(recv, flags, ConstantPoolCacheEntry::parameter_size_mask);
+    // FIXME -- is this actually correct? looks like it should be 2
+    // const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
+    // const int receiver_is_at_end      = -1;  // back off one slot to get receiver
+    // Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
+    // __ movptr(recv, recv_addr);
+    __ add(rscratch1, esp, recv, ext::uxtx, 3); // FIXME: uxtb here?
+    __ ldr(recv, Address(rscratch1, -Interpreter::expr_offset_in_bytes(1)));
+    __ verify_oop(recv);
+  }
+
+  // compute return type
+  // x86 uses a shift and mask or wings it with a shift plus assert
+  // the mask is not needed. aarch64 just uses bitfield extract
+  __ ubfxw(rscratch2, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
+  // load return address
+  {
+    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
+        (address)Interpreter::return_5_addrs_by_index_table() :
+        (address)Interpreter::return_3_addrs_by_index_table();
+    __ mov(rscratch1, table_addr);
+    __ ldr(lr, Address(rscratch1, rscratch2, Address::lsl(3)));
+  }
+}
+
+
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags)
+{
+  // Uses temporary registers r0, r3
+  assert_different_registers(index, recv, r0, r3);
+  // Test for an invoke of a final method
+  Label notFinal;
+  __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal);
+
+  const Register method = index;  // method must be rmethod
+  assert(method == rmethod,
+         "methodOop must be rmethod for interpreter calling convention");
+
+  // do the call - the index is actually the method to call
+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a methodOop
+  __ verify_oop(method);
+
+  // It's final, need a null check here!
+  __ null_check(recv);
+
+  // profile this call
+  __ profile_final_call(r0);
+
+  __ jump_from_interpreted(method, r0);
+
+  __ bind(notFinal);
+
+  // get receiver klass
+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(r0, recv);
+  __ verify_oop(r0);
+
+  // profile this call
+  __ profile_virtual_call(r0, rlocals, r3);
+
+  // get target methodOop & entry point
+  __ lookup_virtual_method(r0, index, method);
+  // FIXME -- this looks completely redundant. is it?
+  // __ ldr(r3, Address(method, methodOopDesc::interpreter_entry_offset()));
+  __ jump_from_interpreted(method, r3);
+}
+
+void TemplateTable::invokevirtual(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+
+  prepare_invoke(byte_no, rmethod, noreg, r2, r3);
+
+  // rmethod: index (actually a Method*)
+  // r2: receiver
+  // r3: flags
+
+  invokevirtual_helper(rmethod, r2, r3);
+}
+
+void TemplateTable::invokespecial(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, rmethod, noreg,  // get f1 methodOop
+                 r2);  // get receiver also for null check
+  __ verify_oop(r2);
+  __ null_check(r2);
+  // do the call
+  __ verify_oop(rmethod);
+  __ profile_call(r0);
+  __ jump_from_interpreted(rmethod, r0);
+}
+
+void TemplateTable::invokestatic(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, rmethod);  // get f1 methodOop
+  // do the call
+  __ verify_oop(rmethod);
+  __ profile_call(r0);
+  __ jump_from_interpreted(rmethod, r0);
+}
+
+void TemplateTable::fast_invokevfinal(int byte_no)
+{
+  __ call_Unimplemented();
+}
+
+void TemplateTable::invokeinterface(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, r0, rmethod,  // get f1 klassOop, f2 itable index
+                 r2, r3); // recv, flags
+
+  // r0: interface klass (from f1)
+  // rmethod: itable index (from f2)
+  // r2: receiver
+  // r3: flags
+
+  // Special case of invokeinterface called for virtual method of
+  // java.lang.Object.  See cpCacheOop.cpp for details.
+  // This code isn't produced by javac, but could be produced by
+  // another compliant java compiler.
+  Label notMethod;
+  __ tbz(r3, ConstantPoolCacheEntry::is_forced_virtual_shift, notMethod);
+
+  invokevirtual_helper(rmethod, r2, r3);
+  __ bind(notMethod);
+
+  // Get receiver klass into r3 - also a null check
+  __ restore_locals();
+  __ null_check(r2, oopDesc::klass_offset_in_bytes());
+  __ load_klass(r3, r2);
+  __ verify_oop(r3);
+
+  // profile this call
+  __ profile_virtual_call(r3, r13, r19);
+
+  Label no_such_interface, no_such_method;
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             r3, r0, rmethod,
+                             // outputs: method, scan temp. reg
+                             rmethod, r13,
+                             no_such_interface);
+
+  // rmethod,: methodOop to call
+  // r2: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ cbz(rmethod, no_such_method);
+
+  // do the call
+  // r2: receiver
+  // rmethod,: methodOop
+  __ jump_from_interpreted(rmethod, r3);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
+  // throw exception
+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_IncompatibleClassChangeError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+  return;
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+
+  if (!EnableInvokeDynamic) {
+    // rewriter does not generate this bytecode
+    __ should_not_reach_here();
+    return;
+  }
+
+  prepare_invoke(byte_no, rmethod, r0, r2);
+  __ verify_oop(rmethod);
+  __ verify_oop(r2);
+  __ null_check(r2);
+
+  // FIXME: profile the LambdaForm also
+
+  // r13 is safe to use here as a scratch reg because it is about to
+  // be clobbered by jump_from_interpreted().
+  __ profile_final_call(r13);
+
+  __ jump_from_interpreted(rmethod, r0);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+
+  if (!EnableInvokeDynamic) {
+    // We should not encounter this bytecode if !EnableInvokeDynamic.
+    // The verifier will stop it.  However, if we get past the verifier,
+    // this will stop the thread in a reasonable way, without crashing the JVM.
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                     InterpreterRuntime::throw_IncompatibleClassChangeError));
+    // the call_VM checks for exception, so we should never return here.
+    __ should_not_reach_here();
+    return;
+  }
+
+  prepare_invoke(byte_no, rmethod, r0);
+
+  // r0: CallSite object (from f1)
+  // rmethod: MH.linkToCallSite method (from f2)
+
+  // Note:  r0_callsite is already pushed by prepare_invoke
+
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
+  // profile this call
+  __ profile_call(rbcp);
+
+  __ verify_oop(r0);
+
+  __ jump_from_interpreted(rmethod, r0);
+}
+
+
+//-----------------------------------------------------------------------------
+// Allocation
+
+void TemplateTable::_new() {
+  transition(vtos, atos);
+
+  __ get_unsigned_2_byte_index_at_bcp(r3, 1);
+  Label slow_case;
+  Label done;
+  Label initialize_header;
+  Label initialize_object; // including clearing the fields
+  Label allocate_shared;
+
+  __ get_cpool_and_tags(r4, r0);
+  // Make sure the class we're about to instantiate has been resolved.
+  // This is done before loading instanceKlass to be consistent with the order
+  // how Constant Pool is updated (see constantPoolOopDesc::klass_at_put)
+  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+  __ lea(rscratch1, Address(r0, r3, Address::lsl(0)));
+  __ lea(rscratch1, Address(rscratch1, tags_offset));
+  __ ldarb(rscratch1, rscratch1);
+  __ cmp(rscratch1, JVM_CONSTANT_Class);
+  __ br(Assembler::NE, slow_case);
+
+  // get instanceKlass
+  __ lea(r4, Address(r4, r3, Address::lsl(3)));
+  __ ldr(r4, Address(r4, sizeof(constantPoolOopDesc)));
+
+  // make sure klass is initialized & doesn't have finalizer
+  // make sure klass is fully initialized
+  __ ldrb(rscratch1, Address(r4, instanceKlass::init_state_offset()));
+  __ cmp(rscratch1, instanceKlass::fully_initialized);
+  __ br(Assembler::NE, slow_case);
+
+  // get instance_size in instanceKlass (scaled to a count of bytes)
+  __ ldrw(r3,
+          Address(r4,
+                  Klass::layout_helper_offset()));
+  // test to see if it has a finalizer or is malformed in some way
+  __ tbnz(r3, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case);
+
+  // Allocate the instance
+  // 1) Try to allocate in the TLAB
+  // 2) if fail and the object is large allocate in the shared Eden
+  // 3) if the above fails (or is not applicable), go to a slow case
+  // (creates a new TLAB, etc.)
+
+  const bool allow_shared_alloc =
+    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
+
+  if (UseTLAB) {
+    __ tlab_allocate(r0, r3, 0, noreg, r1,
+                     allow_shared_alloc ? allocate_shared : slow_case);
+
+    if (ZeroTLAB) {
+      // the fields have been already cleared
+      __ b(initialize_header);
+    } else {
+      // initialize both the header and fields
+      __ b(initialize_object);
+    }
+  }
+
+  // Allocation in the shared Eden, if allowed.
+  //
+  // r3: instance size in bytes
+  if (allow_shared_alloc) {
+    __ bind(allocate_shared);
+
+    __ eden_allocate(r0, r3, 0, r10, slow_case);
+    __ incr_allocated_bytes(rthread, r3, 0, rscratch1);
+  }
+
+  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
+    // The object is initialized before the header.  If the object size is
+    // zero, go directly to the header initialization.
+    __ bind(initialize_object);
+    __ sub(r3, r3, sizeof(oopDesc));
+    __ cbz(r3, initialize_header);
+
+    // Initialize object fields
+    {
+      __ add(r2, r0, sizeof(oopDesc));
+      Label loop;
+      __ bind(loop);
+      __ str(zr, Address(__ post(r2, BytesPerLong)));
+      __ sub(r3, r3, BytesPerLong);
+      __ cbnz(r3, loop);
+    }
+
+    // initialize object header only.
+    __ bind(initialize_header);
+    if (UseBiasedLocking) {
+      __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset()));
+    } else {
+      __ mov(rscratch1, (intptr_t)markOopDesc::prototype());
+    }
+    __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
+    __ store_klass_gap(r0, zr);  // zero klass gap for compressed oops
+    __ store_klass(r0, r4);      // store klass last
+
+    {
+      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
+      // Trigger dtrace event for fastpath
+      __ push(atos); // save the return value
+      __ call_VM_leaf(
+           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), r0);
+      __ pop(atos); // restore the return value
+
+    }
+    __ b(done);
+  }
+
+  // slow case
+  __ bind(slow_case);
+  __ get_constant_pool(c_rarg1);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
+  __ verify_oop(r0);
+
+  // continue
+  __ bind(done);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(Assembler::StoreStore);
+}
+
+void TemplateTable::newarray() {
+  transition(itos, atos);
+  __ load_unsigned_byte(c_rarg1, at_bcp(1));
+  __ mov(c_rarg2, r0);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
+          c_rarg1, c_rarg2);
+  __ membar(Assembler::StoreStore);
+}
+
+void TemplateTable::anewarray() {
+  transition(itos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  __ get_constant_pool(c_rarg1);
+  __ mov(c_rarg3, r0);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
+          c_rarg1, c_rarg2, c_rarg3);
+  __ membar(Assembler::StoreStore);
+}
+
+void TemplateTable::arraylength() {
+  transition(atos, itos);
+  __ null_check(r0, arrayOopDesc::length_offset_in_bytes());
+  __ ldrw(r0, Address(r0, arrayOopDesc::length_offset_in_bytes()));
+}
+
+void TemplateTable::checkcast()
+{
+  transition(atos, atos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ cbz(r0, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
+  __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
+  // See if bytecode has already been quicked
+  __ add(rscratch1, r3, typeArrayOopDesc::header_size(T_BYTE) * wordSize);
+  __ lea(r1, Address(rscratch1, r19));
+  __ ldarb(r1, r1);
+  __ cmp(r1, JVM_CONSTANT_Class);
+  __ br(Assembler::EQ, quicked);
+
+  __ push(atos); // save receiver for result, and for GC
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ pop(r3); // restore receiver
+  __ b(resolved);
+
+  // Get superklass in r0 and subklass in r3
+  __ bind(quicked);
+  __ mov(r3, r0); // Save object in r3; r0 needed for subtype check
+  __ lea(r0, Address(r2, r19, Address::lsl(3)));
+  __ ldr(r0, Address(r0, sizeof(constantPoolOopDesc)));
+
+  __ bind(resolved);
+  __ load_klass(r19, r3);
+
+  // Generate subtype check.  Blows r2, r5.  Object in r3.
+  // Superklass in r0.  Subklass in r19.
+  __ gen_subtype_check(r19, ok_is_subtype);
+
+  // Come here on failure
+  __ push(r3);
+  // object is at TOS
+  __ b(Interpreter::_throw_ClassCastException_entry);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ mov(r0, r3); // Restore object in r3
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ b(done);
+    __ bind(is_null);
+    __ profile_null_seen(r2);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+}
+
+void TemplateTable::instanceof() {
+  transition(atos, itos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ cbz(r0, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
+  __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
+  // See if bytecode has already been quicked
+  __ add(rscratch1, r3, typeArrayOopDesc::header_size(T_BYTE) * wordSize);
+  __ lea(r1, Address(rscratch1, r19));
+  __ ldarb(r1, r1);
+  __ cmp(r1, JVM_CONSTANT_Class);
+  __ br(Assembler::EQ, quicked);
+
+  __ push(atos); // save receiver for result, and for GC
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ pop(r3); // restore receiver
+  __ verify_oop(r3);
+  __ load_klass(r3, r3);
+  __ b(resolved);
+
+  // Get superklass in r0 and subklass in r3
+  __ bind(quicked);
+  __ load_klass(r3, r0);
+  __ lea(r0, Address(r2, r19, Address::lsl(3)));
+  __ ldr(r0, Address(r0, sizeof(constantPoolOopDesc)));
+
+  __ bind(resolved);
+
+  // Generate subtype check.  Blows r2, r5
+  // Superklass in r0.  Subklass in r3.
+  __ gen_subtype_check(r3, ok_is_subtype);
+
+  // Come here on failure
+  __ mov(r0, 0);
+  __ b(done);
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ mov(r0, 1);
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ b(done);
+    __ bind(is_null);
+    __ profile_null_seen(r2);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+  // r0 = 0: obj == NULL or  obj is not an instanceof the specified klass
+  // r0 = 1: obj != NULL and obj is     an instanceof the specified klass
+}
+
+//-----------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+  // Note: We get here even if we are single stepping..
+  // jbug inists on setting breakpoints at every bytecode
+  // even if we are in single step mode.
+
+  transition(vtos, vtos);
+
+  // get the unpatched byte code
+  __ get_method(c_rarg1);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::get_original_bytecode_at),
+             c_rarg1, rbcp);
+  __ mov(r19, r0);
+
+  // post the breakpoint event
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
+             rmethod, rbcp);
+
+  // complete the execution of original bytecode
+  __ mov(rscratch1, r19);
+  __ dispatch_only_normal(vtos);
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+  transition(atos, vtos);
+  __ null_check(r0);
+  __ b(Interpreter::throw_exception_entry());
+}
+
+//-----------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions  ] <--- esp               = expression stack top
+// ..
+// [expressions  ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data   ] <--- monitor block bot
+// ...
+// [saved rbp    ] <--- rbp
+void TemplateTable::monitorenter()
+{
+  transition(atos, vtos);
+
+  // check for NULL object
+  __ null_check(r0);
+
+  const Address monitor_block_top(
+        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  const Address monitor_block_bot(
+        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+  Label allocated;
+
+  // initialize entry pointer
+  __ mov(c_rarg1, zr); // points to free slot or NULL
+
+  // find a free slot in the monitor block (result in c_rarg1)
+  {
+    Label entry, loop, exit;
+    __ ldr(c_rarg3, monitor_block_top); // points to current entry,
+                                        // starting with top-most entry
+    __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
+
+    __ b(entry);
+
+    __ bind(loop);
+    // check if current entry is used
+    // if not used then remember entry in c_rarg1
+    __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
+    __ cmp(zr, rscratch1);
+    __ csel(c_rarg1, c_rarg3, c_rarg1, Assembler::EQ);
+    // check if current entry is for same object
+    __ cmp(r0, rscratch1);
+    // if same object then stop searching
+    __ br(Assembler::EQ, exit);
+    // otherwise advance to next entry
+    __ add(c_rarg3, c_rarg3, entry_size);
+    __ bind(entry);
+    // check if bottom reached
+    __ cmp(c_rarg3, c_rarg2);
+    // if not at bottom then check this entry
+    __ br(Assembler::NE, loop);
+    __ bind(exit);
+  }
+
+  __ cbnz(c_rarg1, allocated); // check if a slot has been found and
+                            // if found, continue with that on
+
+  // allocate one if there's no free slot
+  {
+    Label entry, loop;
+    // 1. compute new pointers            // rsp: old expression stack top
+    __ ldr(c_rarg1, monitor_block_bot);   // c_rarg1: old expression stack bottom
+    __ sub(esp, esp, entry_size);         // move expression stack top
+    __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
+    __ mov(c_rarg3, esp);                 // set start value for copy loop
+    __ str(c_rarg1, monitor_block_bot);   // set new monitor block bottom
+
+    __ sub(sp, sp, entry_size);           // make room for the monitor
+
+    __ b(entry);
+    // 2. move expression stack contents
+    __ bind(loop);
+    __ ldr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
+                                                   // word from old location
+    __ str(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
+    __ add(c_rarg3, c_rarg3, wordSize);            // advance to next word
+    __ bind(entry);
+    __ cmp(c_rarg3, c_rarg1);        // check if bottom reached
+    __ br(Assembler::NE, loop);      // if not at bottom then
+                                     // copy next word
+  }
+
+  // call run-time routine
+  // c_rarg1: points to monitor entry
+  __ bind(allocated);
+
+  // Increment bcp to point to the next bytecode, so exception
+  // handling for async. exceptions work correctly.
+  // The object has already been poped from the stack, so the
+  // expression stack looks correct.
+  __ increment(rbcp);
+
+  // store object
+  __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+  __ lock_object(c_rarg1);
+
+  // check to make sure this monitor doesn't cause stack overflow after locking
+  __ save_bcp();  // in case of exception
+  __ generate_stack_overflow_check(0);
+
+  // The bcp has already been incremented. Just need to dispatch to
+  // next instruction.
+  __ dispatch_next(vtos);
+}
+
+
+void TemplateTable::monitorexit()
+{
+  transition(atos, vtos);
+
+  // check for NULL object
+  __ null_check(r0);
+
+  const Address monitor_block_top(
+        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  const Address monitor_block_bot(
+        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+  Label found;
+
+  // find matching slot
+  {
+    Label entry, loop;
+    __ ldr(c_rarg1, monitor_block_top); // points to current entry,
+                                        // starting with top-most entry
+    __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
+                                        // of monitor block
+    __ b(entry);
+
+    __ bind(loop);
+    // check if current entry is for same object
+    __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+    __ cmp(r0, rscratch1);
+    // if same object then stop searching
+    __ br(Assembler::EQ, found);
+    // otherwise advance to next entry
+    __ add(c_rarg1, c_rarg1, entry_size);
+    __ bind(entry);
+    // check if bottom reached
+    __ cmp(c_rarg1, c_rarg2);
+    // if not at bottom then check this entry
+    __ br(Assembler::NE, loop);
+  }
+
+  // error handling. Unlocking was not block-structured
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+  __ should_not_reach_here();
+
+  // call run-time routine
+  __ bind(found);
+  __ push_ptr(r0); // make sure object is on stack (contract with oopMaps)
+  __ unlock_object(c_rarg1);
+  __ pop_ptr(r0); // discard object
+}
+
+
+// Wide instructions
+void TemplateTable::wide()
+{
+  __ load_unsigned_byte(r19, at_bcp(1));
+  __ mov(rscratch1, (address)Interpreter::_wentry_point);
+  __ ldr(rscratch1, Address(rscratch1, r19, Address::uxtw(3)));
+  __ br(rscratch1);
+}
+
+
+// Multi arrays
+void TemplateTable::multianewarray() {
+  transition(vtos, atos);
+  __ load_unsigned_byte(r0, at_bcp(3)); // get number of dimensions
+  // last dim is on top of stack; we want address of first one:
+  // first_addr = last_addr + (ndims - 1) * wordSize
+  __ lea(c_rarg1, Address(esp, r0, Address::uxtw(3)));
+  __ sub(c_rarg1, c_rarg1, wordSize);
+  call_VM(r0,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
+          c_rarg1);
+  __ load_unsigned_byte(r1, at_bcp(3));
+  __ lea(esp, Address(esp, r1, Address::uxtw(3)));
+}
+#endif // !CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP
+#define CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP
+
+static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vmStructs_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP
+#define CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* JavaCallWrapper            */                                                                                                   \
+  /******************************/                                                                                                   \
+  /******************************/                                                                                                   \
+  /* JavaFrameAnchor            */                                                                                                   \
+  /******************************/                                                                                                   \
+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
+                                                                                                                                     \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
+  /* be present there)                                                */
+
+
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry)                               \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
+  /* be present there)                                                */
+
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used        */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                      */
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used         */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                       */
+
+#endif // CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates.
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "utilities/sizes.hpp"
+#include "assembler_aarch64.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_aarch64.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "os_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "os_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "os_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "os_bsd.inline.hpp"
+#endif
+
+#ifndef BUILTIN_SIM
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+#else
+#define getauxval(hwcap) 0
+#endif
+
+#ifndef HWCAP_AES
+#define HWCAP_AES   (1<<3)
+#endif
+
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1<<7)
+#endif
+
+int VM_Version::_cpu;
+int VM_Version::_model;
+int VM_Version::_model2;
+int VM_Version::_variant;
+int VM_Version::_revision;
+int VM_Version::_stepping;
+int VM_Version::_cpuFeatures;
+const char*           VM_Version::_features_str = "";
+VM_Version::PsrInfo VM_Version::_psr_info   = { 0, };
+
+static BufferBlob* stub_blob;
+static const int stub_size = 550;
+
+extern "C" {
+  typedef void (*getPsrInfo_stub_t)(void*);
+}
+static getPsrInfo_stub_t getPsrInfo_stub = NULL;
+
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+  address generate_getPsrInfo() {
+    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
+#   define __ _masm->
+    address start = __ pc();
+
+#ifdef BUILTIN_SIM
+    __ c_stub_prolog(1, 0, MacroAssembler::ret_type_void);
+#endif
+
+    // void getPsrInfo(VM_Version::PsrInfo* psr_info);
+
+    address entry = __ pc();
+
+    __ enter();
+
+    __ get_dczid_el0(rscratch1);
+    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset())));
+
+    __ get_ctr_el0(rscratch1);
+    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset())));
+
+    __ leave();
+    __ ret(lr);
+
+#   undef __
+
+    return start;
+  }
+};
+
+
+void VM_Version::get_processor_features() {
+  _supports_cx8 = true;
+  _supports_atomic_getset4 = true;
+  _supports_atomic_getadd4 = true;
+  _supports_atomic_getset8 = true;
+  _supports_atomic_getadd8 = true;
+
+  getPsrInfo_stub(&_psr_info);
+
+  int dcache_line = VM_Version::dcache_line_size();
+
+  // Limit AllocatePrefetchDistance so that it does not exceed the
+  // constraint in AllocatePrefetchDistanceConstraintFunc.
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance))
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3*dcache_line));
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize))
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line);
+  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes))
+    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
+  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
+    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
+
+  if (PrefetchCopyIntervalInBytes != -1 &&
+       ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
+    warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
+    PrefetchCopyIntervalInBytes &= ~7;
+    if (PrefetchCopyIntervalInBytes >= 32768)
+      PrefetchCopyIntervalInBytes = 32760;
+  }
+  FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
+
+#ifndef BUILTIN_SIM
+  unsigned long auxv = getauxval(AT_HWCAP);
+
+  char buf[512];
+
+  strcpy(buf, "simd");
+  if (auxv & HWCAP_CRC32) strcat(buf, ", crc");
+  if (auxv & HWCAP_AES)   strcat(buf, ", aes");
+
+  _features_str = strdup(buf);
+  _cpuFeatures = auxv;
+
+  int cpu_lines = 0;
+  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
+    char buf[128], *p;
+    while (fgets(buf, sizeof (buf), f) != NULL) {
+      if (p = strchr(buf, ':')) {
+        long v = strtol(p+1, NULL, 0);
+        if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) {
+          _cpu = v;
+          cpu_lines++;
+        } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) {
+          _variant = v;
+        } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) {
+          if (_model != v)  _model2 = _model;
+          _model = v;
+        } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) {
+          _revision = v;
+        }
+      }
+    }
+    fclose(f);
+  }
+
+  // Enable vendor specific features
+  if (_cpu == CPU_CAVIUM && _variant == 0) _cpuFeatures |= CPU_DMB_ATOMICS;
+  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _cpuFeatures |= CPU_A53MAC;
+  if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _cpuFeatures |= CPU_STXR_PREFETCH;
+  // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
+  // we assume the worst and assume we could be on a big little system and have
+  // undisclosed A53 cores which we could be swapped to at any stage
+  if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _cpuFeatures |= CPU_A53MAC;
+
+  if (FLAG_IS_DEFAULT(UseCRC32)) {
+    UseCRC32 = (auxv & HWCAP_CRC32) != 0;
+  }
+  if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
+    warning("UseCRC32 specified, but not supported on this CPU");
+  }
+  if (auxv & HWCAP_AES) {
+    UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
+    UseAESIntrinsics =
+        UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
+    if (UseAESIntrinsics && !UseAES) {
+      warning("UseAESIntrinsics enabled, but UseAES not, enabling");
+      UseAES = true;
+    }
+  } else {
+    if (UseAES) {
+      warning("UseAES specified, but not supported on this CPU");
+    }
+    if (UseAESIntrinsics) {
+      warning("UseAESIntrinsics specified, but not supported on this CPU");
+    }
+  }
+#endif
+
+  if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+    UseCRC32Intrinsics = true;
+  }
+
+  if (is_zva_enabled()) {
+    if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
+      FLAG_SET_DEFAULT(UseBlockZeroing, true);
+    }
+    if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) {
+      FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * VM_Version::zva_length());
+    }
+  } else if (UseBlockZeroing) {
+    warning("DC ZVA is not available on this CPU");
+    FLAG_SET_DEFAULT(UseBlockZeroing, false);
+  }
+
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
+#ifdef COMPILER2
+  if (FLAG_IS_DEFAULT(OptoScheduling)) {
+    OptoScheduling = true;
+  }
+#else
+  if (ReservedCodeCacheSize > 128*M) {
+    vm_exit_during_initialization("client compiler does not support ReservedCodeCacheSize > 128M");
+  }
+#endif
+}
+
+void VM_Version::initialize() {
+  ResourceMark rm;
+
+  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
+  if (stub_blob == NULL) {
+    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
+  }
+
+  CodeBuffer c(stub_blob);
+  VM_Version_StubGenerator g(&c);
+  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
+                                   g.generate_getPsrInfo());
+
+  get_processor_features();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP
+#define CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+class VM_Version : public Abstract_VM_Version {
+public:
+protected:
+  static int _cpu;
+  static int _model;
+  static int _model2;
+  static int _variant;
+  static int _revision;
+  static int _stepping;
+  static int _cpuFeatures;     // features returned by the "cpuid" instruction
+                               // 0 if this instruction is not available
+  static const char* _features_str;
+
+  struct PsrInfo {
+    uint32_t dczid_el0;
+    uint32_t ctr_el0;
+  };
+  static PsrInfo _psr_info;
+  static void get_processor_features();
+
+public:
+  // Initialization
+  static void initialize();
+
+  // Asserts
+  static void assert_is_initialized() {
+  }
+
+  enum {
+    CPU_ARM       = 'A',
+    CPU_BROADCOM  = 'B',
+    CPU_CAVIUM    = 'C',
+    CPU_DEC       = 'D',
+    CPU_INFINEON  = 'I',
+    CPU_MOTOROLA  = 'M',
+    CPU_NVIDIA    = 'N',
+    CPU_AMCC      = 'P',
+    CPU_QUALCOM   = 'Q',
+    CPU_MARVELL   = 'V',
+    CPU_INTEL     = 'i',
+  } cpuFamily;
+
+  enum {
+    CPU_FP           = (1<<0),
+    CPU_ASIMD        = (1<<1),
+    CPU_EVTSTRM      = (1<<2),
+    CPU_AES          = (1<<3),
+    CPU_PMULL        = (1<<4),
+    CPU_SHA1         = (1<<5),
+    CPU_SHA2         = (1<<6),
+    CPU_CRC32        = (1<<7),
+    CPU_STXR_PREFETCH= (1 << 29),
+    CPU_A53MAC       = (1 << 30),
+    CPU_DMB_ATOMICS  = (1 << 31),
+  } cpuFeatureFlags;
+
+  static const char* cpu_features()           { return _features_str; }
+  static int cpu_family()                     { return _cpu; }
+  static int cpu_model()                      { return _model; }
+  static int cpu_variant()                    { return _variant; }
+  static int cpu_revision()                   { return _revision; }
+  static int cpu_cpuFeatures()                { return _cpuFeatures; }
+  static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
+  static ByteSize ctr_el0_offset()   { return byte_offset_of(PsrInfo, ctr_el0); }
+  static bool is_zva_enabled() {
+    // Check the DZP bit (bit 4) of dczid_el0 is zero
+    // and block size (bit 0~3) is not zero.
+    return ((_psr_info.dczid_el0 & 0x10) == 0 &&
+            (_psr_info.dczid_el0 & 0xf) != 0);
+  }
+  static int zva_length() {
+    assert(is_zva_enabled(), "ZVA not available");
+    return 4 << (_psr_info.dczid_el0 & 0xf);
+  }
+  static int icache_line_size() {
+    return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4;
+  }
+  static int dcache_line_size() {
+    return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
+  }
+};
+
+#endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vmreg_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+
+
+void VMRegImpl::set_regName() {
+  Register reg = ::as_Register(0);
+  int i;
+  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
+    regName[i++] = reg->name();
+    regName[i++] = reg->name();
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+    regName[i++] = freg->name();
+    regName[i++] = freg->name();
+    freg = freg->successor();
+  }
+
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+    regName[i] = "NON-GPR-FPR";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vmreg_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_VMREG_AARCH64_HPP
+#define CPU_AARCH64_VM_VMREG_AARCH64_HPP
+
+  bool is_Register();
+  Register as_Register();
+
+  bool is_FloatRegister();
+  FloatRegister as_FloatRegister();
+#endif // CPU_AARCH64_VM_VMREG_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vmreg_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+  if( this==noreg ) return VMRegImpl::Bad();
+  return VMRegImpl::as_VMReg(encoding() << 1 );
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+}
+
+inline bool VMRegImpl::is_Register() {
+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool VMRegImpl::is_FloatRegister() {
+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register VMRegImpl::as_Register() {
+
+  assert( is_Register(), "must be");
+  // Yuk
+  return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister VMRegImpl::as_FloatRegister() {
+  assert( is_FloatRegister() && is_even(value()), "must be" );
+  // Yuk
+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+}
+
+inline   bool VMRegImpl::is_concrete() {
+  assert(is_reg(), "must be");
+  return is_even(value());
+}
+
+#endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2013, Red Hat Inc.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_aarch64.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread,
+                                          oop receiver,
+                                          int index);
+#endif
+
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+  const int aarch64_code_length = VtableStub::pd_code_size_limit(true);
+  VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), aarch64_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ lea(r19, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+    __ incrementw(Address(r19));
+  }
+#endif
+
+  // get receiver (need to skip return address on top of stack)
+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
+  // get receiver klass
+  address npe_addr = __ pc();
+  __ load_klass(r19, j_rarg0);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    // check offset vs vtable length
+    __ ldrw(rscratch1, Address(r19, instanceKlass::vtable_length_offset() * wordSize));
+    __ cmpw(rscratch1, vtable_index * vtableEntry::size());
+    __ br(Assembler::GT, L);
+    __ enter();
+    __ mov(r2, vtable_index);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
+    __ leave();
+    __ bind(L);
+  }
+#endif // PRODUCT
+
+  __ lookup_virtual_method(r19, vtable_index, rmethod);
+
+  if (DebugVtables) {
+    Label L;
+    __ cbz(rmethod, L);
+    __ ldr(rscratch1, Address(rmethod, methodOopDesc::from_compiled_offset()));
+    __ cbnz(rscratch1, L);
+    __ stop("Vtable entry is NULL");
+    __ bind(L);
+  }
+  // r0: receiver klass
+  // rmethod: methodOopDesc
+  // r2: receiver
+  address ame_addr = __ pc();
+  __ ldr(rscratch1, Address(rmethod, methodOopDesc::from_compiled_offset()));
+  __ br(rscratch1);
+
+  __ flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+  // Note well: pd_code_size_limit is the absolute minimum we can get
+  // away with.  If you add code here, bump the code stub size
+  // returned by pd_code_size_limit!
+  const int code_length = VtableStub::pd_code_size_limit(false);
+  VtableStub* s = new(code_length) VtableStub(false, itable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+    __ incrementw(Address(r10));
+  }
+#endif
+
+  // Entry arguments:
+  //  rscratch2: Interface
+  //  j_rarg0: Receiver
+
+  // Free registers (non-args) are r0 (interface), rmethod
+
+  // get receiver (need to skip return address on top of stack)
+
+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+  // get receiver klass (also an implicit null-check)
+  address npe_addr = __ pc();
+
+  // Most registers are in use; we'll use r0, rmethod, r10, r11
+  __ load_klass(r10, j_rarg0);
+
+  Label throw_icce;
+
+  // Get methodOop and entrypoint for compiler
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             r10, rscratch2, itable_index,
+                             // outputs: method, scan temp. reg
+                             rmethod, r11,
+                             throw_icce);
+
+  // method (rmethod): methodOop
+  // j_rarg0: receiver
+
+#ifdef ASSERT
+  if (DebugVtables) {
+    Label L2;
+    __ cbz(rmethod, L2);
+    __ ldr(rscratch1, Address(rmethod, methodOopDesc::from_compiled_offset()));
+    __ cbnz(rscratch1, L2);
+    __ stop("compiler entrypoint is null");
+    __ bind(L2);
+  }
+#endif // ASSERT
+
+  // rmethod: methodOop
+  // j_rarg0: receiver
+  address ame_addr = __ pc();
+  __ ldr(rscratch1, Address(rmethod, methodOopDesc::from_compiled_offset()));
+  __ br(rscratch1);
+
+  __ bind(throw_icce);
+  __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+
+  __ flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+  int size = DebugVtables ? 216 : 0;
+  if (CountCompiledCalls)
+    size += 6 * 4;
+   // FIXME
+  if (is_vtable_stub)
+    size += 52;
+  else
+    size += 104;
+  return size;
+
+  // In order to tune these parameters, run the JVM with VM options
+  // +PrintMiscellaneous and +WizardMode to see information about
+  // actual itable stubs.  Run it with -Xmx31G -XX:+UseCompressedOops.
+  //
+  // If Universe::narrow_klass_base is nonzero, decoding a compressed
+  // class can take zeveral instructions.  Run it with -Xmx31G
+  // -XX:+UseCompressedOops.
+  //
+  // The JVM98 app. _202_jess has a megamorphic interface call.
+  // The itable code looks like this:
+  // Decoding VtableStub itbl[1]@12
+  //     ldr     w10, [x1,#8]
+  //     lsl     x10, x10, #3
+  //     ldr     w11, [x10,#280]
+  //     add     x11, x10, x11, uxtx #3
+  //     add     x11, x11, #0x1b8
+  //     ldr     x12, [x11]
+  //     cmp     x9, x12
+  //     b.eq    success
+  // loop:
+  //     cbz     x12, throw_icce
+  //     add     x11, x11, #0x10
+  //     ldr     x12, [x11]
+  //     cmp     x9, x12
+  //     b.ne    loop
+  // success:
+  //     ldr     x11, [x11,#8]
+  //     ldr     x12, [x10,x11]
+  //     ldr     x8, [x12,#72]
+  //     br      x8
+  // throw_icce:
+  //     b      throw_ICCE_entry
+}
+
+int VtableStub::pd_code_alignment() { return 4; }
--- a/src/cpu/ppc/vm/ppc.ad	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/ppc/vm/ppc.ad	Mon Apr 13 16:44:26 2020 +0100
@@ -6638,11 +6638,11 @@
   interface(CONST_INTER);
 %}
 
-// constant 'float +0.0'.
+// Float Immediate: +0.0f.
 operand immF_0() %{
-  predicate((n->getf() == 0) &&
-            (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
+  predicate(jint_cast(n->getf()) == 0);
   match(ConF);
+  
   op_cost(0);
   format %{ %}
   interface(CONST_INTER);
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "macroAssembler_ppc.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
+#include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "utilities/defaultStream.hpp"
 #include "vm_version_ppc.hpp"
@@ -110,7 +111,7 @@
                (has_vand()    ? " vand"    : "")
                // Make sure number of %s matches num_features!
               );
-  _features_str = strdup(buf);
+  _features_str = os::strdup(buf);
   NOT_PRODUCT(if (Verbose) print_features(););
 
   // PPC64 supports 8-byte compare-exchange operations (see
@@ -142,6 +143,11 @@
     AllocatePrefetchStyle = 1; // Fall back if inappropriate.
 
   assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
+  
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
 }
 
 void VM_Version::print_features() {
--- a/src/cpu/sparc/vm/sparc.ad	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/sparc/vm/sparc.ad	Mon Apr 13 16:44:26 2020 +0100
@@ -3766,13 +3766,9 @@
   interface(CONST_INTER);
 %}
 
+// Double Immediate: +0.0d
 operand immD0() %{
-#ifdef _LP64
-  // on 64-bit architectures this comparision is faster
   predicate(jlong_cast(n->getd()) == 0);
-#else
-  predicate((n->getd() == 0) && (fpclass(n->getd()) == FP_PZERO));
-#endif
   match(ConD);
 
   op_cost(0);
@@ -3789,9 +3785,9 @@
   interface(CONST_INTER);
 %}
 
-// Float Immediate: 0
-operand immF0() %{
-  predicate((n->getf() == 0) && (fpclass(n->getf()) == FP_PZERO));
+// Float Immediate: +0.0f
+operand immF0() %{ 
+  predicate(jint_cast(n->getf()) == 0);  
   match(ConF);
 
   op_cost(0);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #include "assembler_sparc.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
+#include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "vm_version_sparc.hpp"
 #ifdef TARGET_OS_FAMILY_linux
@@ -37,6 +38,7 @@
 
 int VM_Version::_features = VM_Version::unknown_m;
 const char* VM_Version::_features_str = "";
+unsigned int VM_Version::_L2_data_cache_line_size = 0;
 
 void VM_Version::initialize() {
   _features = determine_features();
@@ -205,7 +207,7 @@
   }
 
   assert(BlockZeroingLowLimit > 0, "invalid value");
-  if (has_block_zeroing()) {
+  if (has_block_zeroing() && cache_line_size > 0) {
     if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
       FLAG_SET_DEFAULT(UseBlockZeroing, true);
     }
@@ -215,7 +217,7 @@
   }
 
   assert(BlockCopyLowLimit > 0, "invalid value");
-  if (has_block_zeroing()) { // has_blk_init() && is_T4(): core's local L2 cache
+  if (has_block_zeroing() && cache_line_size > 0) { // has_blk_init() && is_T4(): core's local L2 cache
     if (FLAG_IS_DEFAULT(UseBlockCopy)) {
       FLAG_SET_DEFAULT(UseBlockCopy, true);
     }
@@ -260,7 +262,7 @@
                (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
 
   // buf is started with ", " or is empty
-  _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
+  _features_str = os::strdup(strlen(buf) > 2 ? buf + 2 : buf);
 
   // UseVIS is set to the smallest of what hardware supports and what
   // the command line requires.  I.e., you cannot set UseVIS to 3 on
@@ -309,8 +311,16 @@
     }
   }
 
+  // This machine does not allow unaligned memory accesses
+  if (UseUnalignedAccesses) {
+    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+      warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
+    tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0) {
       tty->print_cr(": no prefetching");
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -90,6 +90,9 @@
   static int  _features;
   static const char* _features_str;
 
+  static unsigned int _L2_data_cache_line_size;
+  static unsigned int L2_data_cache_line_size() { return _L2_data_cache_line_size; }
+
   static void print_features();
   static int  determine_features();
   static int  platform_features(int features);
@@ -158,9 +161,8 @@
 
   static const char* cpu_features()     { return _features_str; }
 
-  static intx prefetch_data_size()  {
-    return is_T4() && !is_T7() ? 32 : 64;  // default prefetch block size on sparc
-  }
+  // default prefetch block size on sparc
+  static intx prefetch_data_size()      { return L2_data_cache_line_size();  }
 
   // Prefetch
   static intx prefetch_copy_interval_in_bytes() {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -11085,7 +11085,7 @@
     subl(cnt2, stride2);
     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
 
     // compare wide vectors tail
     bind(COMPARE_WIDE_TAIL);
@@ -11100,7 +11100,7 @@
     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
     bind(VECTOR_NOT_EQUAL);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
     lea(str1, Address(str1, result, scale));
     lea(str2, Address(str2, result, scale));
     jmp(COMPARE_16_CHARS);
@@ -11359,7 +11359,8 @@
   bind(DONE);
   if (UseAVX >= 2) {
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
+    vpxor(vec2, vec2);
   }
 }
 
@@ -11493,7 +11494,8 @@
 
         BIND(L_check_fill_8_bytes);
         // clean upper bits of YMM registers
-        vzeroupper();
+        movdl(xtmp, value);
+        pshufd(xtmp, xtmp, 0);
       } else {
         // Fill 32-byte chunks
         pshufd(xtmp, xtmp, 0);
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -89,7 +89,7 @@
 
 // Heap related flags
 define_pd_global(uintx,PermSize,    ScaleForWordSize(16*M));
-define_pd_global(uintx,MaxPermSize, ScaleForWordSize(64*M));
+define_pd_global(uintx,MaxPermSize, ScaleForWordSize(128*M));
 
 // Ergonomics related flags
 define_pd_global(bool, NeverActAsServerClassMachine, false);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -867,7 +867,8 @@
 
     if (UseUnalignedLoadStores && (UseAVX >= 2)) {
       // clean upper bits of YMM registers
-      __ vzeroupper();
+      __ vpxor(xmm0, xmm0);
+      __ vpxor(xmm1, xmm1);
     }
     __ addl(qword_count, 8);
     __ jccb(Assembler::zero, L_exit);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1357,7 +1357,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
@@ -1434,7 +1435,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -26,6 +26,7 @@
 #include "assembler_x86.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
+#include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "vm_version_x86.hpp"
 #ifdef TARGET_OS_FAMILY_linux
@@ -454,7 +455,7 @@
                (supports_tsc() ? ", tsc": ""),
                (supports_tscinv_bit() ? ", tscinvbit": ""),
                (supports_tscinv() ? ", tscinv": ""));
-  _features_str = strdup(buf);
+  _features_str = os::strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
   // the command line requires.  I.e., you cannot set UseSSE to 2 on
@@ -752,6 +753,11 @@
   PrefetchFieldsAhead         = prefetch_fields_ahead();
 #endif
 
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("Logical CPUs per core: %u",
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -521,7 +521,7 @@
       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
                cores_per_cpu();
     }
-    return result;
+    return (result == 0 ? 1 : result);
   }
 
   static intx prefetch_data_size()  {
--- a/src/cpu/x86/vm/x86_64.ad	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/x86/vm/x86_64.ad	Mon Apr 13 16:44:26 2020 +0100
@@ -3948,6 +3948,23 @@
   %}
 %}
 
+// Indirect Memory Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP reg (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -4099,6 +4116,23 @@
   %}
 %}
 
+// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -4283,11 +4317,11 @@
 // case of this is memory operands.
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
-               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
+               indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
                indCompressedOopOffset,
                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
-               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
+               indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -5321,6 +5355,17 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 %{
   match(Set dst mem);
@@ -5405,6 +5450,18 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 %{
   predicate(Universe::narrow_oop_shift() == 0);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/arm32JIT.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,8583 @@
+/*
+ * Copyright 2009, 2010 Edward Nevill
+ * Copyright 2012, 2013 Red Hat
+ * Copyright 2014, 2015 Linaro Ltd
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#if defined(__arm__)
+
+#undef T2JIT
+#if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
+#define T2JIT
+#endif
+
+unsigned int Thumb2 = 1;
+
+#ifdef T2JIT
+
+// setting DISABLE_THUMB2_JVMTI at build time disables notification
+// of JVMTI dynamic_generate and compiled_method_load events
+#undef THUMB2_JVMTI
+#if !defined(DISABLE_THUMB2_JVMTI)
+#define THUMB2_JVMTI
+#endif
+
+#define T2_PRINT_COMPILATION
+#define T2_PRINT_STATISTICS
+#define T2_PRINT_DISASS
+#define T2_PRINT_REGUSAGE
+
+#define T2EE_PRINT_REGUSAGE
+#define CODE_ALIGN 64
+
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+#ifdef T2_PRINT_STATISTICS
+static char *t2_print_statistics;
+#endif
+
+#ifdef T2_PRINT_REGUSAGE
+static char *t2_print_regusage;
+#endif
+
+static char *t2_ospace;
+#define OSPACE t2_ospace
+
+#ifdef PRODUCT
+#define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
+#else
+#define THUMB2_CODEBUF_SIZE (4 * 1024 * 1024)
+#endif
+#define THUMB2_MAX_BYTECODE_SIZE 10000
+#define THUMB2_MAX_T2CODE_SIZE 65000
+#define THUMB2_MAXLOCALS 1000
+
+#include <sys/mman.h>
+#include <ucontext.h>
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "compiler/compilerOracle.hpp"
+
+#define opc_nop			0x00
+#define opc_aconst_null		0x01
+#define opc_iconst_m1		0x02
+#define opc_iconst_0		0x03
+#define opc_iconst_1		0x04
+#define opc_iconst_2		0x05
+#define opc_iconst_3		0x06
+#define opc_iconst_4		0x07
+#define opc_iconst_5		0x08
+#define opc_lconst_0		0x09
+#define opc_lconst_1		0x0a
+#define opc_fconst_0		0x0b
+#define opc_fconst_1		0x0c
+#define opc_fconst_2		0x0d
+#define opc_dconst_0		0x0e
+#define opc_dconst_1		0x0f
+#define opc_bipush		0x10
+#define opc_sipush		0x11
+#define opc_ldc			0x12
+#define opc_ldc_w		0x13
+#define opc_ldc2_w		0x14
+#define opc_iload		0x15
+#define opc_lload		0x16
+#define opc_fload		0x17
+#define opc_dload		0x18
+#define opc_aload		0x19
+#define opc_iload_0		0x1a
+#define opc_iload_1		0x1b
+#define opc_iload_2		0x1c
+#define opc_iload_3		0x1d
+#define opc_lload_0		0x1e
+#define opc_lload_1		0x1f
+#define opc_lload_2		0x20
+#define opc_lload_3		0x21
+#define opc_fload_0		0x22
+#define opc_fload_1		0x23
+#define opc_fload_2		0x24
+#define opc_fload_3		0x25
+#define opc_dload_0		0x26
+#define opc_dload_1		0x27
+#define opc_dload_2		0x28
+#define opc_dload_3		0x29
+#define opc_aload_0		0x2a
+#define opc_aload_1		0x2b
+#define opc_aload_2		0x2c
+#define opc_aload_3		0x2d
+#define opc_iaload		0x2e
+#define opc_laload		0x2f
+#define opc_faload		0x30
+#define opc_daload		0x31
+#define opc_aaload		0x32
+#define opc_baload		0x33
+#define opc_caload		0x34
+#define opc_saload		0x35
+#define opc_istore		0x36
+#define opc_lstore		0x37
+#define opc_fstore		0x38
+#define opc_dstore		0x39
+#define opc_astore		0x3a
+#define opc_istore_0		0x3b
+#define opc_istore_1		0x3c
+#define opc_istore_2		0x3d
+#define opc_istore_3		0x3e
+#define opc_lstore_0		0x3f
+#define opc_lstore_1		0x40
+#define opc_lstore_2		0x41
+#define opc_lstore_3		0x42
+#define opc_fstore_0		0x43
+#define opc_fstore_1		0x44
+#define opc_fstore_2		0x45
+#define opc_fstore_3		0x46
+#define opc_dstore_0		0x47
+#define opc_dstore_1		0x48
+#define opc_dstore_2		0x49
+#define opc_dstore_3		0x4a
+#define opc_astore_0		0x4b
+#define opc_astore_1		0x4c
+#define opc_astore_2		0x4d
+#define opc_astore_3		0x4e
+#define opc_iastore		0x4f
+#define opc_lastore		0x50
+#define opc_fastore		0x51
+#define opc_dastore		0x52
+#define opc_aastore		0x53
+#define opc_bastore		0x54
+#define opc_castore		0x55
+#define opc_sastore		0x56
+#define opc_pop			0x57
+#define opc_pop2		0x58
+#define opc_dup			0x59
+#define opc_dup_x1		0x5a
+#define opc_dup_x2		0x5b
+#define opc_dup2		0x5c
+#define opc_dup2_x1		0x5d
+#define opc_dup2_x2		0x5e
+#define opc_swap		0x5f
+#define opc_iadd		0x60
+#define opc_ladd		0x61
+#define opc_fadd		0x62
+#define opc_dadd		0x63
+#define opc_isub		0x64
+#define opc_lsub		0x65
+#define opc_fsub		0x66
+#define opc_dsub		0x67
+#define opc_imul		0x68
+#define opc_lmul		0x69
+#define opc_fmul		0x6a
+#define opc_dmul		0x6b
+#define opc_idiv		0x6c
+#define opc_ldiv		0x6d
+#define opc_fdiv		0x6e
+#define opc_ddiv		0x6f
+#define opc_irem		0x70
+#define opc_lrem		0x71
+#define opc_frem		0x72
+#define opc_drem		0x73
+#define opc_ineg		0x74
+#define opc_lneg		0x75
+#define opc_fneg		0x76
+#define opc_dneg		0x77
+#define opc_ishl		0x78
+#define opc_lshl		0x79
+#define opc_ishr		0x7a
+#define opc_lshr		0x7b
+#define opc_iushr		0x7c
+#define opc_lushr		0x7d
+#define opc_iand		0x7e
+#define opc_land		0x7f
+#define opc_ior			0x80
+#define opc_lor			0x81
+#define opc_ixor		0x82
+#define opc_lxor		0x83
+#define opc_iinc		0x84
+#define opc_i2l			0x85
+#define opc_i2f			0x86
+#define opc_i2d			0x87
+#define opc_l2i			0x88
+#define opc_l2f			0x89
+#define opc_l2d			0x8a
+#define opc_f2i			0x8b
+#define opc_f2l			0x8c
+#define opc_f2d			0x8d
+#define opc_d2i			0x8e
+#define opc_d2l			0x8f
+#define opc_d2f			0x90
+#define opc_i2b			0x91
+#define opc_i2c			0x92
+#define opc_i2s			0x93
+#define opc_lcmp		0x94
+#define opc_fcmpl		0x95
+#define opc_fcmpg		0x96
+#define opc_dcmpl		0x97
+#define opc_dcmpg		0x98
+#define opc_ifeq		0x99
+#define opc_ifne		0x9a
+#define opc_iflt		0x9b
+#define opc_ifge		0x9c
+#define opc_ifgt		0x9d
+#define opc_ifle		0x9e
+#define opc_if_icmpeq		0x9f
+#define opc_if_icmpne		0xa0
+#define opc_if_icmplt		0xa1
+#define opc_if_icmpge		0xa2
+#define opc_if_icmpgt		0xa3
+#define opc_if_icmple		0xa4
+#define opc_if_acmpeq		0xa5
+#define opc_if_acmpne		0xa6
+#define opc_goto		0xa7
+#define opc_jsr			0xa8
+#define opc_ret			0xa9
+#define opc_tableswitch		0xaa
+#define opc_lookupswitch	0xab
+#define opc_ireturn		0xac
+#define opc_lreturn		0xad
+#define opc_freturn		0xae
+#define opc_dreturn		0xaf
+#define opc_areturn		0xb0
+#define opc_return		0xb1
+#define opc_getstatic		0xb2
+#define opc_putstatic		0xb3
+#define opc_getfield		0xb4
+#define opc_putfield		0xb5
+#define opc_invokevirtual	0xb6
+#define opc_invokespecial	0xb7
+#define opc_invokestatic	0xb8
+#define opc_invokeinterface	0xb9
+#define opc_invokedynamic	0xba
+#define opc_new			0xbb
+#define opc_newarray		0xbc
+#define opc_anewarray		0xbd
+#define opc_arraylength		0xbe
+#define opc_athrow		0xbf
+#define opc_checkcast		0xc0
+#define opc_instanceof		0xc1
+#define opc_monitorenter	0xc2
+#define opc_monitorexit		0xc3
+#define opc_wide		0xc4
+#define opc_multianewarray	0xc5
+#define opc_ifnull		0xc6
+#define opc_ifnonnull		0xc7
+#define opc_goto_w		0xc8
+#define opc_jsr_w		0xc9
+#define opc_breakpoint		0xca
+
+#define OPC_LAST_JAVA_OP	0xca
+
+#define opc_fast_aldc		Bytecodes::_fast_aldc
+#define opc_fast_aldc_w		Bytecodes::_fast_aldc_w
+
+#define opc_bgetfield			0xcc
+#define opc_cgetfield			0xcd
+#define opc_igetfield			0xd0
+#define opc_lgetfield			0xd1
+#define opc_sgetfield			0xd2
+#define opc_aputfield			0xd3
+#define opc_bputfield			0xd4
+#define opc_cputfield			0xd5
+#define opc_iputfield			0xd8
+#define opc_lputfield			0xd9
+#define opc_iaccess_0			0xdb
+#define opc_iaccess_1			0xdc
+#define opc_iaccess_2			0xdd
+#define opc_iaccess_3			0xde
+#define opc_invokeresolved		0xdf
+#define opc_invokespecialresolved	0xe0
+#define opc_invokestaticresolved	0xe1
+#define opc_invokevfinal		0xe2
+#define opc_iload_iload			0xe3
+
+#define opc_return_register_finalizer   0xe7
+#define opc_dmac                        0xe8
+#define opc_iload_0_iconst_N            0xe9
+#define opc_iload_1_iconst_N            0xea
+#define opc_iload_2_iconst_N            0xeb
+#define opc_iload_3_iconst_N            0xec
+#define opc_iload_iconst_N              0xed
+#define opc_iadd_istore_N               0xee
+#define opc_isub_istore_N               0xef
+#define opc_iand_istore_N               0xf0
+#define opc_ior_istore_N                0xf1
+#define opc_ixor_istore_N               0xf2
+#define opc_iadd_u4store                0xf3
+#define opc_isub_u4store                0xf4
+#define opc_iand_u4store                0xf5
+#define opc_ior_u4store                 0xf6
+#define opc_ixor_u4store                0xf7
+#define opc_iload_0_iload               0xf8
+#define opc_iload_1_iload               0xf9
+#define opc_iload_2_iload               0xfa
+#define opc_iload_3_iload               0xfb
+#define opc_iload_0_iload_N             0xfc
+#define opc_iload_1_iload_N             0xfd
+#define opc_iload_2_iload_N             0xfe
+#define opc_iload_3_iload_N             0xff
+
+
+#define H_IREM				0
+#define H_IDIV				1
+#define H_LDIV				2
+#define H_LREM				3
+#define H_FREM				4
+#define H_DREM				5
+#define	H_LDC				6
+#define H_NEW				8
+#define H_I2F				9
+#define H_I2D				10
+#define H_L2F				11
+#define H_L2D				12
+#define H_F2I				13
+#define H_F2L				14
+#define H_F2D				15
+#define H_D2I				16
+#define H_D2L				17
+#define H_D2F				18
+#define H_NEWARRAY			19
+#define H_ANEWARRAY			20
+#define H_MULTIANEWARRAY		21
+#define H_INSTANCEOF			22
+#define H_CHECKCAST			23
+#define H_AASTORE			24
+#define H_APUTFIELD			25
+#define H_SYNCHRONIZED_ENTER		26
+#define H_SYNCHRONIZED_EXIT		27
+
+#define H_EXIT_TO_INTERPRETER		28
+
+#define H_RET				H_EXIT_TO_INTERPRETER
+#define H_DEADCODE			H_EXIT_TO_INTERPRETER
+#define H_ATHROW			H_EXIT_TO_INTERPRETER
+
+#define H_HANDLE_EXCEPTION		29
+#define H_ARRAYBOUND			30
+
+#define H_LDC_W				31
+
+#define H_DEBUG_METHODENTRY		32
+#define H_DEBUG_METHODEXIT		33
+#define H_DEBUG_METHODCALL		34
+
+#define H_INVOKEINTERFACE		35
+#define H_INVOKEVIRTUAL			36
+#define H_INVOKESTATIC			37
+#define H_INVOKESPECIAL			38
+
+#define H_GETFIELD_WORD			39
+#define H_GETFIELD_SH			40
+#define H_GETFIELD_H			41
+#define H_GETFIELD_SB			42
+#define H_GETFIELD_DW			43
+
+#define H_PUTFIELD_WORD			44
+#define H_PUTFIELD_H			45
+#define H_PUTFIELD_B			46
+#define H_PUTFIELD_A			47
+#define H_PUTFIELD_DW			48
+
+#define H_GETSTATIC_WORD		49
+#define H_GETSTATIC_SH			50
+#define H_GETSTATIC_H			51
+#define H_GETSTATIC_SB			52
+#define H_GETSTATIC_DW			53
+
+#define H_PUTSTATIC_WORD		54
+#define H_PUTSTATIC_H			55
+#define H_PUTSTATIC_B			56
+#define H_PUTSTATIC_A			57
+#define H_PUTSTATIC_DW			58
+
+#define H_STACK_OVERFLOW		59
+
+#define H_HANDLE_EXCEPTION_NO_REGS	60
+
+#define H_INVOKESTATIC_RESOLVED		61
+#define H_INVOKESPECIAL_RESOLVED	62
+#define H_INVOKEVIRTUAL_RESOLVED	63
+#define H_INVOKEVFINAL			64
+
+#define H_MONITORENTER			65
+#define H_MONITOREXIT			66
+
+#define H_SAFEPOINT              	67
+
+#define H_LAST                          68  // Not used
+
+unsigned handlers[H_LAST];
+
+#define LEAF_STACK_SIZE			200
+#define STACK_SPARE			40
+
+#define COMPILER_RESULT_FAILED	1	// Failed to compiled this method
+#define COMPILER_RESULT_FATAL	2	// Fatal - dont try compile anything ever again
+
+#include <setjmp.h>
+
+static jmp_buf compiler_error_env;
+
+#define J_BogusImplementation() longjmp(compiler_error_env, COMPILER_RESULT_FAILED)
+
+#ifdef PRODUCT
+
+//#define JASSERT(cond, msg)	
+//#define J_Unimplemented() longjmp(compiler_error_env, COMPILER_RESULT_FATAL)
+#define JASSERT(cond, msg)	do { if (!(cond)) fatal(msg); } while (0)
+#define J_Unimplemented()       { report_unimplemented(__FILE__, __LINE__); BREAKPOINT; }
+#define JDEBUG_( _j_ )        
+
+#else
+
+#define JASSERT(cond, msg)	do { if (!(cond)) fatal(msg); } while (0)
+#define J_Unimplemented()       { report_unimplemented(__FILE__, __LINE__); BREAKPOINT; }
+#define JDEBUG_( _j_ )          _j_
+
+#endif // PRODUCT
+
+#define GET_NATIVE_U2(p)	(*(unsigned short *)(p))
+#define GET_NATIVE_U4(p)	(*(unsigned *)(p))
+
+#define GET_JAVA_S1(p)		(((signed char *)(p))[0])
+#define GET_JAVA_S2(p)  	((((signed char *)(p))[0] << 8) + (p)[1])
+#define GET_JAVA_U2(p)		(((p)[0] << 8) + (p)[1])
+#define GET_JAVA_U4(p)		(((p)[0] << 24) + ((p)[1] << 16) + ((p)[2] << 8) + (p)[3])
+
+#define BYTESEX_REVERSE(v) (((v)<<24) | (((v)<<8) & 0xff0000) | (((v)>>8) & 0xff00) | ((v)>>24))
+#define BYTESEX_REVERSE_U2(v) (((v)<<8) | ((v)>>8))
+
+// n.b. this value is chosen because it is an illegal thumb2 instruction
+#define THUMB2_POLLING_PAGE_MAGIC 0xdead
+#define ARM_POLLING_PAGE_MAGIC    0xf0bef0be
+
+typedef struct Thumb2_CodeBuf {
+  unsigned size;
+  char *sp;
+  char *hp;
+} Thumb2_CodeBuf;
+
+Thumb2_CodeBuf *thumb2_codebuf;
+
+// We support any arch >= ARM v6. The differences in the archs are:-
+// ARMv6: 	No Thumb2 support, No LDREXD/STREXD
+// ARMv6k:	No Thumb2 support, LDREXD/STREXD
+// >=ARMv6t2:	Thumb2 support, LDREXD/STREXD
+// Note: That support for vanilla ARMv6 (as on Raspberry PI) is only available
+// on non-MP systems because of the lack of the necessary LDREXD/STREXD instructions
+// fortunately Raspberry PI is single core.
+#define ARCH_GE_V6(cpuinfo)	(((cpuinfo) & 0xffff) >= (1<<6))
+#define ARCH_IS_V6(cpuinfo)	(((cpuinfo) & 0xffff) == (1<<6))
+#define ARCH_GE_V6T2(cpuinfo)   (ARCH_GE_V6(cpuinfo) && ((cpuinfo) & ARCH_THUMB2))
+#define ARCH_GE_V6K(cpuinfo)	(ARCH_GE_V6T2(cpuinfo) || ARCH_IS_V6(cpuinfo) && os::is_MP())
+
+extern unsigned CPUInfo;
+
+unsigned bc_stackinfo[THUMB2_MAX_BYTECODE_SIZE];
+unsigned locals_info[1000];
+unsigned stack[1000];
+unsigned r_local[1000];
+
+#ifdef THUMB2_JVMTI
+// jvmti needs to map start address of generated code for a bytecode
+// to corresponding bytecode index so agents can correlate code address
+// ranges with bci and thence line number
+static jvmtiAddrLocationMap *address_bci_map = NULL;
+static jint address_bci_map_length = 0;
+
+static void *stub_gen_code_start = 0;
+static void *stub_gen_code_end = 0;
+
+// function used to lazily initialize the address to bci translation map
+// the first time a compiled method is generated.
+static void address_bci_map_init(JavaThread *thread)
+{
+  // the dynamic_code_generated event posted to notify generation of
+  // the stub code has to be posted lazily because generation happens
+  // in Thumb2_Initialize under bci_init and the latter is called as a
+  // side-effect of loading libjvm.o. we don't have a Java thread at
+  // that point nor, indeed, any agents to catch the notify. so the
+  // info cached by Thumb2_Initialize needs to be posted when the
+  // first compiled method load event is notified, at which point we
+  // will indeed have a current thread.
+
+  {
+    // a thread transition from in Java to in VM is required before
+    // calling into Jvmti
+
+    ThreadInVMfromJava transition(thread);
+
+    JvmtiExport::post_dynamic_code_generated("thumb2_dynamic_stubs_block",
+					   stub_gen_code_start,
+					   stub_gen_code_end);
+
+    // n.b. exiting this block reverts the thread state to in Java
+  }
+  
+
+  // the map is lazily allocated so we don't use the space unless we
+  // are actually using the JIT
+
+  // at worst we need a start address for every bytecode so
+  // the map size is limited by the compiler's bytecode limit
+  address_bci_map = new jvmtiAddrLocationMap[THUMB2_MAX_BYTECODE_SIZE];
+}
+
+// clear the address to bci translation map
+static void address_bci_map_reset(JavaThread *thread)
+{
+  // this only gets called after obtaining the compiler lock so there
+  // is no need to worry about races
+  
+  if (address_bci_map == NULL) {
+    address_bci_map_init(thread);
+  }
+
+  // this effectively clears the previous map
+
+  address_bci_map_length = 0;
+}
+
+// add an entry to the address to bci translation map
+// this will never exceed the available space
+static void address_bci_map_add(void *addr, unsigned bci)
+{
+    address_bci_map[address_bci_map_length].start_address = addr;
+    address_bci_map[address_bci_map_length].location = bci;
+    address_bci_map_length++;
+}
+#endif // THUMB2_JVMTI
+
+#ifdef T2_PRINT_DISASS
+
+short start_bci[THUMB2_MAX_T2CODE_SIZE];
+#define START_BCI(idx) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? start_bci[idx] : -1)
+#define SET_START_BCI(idx, bci) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? (start_bci[idx] = (bci)) : 0)
+
+short end_bci[THUMB2_MAX_T2CODE_SIZE];
+#define END_BCI(idx) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? end_bci[idx] : -1)
+#define SET_END_BCI(idx, bci) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? (end_bci[idx] = (bci)) : 0)
+
+#else
+
+#define START_BCI(idx)	-1
+#define SET_START_BCI(idx, bci) 0
+#define END_BCI(idx)	-1
+#define SET_END_BCI(idx, bci) 0
+
+#endif
+
+bool DebugSwitch = false;
+
+// XXX hardwired constants!
+#define ENTRY_FRAME             1
+#define INTERPRETER_FRAME       2
+#define SHARK_FRAME             3
+#define FAKE_STUB_FRAME         4
+
+#include "offsets_arm.s"
+
+#define BC_FLAGS_MASK		0xf0000000
+#define BC_VISITED		0x80000000
+#define BC_BRANCH_TARGET	0x40000000
+#define BC_COMPILED		0x20000000
+#define BC_BACK_TARGET		0x10000000
+
+#define IS_DEAD(x)	(((x) & BC_VISITED) == 0)
+
+#define LOCAL_MODIFIED		31
+#define LOCAL_REF		30
+#define LOCAL_DOUBLE		29
+#define LOCAL_FLOAT		28
+#define LOCAL_LONG		27
+#define LOCAL_INT		26
+#define LOCAL_ALLOCATED		25
+
+#define LOCAL_COUNT_BITS	10
+#define LOCAL_READ_POS		0
+#define LOCAL_WRITE_POS		LOCAL_COUNT_BITS
+
+#define LOCAL_READS(x)		(((x) >> LOCAL_READ_POS) & ((1<<LOCAL_COUNT_BITS)-1))
+#define LOCAL_WRITES(x)		(((x) >> LOCAL_WRITE_POS) & ((1<<LOCAL_COUNT_BITS)-1))
+#define LOCAL_SET_COUNTS(r, w)	(((r) << LOCAL_READ_POS) | (((w) << LOCAL_WRITE_POS)))
+#define LOCAL_INC_COUNT(c)	((c) < ((1<<LOCAL_COUNT_BITS)-1) ? (c)+1 : (c))
+
+#define STACK_REGS	4
+#define FP_STACK_REGS	4
+
+typedef unsigned	u32;
+typedef unsigned	Reg;
+
+#define	ARM_R0		0
+#define ARM_R1		1
+#define ARM_R2		2
+#define ARM_R3		3
+#define ARM_R4		4
+#define ARM_R5		5
+#define ARM_R6		6
+#define ARM_R7		7
+#define ARM_R8		8
+#define ARM_R9		9
+#define ARM_R10		10
+#define ARM_R11		11
+#define ARM_IP		12
+#define ARM_SP		13
+#define ARM_LR		14
+#define ARM_PC		15
+#define ARM_CPSR	16	// CPSR in sigcontext
+#define ARM_FAULT	17	// fault address in sigcontext
+
+#define CPSR_THUMB_BIT	(1<<5)
+
+#define VFP_S0		32
+#define VFP_S1		33
+#define VFP_S2		34
+#define VFP_S3		35
+#define VFP_S4		36
+#define VFP_S5		37
+#define VFP_S6		38
+#define VFP_S7		39
+
+#define VFP_D0		64
+#define VFP_D1		65
+#define VFP_D2		66
+#define VFP_D3		67
+#define VFP_D4		68
+#define VFP_D5		69
+#define VFP_D6		70
+#define VFP_D7		71
+
+#define PREGS	6
+
+#define JAZ_V1	ARM_R5
+#define JAZ_V2	ARM_R6
+#define JAZ_V3	ARM_R7
+#define JAZ_V4	ARM_R8
+#define JAZ_V5	ARM_R9
+#define JAZ_V6	ARM_R11
+
+#define Rstack		ARM_R4
+#define Rlocals		ARM_R7
+#define Ristate		ARM_R8
+#define Rthread		ARM_R10
+
+#define Rint_jpc	ARM_R5
+
+#define IS_ARM_INT_REG(r) ((r) <= ARM_PC)
+#define IS_ARM_FP_REG(r) (!IS_ARM_INT_REG(r))
+
+#define I_REGSET	((1<<ARM_R4) | (1<<ARM_R5) | (1<<ARM_R6) | (1<<ARM_R7) | \
+			 (1<<ARM_R9) | (1<<ARM_R10) | (1<<ARM_R11))
+#define C_REGSET	(1<<ARM_R8)
+
+#define LOG2(n) binary_log2(n)
+
+unsigned binary_log2(unsigned n)
+{
+  unsigned r = 0;
+  if ((n & 0xffff) == 0) r = 16, n >>= 16;
+  if ((n & 0xff) == 0) r += 8, n >>= 8;
+  if ((n & 0xf) == 0) r += 4, n >>= 4;
+  if ((n & 3) == 0) r += 2, n >>= 2;
+  if ((n & 1) == 0) r += 1;
+  return r;
+}
+
+typedef struct Compiled_Method {
+    // All entry points aligned on a cache line boundary
+    //		.align	CODE_ALIGN
+    // slow_entry:				@ callee save interface
+    // 		push	{r4, r5, r6, r7, r9, r10, r11, lr}
+    // 		mov	Rthread, r2
+    // 		bl	fast_entry
+    // 		pop	{r4, r5, r6, r7, r9, r10, r11, pc}
+    unsigned slow_entry[4];
+    unsigned *osr_table;			// pointer to the osr table
+    unsigned *exception_table;
+    Compiled_Method *next;
+    // The next 6 halfword give the register mapping for JAZ_V1 to JAZ_v5
+    // This is used when receovering from an exception so we can push
+    // the register back into the local variables pool.
+    short regusage[6];
+    unsigned header_end[1];
+    // fast_entry:
+    // 		push	{r8, lr}
+    // 		...	@ The compiled code
+    // 		pop	{r8, pc}
+    // 		.align	WORD_ALIGN
+    // code_handle:				@ from interpreted entry
+    // 		.word	slow_entry		@ bottom bit must be set!
+    // osr_table:
+    // 		.word	<no. of entries>
+    // @@@ For bytecode 0 and for each backwards branch target
+    // 		.short	<bytecode index>
+    // 		.short	<code offset>		@ offset in halfwords from slow_entry
+} Compiled_Method;
+
+Compiled_Method *compiled_method_list = 0;
+Compiled_Method **compiled_method_list_tail_ptr = &compiled_method_list;
+
+typedef struct Thumb2_Entrypoint {
+  unsigned compiled_entrypoint;
+  short *regusage;
+} Thumb2_Entrypoint;
+
+typedef struct CodeBuf {
+    unsigned short *codebuf;
+    unsigned idx;
+    unsigned limit;
+} CodeBuf;
+
+typedef struct Thumb2_Stack {
+    unsigned *stack;
+    unsigned depth;
+} Thumb2_Stack;
+
+#define IS_SREG(r) ((r) < STACK_REGS)
+
+typedef struct Thumb2_Registers {
+    unsigned *r_local;
+    unsigned npregs;
+    unsigned pregs[PREGS];
+    int mapping[PREGS];
+} Thumb2_Registers;
+
+typedef struct Thumb2_Info {
+    JavaThread *thread;
+    methodOop method;
+    unsigned *bc_stackinfo;
+    unsigned *locals_info;
+    jubyte *code_base;
+    unsigned code_size;
+    CodeBuf *codebuf;
+    Thumb2_Stack *jstack;
+    Thumb2_Registers *jregs;
+    unsigned compiled_return;
+    unsigned compiled_word_return[12];  // R0 .. R11
+    unsigned is_leaf;
+    unsigned use_istate;
+} Thumb2_Info;
+
+#define IS_INT_SIZE_BASE_TYPE(c) (c=='B' || c=='C' || c=='F' || c=='I' || c=='S' || c=='Z')
+#define IS_INT_SIZE_TYPE(c) (IS_INT_SIZE_BASE_TYPE(c) || c == 'L' || c == '[')
+
+void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
+void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth);
+
+static int method_stackchange(const jbyte *base)
+{
+  jbyte c;
+  int stackchange = 0;
+
+  c = *base++;
+  JASSERT(c == '(', "Invalid signature, missing '('");
+  while ((c = *base++) != ')') {
+    stackchange -= 1;
+    if (c == 'J' || c == 'D') {
+      stackchange -= 1;
+    } else if (c == '[') {
+      do { c = *base++; } while (c == '[');
+      if (c == 'L')
+	do { c = *base++; } while (c != ';');
+    } else if (c == 'L') {
+      do { c = *base++; } while (c != ';');
+    } else {
+      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
+    }
+  }
+  JASSERT(c == ')', "Invalid signature, missing ')'");
+  c = *base++;
+  if (c == 'J' || c == 'D') stackchange += 2;
+  else if (c != 'V') {
+    stackchange += 1;
+    JASSERT(IS_INT_SIZE_TYPE(c), "Invalid signature, bad ret type");
+  }
+  return stackchange;
+}
+
+static void Thumb2_local_info_from_sig(Thumb2_Info *jinfo, methodOop method,
+				       const jbyte *base)
+{
+  jbyte c;
+  unsigned arg = 0;
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned local_info;
+
+  if (!method->is_static()) locals_info[arg++] = 1 << LOCAL_REF;
+  c = *base++;
+  JASSERT(c == '(', "Invalid signature, missing '('");
+  while ((c = *base++) != ')') {
+    local_info = 1 << LOCAL_INT;
+    if (c == 'J') local_info = 1 << LOCAL_LONG;
+    else if (c == 'D') local_info = 1 << LOCAL_DOUBLE;
+    else if (c == '[') {
+      local_info = 1 << LOCAL_REF;
+      do { c = *base++; } while (c == '[');
+      if (c == 'L')
+	do { c = *base++; } while (c != ';');
+    } else if (c == 'L') {
+      local_info = 1 << LOCAL_REF;
+      do { c = *base++; } while (c != ';');
+    } else {
+      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
+    }
+    locals_info[arg++] = local_info;
+  }
+}
+
+#define UNDEFINED_32	0xf7f0a000
+#define UNDEFINED_16	0xde00
+
+static const char *local_types[] = { "int", "long", "float", "double", "ref" };
+
+#ifdef T2_PRINT_DISASS
+
+class Hsdis {
+public:
+
+  typedef void* (*decode_instructions_event_callback_ftype)  (void*, const char*, void*);
+
+  typedef void* (*decode_instructions_ftype)
+    (void* start, void* end,
+     decode_instructions_event_callback_ftype event_callback,
+     void* event_stream,
+     void* printf_callback,
+     void* printf_stream,
+     const char* options);
+
+  decode_instructions_ftype decode_instructions;
+
+  void *lib;
+
+  // Load hsdis-arm.so lazily.
+  Hsdis()
+  {
+    decode_instructions = NULL;
+
+    if (PrintAssembly) {
+      if (lib = dlopen("hsdis-arm.so", RTLD_NOW)) {
+	decode_instructions
+	  = (typeof decode_instructions)dlsym(lib, "decode_instructions");
+      }
+
+      if (! (decode_instructions)) {
+	fprintf (stderr, "PrintAssembly (or T2_PRINT_DISASS) is set, but\n"
+		 "hsdis-arm.so has not been found or is invalid.  If you want to\n"
+		 "see a disassembly, please ensure that a valid copy of\n"
+		 "hsdis-arm.so is present somewhere in your library load path.\n");
+	abort();
+      }
+    }
+  }
+};
+
+static void *print_address(void *stream, const char *tag, void *data);
+
+void Thumb2_disass(Thumb2_Info *jinfo)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned nlocals = jinfo->method->max_locals();
+  int bci = 0;
+  int last_bci = -1;
+  int start_b, end_b;
+  unsigned nodisass;
+
+  unsigned short *codebuf = jinfo->codebuf->codebuf;
+  unsigned idx, compiled_len;
+
+  static Hsdis hsdis;
+
+  fflush(stdout);
+  fflush(stderr);
+
+  compiled_len = jinfo->codebuf->idx * 2;
+  for (idx = 0; idx < compiled_len; ) {
+    nodisass = 0;
+    start_b = START_BCI(idx/2);
+    end_b = END_BCI(idx/2);
+    if (start_b != -1) {
+      last_bci != -1;
+      for (bci = start_b; bci < end_b; ) {
+	unsigned stackinfo = bc_stackinfo[bci];
+	unsigned opcode;
+	int len;
+
+	if (stackinfo & BC_BRANCH_TARGET)
+	  fprintf(stderr, "----- Basic Block -----\n");
+	JASSERT(bci > last_bci, "disass not advancing");
+	last_bci = bci;
+	fprintf(stderr, "%c%4d : ", (stackinfo & BC_VISITED) ? ' ' : '?', bci);
+	opcode = code_base[bci];
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+	switch (opcode) {
+	  case opc_tableswitch: {
+	    int nbci = (bci & ~3) + 4;
+	    int low, high;
+	    unsigned w;
+	    unsigned *table;
+	    int def;
+	    unsigned n, i;
+
+	    fprintf(stderr, "%02x ", opcode);
+	    for (int i = 1; i < 5; i++)
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
+	    w = *(unsigned int *)(code_base + nbci + 4);
+	    low = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 8);
+	    high = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 0);
+	    def = (int)BYTESEX_REVERSE(w);
+	    table = (unsigned int *)(code_base + nbci + 12);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tlow:\t\t0x%08x\n", low);
+	    fprintf(stderr, "\thigh:\t\t0x%08x\n", high);
+	    n = high - low + 1;
+	    while (low <= high) {
+	      int off;
+
+	      w = *table++;
+	      off = (int)BYTESEX_REVERSE(w);
+	      fprintf(stderr, "\toffset %d:\t0x%08x\n", low, off);
+	      low++;
+	    }
+	    bci += len;
+	    if (Thumb2) {
+	      unsigned short *start = codebuf + idx/2;
+	      unsigned short *p = start;
+              while ((*p++ >> 4) != 0xe8d); p++;
+	      hsdis.decode_instructions((char*)start,
+					(char *)p,
+					print_address, NULL, NULL, stderr,
+					Thumb2 ? "force-thumb" : "");
+              idx += (p - start) * 2;
+	    } else {
+              unsigned *start = (unsigned *)((char *)codebuf + idx);
+              unsigned *p = start;
+              // skip to add pc, pc, ip, lsl #1
+              while (*p++ != 0xe08ff08c);
+	      hsdis.decode_instructions((char*)start,
+					(char *)p,
+					print_address, NULL, NULL, stderr,
+					Thumb2 ? "force-thumb" : "");
+              idx += (p - start) * 4;
+            }
+	    for (i = 0; i < n; i++) {
+	      fprintf(stderr, "0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(unsigned short *)((int)codebuf + idx));
+	      idx += 2;
+	    }
+            if (!Thumb2 && (idx & 2)) {
+	      fprintf(stderr, "0x%08x:\t.pad\t0x%04x\n", (int)codebuf+idx, *(unsigned short *)((int)codebuf + idx));
+              idx += 2;
+            }
+	    {
+	      char *p = (char*)codebuf + idx;
+	      int len = 0;
+	      while (len + idx < compiled_len
+		     && START_BCI((len + idx)/2) == -1)
+		len += 2;
+	      hsdis.decode_instructions((char*)p, (char*)p + len,
+					  print_address, NULL, NULL, stderr,
+					  Thumb2 ? "force-thumb" : "");
+	    }
+	    nodisass = 1;
+	    break;
+	  }
+	  case opc_lookupswitch: {
+	    unsigned w;
+	    unsigned nbci = (bci & ~3) + 4;;
+	    int def;
+	    int npairs;	// The Java spec says signed but must be >= 0??
+	    unsigned *table;
+
+	    fprintf(stderr, "%02x ", opcode);
+	    for (int i = 1; i < 5; i++)
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
+
+	    w = *(unsigned int *)(code_base + nbci + 0);
+	    def = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 4);
+	    npairs = (int)BYTESEX_REVERSE(w);
+	    table = (unsigned int *)(code_base + nbci + 8);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tnpairs:\t\t0x%08x\n", npairs);
+	    for (int i = 0; i < npairs; i++) {
+	      unsigned match, off;
+	      w = table[0];
+	      match = BYTESEX_REVERSE(w);
+	      w = table[1];
+	      table += 2;
+	      off = BYTESEX_REVERSE(w);
+	      fprintf(stderr, "\t  match: 0x%08x, offset: 0x%08x\n", match, off);
+	    }
+	    break;
+	  }
+
+	  default:
+	    for (int i = 0; i < 5; i++) {
+	      if (i < len)
+		fprintf(stderr, "%02x ", code_base[bci+i]);
+	      else
+		fprintf(stderr, "   ");
+	    }
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
+	    break;
+	}
+	bci += len;
+      }
+    }
+    if (!nodisass) {
+      {
+	int len;
+	unsigned s1, s2;
+
+	s1 = *(unsigned short *)((int)codebuf + idx);
+	s2 = *(unsigned short *)((int)codebuf + idx + 2);
+	if (s1 == UNDEFINED_16 || ((s1 << 16) + s2) == UNDEFINED_32) {
+	  if (s1 == UNDEFINED_16) {
+	    fprintf(stderr, "undefined (0xde00) - UNPATCHED BRANCH???");
+	    len = 2;
+	  } else {
+	    fprintf(stderr, "undefined (0xf7f0a000) - UNPATCHED BRANCH???");
+	    len = 4;
+	  }
+	} else {
+	  char *p = (char*)codebuf + idx;
+	  len = 2;
+	  while (len + idx < compiled_len
+		 && START_BCI((len + idx)/2) == -1)
+	    len += 2;
+	  hsdis.decode_instructions((char*)p, (char*)p + len,
+				      print_address, NULL, NULL, stderr,
+				      Thumb2 ? "force-thumb" : "");
+	}
+	idx += len;
+      }
+    }
+  }
+  fflush(stderr);
+}
+// where
+static void *print_address(void *, const char *tag, void *data) {
+  if (strcmp(tag, "insn") == 0)
+    fprintf(stderr, "0x%08x:\t", (unsigned int)data);
+  return NULL;
+}
+#endif // T2_PRINT_DISASS
+
+#define BCI(len, pop, push, special, islocal, islocal_n, isstore, local_n, local_type) \
+	((len) | ((pop)<<3) | ((push)<<6) | (unsigned)((special) << 31) | ((islocal) << 30) | ((islocal_n) << 29) | ((isstore) << 28) | ((local_n) << 9) | ((local_type) << 11))
+
+#define BCI_LEN(x) 	((x) & 7)
+#define BCI_POP(x) 	(((x)>>3) & 7)
+#define BCI_PUSH(x) 	(((x)>>6) & 7)
+#define BCI_LOCAL_N(x)	(((x)>>9) & 3)
+#define BCI_LOCAL_TYPE(x) (((x) >> 11) & 7)
+
+#define BCI_TYPE_INT	0
+#define BCI_TYPE_LONG	1
+#define BCI_TYPE_FLOAT	2
+#define BCI_TYPE_DOUBLE	3
+#define BCI_TYPE_REF	4
+
+#define BCI_SPECIAL(x) 	((x) & 0x80000000)
+#define BCI_ISLOCAL(x)	((x) & 0x40000000)
+#define BCI_ISLOCAL_N(x) ((x) & 0x20000000)
+#define BCI_ISSTORE(x)	((x) & 0x10000000)
+
+static const unsigned bcinfo[256] = {
+	BCI(1, 0, 0, 0, 0, 0, 0, 0, 0),	// nop
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// aconst_null
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_m1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_0
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_2
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_3
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_4
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_5
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_0
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_0
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_2
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_0
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_1
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc_w
+	BCI(3, 0, 2, 0, 0, 0, 0, 0, 0),	// ldc2_w
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload
+	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_LONG),	// lload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_FLOAT),	// fload
+	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_DOUBLE),	// dload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_REF),	// aload
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3
+	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_LONG),	// lload_0
+	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_LONG),	// lload_1
+	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_LONG),	// lload_2
+	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_LONG),	// lload_3
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_FLOAT),	// fload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_FLOAT),	// fload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_FLOAT),	// fload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_FLOAT),	// fload_3
+	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_DOUBLE),	// dload_0
+	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_DOUBLE),	// dload_1
+	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_DOUBLE),	// dload_2
+	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_DOUBLE),	// dload_3
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// aload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// aload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// aload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// aload_3
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iaload
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// laload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// faload
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// daload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// aaload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// baload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// caload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// saload
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// istore
+	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_LONG),	// lstore
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_FLOAT),	// fstore
+	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_DOUBLE),	// dstore
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_REF),	// astore
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_INT),	// istore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_INT),	// istore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_INT),	// istore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_INT),	// istore_3
+	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_LONG),	// lstore_0
+	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_LONG),	// lstore_1
+	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_LONG),	// lstore_2
+	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_LONG),	// lstore_3
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_FLOAT),	// fstore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_FLOAT),	// fstore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_FLOAT),	// fstore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_FLOAT),	// fstore_3
+	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_DOUBLE),	// dstore_0
+	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_DOUBLE),	// dstore_1
+	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_DOUBLE),	// dstore_2
+	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_DOUBLE),	// dstore_3
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_REF),	// astore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_REF),	// astore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_REF),	// astore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_REF),	// astore_3
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// iastore
+	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// dastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// fastore
+	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// lastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// aastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// bastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// castore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// sastore
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// pop
+	BCI(1, 2, 0, 0, 0, 0, 0, 0, 0),	// pop2
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// dup
+	BCI(1, 2, 3, 0, 0, 0, 0, 0, 0),	// dup_x1
+	BCI(1, 3, 4, 0, 0, 0, 0, 0, 0),	// dup_x2
+	BCI(1, 2, 4, 0, 0, 0, 0, 0, 0),	// dup2
+	BCI(1, 3, 5, 0, 0, 0, 0, 0, 0),	// dup2_x1
+	BCI(1, 4, 6, 0, 0, 0, 0, 0, 0),	// dup2_x2
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// swap
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ladd
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fadd
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dadd
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lsub
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fsub
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dsub
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// imul
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lmul
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fmul
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmul
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// idiv
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ldiv
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fdiv
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ddiv
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// irem
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lrem
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// frem
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// drem
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// ineg
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// lneg
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// fneg
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// dneg
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishl
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshl
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishr
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshr
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iushr
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lushr
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// land
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lor
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lxor
+	BCI(3, 0, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// iinc
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2l
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2f
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2d
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2i
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2f
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// l2d
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// f2i
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2l
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2d
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2i
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// d2l
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2f
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2b
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2c
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2s
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// lcmp
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpl
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpg
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpl
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpg
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifeq
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifne
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// iflt
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifge
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifgt
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifle
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpeq
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpne
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmplt
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpge
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpgt
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmple
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpeq
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpne
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// goto
+	BCI(3, 0, 1, 1, 0, 0, 0, 0, 0),	// jsr
+	BCI(2, 0, 0, 1, 0, 0, 0, 0, 0),	// ret
+	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// tableswitch
+	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// lookupswitch
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// ireturn
+	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// lreturn
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// freturn
+	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// dreturn
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// areturn
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getstatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putstatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getfield
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putfield
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevirtual
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecial
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeinterface
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// invokedynamic
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// new
+	BCI(2, 1, 1, 0, 0, 0, 0, 0, 0),	// newarray
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// anewarray
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// arraylength
+	BCI(1, 1, 1, 1, 0, 0, 0, 0, 0),	// athrow
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// checkcast
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// instanceof
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorenter
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorexit
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// wide
+	BCI(4, 0, 0, 1, 0, 0, 0, 0, 0),	// multianewarray
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnull
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnonnull
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// goto_w
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// jsr_w
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// breakpoint
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcb
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// bgetfield
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// cgetfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xce
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcf
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// igetfield
+	BCI(3, 1, 2, 0, 0, 0, 0, 0, 0),	// lgetfield
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// sgetfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// aputfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// bputfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// cputfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd6
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd7
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// iputfield
+	BCI(3, 3, 0, 0, 0, 0, 0, 0, 0),	// lputfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xda
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// iaccess_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// iaccess_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// iaccess_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// iaccess_3
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecialresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestaticresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevfinal
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload_N
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc_w
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return_register_finalizer
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmac
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iconst_N
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iconst_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_u4store
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload_N
+};
+
+void Thumb2_pass1(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned *locals_info = jinfo->locals_info;
+  JDEBUG_ (
+   Symbol *name = jinfo->method->name();
+   Symbol *sig = jinfo->method->signature();
+  );
+  //constantPoolCacheOop cp = jinfo->method->constants()->cache();
+
+  bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+  while (bci < code_size) {
+    unsigned stackinfo = bc_stackinfo[bci];
+    unsigned bytecodeinfo;
+    unsigned opcode;
+
+    if (stackinfo & BC_VISITED) break;
+    JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED;
+    opcode = code_base[bci];
+//	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
+    bytecodeinfo = bcinfo[opcode];
+    if (!BCI_SPECIAL(bytecodeinfo)) {
+      if (BCI_ISLOCAL(bytecodeinfo)) {
+	unsigned local = BCI_LOCAL_N(bytecodeinfo);
+	unsigned local_type = BCI_LOCAL_TYPE(bytecodeinfo) + LOCAL_INT;
+	unsigned local_modified = 0;
+	unsigned linfo;
+	unsigned read_count, write_count;
+
+	if (!BCI_ISLOCAL_N(bytecodeinfo)) local = code_base[bci+1];
+	if (BCI_ISSTORE(bytecodeinfo)) local_modified = 1U << LOCAL_MODIFIED;
+	linfo = locals_info[local];
+	read_count = LOCAL_READS(linfo);
+	write_count = LOCAL_WRITES(linfo);
+	if (local_modified)
+	  write_count = LOCAL_INC_COUNT(write_count);
+	else
+	  read_count = LOCAL_INC_COUNT(read_count);
+	
+	locals_info[local] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
+	if (local_type == LOCAL_LONG || local_type == LOCAL_DOUBLE) {
+	  locals_info[local+1] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
+	}
+      }
+      bci += BCI_LEN(bytecodeinfo);
+      stackdepth += BCI_PUSH(bytecodeinfo) - BCI_POP(bytecodeinfo);
+      JASSERT(stackdepth <= (unsigned)jinfo->method->max_stack(), "stack over/under flow?");
+      continue;
+    }
+
+    switch (opcode) {
+
+      case opc_goto: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+      case opc_goto_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_ifeq:
+      case opc_ifne:
+      case opc_iflt:
+      case opc_ifge:
+      case opc_ifgt:
+      case opc_ifle:
+      case opc_ifnull:
+      case opc_ifnonnull: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	stackdepth -= 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
+	bci += 3;
+	break;
+      }
+
+      case opc_if_icmpeq:
+      case opc_if_icmpne:
+      case opc_if_icmplt:
+      case opc_if_icmpge:
+      case opc_if_icmpgt:
+      case opc_if_icmple:
+      case opc_if_acmpeq:
+      case opc_if_acmpne: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	stackdepth -= 2;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
+	bci += 3;
+	break;
+      }
+
+      case opc_jsr: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
+	bci += 3;
+	stackdepth = 0;
+	break;
+      }
+      case opc_jsr_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
+	bci += 5;
+	break;
+      }
+
+      case opc_ireturn:
+      case opc_lreturn:
+      case opc_freturn:
+      case opc_dreturn:
+      case opc_areturn:
+      case opc_return:
+      case opc_return_register_finalizer:
+      case opc_ret:
+      case opc_athrow:
+	// The test for BC_VISITED above will break out of the loop!!!
+	break;
+
+      case opc_tableswitch: {
+	int low, high;
+	unsigned w;
+	unsigned *table;
+	unsigned nbci;
+	int def;
+
+	stackdepth -= 1;
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 8);
+	low = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 12);
+	high = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	while (low <= high) {
+	  int off;
+	  w = *table++;
+	  off = (int)BYTESEX_REVERSE(w);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
+	  low++;
+	}
+
+	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_lookupswitch: {
+	unsigned w;
+	unsigned nbci;
+	int def;
+	int npairs;	// The Java spec says signed but must be >= 0??
+	unsigned *table;
+
+	stackdepth -= 1;
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 8);
+	npairs = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	for (int i = 0; i < npairs; i++) {
+	  int off;
+	  w = *table;
+	  table += 2;
+	  off = (int)BYTESEX_REVERSE(w);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
+	}
+
+	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_getstatic:
+      case opc_putstatic:
+      case opc_getfield:
+      case opc_putfield: {
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	Symbol *sig = pool->signature_ref_at(index);
+	const jbyte *base = sig->base();
+	jbyte c = *base;
+	int stackchange;
+
+	opcode = code_base[bci];
+	if (opcode == opc_getfield || opcode == opc_putfield)
+	  stackdepth -= 1;
+	stackchange = 1;
+	if (c == 'J' || c == 'D') stackchange = 2;
+	if (opcode == opc_getfield || opcode == opc_getstatic)
+	  stackdepth += stackchange;
+	else
+	  stackdepth -= stackchange;
+	bci += 3;
+	break;
+      }
+
+      case opc_invokedynamic: {
+	int site_index = GET_NATIVE_U4(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	int main_index = pool->cache()->secondary_entry_at(site_index)->main_entry_index();
+	JDEBUG_( int pool_index = pool->cache()->entry_at(main_index)->constant_pool_index(); );
+	Symbol *sig = pool->signature_ref_at(main_index);
+	const jbyte *base = sig->base();
+
+	JDEBUG_( tty->print("InvokeDynamic %d: %s: %s %d %d\n", opcode, name->as_C_string(), sig->as_C_string(), main_index, pool_index); );
+	stackdepth += method_stackchange(base);
+	opcode = code_base[bci];
+	bci += 5;
+	break;
+      }
+
+      case opc_invokeresolved:
+      case opc_invokespecialresolved:
+      case opc_invokestaticresolved:
+      case opc_invokevfinal:
+      case opc_invokeinterface:
+      case opc_invokevirtual:
+      case opc_invokespecial:
+      case opc_invokestatic: {
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	Symbol *sig = pool->signature_ref_at(index);
+	const jbyte *base = sig->base();
+
+	jinfo->is_leaf = 0;
+	JDEBUG_( tty->print("%d: %s: %s\n", opcode, name->as_C_string(), sig->as_C_string()); );
+	stackdepth += method_stackchange(base);
+	opcode = code_base[bci];
+	bci += 3;
+	if (opcode == opc_invokeinterface) bci += 2;
+	if (opcode != opc_invokestatic && opcode != opc_invokestaticresolved)
+	  stackdepth -= 1;
+	JDEBUG_( tty->print("invoke %d: %s: %s %d %d %d\n", opcode, name->as_C_string(), sig->as_C_string(),
+	         bci, index, stackdepth); );
+	break;
+      }
+
+      case opc_multianewarray:
+	stackdepth = (stackdepth - code_base[bci+3]) + 1;
+	bci += 4;
+	break;
+
+      case opc_wide: {
+	opcode = code_base[bci+1];
+	if (opcode == opc_iinc) {
+	  bci += 6;
+	} else {
+	  bci += 4;
+	  if (opcode == opc_iload ||
+	  	opcode == opc_fload || opcode == opc_aload)
+	    stackdepth += 1;
+	  else if (opcode == opc_lload || opcode == opc_dload)
+	    stackdepth += 2;
+	  else if (opcode == opc_istore ||
+	  	opcode == opc_fstore || opcode == opc_astore)
+	    stackdepth -= 1;
+	  else if (opcode == opc_lstore || opcode == opc_dstore)
+	    stackdepth -= 2;
+	  else if (opcode != opc_ret)
+	    fatal(err_msg("Undefined wide opcode %d\n", opcode));
+	}
+	break;
+      }
+
+      default:
+	opcode = code_base[bci];
+	fatal(err_msg("Undefined opcode %d\n", opcode));
+	break;
+    }
+  }
+}
+
+void Thumb2_RegAlloc(Thumb2_Info *jinfo)
+{
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned i, j;
+  unsigned linfo;
+  unsigned score, max_score;
+  unsigned local;
+  unsigned nlocals = jinfo->method->max_locals();
+  unsigned *pregs = jinfo->jregs->pregs;
+  unsigned npregs = jinfo->jregs->npregs;
+
+  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
+  for (i = 0; i < npregs; i++) {
+    if (jinfo->use_istate && pregs[i] == Ristate) continue;
+    max_score = 0;
+    for (j = 0; j < nlocals; j++) {
+      linfo = locals_info[j];
+
+      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
+      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
+      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
+      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
+      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
+      if (score > max_score) max_score = score, local = j;
+    }
+    if (max_score < (OSPACE ? 8 : 2)) break;
+    locals_info[local] |= 1<<LOCAL_ALLOCATED;
+    jinfo->jregs->r_local[local] = pregs[i];
+    jinfo->jregs->mapping[i] = local;
+  }
+#ifdef T2_PRINT_REGUSAGE
+  if (t2_print_regusage) {
+    fprintf(stderr, "Regalloc: %d physical registers allocated as follows\n", npregs);
+    for (j = 0; j < nlocals; j++) {
+      unsigned r = jinfo->jregs->r_local[j];
+      if (r) {
+	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
+	fprintf(stderr, "  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
+      }
+    }
+  }
+#endif
+}
+
+//-------------------------------------------------------------------------------------
+
+#define	DA	0
+#define	IA	1
+#define DB	2
+#define IB	3
+
+#define	PUSH_ED	0
+#define PUSH_EA	1
+#define	PUSH_FD	2
+#define	PUSH_FA	3
+
+#define	POP_FA	0
+#define	POP_FD	1
+#define	POP_EA	2
+#define	POP_ED	3
+
+#define ROR(imm, sh) (((imm) >> (sh)) | ((imm) << (32 - (sh))))
+#define ROL(imm, sh) (((imm) << (sh)) | ((imm) >> (32 - (sh))))
+
+#define abs(i) ((i) < 0 ? -(i) : (i))
+#define U(i) ((i) < 0 ? 0 : 1)
+
+#define LS_STR		0
+#define	LS_STRB		1
+#define	LS_STRH		2
+#define LS_LDRSB	3
+#define	LS_LDR		4
+#define LS_LDRB		5
+#define	LS_LDRH		6
+#define LS_LDRSH	7
+#define LS_LDRD		8
+#define LS_STRD		9
+
+#define LS_IS_LDR(op)	((op) >= LS_LDRSB)
+#define LS_IS_WORD(op)	(((op) & 3) == LS_STR)
+#define LS_IS_BYTE(op)	(((op) & 3) == LS_STRB || (op) == LS_LDRSB)
+#define LS_IS_HW(op)	(((op) & 3) == LS_STRH || (op) == LS_LDRSH)
+#define LS_IS_IMM12(op)	((op) == LS_STR || (op) == LS_STRB || (op) == LS_LDR || (op) == LS_LDRB)
+
+static const unsigned t_ls_ops[16] = {
+	0x5000,		0xf8400000,
+	0x5400,		0xf8000000,
+	0x5200,		0xf8200000,
+	0x5600,		0xf9100000,
+	0x5800,		0xf8500000,
+	0x5c00,		0xf8100000,
+	0x5a00,		0xf8300000,
+	0x5e00,		0xf9300000,
+};
+
+static const unsigned a_ls_ops[20] = {
+	0xe4000000,	0xe6000000,	// str
+	0xe4400000,	0xe6400000,	// strb
+	0xe04000b0,	0xe00000b0,	// strh
+	0xe05000d0,	0xe01000d0,	// ldrsb
+	0xe4100000,	0xe6100000,	// ldr
+	0xe4500000,	0xe6500000,	// ldrb
+	0xe05000b0,	0xe01000b0,	// ldrh
+	0xe05000f0,	0xe01000f0,	// ldrsh
+	0xe04000d0,	0xe00000d0,	// ldrd
+	0xe04000f0,	0xe00000f0,	// strd
+};
+
+#define A_LS_OP_IMM(op)	a_ls_ops[(op)*2]
+#define A_LS_OP_REG(op)	a_ls_ops[(op)*2+1]
+
+
+#define DP_ADC	0
+#define DP_ADD	1
+#define DP_AND	2
+#define DP_ASR	3
+#define DP_BIC	4
+#define DP_CMN	5
+#define DP_CMP	6
+#define DP_EOR	7
+#define DP_LSL	8
+#define DP_LSR	9
+#define DP_MOV	10
+#define DP_MVN	11
+#define DP_ORN	12
+#define DP_ORR	13
+#define DP_ROR	14
+#define DP_RSB	15
+#define DP_SBC	16
+#define DP_SUB	17
+#define DP_TEQ	18
+#define DP_TST	19
+#define DP_MUL	20
+
+static const unsigned n_ops[] = {
+	DP_SBC,		// ADC	x, y == SBC x, ~y
+	DP_SUB,		// ADD	x, y == SUB x, -y
+	DP_BIC,		// AND	x, y == BIX x, ~y
+	(unsigned)-1,	// ASR
+	DP_AND,		// BIC	x, y == AND x, ~y
+	DP_CMP,		// CMN	x, y == CMP x, -y
+	DP_CMN,		// CMP	x, y == CMN x, -y
+	(unsigned)-1,	// EOR
+	(unsigned)-1,	// LSL
+	(unsigned)-1,	// LSR
+	DP_MVN,		// MOV	x, y == MVN x, ~y
+	DP_MOV,		// MVN	x, y == MOV x, ~y
+	DP_ORR,		// ORN	x, y == ORR x, ~y
+	DP_ORN,		// ORR	x, y == ORN x, ~y
+	(unsigned)-1,	// ROR
+	(unsigned)-1,	// RSB
+	DP_ADC,		// SBC	x, y == ADC x, ~y
+	DP_ADD,		// ADD	x, y == SUB x, -y
+	(unsigned)-1,	// TEQ
+	(unsigned)-1,	// TST
+	(unsigned)-1,	// MUL
+};
+
+#define N_OP(op)	n_ops[(op)]
+
+static const unsigned t_dop_ops[] = {
+//	Rd, Rm, #N	Rd, Rn, Rm
+	0xf1400000,	0xeb400000,	// ADC
+	0xf1000000,	0xeb000000,	// ADD
+	0xf0000000,	0xea000000,	// AND
+	0xea4f0020,	0xfa40f000,	// ASR
+	0xf0200000,	0xea200000,	// BIC
+	0xf1100f00,	0xeb100f00,	// CMN
+	0xf1b00f00,	0xebb00f00,	// CMP
+	0xf0800000,	0xea800000,	// EOR
+	0xea4f0000,	0xfa00f000,	// LSL
+	0xea4f0010,	0xfa20f000,	// LSR
+	0xf04f0000,	0xea4f0000,	// MOV
+	0xf06f0000,	0xea6f0000,	// MVN
+	0xf0600000,	0xea600000,	// ORN
+	0xf0400000,	0xea400000,	// ORR
+	0xea4f0030,	0xfa6f0000,	// ROR
+	0xf1c00000,	0xebc00000,	// RSB
+	0xf1600000,	0xeb600000,	// SBC
+	0xf1a00000,	0xeba00000,	// SUB
+	0xf0900f00,	0xea900f00,	// TEQ
+	0xf0100f00,	0xea100f00,	// TST
+	(unsigned)-1,	0xfb00f000,	// MUL
+};
+
+#define T_DP_IMM(op)	t_dop_ops[(op)*2]
+#define T_DP_REG(op)	t_dop_ops[(op)*2+1]
+
+static const unsigned a_dop_ops[] = {
+//	Rd, Rm, #N	Rd, Rn, Rm
+	0xe2a00000,	0xe0a00000,	// ADC
+	0xe2800000,	0xe0800000,	// ADD
+	0xe2000000,	0xe0000000,	// AND
+	0xe1a00040,	0xe1a00050,	// ASR
+	0xe3c00000,	0xe1c00000,	// BIC
+	0xe3700000,	0xe1700000,	// CMN
+	0xe3500000,	0xe1500000,	// CMP
+	0xe2200000,	0xe0200000,	// EOR
+	0xe1a00000,	0xe1a00010,	// LSL
+	0xe1a00020,	0xe1a00030,	// LSR
+	0xe3a00000,	0xe1a00000,	// MOV
+	0xe3e00000,	0xe1e00000,	// MVN
+	(unsigned)-1,	(unsigned)-1,	// ORN - only Thumb
+	0xe3800000,	0xe1800000,	// ORR
+	0xe1a00060,	0xe1a00070,	// ROR
+	0xe2600000,	0xe0600000,	// RSB
+	0xe2c00000,	0xe0c00000,	// SBC
+	0xe2400000,	0xe0400000,	// SUB
+	0xe3300000,	0xe1300000,	// TEQ
+	0xe3100000,	0xe1100000,	// TST
+	(unsigned)-1,	0xe0000090,	// MUL
+};
+
+#define A_DP_IMM(op)	a_dop_ops[(op)*2]
+#define A_DP_REG(op)	a_dop_ops[(op)*2+1]
+
+#define VP_ADD	0
+#define VP_SUB	1
+#define VP_MUL	2
+#define VP_DIV	3
+#define VP_SQRT 4
+
+static const unsigned t_vop_ops[] = {
+	0xee300a00,			// VADD
+	0xee300a40,			// VSUB
+	0xee200a00,			// VMUL
+	0xee800a00,			// VDIV
+	0xeeb10bc0			// VSQRT
+};
+
+#define VP_REG(op)	t_vop_ops[op]
+
+#define T1_LS_OP(op)	t_ls_ops[(op)*2]
+#define T2_LS_OP(op)	t_ls_ops[(op)*2+1]
+
+#define SHIFT_LSL	0
+#define SHIFT_LSR	1
+#define SHIFT_ASR	2
+#define SHIFT_ROR	3
+#define SHIFT_RRX	3
+
+//------------------------------------------------------------------------------------
+
+#define A_BX(src)	(0x012fff10 | (src))
+#define A_MOV(dst, src)	(0x01a00000 | ((dst) << 12) | (src))
+#define A_MOVW_IMM16(r, imm) \
+		(0x03000000 | (((imm) & 0xf000) << (16-12)) | ((imm) & 0xfff) | ((r) << 12))
+#define A_MOVT_IMM16(r, imm) \
+		(0x03400000 | (((imm) & 0xf000) << (16-12)) | ((imm) & 0xfff) | ((r) << 12))
+
+#define A_DOP_IMM(op, dst, src, ror, imm)	((op) | ((dst) << 12) | ((src) << 16) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_DOP_REG(op, dst, lho, rho, st, sh)	((op) | ((dst) << 12) | ((lho) <<16) | (rho) | \
+		((st) << 5) | ((sh) << 7))
+#define A_SHIFT_IMM(op, dst, src, imm)	((op) | ((dst) << 12) | (src) | ((imm) << 7))
+#define A_SHIFT_REG(op, dst, lho, rho)  ((op) | ((dst) << 12) | (lho) | ((rho) << 8))
+#define A_MUL(dst, lho, rho) (0x00000090 | ((dst) << 16) | ((rho) << 8) | (lho))
+
+#define A_MOV_IMM(r, ror, imm)	\
+		(0xe3a00000 | ((r) << 12) | ((ror) << (12-5)) | ((imm) & 0xff))
+#define A_MVN_IMM(r, ror, imm)	\
+		(0xe3e00000 | ((r) << 12) | ((ror) << (12-5)) | ((imm) & 0xff))
+#define A_ORR_IMM(dst, src, ror, imm) \
+		(0xe3800000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_BIC_IMM(dst, src, ror, imm) \
+		(0xe3c00000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_ADD_IMM(dst, src, ror, imm) \
+		(0xe2800000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_SUB_IMM(dst, src, ror, imm) \
+		(0xe2400000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+
+#define A_STM(base, regset, st, wb)	(0xe8000000 | ((st) << 23) | ((wb) << 21) |	\
+					((base) << 16) | (regset))
+#define A_LDM(base, regset, st, wb)	(0xe8100000 | ((st) << 23) | ((wb) << 21) |     \
+					((base) << 16) | (regset))
+
+#define A_LDR_STR_REG(op, xfer, base, off, sh, pre, wb) ((op) | ((base)<<16) | \
+		((xfer)<<12) | (off) | ((sh) << 7) | (SHIFT_LSL << 5) | (pre<<24) | \
+		(1<<23) | (wb<<21))
+
+#define A_LDREX(dst, base)	  (0xe1900f9f | ((base) << 16) | ((dst) << 12))
+#define A_STREX(dst, src, base)  (0xe1800f90 | ((base) << 16) | ((dst) << 12) | (src))
+#define A_LDREXD(dst, base)	  (0xe1b00f9f | ((base) << 16) | ((dst) << 12))
+#define A_STREXD(dst, src, base) (0xe1a00f90 | ((base) << 16) | ((dst) << 12) | (src))
+
+#define A_DMB(option)			(0xf57ff050 | (option))
+
+#define A_SXTB(dst, src)	(0xe6af0070 | ((dst) << 12) | (src))
+#define A_SXTH(dst, src)	(0xe6bf0070 | ((dst) << 12) | (src))
+#define A_UXTH(dst, src)	(0xe6ff0070 | ((dst) << 12) | (src))
+
+#define A_MLA(res, lho, rho, a) \
+		(0xe0200090 | ((res) << 16) | (lho) | ((rho) << 8) | ((a) << 12))
+#define A_UMULL(res_lo, res_hi, lho, rho) \
+		(0xe0800090 | ((res_lo) << 12) | ((res_hi) << 16) | (lho) | ((rho) << 8))
+
+
+#define TBIT 1
+
+#define T_MOV_IMM8(r, imm8)		(0x2000 | ((r)<<8) | (imm8))
+#define T_MOV_BYTELANE(r, typ, b)	(0xf04f0000 | ((typ) << 12) | ((r) << 8) | (b))
+#define T_MOV_ROT_IMM(r, ror, imm)	\
+		(0xf04f0000 | (((ror) & 0x10) << (26-4)) | (((ror) & 0xe) << (12-1)) |	\
+		(((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
+#define T_MOVW_IMM16(r, imm)		\
+		(0xf2400000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
+		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
+#define T_MOVT_IMM16(r, imm)		\
+		(0xf2c00000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
+		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
+#define T_MVN_BYTELANE(r, typ, b)	(0xf06f0000 | ((typ) << 12) | ((r) << 8) | (b))
+#define T_MVN_ROT_IMM(r, ror, imm)	(0xf06f0000 | (((ror) & 0x10) << (26-4)) |	\
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
+
+#define T_ORR_ROT_IMM(dst, src, ror, imm)	(0xf0400000 | (((ror) & 0x10) << (26-4)) | \
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
+		((dst) << 8) | ((imm) & 0x7f))
+#define T_ORN_ROT_IMM(dst, src, ror, imm)	(0xf0600000 | (((ror) & 0x10) << (26-4)) | \
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
+		((dst) << 8) | ((imm) & 0x7f))
+
+#define T_STR_IMM5(src, base, imm5)	(0x6000 | ((imm5) << 6) | ((base) << 3) | (src))
+#define T_STR_SP_IMM8(src, imm8)	(0x9000 | ((src) << 8) | (imm8))
+#define T_STR_IMM12(src, base, imm12)	(0xf8c00000 | ((src)<<12) | ((base)<<16) | (imm12))
+#define T_STR_IMM8(src, base, imm8, pre, wb)	(0xf8400800 | ((src)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDR_IMM5(dst, base, imm5)	(0x6800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDR_SP_IMM8(src, imm8)	(0x9800 | ((dst) << 8) | (imm8))
+#define T_LDR_IMM12(dst, base, imm12)	(0xf8d00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDR_IMM8(src, base, imm8, pre, wb)	(0xf8500800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_STRB_IMM5(src, base, imm5)	(0x7000 | ((imm5) << 6) | ((base) << 3) | (src))
+#define T_STRB_IMM12(src, base, imm12)	(0xf8800000 | ((src)<<12) | ((base)<<16) | (imm12))
+#define T_STRB_IMM8(src, base, imm8, pre, wb)	(0xf8000800 | ((src)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRB_IMM5(dst, base, imm5)	(0x7800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDRB_IMM12(dst, base, imm12)	(0xf8900000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRB_IMM8(dst, base, imm8, pre, wb)	(0xf8100800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_STRH_IMM5(dst, base, imm5)	(0x8000 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_STRH_IMM12(dst, base, imm12)	(0xf8a00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_STRH_IMM8(dst, base, imm8, pre, wb)	(0xf8200800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRH_IMM5(dst, base, imm5)	(0x8800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDRH_IMM12(dst, base, imm12)	(0xf8b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRH_IMM8(dst, base, imm8, pre, wb)	(0xf8300800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRSH_IMM12(dst, base, imm12)	(0xf9b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRSH_IMM8(dst, base, imm8, pre, wb)	(0xf9300800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRSB_IMM12(dst, base, imm12)	(0xf9900000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRSB_IMM8(dst, base, imm8, pre, wb)	(0xf9100800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8500000 | ((base)<<16) |		\
+		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
+#define T_STRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8400000 | ((base)<<16) |		\
+		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
+
+#define T_LDREX(dst, base, off) (0xe8500f00 | ((base) << 16) | ((dst) << 12) | ((off) >> 2))
+#define T_STREX(dst, src, base, off) (0xe8400000 | ((base) << 16) | \
+		((src) << 12) | ((dst) << 8) | ((off >> 2)))
+
+#define T_LDREXD(dst1, dst2, base) (0xe8d0007f | ((base) << 16) | ((dst1) << 12) | (dst2 << 8))
+#define T_STREXD(dst, src1, src2, base) (0xe8c00070 | ((base) << 16) | ((src1) << 12) | (src2 << 8) | dst)
+
+#define T_STM8(base, regset)		(0xc000 | ((base) << 8) | (regset))
+#define T_STM16(base, regset, st, wb)	(0xe8000000 | ((st) << 23) | ((wb) << 21) |	\
+		((base) << 16) | (regset))
+
+#define T_LDM8(base, regset)		(0xc800 | ((base) << 8) | (regset))
+#define	T_LDM16(base, regset, st, wb)	(0xe8100000 | ((st) << 23) | ((wb) << 21) |	\
+		((base) << 16) | (regset))
+#define T_POP(regset)	(0xbc00 | (((regset & (1<<ARM_PC)) >> ARM_PC) << 8) | (regset & 0xff))
+#define T_PUSH(regset)	(0xb400 | (((regset & (1<<ARM_LR)) >> ARM_LR) << 8) | (regset & 0xff))
+
+#define	T1_LDR_STR_REG(op, xfer, base, off) 	((op) | ((off) << 6) | ((base) << 3) | (xfer))
+#define T2_LDR_STR_REG(op, xfer, base, off, sh)	((op) | ((base) << 16) | ((xfer) << 12) | \
+		((sh)<<4) | (off))
+
+#define T_CHKA(size, idx)		(0xca00 | (((size) & 8) << (7-3)) | ((idx) << 3) | ((size) & 7))
+#define T_HBL(handler)			(0xc300 | (handler))
+#define T_MISC_CONTROL(op, option)	(0xf3bf8f00 | ((op)<<4) | option)
+#define T_ENTER_LEAVE(enter)		(T_MISC_CONTROL(enter, 0xf))
+#define T_DMB(option)			(T_MISC_CONTROL(5, option))
+
+#define T1_ADD_IMM(dst, src, imm3)	(0x1c00 | ((imm3) << 6) | ((src) << 3) | (dst))
+#define T2_ADD_IMM(r, imm8)		(0x3000 | ((r) << 8) | (imm8))
+#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+#define T3_ADD_ROT_IMM(dst, src, ror, imm) (0xf1000000 | ((src) << 16) | ((dst) << 8) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T4_ADD_IMM(dst, src, imm)	(0xf2000000 | ((src) << 16) | ((dst) << 8) | \
+		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
+
+#define T1_SUB_IMM(dst, src, imm3)	(0x1e00 | ((imm3) << 6) | ((src) << 3) | (dst))
+#define T2_SUB_IMM(r, imm8)		(0x3800 | ((r) << 8) | (imm8))
+#define T3_SUB_BYTELANE(dst, src, typ, b) (0xf1a00000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+#define T3_SUB_ROT_IMM(dst, src, ror, imm) (0xf1a00000 | ((src) << 16) | ((dst) << 8) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T4_SUB_IMM(dst, src, imm)	(0xf2a00000 | ((src) << 16) | ((dst) << 8) | \
+		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
+
+#define T_DOP_BYTELANE(op, dst, src, typ, b)	((op) | ((dst) << 8) | ((src) << 16) | \
+		((typ) << 12) | (b))
+#define T_DOP_ROT_IMM(op, dst, src, ror, imm)	((op) | ((dst) << 8) | ((src) << 16) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T_SHIFT_IMM(op, dst, src, imm)	((op) | ((dst) << 8) | (src) | \
+		(((imm) & 3) << 6) | (((imm) & 0x1c) << (12-2)))
+#define T_DOP_REG(op, dst, lho, rho, st, sh)	((op) | ((dst) << 8) | ((lho) << 16) | (rho) | \
+		((st) << 4) | (((sh) & 0x1c) << (12-2)) | (((sh) & 3) << 6))
+#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+
+#define T_CMP_IMM(src, imm)		(0x2800 | ((src) << 8) | (imm))
+#define T_CMP_REG(lho, rho)		(0x4280 | ((rho) << 3) | (lho))
+
+#define T_NEG(dst, src)		(0x4240 | (dst) | ((src) << 3))
+#define T_MVN(dst, src)		(0x43c0 | (dst) | ((src) << 3))
+#define T_MOV(dst, src)		(0x4600 | (((dst) & 8) << (7-3)) | ((src) << 3) | ((dst) & 7))
+
+#define T_VMOVS_TOARM(dst, src)	\
+	(0xee100a10 | ((dst) << 12) | (((src) & 1) << 7) | (((src) & 0x1e)<<(16-1)))
+#define T_VMOVS_TOVFP(dst, src) \
+	(0xee000a10 | ((src) << 12) | (((dst) & 1) << 7) | (((dst) & 0x1e)<<(16-1)))
+
+#define T_VMOVD_TOARM(dst_lo, dst_hi, src) \
+  (0xec500b10 | ((dst_lo) << 12) | ((dst_hi) << 16) | (((src) & 0x10)<<(5-4)) | ((src) & 0x0f))
+#define T_VMOVD_TOVFP(dst, src_lo, src_hi) \
+  (0xec400b10 | ((src_lo) << 12) | ((src_hi) << 16) | (((dst) & 0x10)<<(5-4)) | ((dst) & 0x0f))
+
+// VFP reg to VFP re move.
+#define T_VMOVD_VFP_TOVFP(dst, src) (0xeeb00b40 | (((dst) & 0x0f) << 12) | ((src) & 0x0f))
+
+#define T_VOP_REG_S(op, dst, lho, rho)	((op) |				\
+		(((dst) & 1) << 22) | (((dst) & 0x1e) << (12-1)) | 	\
+		(((lho) & 1) << 7) | (((lho) & 0x1e) << (16-1))	 |	\
+		(((rho) & 1) << 5) | (((rho) & 0x1e) >> 1))
+#define T_VOP_REG_D(op, dst, lho, rho)	((op) |	(1 << 8) |		\
+		(((dst) & 0x10) << (22-4)) | (((dst) & 0xf) << 12) | 	\
+		(((lho) & 0x10) << (7-4)) | (((lho) & 0xf) << 16)   |	\
+		(((rho) & 0x10) << (5-4)) | ((rho) & 0xf))
+
+#define T_VCMP_S(lho, rho, e)		(0xeeb40a40 | ((e) << 7) |	\
+		(((lho) & 1) << 22) | (((lho) & 0x1e) << (12-1)) |	\
+		(((rho) & 1) << 5) | (((rho) & 0x1e) >>1))
+#define T_VCMP_D(lho, rho, e)		(0xeeb40b40 | ((e) << 7) |	\
+		(((lho) & 0x10) << (22-4)) | (((lho) & 0x0f) << 12) |	\
+		(((rho) & 0x10) << (5-4)) | ((rho) & 0x0f))
+#define T_VMRS(dst)	(0xeef10a10 | ((dst) << 12))
+
+#define T_MLA(res, lho, rho, a) \
+		(0xfb000000 | ((res) << 8) | ((lho) << 16) | (rho) | ((a) << 12))
+#define T_UMULL(res_lo, res_hi, lho, rho) \
+		(0xfba00000 | ((res_lo) << 12) | ((res_hi) << 8) | ((lho) << 16) | (rho))
+
+#define T_BX(src)		(0x4700 | ((src) << 3))
+#define T_TBH(base, idx)	(0xe8d0f010 | ((base) << 16) | (idx))
+
+#define T_SXTB(dst, src)	(0xb240 | ((src) << 3) | (dst))
+#define T_SXTH(dst, src)	(0xb200 | ((src) << 3) | (dst))
+#define T2_SXTB(dst, src)	(0xfa4ff080 | ((dst) << 8) | (src))
+#define T2_SXTH(dst, src)	(0xfa0ff080 | ((dst) << 8) | (src))
+#define T_UXTH(dst, src)	(0xb280 | ((src) << 3) | (dst))
+#define T2_UXTH(dst, src)	(0xfa1ff080 | ((dst) << 8) | (src))
+
+#define COND_EQ 0
+#define COND_NE 1
+#define COND_LT	2
+#define COND_GE 3
+#define COND_GT 4
+#define COND_LE 5
+#define COND_CS 6
+#define COND_CC 7
+#define COND_MI 8
+#define COND_PL 9
+#define COND_AL 10
+
+static unsigned conds[] = {
+	0x0,
+	0x1,
+	0xb,
+	0xa,
+	0xc,
+	0xd,
+	0x2,
+	0x3,
+	0x4,
+	0x5,
+	0xe,
+};
+
+#define IT_MASK_T	8
+#define IT_MASK_TE	0x0c
+#define IT_MASK_TEE	0x0e
+
+#define T_IT(cond, mask) (0xbf00 | (conds[cond] << 4) | (mask))
+
+int out_16_data(CodeBuf *codebuf, u32 s)
+{
+  if (codebuf->idx >= codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  codebuf->codebuf[codebuf->idx++] = s;
+  return 0;
+}
+
+int out_16(CodeBuf *codebuf, u32 s)
+{
+  JASSERT(Thumb2, "must be Thumb2 in out16");
+  if (codebuf->idx >= codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  codebuf->codebuf[codebuf->idx++] = s;
+  return 0;
+}
+
+int out_16x2(CodeBuf *codebuf, u32 sx2)
+{
+  unsigned s1 = sx2 >> 16;
+  unsigned s2 = sx2 & 0xffff;
+
+  out_16(codebuf, s1);
+  return out_16(codebuf, s2);
+}
+
+int out_32(CodeBuf *codebuf, u32 w)
+{
+  if (codebuf->idx + 2 > codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  *(u32 *)&(codebuf->codebuf[codebuf->idx]) = w;
+  // printf("%p: 0x%08x\n", &(codebuf->codebuf[codebuf->idx]), w);
+  codebuf->idx += 2;
+  return 0;
+}
+
+int out_arm32(CodeBuf *codebuf, u32 w, unsigned cond)
+{
+  if ((w >> 28) == 0x0f) {
+    JASSERT(cond == COND_AL, "cannot applay cond to always inst");
+  } else {
+    JASSERT((w >> 28) == 0x0e || (w >> 28) == 0, "must be");
+    w &= ~0xf0000000;
+    w |= conds[cond] << 28;
+  }
+  return out_32(codebuf, w | (conds[cond] << 28));
+}
+
+int out_arm32(CodeBuf *codebuf, u32 w)
+{
+  return out_arm32(codebuf, w, COND_AL);
+}
+
+int out_armthumb32(CodeBuf *codebuf, u32 w)
+{
+  if (Thumb2) return out_16x2(codebuf, w);
+  else return out_32(codebuf, w);
+}
+
+u32 out_pos(CodeBuf *codebuf)
+{
+  return (u32)&(codebuf->codebuf[codebuf->idx]);
+}
+
+u32 out_loc(CodeBuf *codebuf)
+{
+  return codebuf->idx * 2;
+}
+
+u32 out_align(CodeBuf *codebuf, unsigned align)
+{
+  while ((out_pos(codebuf) & (align-1)) != 0)
+    ((out_pos(codebuf) & 2) ? out_16_data:out_32)(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+u32 out_align_offset(CodeBuf *codebuf, unsigned align, unsigned offset)
+{
+  while ((out_pos(codebuf) & (align-1)) != offset)
+    ((out_pos(codebuf) & 2) ? out_16:out_32)(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+int it(CodeBuf *codebuf, unsigned cond, unsigned mask)
+{
+  JASSERT(Thumb2, "Must be");
+  if (cond & 1) {
+    // If this is a negated condition, flip all the bits above the
+    // least significant bit that is 1.  Note that at least one bit is
+    // always 1 in mask
+    switch (mask & (-mask)) {
+    case 8:
+      break;
+    case 4:
+      mask ^= 8;
+      break;
+    case 2:
+      mask ^= 0x0c;
+      break;
+    case 1:
+      mask ^= 0x0e;
+      break;
+    default:
+      // Impossible unless someone specified an incorrect mask
+      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+    }
+  }
+
+  return out_16(codebuf, T_IT(cond, mask));
+}
+
+int thumb_single_shift(unsigned imm)
+{
+  unsigned lsl;
+
+  if (!imm) return -1;
+  lsl = 0;
+  while (!(imm & 0x80000000)) {
+    imm <<= 1;
+    lsl++;
+  }
+  if (lsl >= 24) return -1;
+  if ((imm & 0xff000000) == imm) return lsl+8;
+  return -1;
+}
+
+unsigned a_imm_shift(unsigned imm)
+{
+        unsigned shl, maxshl, z, maxz, i;
+
+// ECN: Need to be cafeful with the selection of the shifts here.
+// Consider, for example, the constant 0xf0000ffc
+// This can be done in 2 instructions if we choose the correct start
+//      MOV Rn, #0xf000000c / ORR Rn, Rn, #0x00000ff0
+// However, choose the wrong start and it takes 3 instructions
+//      MOV Rn, #0x000003fc / ORR Rn, Rn, #0x00000c00 / ORR Rn, Rn, #0xf0000000
+// To get this right we find the longest sequence of 00 bits.
+
+// First shift the immediate down so we have a non zero bit in bits 0,1
+	// short circuit simple imm and keep rotate 0 for readability
+        if (!(imm & ~0xff)) return 0;
+        shl = 0;
+        while ((imm & 3) == 0) {
+                imm >>= 2;
+                shl += 2;
+        }
+	// short circuit where it is all in bottom 16 bits - always 1 or 2
+//	if (!(imm & ~0xffff)) return 32 - shl;
+        maxz = 0;
+        maxshl = 0;
+        z = 0;
+        for (i = 0; i < 32; i += 2) {
+                if ((imm & 3) == 0) {
+                        z++;
+                } else {
+                        if (z > maxz) { maxz = z; maxshl = i; }
+                        z = 0;
+                }
+                imm = ROR(imm, 2);
+        }
+        if (z > maxz) maxshl = 0;
+        return -(shl + maxshl) & 0x1f;
+}
+
+int arm_single_shift(unsigned imm)
+{
+    unsigned sh;
+    unsigned mask;
+
+    sh = a_imm_shift(imm);
+    mask = ROR(0xff, sh);
+    if (imm & ~mask) return -1;
+    return (int)sh;
+}
+
+int thumb_bytelane(u32 imm)
+{
+    unsigned b1 = imm & 0xff;
+    unsigned b2 = (imm >> 8) & 0xff;
+    unsigned b3 = (imm >> 16) & 0xff;
+    unsigned b4 = imm >> 24;
+    int mov_type = -1;
+
+    if (b1 == b3 && b2 == 0 && b4 == 0) mov_type = 1;
+    if (b1 == b2 && b1 == b3 && b1 == b4) mov_type = 3;
+    if (b2 == b4 && b1 == 0 && b3 == 0) mov_type = 2;
+    if (imm < 256) mov_type = 0;
+    return mov_type;
+}
+
+int mov_imm(CodeBuf *codebuf, Reg r, u32 imm, unsigned cond = COND_AL)
+{
+  int mov_type, rol;
+
+  if (Thumb2) {
+    JASSERT(cond == COND_AL, "only COND_AL in Thumb2");
+    if (r < ARM_R8 && imm < 256)
+      return out_16(codebuf, T_MOV_IMM8(r, imm));
+    mov_type = thumb_bytelane(imm);
+    if (mov_type >= 0) {
+      if (mov_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T_MOV_BYTELANE(r, mov_type, (imm & 0xff)));
+    }
+    mov_type = thumb_bytelane(~imm);
+    if (mov_type >= 0) {
+      imm = ~imm;
+      if (mov_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T_MVN_BYTELANE(r, mov_type, (imm & 0xff)));
+    }
+    rol = thumb_single_shift(imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T_MOV_ROT_IMM(r, rol, ROL(imm, rol)));
+    rol = thumb_single_shift(~imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T_MVN_ROT_IMM(r, rol, ROL(~imm, rol)));
+    if ((imm & ~0xffff) == 0)
+      return out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
+    if (r < ARM_R8) {
+      rol = thumb_single_shift(imm & ~0xff);
+      if (rol >= 0) {
+	out_16(codebuf, T_MOV_IMM8(r, imm & 0xff));
+	return out_16x2(codebuf, T_ORR_ROT_IMM(r, r, rol, ROL(imm & ~0xff, rol)));
+      }
+    }
+    out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
+    return out_16x2(codebuf, T_MOVT_IMM16(r, imm >> 16));
+  }
+  if (ARCH_GE_V6T2(CPUInfo)) {
+    // ARMV6T2 or greater, we can use movw/movt
+    int sh;
+    if ((sh = arm_single_shift(imm)) >= 0)
+      return out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+    if ((sh = arm_single_shift(~imm)) >= 0)
+      return out_arm32(codebuf, A_MVN_IMM(r, sh, ROL(~imm, sh)), cond);
+    out_arm32(codebuf, A_MOVW_IMM16(r, imm & 0xffff), cond);
+    if ((imm >> 16) == 0) return 0;
+    return out_arm32(codebuf, A_MOVT_IMM16(r, imm >> 16), cond);
+  }
+  {
+// The following generates an optimal sequence of either
+//   MOV dst, #N followed by up to 3 ORR dst, dst, #N
+//   MVN dst, #N followed by up to 3 BIC dst, dst, #N
+// The following is a profile of how many instructions are required for all
+// integers in the 32 bit range
+// 1 instruction sequence - 6146 times
+// 2 instruction sequence - 5308392 times
+// 3 instruction sequence - 1071714202 times
+// 4 instruction sequence - 3217938556 times
+//
+    unsigned mov_count, mov_shifts[4];
+    unsigned mvn_count, mvn_shifts[4];
+    unsigned im, sh;
+    unsigned mask;
+    unsigned i;
+
+    im = imm;
+    mov_count = 0;
+    do {
+      sh = mov_shifts[mov_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (mov_count == 1) {
+      // short circuit the common case of 1 instruction
+      sh = mov_shifts[0];
+      return out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+    }
+    im = ~imm;
+    mvn_count = 0;
+    do {
+      sh = mvn_shifts[mvn_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (mov_count <= mvn_count) {
+      sh = mov_shifts[0];
+      out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+      for (i = 1; i < mov_count; i++) {
+	sh = mov_shifts[i];
+	out_arm32(codebuf, A_ORR_IMM(r, r, sh, ROL(imm, sh)), cond);
+      }
+    } else {
+      imm = ~imm;
+      sh = mvn_shifts[0];
+      out_arm32(codebuf, A_MVN_IMM(r, sh, ROL(imm, sh)), cond);
+      for (i = 1; i < mvn_count; i++) {
+	sh = mvn_shifts[i];
+	out_arm32(codebuf, A_BIC_IMM(r, r, sh, ROL(imm,sh)), cond);
+      }
+    }
+    return 0;
+  }
+}
+
+static int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho);
+
+int a_ldst_reg(CodeBuf *codebuf, u32 op, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  if (pre == 0) wb = 0;
+  return out_32(codebuf, A_LS_OP_REG(op) | ((base) << 16) | ((src) << 12) |
+		(offset) | (pre<<24) | (1<<23) | (wb<<21));
+}
+
+int load_store_reg(CodeBuf *codebuf, u32 op, Reg xfer, Reg base, Reg offset,
+							  int pre, int wb)
+{
+  if (Thumb2) {
+    JASSERT(pre, "post not supported");
+    if (xfer < ARM_R8 && base < ARM_R8 && offset < ARM_R8)
+      out_16(codebuf, T1_LDR_STR_REG(T1_LS_OP(op), xfer, base, offset));
+    else
+      out_16x2(codebuf, T2_LDR_STR_REG(T2_LS_OP(op), xfer, base, offset, 0));
+    if (wb) add_reg(codebuf, base, base, offset);
+    return 0;
+  }
+  return a_ldst_reg(codebuf, op, xfer, base, offset, pre, wb);
+}
+
+int str_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STR, src, base, offset, pre, wb);
+}
+
+int ldr_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDR, dst, base, offset, pre, wb);
+}
+
+int strb_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STRB, src, base, offset, pre, wb);
+}
+
+int ldrb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRB, dst, base, offset, pre, wb);
+}
+
+int strh_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STRH, src, base, offset, pre, wb);
+}
+
+int ldrh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRH, dst, base, offset, pre, wb);
+}
+
+int ldrsh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRSH, dst, base, offset, pre, wb);
+}
+
+int ldrsb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRSB, dst, base, offset, pre, wb);
+}
+
+int ldrex_imm(CodeBuf *codebuf, Reg dst, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_LDREX(dst, base, 0));
+  return out_32(codebuf, A_LDREX(dst, base));
+}
+
+int strex_imm(CodeBuf *codebuf, Reg dst, Reg src, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_STREX(dst, src, base, 0));
+  return out_32(codebuf, A_STREX(dst, src, base));
+}
+
+int ldrexd(CodeBuf *codebuf, Reg dst0, Reg dst1, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_LDREXD(dst0, dst1, base));
+  JASSERT(dst1 == dst0+1, "must be reg pair for ldrexd");
+  return out_32(codebuf, A_LDREXD(dst0, base));
+}
+
+int strexd(CodeBuf *codebuf, Reg dst, Reg src0, Reg src1, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_STREXD(dst, src0, src1, base));
+  JASSERT(src1 == src0+1, "must be reg pair for strexd");
+  return out_32(codebuf, A_STREXD(dst, src0, base));
+}
+
+int a_ldst_imm(CodeBuf *codebuf, u32 op, Reg src, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff = (unsigned)offset;
+  int is_imm12 = LS_IS_IMM12(op);
+  unsigned uoff_limit = is_imm12 ? (1<<12) : (1<<8);
+
+  if (pre == 0) wb = 0;
+  if (offset < 0) uoff = (unsigned)-offset;
+  if (uoff < uoff_limit) {
+    if (!is_imm12) uoff = (uoff & 0xf) | ((uoff & 0xf0) << 4);
+    return out_32(codebuf, A_LS_OP_IMM(op) | ((base) << 16) | ((src) << 12) |
+	          (pre<<24) | ((offset>=0)<<23) | (wb<<21) | uoff);
+  }
+  // Use ARM_LR as it is always the last tmp
+  mov_imm(codebuf, ARM_LR, offset);
+  return a_ldst_reg(codebuf, op, src, base, ARM_LR, pre, wb);
+}
+
+int str_imm_wb(CodeBuf *codebuf, Reg src, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff;
+
+  if (!pre && !wb) pre = 1, offset = 0;
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (pre && !wb && offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 128 && (uoff & 3) == 0)
+	return out_16(codebuf, T_STR_IMM5(src, base, uoff>>2));
+      if (base == ARM_SP && src < ARM_R8 && uoff < 1024 && (uoff &3) ==0)
+	return out_16(codebuf, T_STR_SP_IMM8(src, uoff>>2));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STR_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STR_IMM8(src, base, offset, pre, wb));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return str_reg(codebuf, src, base, ARM_IP, pre, wb);
+  }
+  a_ldst_imm(codebuf, LS_STR, src, base, offset, pre, wb);
+}
+
+int str_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  str_imm_wb(codebuf, src, base, offset, 1, 0);
+}
+
+int ldr_imm_wb(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff;
+
+  if (!pre && !wb) pre = 1, offset = 0;
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (pre && !wb && offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 128 && (uoff & 3) ==0)
+	return out_16(codebuf, T_LDR_IMM5(dst, base, uoff>>2));
+      if (base == ARM_SP && dst < ARM_R8 && uoff < 1024 & (uoff & 3) == 0)
+	return out_16(codebuf, T_LDR_SP_IMM8(dst, uoff>>2));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDR_IMM12(dst, base, uoff));
+    } else {
+      if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDR_IMM8(dst, base, offset, pre, wb));
+    }
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldr_reg(codebuf, dst, base, ARM_IP, pre, wb);
+  }
+  a_ldst_imm(codebuf, LS_LDR, dst, base, offset, pre, wb);
+}
+
+int ldr_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  return ldr_imm_wb(codebuf, dst, base, offset, 1, 0);
+}
+
+int strb_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 32)
+	return out_16(codebuf, T_STRB_IMM5(src, base, uoff));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STRB_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STRB_IMM8(src, base, offset, 1, 0));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return strb_reg(codebuf, src, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_STRB, src, base, offset, 1, 0);
+}
+
+int ldrb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 32)
+	return out_16(codebuf, T_LDRB_IMM5(dst, base, uoff));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRB_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRB_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrb_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRB, dst, base, offset, 1, 0);
+}
+
+int strh_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
+	return out_16(codebuf, T_STRH_IMM5(src, base, uoff>>1));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STRH_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STRH_IMM8(src, base, offset, 1, 0));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return strh_reg(codebuf, src, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_STRH, src, base, offset, 1, 0);
+}
+
+int ldrh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
+	return out_16(codebuf, T_LDRH_IMM5(dst, base, uoff>>1));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRH_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRH_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrh_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRH, dst, base, offset, 1, 0);
+}
+
+int ldrsh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRSH_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRSH_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrsh_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRSH, dst, base, offset, 1, 0);
+}
+
+int ldrsb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRSB_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRSB_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrsb_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRSB, dst, base, offset, 1, 0);
+}
+
+int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm);
+
+int mov_reg(CodeBuf *codebuf, u32 dst, u32 src, unsigned cond = COND_AL)
+{
+  if (dst == src) return 0;
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    if (dst == ARM_PC) return out_16(codebuf, T_BX(src));
+    return out_16(codebuf, T_MOV(dst, src));
+  }
+  if (dst == ARM_PC) return out_arm32(codebuf, A_BX(src), cond);
+  return out_arm32(codebuf, A_MOV(dst, src), cond);
+}
+
+int stm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
+{
+  JASSERT(regset != 0, "regset != 0 in stm");
+  if (Thumb2) {
+    if (base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
+      return out_16(codebuf, T_STM8(base, regset));
+    if (base == ARM_SP) {
+      if ((regset & ~0x40ff) == 0 && st == DB && wb)
+	return out_16(codebuf, T_PUSH(regset));
+    }
+    if ((regset & -regset) == regset)
+      return str_imm_wb(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
+    JASSERT(st == PUSH_EA || st == PUSH_FD, "only PUSH_EA or PUSH_FD available on Thumb");
+    return out_16x2(codebuf, T_STM16(base, regset, st, wb));
+  }
+  return out_32(codebuf, A_STM(base, regset, st, wb));
+}
+
+int ldm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
+{
+  JASSERT(regset != 0, "regset != 0 in stm");
+  if (Thumb2) {
+    if (base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
+      return out_16(codebuf, T_LDM8(base, regset));
+    if (base == ARM_SP) {
+      if ((regset & ~0x80ff) == 0 && st == IA && wb)
+	return out_16(codebuf, T_POP(regset));
+    }
+    if ((regset & -regset) == regset)
+      return ldr_imm_wb(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
+    JASSERT(st == POP_EA || st == POP_FD, "only POP_EA or POP_FD available on Thumb");
+    return out_16x2(codebuf, T_LDM16(base, regset, st, wb));
+  }
+  return out_32(codebuf, A_LDM(base, regset, st, wb));
+}
+
+// Use this macro before calling ldrd_imm to ensure the regs are in the right order
+// for an ldm when compiling for ARM. If the registers are in the wrong order it does
+// a SWAP (note: this does not actually swap the regs, just renames them)
+#define LDRD_PRE(jstack, lo, hi) do { \
+		if (lo > hi && !Thumb2) { \
+		  Reg tmp = lo; \
+		  lo = hi; \
+		  hi = tmp; \
+		  SWAP(jstack); \
+		} \
+              } while (0)
+
+int ldrd_imm(CodeBuf *codebuf, Reg dst_lo, Reg dst_hi, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
+      return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, base, offset>>2, 1, 0));
+    add_imm(codebuf, ARM_IP, base, offset);
+    return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, ARM_IP, 0, 1, 0));
+  }
+  if (dst_hi == dst_lo + 1 && !(dst_lo & 1))
+    return a_ldst_imm(codebuf, LS_LDRD, dst_lo, base, offset, 1, 0);
+
+  // The ARM instruction set only allows for a single register
+  // in the ldrd instruction, the high register is assumed to
+  // be the low register + 1, even though the Thumb instruction
+  // set allows a pair of registers to be specified.
+  // In addition the low register must be an even register and
+  // it gives an exception if this is not the case even though
+  // on identically the same processor it can handle an odd
+  // register in Thumb.
+  // So we use LDM instead. Note: We must use LDM rather than
+  // 2 x LDR because it is required to be atomic (on non MP core)
+
+  // LDM requires that the regs are in order and the caller must
+  // use LDRD_PRE to ensure this
+  JASSERT(dst_lo < dst_hi, "regs must be in order for ldm");
+
+  // Note: We only do LDMIA (offset=0) and LDMIB (offset=4)
+  if (offset != 0 && offset != 4) {
+    add_imm(codebuf, ARM_IP, base, offset);
+    base = ARM_IP;
+    offset = 0;
+  }
+  return ldm(codebuf, (1<<dst_lo)|(1<<dst_hi), base, offset ? IB:IA, 0);
+}
+
+int strd_imm(CodeBuf *codebuf, Reg src_lo, Reg src_hi, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
+      return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, base, offset>>2, 1, 0));
+    add_imm(codebuf, ARM_IP, base, offset);
+    return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, ARM_IP, 0, 1, 0));
+  }
+  // See comments above in ldrd_imm
+  if (src_hi == src_lo +1 && !(src_lo & 1))
+    return a_ldst_imm(codebuf, LS_STRD, src_lo, base, offset, 1, 0);
+
+  // If the registers are ooo we cannot simply rename tham like we did for
+  // ldm because the values have already been loaded. So use tmp to ensure
+  // the registers are in order.
+  Reg tmp = ARM_LR;
+  if (src_lo > src_hi) {
+    mov_reg(codebuf, tmp, src_hi);
+    tmp = src_hi;
+    src_hi = ARM_LR;
+  }
+  JASSERT(src_lo < src_hi, "regs must be in order for stm");
+  if (offset != 0 && offset != 4) {
+    add_imm(codebuf, tmp, base, offset);
+    base = tmp;
+    offset = 0;
+  }
+  return stm(codebuf, (1<<src_lo)|(1<<src_hi), base, offset ? IB:IA, 0);
+}
+
+int dop_reg(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho, u32 sh_typ, u32 shift, unsigned cond = COND_AL)
+{
+  unsigned s = 0;
+  if (op != DP_MUL && dst != ARM_PC) s = 1 << 20;
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    return out_16x2(codebuf, T_DOP_REG(T_DP_REG(op)|s, dst, lho, rho, sh_typ, shift));
+  }
+  if (op == DP_MUL) return out_arm32(codebuf, A_MUL(dst, lho, rho), cond);
+  if (op == DP_LSL || op == DP_LSR || op == DP_ASR)
+    return out_arm32(codebuf, A_SHIFT_REG(A_DP_REG(op)|s, dst, lho, rho));
+  return out_arm32(codebuf, A_DOP_REG(A_DP_REG(op)|s, dst, lho, rho, sh_typ, shift), cond);
+}
+
+int sxtb(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_SXTB(dst, src));
+    return out_16x2(codebuf, T2_SXTB(dst, src));
+  }
+  return out_32(codebuf, A_SXTB(dst, src));
+}
+
+int sxth(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_SXTH(dst, src));
+    return out_16x2(codebuf, T2_SXTH(dst, src));
+  }
+  return out_32(codebuf, A_SXTH(dst, src));
+}
+
+int uxth(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_UXTH(dst, src));
+    return out_16x2(codebuf, T2_UXTH(dst, src));
+  }
+  return out_32(codebuf, A_UXTH(dst, src));
+}
+
+int nop_16(CodeBuf *codebuf)
+{
+  return out_16(codebuf, T_MOV(ARM_R0, ARM_R0));
+}
+
+int nop_32(CodeBuf *codebuf)
+{
+  return mov_reg(codebuf, ARM_R8, ARM_R8);
+}
+
+int mvn_reg(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2 && dst < ARM_R8 && src < ARM_R8)
+    return out_16(codebuf, T_MVN(dst, src));
+  return dop_reg(codebuf, DP_MVN, dst, 0, src, SHIFT_LSL, 0);
+}
+
+int vmov_reg_s_toVFP(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVS_TOVFP(dst, src));
+}
+
+int vmov_reg_s_toARM(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVS_TOARM(dst, src));
+}
+
+int vmov_reg_d_toVFP(CodeBuf *codebuf, u32 dst, u32 src_lo, u32 src_hi)
+{
+  return out_armthumb32(codebuf, T_VMOVD_TOVFP(dst, src_lo, src_hi));
+}
+
+int vmov_reg_d_VFP_to_VFP(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVD_VFP_TOVFP(dst, src));
+}
+
+int vmov_reg_d_toARM(CodeBuf *codebuf, u32 dst_lo, u32 dst_hi, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVD_TOARM(dst_lo, dst_hi, src));
+}
+
+int vop_reg_s(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, T_VOP_REG_S(VP_REG(op), dst, lho, rho));
+}
+
+int vop_reg_d(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, T_VOP_REG_D(VP_REG(op), dst, lho, rho));
+}
+
+int vcmp_reg_s(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
+{
+  return out_armthumb32(codebuf, T_VCMP_S(lho, rho, e));
+}
+
+int vcmp_reg_d(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
+{
+  return out_armthumb32(codebuf, T_VCMP_D(lho, rho, e));
+}
+
+int vmrs(CodeBuf *codebuf, u32 dst)
+{
+  return out_armthumb32(codebuf, T_VMRS(dst));
+}
+
+int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho)
+{
+  // ECN: FIXMME: Thumb has a 16 bit ADD dst, lho, rho
+  return dop_reg(codebuf, DP_ADD, dst, lho, rho, SHIFT_LSL, 0);
+}
+
+int cmp_reg(CodeBuf *codebuf, Reg lho, Reg rho, unsigned cond = COND_AL)
+{
+  if (Thumb2 && lho < ARM_R8 && rho < ARM_R8) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    return out_16(codebuf, T_CMP_REG(lho, rho));
+  }
+  return dop_reg(codebuf, DP_CMP, 0x0f, lho, rho, SHIFT_LSL, 0, cond);
+}
+
+int add_reg_shift(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho, u2 sh_typ, u32 shift)
+{
+  return dop_reg(codebuf, DP_ADD, dst, lho, rho, sh_typ, shift);
+}
+
+int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  int imm_type, rol;
+
+  if (imm == 0) return mov_reg(codebuf, dst, src);
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8) {
+      if (imm < 8)
+	return out_16(codebuf, T1_ADD_IMM(dst, src, imm));
+      if (-imm < 8)
+	return out_16(codebuf, T1_SUB_IMM(dst, src, -imm));
+      if (src == dst) {
+	if (imm < 256)
+	  return out_16(codebuf, T2_ADD_IMM(src, imm));
+	if (-imm < 256)
+	  return out_16(codebuf, T2_SUB_IMM(src, -imm));
+      }
+    }
+    imm_type = thumb_bytelane(imm);
+    if (imm_type >= 0) {
+      if (imm_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T3_ADD_BYTELANE(dst, src, imm_type, (imm & 0xff)));
+    }
+    imm_type = thumb_bytelane(-imm);
+    if (imm_type >= 0) {
+      imm = -imm;
+      if (imm_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T3_SUB_BYTELANE(dst, src, imm_type, (imm & 0xff)));
+    }
+    rol = thumb_single_shift(imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T3_ADD_ROT_IMM(dst, src, rol, ROL(imm, rol)));
+    rol = thumb_single_shift(-imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T3_SUB_ROT_IMM(dst, src, rol, ROL(-imm, rol)));
+    if (imm < (1 << 12))
+      return out_16x2(codebuf, T4_ADD_IMM(dst, src, imm));
+    if (-imm < (1 << 12))
+      return out_16x2(codebuf, T4_SUB_IMM(dst, src, -imm));
+    mov_imm(codebuf, ARM_IP, imm);
+    return add_reg(codebuf, dst, src, ARM_IP);
+  }
+  {
+    unsigned add_count, add_shifts[4];
+    unsigned sub_count, sub_shifts[4];
+    unsigned im, sh;
+    unsigned mask;
+    unsigned i;
+
+    im = imm;
+    add_count = 0;
+    do {
+      sh = add_shifts[add_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (add_count == 1) {
+      // short circuit the common case of 1 instruction
+      sh = add_shifts[0];
+      return out_32(codebuf, A_ADD_IMM(dst, src, sh, ROL(imm, sh)));
+    }
+    im = -imm;
+    sub_count = 0;
+    do {
+      sh = sub_shifts[sub_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (add_count <= sub_count) {
+      for (i = 0; i < add_count; i++) {
+	sh = add_shifts[i];
+	out_32(codebuf, A_ADD_IMM(dst, src, sh, ROL(imm, sh)));
+	src = dst;
+      }
+    } else {
+      imm = -imm;
+      for (i = 0; i < sub_count; i++) {
+	sh = sub_shifts[i];
+	out_32(codebuf, A_SUB_IMM(dst, src, sh, ROL(imm,sh)));
+	src = dst;
+      }
+    }
+    return 0;
+  }
+}
+
+int sub_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return add_imm(codebuf, dst, src, -imm);
+}
+
+// ECN: This is suboptimal. Need to rewrite to have 3 states
+// (set flags, preserve flags, dont care). Then rewrite
+// mov ip, #N; op dst, src, ip as op dst, src, #N; op dst, dst, #N ...
+int dop_imm_s(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm, unsigned s)
+{
+    int imm_type, rol;
+    unsigned n_op, n_imm;
+
+    JASSERT(op == DP_ADC || op == DP_ADD || op == DP_AND || op == DP_BIC || op == DP_CMN ||
+		op == DP_CMP || op == DP_EOR || op == DP_MOV || op == DP_MVN ||
+		op == DP_ORN || op == DP_ORR || op == DP_RSB || op == DP_SBC ||
+		op == DP_SUB || op == DP_TEQ || op == DP_TST, "bad op");
+    if (op == DP_CMP || op == DP_CMN || op == DP_TEQ || op == DP_TST) dst = 0x0f;
+    if (op == DP_MOV || op == DP_MVN) src = 0x0f;
+    if (Thumb2) {
+      imm_type = thumb_bytelane(imm);
+      if (imm_type >= 0) {
+	if (imm_type == 2) imm >>= 8;
+	return out_16x2(codebuf, T_DOP_BYTELANE(T_DP_IMM(op)|s, dst, src, imm_type, (imm & 0xff)));
+      }
+      rol = thumb_single_shift(imm);
+      if (rol >= 0)
+	return out_16x2(codebuf, T_DOP_ROT_IMM(T_DP_IMM(op)|s, dst, src, rol, ROL(imm, rol)));
+      n_op = N_OP(op);
+      if (n_op != (unsigned)-1) {
+	n_imm = ~imm;
+	if (op == DP_ADD || op == DP_SUB || op == DP_CMP || op == DP_CMN) n_imm = -imm;
+	imm_type = thumb_bytelane(n_imm);
+	if (imm_type >= 0) {
+	  if (imm_type == 2) n_imm >>= 8;
+	  return out_16x2(codebuf, T_DOP_BYTELANE(T_DP_IMM(n_op)|s, dst, src, imm_type, (n_imm & 0xff)));
+	}
+	rol = thumb_single_shift(n_imm);
+	if (rol >= 0)
+	  return out_16x2(codebuf, T_DOP_ROT_IMM(T_DP_IMM(n_op)|s, dst, src, rol, ROL(n_imm, rol)));
+      }
+      mov_imm(codebuf, ARM_IP, imm);
+      return out_16x2(codebuf, T_DOP_REG(T_DP_REG(op)|s, dst, src, ARM_IP, SHIFT_LSL, 0));
+    }
+    if (dst == 0x0f) dst = 0;
+    if (src == 0x0f) src = 0;
+    if (op == DP_ORN) op = DP_ORR, imm = ~imm; // no ORN in arm
+    rol = arm_single_shift(imm);
+    if (rol >= 0)
+      return out_32(codebuf, A_DOP_IMM(A_DP_IMM(op)|s, dst, src, rol, ROL(imm, rol)));
+    n_op = N_OP(op);
+    if (n_op != (unsigned)-1 && n_op != DP_ORN) {
+      n_imm = ~imm;
+      if (op == DP_ADD || op == DP_SUB || op == DP_CMP || op == DP_CMN) n_imm = -imm;
+      rol = arm_single_shift(n_imm);
+      if (rol >= 0)
+        return out_32(codebuf, A_DOP_IMM(A_DP_IMM(n_op)|s, dst, src, rol, ROL(n_imm, rol)));
+    }
+    mov_imm(codebuf, ARM_IP, imm);
+    return out_32(codebuf, A_DOP_REG(A_DP_REG(op)|s, dst, src, ARM_IP, SHIFT_LSL, 0));
+}
+
+int dop_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    return dop_imm_s(codebuf, op, dst, src, imm, 1<<20);
+}
+
+int dop_imm_preserve(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    return dop_imm_s(codebuf, op, dst, src, imm, 0);
+}
+
+int shift_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    imm &= 31;
+    if (imm == 0)
+      return mov_reg(codebuf, dst, src);
+    if (Thumb2)
+      return out_16x2(codebuf, T_SHIFT_IMM(T_DP_IMM(op), dst, src, imm));
+    return out_32(codebuf, A_SHIFT_IMM(A_DP_IMM(op), dst, src, imm));
+}
+
+int rsb_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  if (Thumb2 && dst < ARM_R8 && src < ARM_R8 && imm == 0)
+    return out_16(codebuf, T_NEG(dst, src));
+  return dop_imm(codebuf, DP_RSB, dst, src, imm);
+}
+
+int adc_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_ADC, dst, src, imm);
+}
+
+int asr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return shift_imm(codebuf, DP_ASR, dst, src, imm);
+}
+
+int lsl_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return shift_imm(codebuf, DP_LSL, dst, src, imm);
+}
+
+int eor_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_EOR, dst, src, imm);
+}
+
+int and_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_AND, dst, src, imm);
+}
+
+int orr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_ORR, dst, src, imm);
+}
+
+int cmp_imm(CodeBuf *codebuf, Reg src, u32 imm)
+{
+  if (Thumb2 && src < ARM_R8 && imm < 256)
+    return out_16(codebuf, T_CMP_IMM(src, imm));
+  return dop_imm(codebuf, DP_CMP, 0x0f, src, imm);
+}
+
+int tst_imm(CodeBuf *codebuf, Reg src, u32 imm)
+{
+  return dop_imm(codebuf, DP_TST, 0x0f, src, imm);
+}
+
+int fullBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP()) {
+    if (Thumb2)
+      return out_16x2(codebuf, T_DMB(0xf));
+    return out_32(codebuf, A_DMB(0xf));
+  }
+}
+
+int storeBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP()) {
+    if (Thumb2)
+      return out_16x2(codebuf, T_DMB(0xe));
+    return out_32(codebuf, A_DMB(0xe));
+  }
+}
+
+int tbh(CodeBuf *codebuf, Reg base, Reg idx)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_TBH(base, idx));
+  // For the moment we emulate the behaviour of TBH in arm code
+  // It may be better to review the tableswitch generation sometime
+  //   	lsl	ip, idx, #1
+  //	ldrh	ip, [pc, ip]
+  //	add	pc, pc, ip, lsl #1
+  lsl_imm(codebuf, ARM_IP, idx, 1);
+  ldrh_reg(codebuf, ARM_IP, ARM_PC, ARM_IP, 1, 0);
+  // return add_reg(codebuf, ARM_PC, ARM_PC, ARM_IP);
+  return dop_reg(codebuf, DP_ADD, ARM_PC, ARM_PC, ARM_IP, SHIFT_LSL, 1);
+}
+
+int umull(CodeBuf *codebuf, u32 res_lo, u32 res_hi, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, Thumb2 ? T_UMULL(res_lo, res_hi, lho, rho) :
+                                      A_UMULL(res_lo, res_hi, lho, rho));
+}
+
+int mla(CodeBuf *codebuf, u32 res, u32 lho, u32 rho, u32 a)
+{
+  return out_armthumb32(codebuf, Thumb2 ? T_MLA(res, lho, rho, a) :
+                                      A_MLA(res, lho, rho, a));
+}
+
+#define NEG_COND(cond)	((cond) ^ 1)
+
+#define T_B(uoff)	(0xe000 | ((uoff) & 0x7ff))
+#define T_BW(uoff)	(0xf0009000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BL(uoff)	(0xf000d000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BLX(uoff)	(0xf000c000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BCC(cond, uoff) (0xd000 | (conds[cond] << 8) | ((uoff) & 0xff))
+#define T_BCCW(cond, uoff) (0xf0008000 | \
+			     (conds[cond] << 22) | \
+			     (((uoff) & (1<<19)) << (26-19)) | \
+			     (((uoff) & (1<<18)) >> (18-11)) | \
+			     (((uoff) & (1<<17)) >> (17-13)) | \
+			     (((uoff) & 0x1f800) << (16-11)) | \
+			     ((uoff) & 0x7ff))
+#define T_BLX_REG(r)	(0x4780 | ((r) << 3))
+#define T_CBZ(r, uoff)	(0xb100 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
+#define T_CBNZ(r, uoff)	(0xb900 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
+
+#define A_B(uoff)	 (0xea000000 | ((uoff) & 0xffffff))
+#define A_BL(cond, uoff) (0x0b000000 | (conds[cond] << 28) | ((uoff) & 0xffffff))
+#define A_BCC(cond, uoff) (0x0a000000 | (conds[cond] << 28) | ((uoff) & 0xffffff))
+#define A_BLX_REG(r)     (0xe12fff30 | (r))
+
+#define PATCH(loc)	do {						\
+	  unsigned oldidx = codebuf->idx;				\
+	  codebuf->idx = (loc) >> 1;					\
+
+#define HCTAP								\
+	  codebuf->idx = oldidx;					\
+    	} while (0)
+
+int forward_short(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  if (Thumb2) out_16(codebuf, UNDEFINED_16);
+  else out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int forward_long(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int forward_cb(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  if (Thumb2) return forward_short(codebuf);
+  out_32(codebuf, UNDEFINED_32);
+  out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int branch_uncond(CodeBuf *codebuf, unsigned dest)
+{
+  unsigned loc = (codebuf->idx * 2);
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<10) && offset < (1<<10))
+      return out_16(codebuf, T_B(uoff));
+    if (offset >= -(1<<23) && offset < (1<<23))
+      return out_16x2(codebuf, T_BW(uoff));
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<22) && offset < (1<<22)) 
+      return out_32(codebuf, A_B(uoff));
+  }
+  J_Unimplemented();
+}
+
+int branch_uncond_patch(CodeBuf *codebuf, unsigned loc, unsigned dest)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  int rc;
+
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<23) && offset < (1<<23)) {
+      uoff = offset & ((1<<24)-1);
+      rc = out_16x2(codebuf, T_BW(uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_B(uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int branch_narrow_patch(CodeBuf *codebuf, unsigned loc)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  if (!Thumb2) return branch_uncond_patch(codebuf, loc, dest);
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  loc += 4;
+  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+  dest >>= 1;
+  loc >>= 1;
+  offset = dest - loc;
+  uoff = offset;
+  if (offset >= -(1<<10) && offset < (1<<10)) {
+    rc = out_16(codebuf, T_B(uoff));
+    codebuf->idx = oldidx;
+    return rc;
+  }
+  J_Unimplemented();
+}
+
+int branch(CodeBuf *codebuf, unsigned cond, unsigned dest)
+{
+  unsigned loc = (codebuf->idx * 2);
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<7) && offset < (1<<7)) {
+      return out_16(codebuf, T_BCC(cond, uoff));
+    }
+    if (offset >= -(1<<19) && offset < (1<<19)) {
+      return out_16x2(codebuf, T_BCCW(cond, uoff));
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<22) && offset < (1<<22)) 
+      return out_32(codebuf, A_BCC(cond, uoff));
+  }
+  J_Unimplemented();
+}
+
+int bcc_patch(CodeBuf *codebuf, unsigned cond, unsigned loc)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    if (offset >= -(1<<7) && offset < (1<<7)) {
+      uoff = offset;
+      rc = out_16(codebuf, T_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int bl(CodeBuf *codebuf, unsigned dest, unsigned cond = COND_AL)
+{
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<23) && offset < (1<<23)) {
+      uoff = offset;
+      return out_16x2(codebuf, T_BL(uoff));
+    }
+  } else {
+    unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 8;
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<23) && offset < (1<<23))
+      return out_32(codebuf, A_BL(cond, uoff));
+  }
+  J_Unimplemented();
+}
+
+int blx(CodeBuf *codebuf, unsigned dest)
+{
+  unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
+  int offset;
+  unsigned uoff;
+
+  if (!Thumb2) return bl(codebuf, dest); // Already in ARM
+  JASSERT((dest & 3) == 0 && (loc & 1) == 0, "unaligned code");
+  dest >>= 1;
+  loc >>= 1;
+  loc &= ~1;
+  offset = dest - loc;
+  if (offset >= -(1<<23) && offset < (1<<23)) {
+    uoff = offset;
+    return out_16x2(codebuf, T_BLX(uoff));
+  }
+  J_Unimplemented();
+}
+
+int branch_patch(CodeBuf *codebuf, unsigned cond, unsigned loc, unsigned dest)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  int rc;
+
+  oldidx = codebuf->idx;
+  if (Thumb2) {
+    codebuf->idx = loc >> 1;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<19) && offset < (1<<19)) {
+      uoff = offset & ((1<<20)-1);
+      rc = out_16x2(codebuf, T_BCCW(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    codebuf->idx = loc >> 1;
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int blx_reg(CodeBuf *codebuf, Reg r)
+{
+  if (Thumb2)
+    return out_16(codebuf, T_BLX_REG(r));
+  return out_32(codebuf, A_BLX_REG(r));
+}
+
+int cbz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
+{
+  unsigned offset;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    loc += 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    JASSERT(r < ARM_R8 && offset < 64, "must be");
+    out_16(codebuf, T_CBZ(r, offset));
+  } else {
+    cmp_imm(codebuf, r, 0);
+    loc += 12;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    JASSERT(offset < (1<<22), "???");
+    out_32(codebuf, A_BCC(COND_EQ, offset));
+  }
+  codebuf->idx = oldidx;
+}
+
+int cbnz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
+{
+  unsigned offset;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    loc += 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    JASSERT(r < ARM_R8 && offset < 64, "must be");
+    out_16(codebuf, T_CBNZ(r, offset));
+  } else {
+    cmp_imm(codebuf, r, 0);
+    loc += 12;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    JASSERT(offset < (1<<22), "???");
+    out_32(codebuf, A_BCC(COND_NE, offset));
+  }
+  codebuf->idx = oldidx;
+}
+
+int chka(CodeBuf *codebuf, u32 size, u32 idx)
+{
+  cmp_reg(codebuf, idx, size);
+  bl(codebuf, handlers[H_ARRAYBOUND], COND_CS);
+}
+
+//-----------------------------------------------------------------------------------
+
+// An example of some debugging logic that you can use to trigger a
+// breakpoint when a particular method is executing.
+#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
+extern "C" void Debug(interpreterState istate)
+{
+  char valuebuf[8192];
+  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
+  if (EQ(valuebuf, "java.util.Hashtable.get(Ljava/lang/Object;)")
+      // && istate->method()->bci_from(istate->bcp()) == 45
+      ) {
+    asm("nop");
+  }
+}
+#undef EQ
+
+void Thumb2_Push_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
+{
+  unsigned regset = 0;
+  unsigned regmask;
+  unsigned i;
+  Reg r;
+
+  JASSERT(nregs > 0, "nregs must be > 0");
+  if (nregs == 1) {
+    str_imm_wb(codebuf, regs[0], Rstack, -4, 1, 1);
+    return;
+  }
+  for (i = 0; i < nregs; i++) {
+    r = regs[i];
+    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
+    regmask = 1<<r;
+    if (regset != 0 && regmask >= (regset & -regset)) {
+      stm(codebuf, regset, Rstack, PUSH_FD, 1);
+      regset = 0;
+    }
+    regset |= regmask;
+  }
+  stm(codebuf, regset, Rstack, PUSH_FD, 1);
+}
+
+void Thumb2_Pop_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
+{
+  unsigned regset = 0;
+  unsigned regmask;
+  unsigned i;
+  Reg r;
+
+  if (nregs == 0)
+    return;
+  JASSERT(nregs > 0, "nregs must be > 0");
+  if (nregs == 1) {
+    ldr_imm_wb(codebuf, regs[0], Rstack, 4, 0, 1);
+    return;
+  }
+  i = nregs;
+  do {
+    i--;
+    r = regs[i];
+    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
+    regmask = 1<<r;
+    if (regmask <= (regset & -regset)) {
+      ldm(codebuf, regset, Rstack, POP_FD, 1);
+      regset = 0;
+    }
+    regset |= regmask;
+  } while (i > 0);
+  ldm(codebuf, regset, Rstack, POP_FD, 1);
+}
+
+int mov_multiple(CodeBuf *codebuf, Reg *dst, Reg *src, unsigned nregs)
+{
+  unsigned u, n, p;
+  unsigned smask = 0;
+  unsigned dmask = 0;
+  unsigned free_mask, free_reg;
+
+  for (u = 0, n = 0; u < nregs; u++) {
+    JASSERT(dst[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
+    JASSERT(src[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
+    if (dst[u] != src[u]) {
+      dst[n] = dst[u];
+      src[n++] = src[u];
+    }
+  }
+  while (n) {
+    // Find a reg which is in the dst reg set but not the src reg set
+    smask = 0;
+    dmask = 0;
+    for (u = 0; u < n; u++) {
+      smask |= (1 << src[u]);
+      dmask |= (1 << dst[u]);
+    }
+    free_mask = dmask & ~smask;
+    if (!free_mask) {
+      // No such reg => must use IP
+      Reg r = dst[0];
+      mov_reg(codebuf, ARM_IP, r);
+      for (u = 0; u < n; u++) {
+	if (src[u] == r) src[u] = ARM_IP;
+      }
+      smask ^= (1<<r) | (1<<ARM_IP);
+      free_mask = dmask & ~smask;
+      JASSERT(free_mask, "still no free reg after using ARM_IP?");
+    }
+    free_reg = LOG2(free_mask);
+    for (u = 0, p = 0; u < n; u++) {
+      if (dst[u] == free_reg) {
+	mov_reg(codebuf, dst[u], src[u]);
+      } else {
+	dst[p] = dst[u];
+	src[p++] = src[u];
+      }
+    }
+    n--;
+  }
+  return 0;
+}
+
+#define TOS(jstack)	((jstack)->stack[(jstack)->depth-1])
+#define TOSM1(jstack)	((jstack)->stack[(jstack)->depth-2])
+#define TOSM2(jstack)	((jstack)->stack[(jstack)->depth-3])
+#define TOSM3(jstack)	((jstack)->stack[(jstack)->depth-4])
+
+#define SWAP(jstack) do { \
+		      Reg r = (jstack)->stack[(jstack)->depth-1]; \
+		      (jstack)->stack[(jstack)->depth-1] = (jstack)->stack[(jstack)->depth-2]; \
+		      (jstack)->stack[(jstack)->depth-2] = r; \
+		    } while (0)
+
+#define JSTACK_REG(jstack)		jstack_reg(jstack)
+#define JSTACK_PREFER(jstack, prefer)	jstack_prefer(jstack, prefer)
+
+int PUSH(Thumb2_Stack *jstack, unsigned reg) {
+  jstack->stack[jstack->depth] = reg;
+  jstack->depth++;
+  return reg;
+}
+
+int POP(Thumb2_Stack *jstack) {
+  jstack->depth--;
+  return jstack->stack[jstack->depth];
+}
+
+static const unsigned last_clear_bit[] = {
+	3,	//	0000
+	3,	//	0001
+	3,	//	0010
+	3,	//	0011
+	3,	//	0100
+	3,	//	0101
+	3,	//	0110
+	3,	//	0111
+	2,	//	1000
+	2,	//	1001
+	2,	//	1010
+	2,	//	1011
+	1,	//	1100
+	1,	//	1101
+	0,	//	1110
+	0,	//	1111 // No registers available...
+};
+
+#define LAST_CLEAR_BIT(mask) last_clear_bit[mask]
+
+unsigned long thumb2_register_allocation_failures = 0;
+
+unsigned jstack_reg(Thumb2_Stack *jstack)
+{
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned r;
+  unsigned i;
+
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask &= (1 << STACK_REGS) - 1;
+  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
+    thumb2_register_allocation_failures++;
+    J_BogusImplementation();
+  }
+  r = LAST_CLEAR_BIT(mask);
+  return r;
+}
+
+unsigned jstack_prefer(Thumb2_Stack *jstack, Reg prefer)
+{
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned r;
+  unsigned i;
+
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask &= (1 << STACK_REGS) - 1;
+  if ((prefer & ~mask) & 0x0f) mask |= (~prefer & ((1 << STACK_REGS) - 1));
+  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
+    thumb2_register_allocation_failures++;
+    J_BogusImplementation();
+  }
+  r = LAST_CLEAR_BIT(mask);
+  return r;
+}
+
+void Thumb2_Fill(Thumb2_Info *jinfo, unsigned required)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned tofill;
+  unsigned r, i;
+
+  if (depth >= required) return;
+  tofill = required - depth;
+  for (i = depth; i > 0;) {
+    i--;
+    mask |= 1 << stack[i];
+    stack[i+tofill] = stack[i];
+  }
+  mask &= (1 << STACK_REGS) - 1;
+  for (i = 0; i < tofill; i++) {
+    JASSERT(mask != (1 << STACK_REGS) - 1, "Fill failed!!!");
+    r = LAST_CLEAR_BIT(mask);
+    mask |= (1 << r);
+    stack[i] = r;
+  }
+  jstack->depth = depth + tofill;
+  Thumb2_Pop_Multiple(jinfo->codebuf, stack, tofill);
+}
+
+static const unsigned bitcount[] = {
+	0,	// 0000
+	1,	// 0001
+	1,	// 0010
+	2,	// 0011
+	1,	// 0100
+	2,	// 0101
+	2,	// 0110
+	3,	// 0111
+	1,	// 1000
+	2,	// 1001
+	2,	// 1010
+	3,	// 1011
+	2,	// 1100
+	3,	// 1101
+	3,	// 1110
+	4,	// 1111
+};
+
+#define BITCOUNT(mask) bitcount[mask]
+
+// Thumb2_Spill:-
+// 	required - ensure that at least this many registers are available
+// 	exclude - bitmask, do not count these registers as available
+//
+// 	The no. of available regs (STACK_REGS) less the no. of registers in
+// 	exclude must be >= the number required, otherwise this function loops!
+//
+// 	Typical usage is
+//
+// 	Thumb2_Spill(jinfo, 2, 0);	// get 2 free regs
+// 	r_res_lo = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
+// 	r_res_hi = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
+//
+//	Use the exclude mask when you do not want a subsequent call to
+//	JSTACK_REG to return a particular register or registers. This can
+//	be useful, for example, with long (64) bit operations. Eg. In the
+//	following we use it to ensure that the hi inputs are not clobbered
+//	by the lo result as part of the intermediate calculation.
+//
+//	Thumb2_Fill(jinfo, 4);
+//	exclude = (1<<rho_hi)|(1<<lho_hi);
+//	rho_lo = POP(jstack);
+//	rho_hi = POP(jstack);
+//	lho_lo = POP(jstack);
+//	lho_hi = POP(jstack);
+//	Thumb2_Spill(jinfo, 2, exclude);
+//	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
+//	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
+//	dop_reg(jinfo->codebuf, DP_ADD, res_lo, lho_lo, rho_lo, SHIFT_LSL, 0); 
+//	dop_reg(jinfo->codebuf, DP_ADC, res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+//	
+void Thumb2_Spill(Thumb2_Info *jinfo, unsigned required, unsigned exclude)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask;
+  unsigned i;
+  unsigned tospill = 0;
+
+  exclude &= (1 << STACK_REGS) - 1;
+  if (depth <= (STACK_REGS - required) && exclude == 0) return;
+  while (1) {
+    mask = 0;
+    for (i = tospill; i < depth; i++) mask |= 1 << stack[i];
+    mask &= ((1 << STACK_REGS) - 1);
+    mask |= exclude;
+    if (STACK_REGS - BITCOUNT(mask) >= required) break;
+    tospill++;
+  }
+  if (tospill == 0) return;
+  Thumb2_Push_Multiple(jinfo->codebuf, stack, tospill);
+  for (i = tospill; i < depth; i++)
+    stack[i-tospill] = stack[i];
+  jstack->depth = depth - tospill;
+  JASSERT((int)jstack->depth >= 0, "Stack underflow");
+}
+
+// Thumb2_Tmp:-
+// 	Allocate a temp reg for use in local code generation.
+// 	exclude is a bit mask of regs not to use.
+// 	A max of 2 regs can be guaranteed (ARM_IP & ARM_LR)
+// 	If allocating 2 regs you must include the reg you got the
+// 	first time in the exclude list. Otherwise you just get
+// 	the same reg again.
+Reg Thumb2_Tmp(Thumb2_Info *jinfo, unsigned exclude)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask;
+  unsigned i;
+
+  mask = 0;
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask |= exclude;
+  for (i = 0; i < STACK_REGS; i++)
+    if ((mask & (1<<i)) == 0) return i;
+  if ((mask & (1<<ARM_IP)) == 0) return ARM_IP;
+  if ((mask & (1<<ARM_LR)) == 0) return ARM_LR;
+  JASSERT(0, "failed to allocate a tmp reg");
+}
+
+void Thumb2_Flush(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  if (jstack->depth > 0)
+    Thumb2_Push_Multiple(jinfo->codebuf, jstack->stack, jstack->depth);
+  jstack->depth = 0;
+}
+
+// SAVE_STACK and RESTORE_STACK save the stack state so that it's
+// possible to do a stack flush to memory and restore that stack state
+// to the same registers.
+#define SAVE_STACK(JSTACK)					\
+  unsigned saved_stack_elements[JSTACK->depth];			\
+  unsigned saved_stack_depth;					\
+  memcpy(saved_stack_elements, JSTACK->stack,			\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);	\
+  saved_stack_depth = JSTACK->depth;
+#define RESTORE_STACK(JSTACK, CODEBUF)					\
+  Thumb2_Pop_Multiple(CODEBUF, saved_stack_elements, saved_stack_depth); \
+  memcpy(JSTACK->stack, saved_stack_elements,				\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);		\
+  JSTACK->depth = saved_stack_depth;
+
+// Call this when we are about to corrupt a local
+// The local may already be on the stack
+// For example
+// 	iload	0
+// 	iconst	2
+// 	istore	0
+// 	istore	1
+// Without this check the code generated would be (r4 is local 0, r5 is local 1)
+// 	mov	r4, #2
+//	mov	r5, r4
+// With this check the code should be
+// 	mov	r3, r4
+// 	mov	r4, #2
+// 	mov	r5, r3
+// This is not ideal, but is better than the previous:-)
+//
+void Thumb2_Corrupt(Thumb2_Info *jinfo, unsigned r, unsigned ignore)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned r_new, mask;
+  unsigned i;
+
+  if (ignore >= depth) return;
+//  JASSERT(depth >= ignore, "Cant ignore more than the whole stack!!");
+  if (IS_SREG(r)) return;
+  depth -= ignore;
+  for (i = 0; i < depth; i++) {
+    if (r == stack[i]) {
+      Thumb2_Spill(jinfo, 1, 0);
+      depth = jstack->depth - ignore;
+      r_new = JSTACK_REG(jstack);
+      mov_reg(jinfo->codebuf, r_new, r);
+      for (i = 0; i < depth; i++) if (r == stack[i]) stack[i] = r_new;
+      break;
+    }
+  }
+}
+
+unsigned Thumb2_ResultLocal(Thumb2_Info *jinfo, unsigned bci)
+{
+  unsigned opc = jinfo->code_base[bci];
+  if (jinfo->bc_stackinfo[bci] & BC_BRANCH_TARGET) return 0;
+  if (opc < opc_istore || opc > opc_astore_3) return 0;
+  if (opc == opc_istore || opc == opc_fstore || opc == opc_astore)
+    return jinfo->jregs->r_local[jinfo->code_base[bci+1]];
+  if ((opc >= opc_istore_0 && opc <= opc_istore_3) ||
+	(opc >= opc_fstore_0 && opc <= opc_fstore_3) ||
+	(opc >= opc_astore_0 && opc <= opc_astore_3))
+    return jinfo->jregs->r_local[(opc-opc_istore_0)&3];
+  return 0;
+}
+
+static const unsigned char dOps[] = {
+	DP_ADD, DP_ADC, VP_ADD, VP_ADD,
+	DP_SUB, DP_SBC, VP_SUB, VP_SUB,
+	DP_MUL, 0, VP_MUL, VP_MUL,
+	0, 0, VP_DIV, VP_DIV,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	DP_LSL, 0,
+	DP_ASR, 0,
+	DP_LSR, 0,
+	DP_AND, DP_AND, DP_ORR, DP_ORR, DP_EOR, DP_EOR,
+};
+
+unsigned Thumb2_Imm(Thumb2_Info *jinfo, unsigned imm, unsigned next_bci)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r;
+  unsigned next_op;
+
+  if (!(jinfo->bc_stackinfo[next_bci] & BC_BRANCH_TARGET)) {
+    next_op = jinfo->code_base[next_bci];
+    if (next_op > OPC_LAST_JAVA_OP) {
+      if (Bytecodes::is_defined((Bytecodes::Code)next_op))
+	next_op = (unsigned)Bytecodes::java_code((Bytecodes::Code)next_op);
+    }
+    switch (next_op) {
+      case opc_istore:
+      case opc_fstore:
+      case opc_astore: {
+	unsigned local = jinfo->code_base[next_bci+1];
+	r = jinfo->jregs->r_local[local];
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  mov_imm(jinfo->codebuf, r, imm);
+	  return 2;
+	}
+	break;
+      }
+      case opc_istore_0:
+      case opc_istore_1:
+      case opc_istore_2:
+      case opc_istore_3:
+      case opc_fstore_0:
+      case opc_fstore_1:
+      case opc_fstore_2:
+      case opc_fstore_3:
+      case opc_astore_0:
+      case opc_astore_1:
+      case opc_astore_2:
+      case opc_astore_3: {
+	unsigned local = (jinfo->code_base[next_bci]-opc_istore_0) & 3;
+	r = jinfo->jregs->r_local[local];
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  mov_imm(jinfo->codebuf, r, imm);
+	  return 1;
+	}
+	break;
+      }
+      case opc_iadd:
+      case opc_isub:
+      case opc_ishl:
+      case opc_ishr:
+      case opc_iushr:
+      case opc_iand:
+      case opc_ior:
+      case opc_ixor: {
+	unsigned len = 0;
+	unsigned r_lho;
+
+	Thumb2_Fill(jinfo, 1);
+	r_lho = POP(jstack);
+
+	r = Thumb2_ResultLocal(jinfo, next_bci+1);
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
+	} else {
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	}
+	if (next_op == opc_ishl || next_op == opc_ishr || next_op == opc_iushr)
+	  shift_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
+	else
+	  dop_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
+	return 1+len;
+      }
+
+      case opc_idiv: {
+	unsigned len = 0;
+	unsigned r_lho;
+	unsigned abs_imm = abs((int)imm);
+
+	if ((imm & -imm) == abs_imm) {
+	  unsigned l2_imm = LOG2(abs_imm);
+	  unsigned r_lho;
+
+	  if (imm == 0) break;
+	  if (imm == 1) return 1;
+
+	  Thumb2_Fill(jinfo, 1);
+	  r_lho = POP(jstack);
+
+	  r = Thumb2_ResultLocal(jinfo, next_bci+1);
+	  if (r) {
+	    Thumb2_Corrupt(jinfo, r, 0);
+	    len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
+	  } else {
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	  }
+
+	  if (abs_imm != 1) {
+	    unsigned r_tmp = r_lho;
+	    if (abs_imm != 2) {
+	      r_tmp = Thumb2_Tmp(jinfo, (1<<r_lho));
+	      asr_imm(jinfo->codebuf, r_tmp, r_lho, 31);
+	    }
+	    add_reg_shift(jinfo->codebuf, r, r_lho, r_tmp, SHIFT_LSR, 32-l2_imm);
+	    asr_imm(jinfo->codebuf, r, r, l2_imm);
+	  }
+	  if ((int)imm < 0)
+	    rsb_imm(jinfo->codebuf, r, r, 0);
+	  return 1+len;
+	}
+	break;
+      }
+    }
+  }
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  mov_imm(jinfo->codebuf, r, imm);
+  return 0;
+}
+
+void Thumb2_ImmX2(Thumb2_Info *jinfo, unsigned lo, unsigned hi)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+
+  Thumb2_Spill(jinfo, 2, 0);
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  mov_imm(jinfo->codebuf, r_lo, lo);
+  mov_imm(jinfo->codebuf, r_hi, hi);
+}
+
+#define LOCAL_OFFSET(local, stackdepth, nlocals) ((stackdepth)*4 + FRAME_SIZE + ((nlocals)-1-(local))*4)
+#define ISTATE_REG(jinfo)	  ((jinfo)->use_istate ? Ristate : Rstack)
+#define ISTATE(jinfo, stackdepth) ((jinfo)->use_istate ? 0 : (((stackdepth)-(jinfo)->jstack->depth)*4))
+#define ISTATE_OFFSET(jinfo, stackdepth, offset) (ISTATE(jinfo, stackdepth) + (offset))
+
+void load_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals));
+}
+
+void store_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals));
+}
+
+void load_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, istate_offset);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset));
+}
+
+void store_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, istate_offset);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset));
+}
+
+void Thumb2_Load(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r;
+
+  r = jinfo->jregs->r_local[local];
+  if (r) {
+    PUSH(jstack, r);
+  } else {
+    int nlocals = jinfo->method->max_locals();
+
+    Thumb2_Spill(jinfo, 1, 0);
+    JASSERT(stackdepth >= jstack->depth, "negative stack offset?");
+    stackdepth -= jstack->depth;
+    r = JSTACK_REG(jstack);
+    PUSH(jstack, r);
+    load_local(jinfo, r, local, stackdepth);
+  }
+}
+
+void Thumb2_LoadX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  int nlocals = jinfo->method->max_locals();
+
+  r_hi = jinfo->jregs->r_local[local];
+  if (r_hi) {
+    r_lo = jinfo->jregs->r_local[local+1];
+    if (r_lo) {
+      PUSH(jstack, r_hi);
+      PUSH(jstack, r_lo);
+    } else {
+      Thumb2_Spill(jinfo, 1, 0);
+      stackdepth -= jstack->depth;
+      PUSH(jstack, r_hi);
+      r_lo = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_lo, local+1, stackdepth);
+    }
+  } else {
+    r_lo = jinfo->jregs->r_local[local+1];
+    if (r_lo) {
+      Thumb2_Spill(jinfo, 1, 0);
+      stackdepth -= jstack->depth;
+      r_hi = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_hi, local, stackdepth);
+      PUSH(jstack, r_lo);
+    } else {
+      Thumb2_Spill(jinfo, 2, 0);
+      stackdepth -= jstack->depth;
+      r_hi = PUSH(jstack, JSTACK_REG(jstack));
+      r_lo = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_hi, local, stackdepth);
+      load_local(jinfo, r_lo, local+1, stackdepth);
+    }
+  }
+}
+
+void Thumb2_Store(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r, r_local;
+  int nlocals = jinfo->method->max_locals();
+
+  Thumb2_Fill(jinfo, 1);
+  stackdepth -= jstack->depth;
+  r = POP(jstack);
+  r_local = jinfo->jregs->r_local[local];
+  if (r_local) {
+    Thumb2_Corrupt(jinfo, r_local, 0);
+    mov_reg(jinfo->codebuf, r_local, r);
+  } else {
+    store_local(jinfo, r, local, stackdepth);
+  }
+}
+
+void Thumb2_StoreX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  unsigned r_local_lo, r_local_hi;
+  int nlocals = jinfo->method->max_locals();
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  stackdepth -= 2;
+
+  r_local_hi = jinfo->jregs->r_local[local];
+  if (r_local_hi) {
+    Thumb2_Corrupt(jinfo, r_local_hi, 0);
+    mov_reg(jinfo->codebuf, r_local_hi, r_hi);
+  } else {
+    store_local(jinfo, r_hi, local, stackdepth-jstack->depth);
+  }
+
+  r_local_lo = jinfo->jregs->r_local[local+1];
+  if (r_local_lo) {
+    Thumb2_Corrupt(jinfo, r_local_lo, 0);
+    mov_reg(jinfo->codebuf, r_local_lo, r_lo);
+  } else {
+    store_local(jinfo, r_lo, local+1, stackdepth-jstack->depth);
+  }
+}
+
+void Thumb2_Xaload(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_index, r_array, r_value;
+  unsigned op = opc - (unsigned)opc_iaload;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
+  r_value = JSTACK_REG(jstack);
+  PUSH(jstack, r_value);
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  if (opc == opc_baload) {
+    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
+    ldrsb_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_caload) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    ldrh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_saload) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    ldrsh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
+    ldr_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  }
+}
+
+void Thumb2_X2aload(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_index, r_array, r_lo, r_hi;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  Thumb2_Spill(jinfo, 2, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
+  LDRD_PRE(jstack, r_lo, r_hi);
+  ldrd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16);
+}
+
+void Thumb2_Xastore(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_value, r_index, r_array;
+  unsigned op = opc - (unsigned)opc_iastore;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 3);
+  r_value = POP(jstack);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_value));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  if (opc == opc_bastore) {
+    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
+    strb_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_castore || opc == opc_sastore) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    strh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
+    str_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  }
+}
+
+void Thumb2_X2astore(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_index, r_array;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 4);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_lo)|(1<<r_hi));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
+  strd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16);
+}
+
+void Thumb2_Pop(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  while (n > 0 && jstack->depth > 0) {
+    POP(jstack);
+    n--;
+  }
+  if (n > 0) add_imm(jinfo->codebuf, Rstack, Rstack, n * 4);
+}
+
+void Thumb2_Dup(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth;
+  unsigned i;
+
+  Thumb2_Fill(jinfo, n+1);
+  depth = jstack->depth;
+  for (i = 0; i <= n; i++)
+    stack[depth-i] = stack[depth-i-1];
+  stack[depth-n-1] = stack[depth];
+  jstack->depth = depth + 1;
+}
+
+void Thumb2_Dup2(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth;
+  unsigned i;
+
+  Thumb2_Fill(jinfo, n+2);
+  depth = jstack->depth;
+  for (i = 0; i <= n+1; i++)
+    stack[depth-i+1] = stack[depth-i-1];
+  stack[depth-n-1] = stack[depth+1];
+  stack[depth-n-2] = stack[depth];
+  jstack->depth = depth + 2;
+}
+
+void Thumb2_Swap(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  Thumb2_Fill(jinfo, 2);
+  SWAP(jstack);
+}
+
+void Thumb2_iOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lho, r_rho, r;
+
+  Thumb2_Fill(jinfo, 2);
+  r_rho = POP(jstack);
+  r_lho = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  switch (opc) {
+  case opc_ishl:
+  case opc_ishr:
+  case opc_iushr:
+    {
+      unsigned tmp_reg = Thumb2_Tmp(jinfo, 1 << r_lho | 1 << r_rho | 1 << r);
+      and_imm(jinfo->codebuf, tmp_reg, r_rho, 31);
+      r_rho = tmp_reg;
+      break;
+    }
+  }
+  dop_reg(jinfo->codebuf, dOps[opc-opc_iadd], r, r_lho, r_rho, 0, 0);
+}
+
+void Thumb2_iNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_src, r;
+
+  Thumb2_Fill(jinfo, 1);
+  r_src = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  rsb_imm(jinfo->codebuf, r, r_src, 0);
+}
+
+void Thumb2_lNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<r_hi));
+  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
+  JASSERT(r_res_lo != r_res_hi, "oops");
+  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
+  rsb_imm(jinfo->codebuf, r_res_lo, r_lo, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_hi)|(1<<r_res_lo));
+  mov_imm(jinfo->codebuf, r_tmp, 0);
+  dop_reg(jinfo->codebuf, DP_SBC, r_res_hi, r_tmp, r_hi, SHIFT_LSL, 0);
+}
+
+void Thumb2_fNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r, r_result;
+
+  Thumb2_Fill(jinfo, 1);
+  r = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_result = PUSH(jstack, JSTACK_REG(jstack));
+  eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
+}
+
+// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
+static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<r_hi));
+  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
+  JASSERT(r_res_lo != r_res_hi, "oops");
+  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
+  mov_reg(jinfo->codebuf, r_res_lo, r_lo);
+  dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
+}
+
+void Thumb2_dNeg(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_EOR);
+}
+
+void Thumb2_dAbs(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_BIC);
+}
+
+void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned res_lo, res_hi;
+  unsigned lho_lo, lho_hi;
+  unsigned rho_lo, rho_hi;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<lho_hi)|(1<<rho_hi));
+  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_hi)|(1<<rho_hi))));
+  JASSERT(res_lo != rho_hi && res_lo != lho_hi, "res_lo != rho_hi && res_lo != lho_hi");
+  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd], res_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
+  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd+1], res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+}
+
+void Thumb2_lmul(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned res_lo, res_hi;
+  unsigned lho_lo, lho_hi;
+  unsigned rho_lo, rho_hi;
+  unsigned r_tmp_lo, r_tmp_hi;
+  unsigned op_mask;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  op_mask = (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi);
+  Thumb2_Spill(jinfo, 2, 0);
+  res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
+  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
+  r_tmp_lo = res_lo;
+  r_tmp_hi = res_hi;
+  if (op_mask & (1<<r_tmp_lo)) r_tmp_lo = Thumb2_Tmp(jinfo, op_mask);
+  if (op_mask & (1<<r_tmp_hi)) r_tmp_hi = Thumb2_Tmp(jinfo, op_mask|(1<<r_tmp_lo));
+  umull(jinfo->codebuf, r_tmp_lo, r_tmp_hi, rho_lo, lho_lo);
+  mla(jinfo->codebuf, r_tmp_hi, rho_lo, lho_hi, r_tmp_hi);
+  mla(jinfo->codebuf, res_hi, rho_hi, lho_lo, r_tmp_hi);
+  mov_reg(jinfo->codebuf, res_lo, r_tmp_lo);
+}
+
+void Thumb2_fOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned rho, lho, res;
+
+  Thumb2_Fill(jinfo, 2);
+  rho = POP(jstack);
+  lho = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  res = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
+  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
+  vop_reg_s(jinfo->codebuf, dOps[opc-opc_iadd], VFP_S0, VFP_S0, VFP_S1);
+  vmov_reg_s_toARM(jinfo->codebuf, res, VFP_S0);
+}
+
+void Thumb2_dOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned rho_lo, rho_hi, lho_lo, lho_hi, res_lo, res_hi;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 2, 0);
+  res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  res_lo = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D0, lho_lo, lho_hi);
+  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D1, rho_lo, rho_hi);
+  vop_reg_d(jinfo->codebuf, dOps[opc-opc_iadd], VFP_D0, VFP_D0, VFP_D1);
+  vmov_reg_d_toARM(jinfo->codebuf, res_lo, res_hi, VFP_D0);
+}
+
+void Thumb2_Handler(Thumb2_Info *jinfo, unsigned handler, unsigned opcode, unsigned bci)
+{
+  mov_imm(jinfo->codebuf, ARM_R0, opcode);
+  mov_imm(jinfo->codebuf, ARM_R1, bci);
+  mov_imm(jinfo->codebuf, ARM_IP, 0);
+  str_imm(jinfo->codebuf, ARM_IP, ARM_IP, 0);
+}
+
+void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start);
+
+// called from the SEGV handling code to see if a polling page read
+// is from a legitimate safepoint address
+int Thumb2_Install_Safepoint_PC(ucontext_t *uc, int magicByteOffset)
+{
+  mcontext_t *mc = &uc->uc_mcontext;
+  unsigned long arm_pc = mc->arm_pc;
+  // ensure the faulting instruction lies in JITted code
+  if (arm_pc < (unsigned long)(thumb2_codebuf + 1)) {
+    return false;
+  }
+  if (arm_pc >= (unsigned long)thumb2_codebuf->sp) {
+    return false;
+  }
+  // skip to the MAGIC word and check it is valid
+  arm_pc +=magicByteOffset;
+  if (Thumb2) {
+    if (*((short*)arm_pc) != (short)THUMB2_POLLING_PAGE_MAGIC) {
+      return false;
+    }
+    // skip the magic word 
+    arm_pc += 2;
+  } else {
+    if (*((unsigned*)arm_pc) != (unsigned)ARM_POLLING_PAGE_MAGIC) {
+      return false;
+    }
+    // skip the magic word 
+    arm_pc += 4;
+  }
+  mc->arm_pc = arm_pc;
+
+  return true;
+}
+
+// Insert code to poll the SafepointSynchronize state and call
+// Helper_SafePoint.
+// -- if offset is negative it identifies a bytecode index which
+// should be jumped to via an unconditional backward branch
+// taken either before or after executing the safepoint check
+// -- if offset is zero or positive then a return or conditional
+// branch, respectively, needs to be compiled so control should
+// flow to end of the safepoint check whether or not it is executed
+
+void Thumb2_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci, int offset)
+{
+  // normal case: read the polling page and branch to skip
+  // the safepoint test
+  // abnormal case: read the polling page, trap to handler
+  // which resets return address into the safepoint check code
+  //
+  // with a negative offset the generated code will look like
+  //    movw r_tmp, #polling_page
+  //    movt r_tmp, #polling_page
+  //    ldr r_tmp, [r_tmp, #K] ; K == 2 * byte offset to the magic word
+  //    b.n #branchtarget
+  //    #POLLING_PAGE_MAGIC ; magic data word
+  //    <
+  //     safepoint check  code
+  //    >
+  //    b.n #branchtarget
+  //
+  // i.e. the generated code includes the branch backwards twice
+  // and relies on a fault at the ldr to skip into the safepoint code
+  //
+  // with a zero or positive offset the caller will plant the return
+  // (zero) or conditional branch (positive) code after the check so
+  // the normal path skips round the safepoint check code and the
+  // abnormal path just drops through. the generated code will look
+  // like
+  //
+  //    movw r_tmp, #polling_page
+  //    movt r_tmp, #polling_page
+  //    ldr r_tmp, [r_tmp, #0]
+  //    b.n L1
+  //    POLLING_PAGE_MAGIC ; data
+  //    <
+  //     safepoint check  code
+  //    >
+  // L1:
+  //    <caller plants branch/return here>
+  //
+  //  n.b. for a return there is no need save or restore locals
+
+  bool is_return = offset == 0; // This is some kind of return bytecode
+
+  int r_tmp = Thumb2_Tmp(jinfo, 0);
+  unsigned dest;
+  if (offset < 0) {
+    // the index of the backward branch target in the code buffer
+    dest = jinfo->bc_stackinfo[bci+offset] & ~BC_FLAGS_MASK;
+  } else {
+    dest = 0;
+  }
+  mov_imm(jinfo->codebuf, r_tmp, (u32)os::get_polling_page());
+  // this encodes the offset from the read instruction to the magic
+  // word into the fault address, assuming it is 4 bytes. however, if
+  // we need to plant a wide backwards branch we may need to rewrite
+  // this instruction with offset 6. so stash the instruction location
+  // here just in case. n.b. the offset is doubled to ensure the fault
+  // address in aligned -- aligned reads always use a single 16-bit
+  // instruction whereas non-aligned reads require 2 x 16 bit words
+  unsigned read_loc = out_loc(jinfo->codebuf);
+  unsigned loc_fwd_branch = 0;
+  ldr_imm(jinfo->codebuf, r_tmp, r_tmp, Thumb2 ? 8 : 16);
+  if (offset < 0) {
+    branch_uncond(jinfo->codebuf, dest);
+    if (Thumb2) {
+      unsigned magic_loc = out_loc(jinfo->codebuf);
+      if (magic_loc - read_loc != 4) {
+	JASSERT(magic_loc - read_loc == 6, "bad safepoint offset to magic word");
+	// must have needed a wide branch so patch the load instruction
+	jinfo->codebuf->idx = read_loc >> 1;
+	ldr_imm(jinfo->codebuf, r_tmp, r_tmp, 12);
+	jinfo->codebuf->idx = magic_loc >> 1;
+      }
+    }
+  } else {
+    // leave space for the forward skip branch
+    // location of branch instruction is read_loc + 2
+    loc_fwd_branch = forward_short(jinfo->codebuf);
+  }
+  // now write a magic word after the branch so the signal handler can
+  // test that a polling page read is kosher
+  if (Thumb2)
+    out_16(jinfo->codebuf, THUMB2_POLLING_PAGE_MAGIC);
+  else
+    out_32(jinfo->codebuf, ARM_POLLING_PAGE_MAGIC);
+
+  {
+    // Flush the stack to memory and save its register state.
+    SAVE_STACK(jinfo->jstack);
+    Thumb2_Flush(jinfo);
+
+    // We don't save or restore locals if we're returning.
+    if (! is_return)
+      Thumb2_save_local_refs(jinfo, stackdepth);
+
+    // now the safepoint polling code itself
+    mov_imm(jinfo->codebuf, ARM_R1, bci+CONSTMETHOD_CODEOFFSET);
+    add_imm(jinfo->codebuf, ARM_R2, ISTATE_REG(jinfo),
+	    ISTATE_OFFSET(jinfo, stackdepth, 0));
+    bl(jinfo->codebuf, handlers[H_SAFEPOINT]);
+
+    if (! is_return)
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+
+    RESTORE_STACK(jinfo->jstack, jinfo->codebuf);
+
+    if (offset < 0) {
+      // needs another unconditional backward branch
+      branch_uncond(jinfo->codebuf, dest);
+    } else {
+      // patch in the forward skip branch
+      branch_narrow_patch(jinfo->codebuf, loc_fwd_branch);
+    }
+  }
+}
+
+// If this is a backward branch, compile a safepoint check
+void Thumb2_Cond_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci) {
+  int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+  unsigned dest_taken = bci + offset;
+
+  if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+    // pass offset as positive so the safepoint code plant a forward
+    // skip over the test rather than doing an unconditional backwards
+    // branch. that allows the condition test to be planted by
+    // whatever followed this call
+    Thumb2_Safepoint(jinfo, stackdepth, bci, -offset);
+  }
+}
+
+int Thumb2_Branch(Thumb2_Info *jinfo, unsigned bci, unsigned cond)
+{
+    int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+    unsigned dest_taken = bci + offset;
+    unsigned dest_not_taken = bci + 3;
+    unsigned loc;
+
+    if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+      branch(jinfo->codebuf, cond, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+      return dest_not_taken;
+    }
+    loc = forward_long(jinfo->codebuf);
+    Thumb2_codegen(jinfo, dest_not_taken);
+    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in branch not compiled!!!");
+    branch_patch(jinfo->codebuf, cond, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+    return -1;
+}
+
+int Thumb2_Goto(Thumb2_Info *jinfo, unsigned bci, int offset, int len, int stackdepth = -1)
+{
+    unsigned dest_taken = bci + offset;
+    unsigned dest_not_taken = bci + len;
+    unsigned loc;
+
+    if (stackdepth >= 0
+	&& jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+      // n.b. the backwards branch will be planted by the safepoint routine
+      Thumb2_Safepoint(jinfo, stackdepth, bci, offset);
+      return dest_not_taken;
+    }
+    loc = forward_long(jinfo->codebuf);
+    Thumb2_codegen(jinfo, dest_not_taken);
+    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in goto not compiled!!!");
+    branch_uncond_patch(jinfo->codebuf, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+    return -1;
+}
+
+void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1<<LOCAL_REF)) {
+	load_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_save_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_restore_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+	load_local(jinfo, r, i, stackdepth);
+    }
+  }
+}
+
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
+{
+    Thumb2_Flush(jinfo);
+    Thumb2_save_all_locals(jinfo, stackdepth);
+    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[handler]);
+}
+
+void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode, int bci, int stackdepth)
+{
+  Symbol *name = jinfo->method->name();
+  Thumb2_Safepoint(jinfo, stackdepth, bci, 0);
+
+  Reg r_lo, r;
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  if (jinfo->method->has_monitor_bytecodes()) {
+    Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+  }
+
+  if (jinfo->method->is_synchronized()) {
+    unsigned loc_success1, loc_success2, loc_failed, loc_retry, loc_exception;
+    unsigned loc_illegal_monitor_state;
+    Thumb2_Flush(jinfo);
+//    Thumb2_save_local_refs(jinfo);
+    // Free the monitor
+    //
+    // 		add	r1, #<stackdepth>-8
+    // 		ldr	r2, [r1, #4]
+    //		cbz	r2, throw_illegal_monitor_state
+    //		ldr	r0, [r1, #0]
+    //		mov	r3, #0
+    //		str	r3, [r1, #4]
+    //		cbz	r0, success
+    //	retry:
+    //		ldrex	r3, [r2, #0]
+    //		cmp	r1, r3
+    //		bne	failed
+    //		strex	r3, r0, [r2, #0]
+    //		cbz	r3, success
+    //		b	retry
+    //	failed:
+    //		str	r2, [r1, #4]
+    //		...
+    //  success:
+    //
+    // JAZ_V1 == tmp2
+    // JAZ_V2 == tmp1
+    add_imm(jinfo->codebuf, ARM_R1, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) - frame::interpreter_frame_monitor_size()*wordSize);
+
+    ldr_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4);
+    loc_illegal_monitor_state = forward_cb(jinfo->codebuf);
+    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+    mov_imm(jinfo->codebuf, ARM_R3, 0);
+    str_imm(jinfo->codebuf, ARM_R3, ARM_R1, 4);
+    loc_success1 = forward_cb(jinfo->codebuf);
+    loc_retry = out_loc(jinfo->codebuf);
+    ldrex_imm(jinfo->codebuf, ARM_R3, ARM_R2);
+    cmp_reg(jinfo->codebuf, ARM_R1, ARM_R3);
+    loc_failed = forward_short(jinfo->codebuf);
+    strex_imm(jinfo->codebuf, ARM_R3, ARM_R0, ARM_R2);
+    loc_success2 = forward_cb(jinfo->codebuf);
+    branch_uncond(jinfo->codebuf, loc_retry);
+    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+    cbz_patch(jinfo->codebuf, ARM_R2, loc_illegal_monitor_state);
+    str_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4);
+    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_EXIT]);
+    loc_exception = forward_cb(jinfo->codebuf);
+    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_success1);
+    cbz_patch(jinfo->codebuf, ARM_R3, loc_success2);
+  }
+
+  if (opcode == opc_return) {
+    if (jinfo->compiled_return) {
+      unsigned ret_idx = jinfo->compiled_return;
+
+      branch_uncond(jinfo->codebuf, ret_idx);
+      return;
+    }
+    jinfo->compiled_return = jinfo->codebuf->idx * 2;
+  } else {
+    if (opcode == opc_lreturn || opcode == opc_dreturn) {
+      Thumb2_Fill(jinfo, 2);
+      r_lo = POP(jstack);
+      r = POP(jstack);
+    } else {
+      Thumb2_Fill(jinfo, 1);
+      r = POP(jstack);
+      if (jinfo->compiled_word_return[r]) {
+        unsigned ret_idx = jinfo->compiled_word_return[r];
+
+        branch_uncond(jinfo->codebuf, ret_idx);
+        return;
+      }
+      jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
+    }
+  }
+
+  mov_imm(jinfo->codebuf, ARM_LR, 0);
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_SP);
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_FP);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 0);
+
+  if (opcode == opc_return) {
+    add_imm(jinfo->codebuf, Rstack, Rstack, jinfo->method->max_locals() * sizeof(int) + 4);
+  } else {
+    if (opcode == opc_lreturn || opcode == opc_dreturn) {
+      str_imm(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int));
+      str_imm_wb(jinfo->codebuf, r_lo, Rstack, jinfo->method->max_locals() * sizeof(int)-4, 1, 1);
+    } else {
+      str_imm_wb(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int), 1, 1);
+    }
+  }
+
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_TOP_ZERO_FRAME);
+  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+
+  // deoptimized_frames = 0
+  // FIXME: This should be done in the slow entry, but only three
+  // words are allocated there for the instructions.
+  mov_imm(jinfo->codebuf, ARM_R0, 0);
+
+  ldm(jinfo->codebuf, C_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+}
+
+int Thumb2_Accessor(Thumb2_Info *jinfo)
+{
+  jubyte *code_base = jinfo->code_base;
+  constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+  ConstantPoolCacheEntry* cache;
+  int index = GET_NATIVE_U2(code_base+2);
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned slow_entry;
+
+  JASSERT(code_base[0] == opc_aload_0 || code_base[0] == opc_iaccess_0, "not an aload_0 in accessor");
+  JASSERT(code_base[4] == opc_ireturn || code_base[4] == opc_areturn, "not an ireturn in accessor");
+  cache = cp->entry_at(index);
+  if (!cache->is_resolved((Bytecodes::Code)opc_getfield)) return 0;
+
+  TosState tos_type = cache->flag_state();
+  int field_offset = cache->f2_as_index();
+
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  slow_entry = out_pos(jinfo->codebuf);
+  stm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, slow_entry + FAST_ENTRY_OFFSET);
+  ldm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_align(jinfo->codebuf, sizeof(unsigned));
+
+  out_32(jinfo->codebuf, 0);	// pointer to osr table
+  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
+  out_32(jinfo->codebuf, 0);	// next compiled method
+
+  out_32(jinfo->codebuf, -1);    // regusage
+  out_32(jinfo->codebuf, -1);
+  out_32(jinfo->codebuf, -1);
+
+  out_align(jinfo->codebuf, CODE_ALIGN);
+
+  // fast entry point
+  bc_stackinfo[0] = (bc_stackinfo[0] & BC_FLAGS_MASK) | (jinfo->codebuf->idx * 2) | BC_COMPILED;
+  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_JAVA_SP);
+  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+  if (tos_type == btos)
+    ldrsb_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else if (tos_type == ctos)
+    ldrh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else if (tos_type == stos)
+    ldrsh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else
+    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  str_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+
+  if (cache->is_volatile())
+    fullBarrier(jinfo->codebuf);
+
+  // deoptimized_frames = 0
+  mov_imm(jinfo->codebuf, ARM_R0, 0);
+  mov_reg(jinfo->codebuf, ARM_PC, ARM_LR);
+
+  return 1;
+}
+
+#define STACKDEPTH(jinfo, stackinfo) (((stackinfo) & ~BC_FLAGS_MASK) + \
+	((jinfo)->method->is_synchronized() ? frame::interpreter_frame_monitor_size() : 0))
+
+
+void Thumb2_Enter(Thumb2_Info *jinfo)
+{
+  int parms = jinfo->method->size_of_parameters();
+  int extra_locals = jinfo->method->max_locals() - parms;
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+  unsigned stackdepth = 0;
+  unsigned slow_entry;
+  Symbol *name = jinfo->method->name();
+
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  slow_entry = out_pos(jinfo->codebuf);
+  stm(jinfo->codebuf, I_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, slow_entry + FAST_ENTRY_OFFSET);
+  ldm(jinfo->codebuf, I_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_align(jinfo->codebuf, sizeof(unsigned));
+
+  out_32(jinfo->codebuf, 0);	// Space for osr_table pointer
+  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
+  out_32(jinfo->codebuf, 0);	// Pointer to next method
+
+  out_32(jinfo->codebuf, 0);    // regusage
+  out_32(jinfo->codebuf, 0);
+  out_32(jinfo->codebuf, 0);
+
+  out_align(jinfo->codebuf, CODE_ALIGN);
+
+  // Fast entry point == Slow entry + 64 - caller save
+  // R0 = method
+  // R2 = thread
+  stm(jinfo->codebuf, C_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+  {
+    unsigned stacksize;
+
+    stacksize = (extra_locals + jinfo->method->max_stack()) * sizeof(int);
+    stacksize += FRAME_SIZE + STACK_SPARE;
+    if (!jinfo->is_leaf || stacksize > LEAF_STACK_SIZE) {
+      ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_STACK_BASE);
+      sub_imm(jinfo->codebuf, ARM_R1, Rstack, stacksize + LEAF_STACK_SIZE);
+      cmp_reg(jinfo->codebuf, ARM_R3, ARM_R1);
+      bl(jinfo->codebuf, handlers[H_STACK_OVERFLOW], COND_CS);
+    }
+  }
+  mov_imm(jinfo->codebuf, ARM_R1, 0);
+
+  if (extra_locals > 0) {
+    sub_imm(jinfo->codebuf, Rstack, Rstack, extra_locals * 4);
+
+    for (i = 0; i < extra_locals; i++) {
+      unsigned linfo = locals_info[parms+i];
+      if (linfo & (1<< LOCAL_REF) || ((linfo >> LOCAL_INT) & 0x1f) == 0)
+	str_imm(jinfo->codebuf, ARM_R1, Rstack, (extra_locals-1 - i) * 4);
+    }
+  }
+
+  ldr_imm(jinfo->codebuf, ARM_IP, ARM_R0, METHOD_CONSTMETHOD);
+  ldr_imm(jinfo->codebuf, ARM_IP, ARM_IP, METHOD_CONSTANTS);
+
+  add_imm(jinfo->codebuf, Rlocals, Rstack, (jinfo->method->max_locals()-1) * sizeof(int));
+
+  sub_imm(jinfo->codebuf, Rstack, Rstack, FRAME_SIZE);
+
+  if (jinfo->use_istate) mov_reg(jinfo->codebuf, Ristate, Rstack);
+  store_istate(jinfo, Rstack, ISTATE_SELF_LINK, stackdepth);
+
+  store_istate(jinfo, Rstack, ISTATE_MONITOR_BASE, stackdepth);
+
+  store_istate(jinfo, Rlocals, ISTATE_LOCALS, stackdepth);
+
+  if (jinfo->method->is_synchronized()) {
+    sub_imm(jinfo->codebuf, Rstack, Rstack, frame::interpreter_frame_monitor_size()*wordSize);
+    stackdepth = frame::interpreter_frame_monitor_size();
+    if (jinfo->method->is_static()) {
+      ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_POOL_HOLDER);
+      ldr_imm(jinfo->codebuf, JAZ_V1, ARM_R3, KLASS_PART+KLASS_JAVA_MIRROR);
+    } else {
+      ldr_imm(jinfo->codebuf, JAZ_V1, Rlocals, 0);
+    }
+    str_imm(jinfo->codebuf, JAZ_V1, Rstack, 4);
+  }
+
+  store_istate(jinfo, ARM_R1, ISTATE_MSG, stackdepth);
+  store_istate(jinfo, ARM_R1, ISTATE_OOP_TEMP, stackdepth);
+
+  sub_imm(jinfo->codebuf, ARM_R3, Rstack, jinfo->method->max_stack() * sizeof(int));
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP);
+
+  store_istate(jinfo, Rstack, ISTATE_STACK_BASE, stackdepth);
+
+  sub_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
+  store_istate(jinfo, ARM_R3, ISTATE_STACK_LIMIT, stackdepth);
+
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME);
+  store_istate(jinfo, ARM_R3, ISTATE_NEXT_FRAME, stackdepth);
+
+  mov_imm(jinfo->codebuf, ARM_R3, INTERPRETER_FRAME);
+  store_istate(jinfo, ARM_R3, ISTATE_FRAME_TYPE, stackdepth);
+
+  mov_imm(jinfo->codebuf, ARM_R1, 0);   // set last SP to zero before
+                                        // setting FP
+  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP);
+  add_imm(jinfo->codebuf, ARM_R3, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) + ISTATE_NEXT_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_FP);
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_SP);
+
+  ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_CACHE);
+  store_istate(jinfo, ARM_R3, ISTATE_CONSTANTS, stackdepth);
+
+  store_istate(jinfo, Rthread, ISTATE_THREAD, stackdepth);
+  store_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
+
+  if (jinfo->method->is_synchronized()) {
+    unsigned loc_retry, loc_failed, loc_success, loc_exception;
+
+    // JAZ_V1 == monitor object
+    //
+    // Try to acquire the monitor. Seems very sub-optimal
+    // 		ldr	r3, [JAZ_V1, #0]
+    // 		orr	r3, r3, #1
+    // 		str	r3, [Rstack, #0]
+    // 	retry:
+    // 		ldrex	r0, [JAZ_V1, #0]
+    // 		cmp	r3, r0
+    // 		bne	failed
+    // 		strex	r0, Rstack, [JAZ_V1, #0]
+    // 		cbz	r0, success
+    // 		b	retry
+    // 	failed:
+    // 		<failed - someone else has the monitor - must yield>
+    //  success:
+    // 		<success - acquired the monitor>
+    //
+    ldr_imm(jinfo->codebuf, ARM_R3, JAZ_V1, 0);
+    orr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 1);
+    str_imm(jinfo->codebuf, ARM_R3, Rstack, 0);
+    loc_retry = out_loc(jinfo->codebuf);
+// retry:
+    ldrex_imm(jinfo->codebuf, ARM_R0, JAZ_V1);
+    cmp_reg(jinfo->codebuf, ARM_R3, ARM_R0);
+    loc_failed = forward_short(jinfo->codebuf);
+    strex_imm(jinfo->codebuf, ARM_R0, Rstack, JAZ_V1);
+    loc_success = forward_cb(jinfo->codebuf);
+    branch_uncond(jinfo->codebuf, loc_retry);
+    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+// failed:
+    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_ENTER]);
+    loc_exception = forward_cb(jinfo->codebuf);
+    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_success);
+// success:
+
+  }
+
+  {
+    int nlocals = jinfo->method->max_locals();
+
+    for (i = 0; i < nlocals; i++) {
+      Reg r = jinfo->jregs->r_local[i];
+      if (r) {
+	unsigned stackdepth = STACKDEPTH(jinfo, 0);
+        if (i < parms)
+	  load_local(jinfo, r, i, stackdepth);
+        else if (locals_info[i] & (1<<LOCAL_REF))
+          mov_reg(jinfo->codebuf, r, ARM_R1);
+      }
+    }
+  }
+}
+
+unsigned opcode2handler[] = {
+  H_IDIV,
+  H_LDIV,
+  0, 0,			// fdiv, ddiv
+  H_IREM,
+  H_LREM,
+  H_FREM,
+  H_DREM,
+  0, 0, 0, 0,		// ineg, lneg, fneg, dneg
+  0, 0, 0, 0, 0, 0,	// shifts
+  0, 0, 0, 0, 0, 0,	// and, or, xor
+  0,			// iinc
+  0,			// i2l
+  H_I2F,
+  H_I2D,
+  0,			// l2i
+  H_L2F,
+  H_L2D,
+  H_F2I,
+  H_F2L,
+  H_F2D,
+  H_D2I,
+  H_D2L,
+  H_D2F,
+};
+
+// Generate code for a load of a jlong.
+
+void Thumb2_load_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
+		      int field_offset,
+		      bool is_volatile = false)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  CodeBuf *codebuf = jinfo->codebuf;
+  if (is_volatile && os::is_MP()) {
+    Reg r_addr = base;
+    if (field_offset) {
+      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
+      add_imm(codebuf, r_addr, base, field_offset);
+    }
+    ldrexd(codebuf, r_lo, r_hi, r_addr);
+  } else {
+    LDRD_PRE(jstack, r_lo, r_hi);
+    ldrd_imm(codebuf, r_lo, r_hi, base, field_offset);
+  }
+}
+
+// Generate code for a store of a jlong.  If the operand is volatile,
+// generate a sequence of the form
+//
+// .Ldst
+// 	ldrexd 	r2, r3, [dst]
+// 	strexd 	r2, r0, r1, [dst]
+// 	cmp 	r2, #0
+// 	bne 	.Ldst
+
+void Thumb2_store_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
+		      int field_offset,
+		      bool is_volatile = false)
+{
+  CodeBuf *codebuf = jinfo->codebuf;
+  if (is_volatile && os::is_MP()) {
+    Reg r_addr = base;
+    Reg tmp1 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
+    Reg tmp2 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1));
+    if (field_offset) {
+      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1) | (1<<tmp2));
+      add_imm(jinfo->codebuf, r_addr, base, field_offset);
+    }
+    int loc = out_loc(codebuf);
+    ldrexd(codebuf, tmp1, tmp2, r_addr);
+    strexd(codebuf, tmp1, r_lo, r_hi, r_addr);
+    cmp_imm(codebuf, tmp1, 0);
+    branch(codebuf, COND_NE, loc);
+  } else {
+    strd_imm(codebuf, r_lo, r_hi, base, field_offset);
+  }
+}
+
+#define OPCODE2HANDLER(opc) (handlers[opcode2handler[(opc)-opc_idiv]])
+
+extern "C" void _ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc(void);
+
+// Push VFP_REG to the java stack.
+static void vfp_to_jstack(Thumb2_Info *jinfo, int vfp_reg) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_d_toARM(jinfo->codebuf, r_lo, r_hi, vfp_reg);
+}
+
+// Pop the java stack to VFP_REG .
+static void jstack_to_vfp(Thumb2_Info *jinfo, int vfp_reg) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  vmov_reg_d_toVFP(jinfo->codebuf, vfp_reg, r_lo, r_hi);
+  Thumb2_Flush(jinfo);
+}
+
+// Expand a call to a "special" method.  These are usually inlines of
+// java.lang.Math methods.  Return true if the inlining succeeded.
+static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
+				  unsigned stackdepth) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  CodeBuf *codebuf = jinfo->codebuf;
+
+  const char *entry_name;
+
+  switch (callee->intrinsic_id()) {
+  case vmIntrinsics::_dabs:
+   {
+     Thumb2_dAbs(jinfo);
+     return true;
+    }
+
+#ifdef __ARM_PCS_VFP
+  case vmIntrinsics::_dsin:
+    entry_name = "Java_java_lang_StrictMath_sin";
+    break;
+
+  case vmIntrinsics::_dcos:
+    entry_name = "Java_java_lang_StrictMath_cos";
+    break;
+
+  case vmIntrinsics::_dtan:
+    entry_name = "Java_java_lang_StrictMath_tan";
+    break;
+
+  case vmIntrinsics::_dsqrt:
+    {
+      void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
+      if (! entry_point)
+	return false;
+
+      unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+
+      // Make sure that canonical NaNs are returned, as per the spec.
+      //
+      // Generate:
+      // vsqrt.f64 d0, d1
+      // vcmp.f64 d0, d0
+      // vmrs APSR_nzcv, fpscr
+      // beq.n 0f
+      // vmov.f64 d0, d1
+      // blx Java_java_lang_StrictMath_sqrt
+      // 0:
+      jstack_to_vfp(jinfo, VFP_D1);
+      vop_reg_d(jinfo->codebuf, VP_SQRT, VFP_D0, 0, VFP_D1);
+      vcmp_reg_d(jinfo->codebuf, VFP_D0, VFP_D0, 0);
+      vmrs(jinfo->codebuf, ARM_PC);
+      int loc = forward_short(jinfo->codebuf);
+      vmov_reg_d_VFP_to_VFP(jinfo->codebuf, VFP_D0, VFP_D1);
+      // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
+      // parameter, so it's arguably pointless to pass it here.
+      add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
+      mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
+      blx_reg(jinfo->codebuf, ARM_IP);
+      bcc_patch(jinfo->codebuf, COND_EQ, loc);
+      vfp_to_jstack(jinfo, VFP_D0);
+
+      return true;
+    }
+
+  case vmIntrinsics::_dlog:
+    entry_name = "Java_java_lang_StrictMath_log";
+    break;
+
+  case vmIntrinsics::_dlog10:
+    entry_name = "Java_java_lang_StrictMath_log10";
+    break;
+#endif // __ARM_PCS_VFP
+
+  case vmIntrinsics::_compareAndSwapInt:
+   {
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update = POP(jstack);
+      unsigned expect = POP(jstack);
+      unsigned offset = POP(jstack);
+      POP(jstack);  // Actually the high part of the offset
+
+      // unsigned object = POP(jstack);
+      // unsigned unsafe = POP(jstack);  // Initially an instance of java.lang.Unsafe
+
+      Thumb2_Flush(jinfo);
+      // Get ourself a result reg that's not one of the inputs
+      unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
+      unsigned result = JSTACK_PREFER(jstack, ~exclude);
+
+      ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
+      add_reg(codebuf, result, offset, ARM_IP); // result now points to word
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0);  // Security check
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrex_imm(codebuf, ARM_LR, result);
+      cmp_reg(codebuf, ARM_LR, expect);
+      int loc_failed = forward_short(codebuf);
+      strex_imm(codebuf, ARM_IP, update, result);
+      cmp_imm(codebuf, ARM_IP, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      if (Thumb2) {
+	it(codebuf, COND_NE, IT_MASK_TE);
+	mov_imm(codebuf, result, 0);
+	mov_imm(codebuf, result, 1);
+      } else {
+	mov_imm(codebuf, result, 0, COND_NE);
+	mov_imm(codebuf, result, 1, COND_EQ);
+      }
+      fullBarrier(codebuf);
+
+      PUSH(jstack, result);
+    }
+    return true;
+
+  case vmIntrinsics::_compareAndSwapLong:
+    {
+      // Arch < V6K lacks ldrexd/strexd
+      if (!ARCH_GE_V6K(CPUInfo)) return false;
+
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update_lo = POP(jstack);
+      unsigned update_hi = POP(jstack);
+      unsigned expect_lo = POP(jstack);
+      unsigned expect_hi = POP(jstack);
+
+      Thumb2_Flush(jinfo);
+      Thumb2_save_all_locals(jinfo, stackdepth - 4); // 4 args popped above
+
+      // ldrexd/strexd can only take an even pair of registers in ARM mode
+      if (!Thumb2) {
+        if (update_hi != update_lo + 1 || (update_lo & 1)) {
+          mov_reg(codebuf, JAZ_V4, update_lo);
+	  mov_reg(codebuf, JAZ_V5, update_hi);
+          update_lo = JAZ_V4;
+          update_hi = JAZ_V5;
+        }
+      }
+
+      // instance of java.lang.Unsafe:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize);
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0);  // Security check
+
+      // Object:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize);
+      // Offset:
+      ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize);
+      add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
+      cmp_reg(codebuf, JAZ_V2, expect_lo);
+      cmp_reg(codebuf, JAZ_V3, expect_hi, COND_EQ); 
+      int loc_failed = forward_short(codebuf);
+      strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
+      cmp_imm(codebuf, JAZ_V1, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      unsigned result = JSTACK_REG(jinfo->jstack);
+
+      if (Thumb2) {
+	it(codebuf, COND_NE, IT_MASK_TE);
+	mov_imm(codebuf, result, 0);
+	mov_imm(codebuf, result, 1);
+      } else {
+	mov_imm(codebuf, result, 0, COND_NE);
+	mov_imm(codebuf, result, 1, COND_EQ);
+      }
+      fullBarrier(codebuf);
+
+      Thumb2_restore_all_locals(jinfo, stackdepth - 4); // 4 args popped above
+      add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
+      PUSH(jstack, result);
+    }
+    return true;
+
+  default:
+    return false;
+  }
+
+  void *entry_point = dlsym(NULL, entry_name);
+  if (! entry_point)
+    return false;
+
+  jstack_to_vfp(jinfo, VFP_D0);
+  // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
+  // parameter, so it's arguably pointless to pass it here.
+  add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
+  mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
+  blx_reg(jinfo->codebuf, ARM_IP);
+  vfp_to_jstack(jinfo, VFP_D0);
+
+  return true;
+}
+
+void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
+{
+  JDEBUG_ (
+  Symbol *name = jinfo->method->name();
+  Symbol *sig = jinfo->method->signature();
+  );
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  CodeBuf *codebuf = jinfo->codebuf;
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned bci;
+  unsigned opcode;
+  unsigned stackinfo;
+  int len;
+  unsigned stackdepth;
+
+  for (bci = start; bci < code_size; ) {
+    opcode = code_base[bci];
+    stackinfo = bc_stackinfo[bci];
+    unsigned start_idx;
+
+    if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
+
+    if (!OSPACE && (stackinfo & BC_BACK_TARGET)) {
+      if (out_pos(codebuf) & 0x02) nop_16(codebuf);
+      if (out_pos(codebuf) & 0x04) nop_32(codebuf);
+    }
+
+    start_idx = jinfo->codebuf->idx;
+    if (START_BCI(start_idx) == -1) SET_START_BCI(start_idx, bci);
+
+    JASSERT(!(stackinfo & BC_COMPILED), "code already compiled for this bytecode?");
+    stackdepth = STACKDEPTH(jinfo, stackinfo); // Stackdepth here is adjusted for monitors
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2) | BC_COMPILED;
+
+    if (opcode > OPC_LAST_JAVA_OP)
+      switch (opcode) {
+      default:
+	if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	  opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	break;
+      case opc_return_register_finalizer:
+      case opc_fast_aldc_w:
+      case opc_fast_aldc:
+	break;
+      }
+
+    len = Bytecodes::length_for((Bytecodes::Code)opcode);
+    if (len <= 0) {
+      Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+      len = (Bytecodes::special_length_at
+	     (code,
+	      (address)(code_base+bci), (address)(code_base+code_size)));
+    }
+
+    if (IS_DEAD(stackinfo)) {
+      unsigned zlen = 0;
+      unsigned s_bci = bci;
+
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
+      do {
+	zlen += len;
+	bci += len;
+	if (bci >= code_size) break;
+	opcode = code_base[bci];
+	stackinfo = bc_stackinfo[bci];
+
+	if (stackinfo & BC_BRANCH_TARGET) break;
+	if (!IS_DEAD(stackinfo)) break;
+
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+
+      } while (1);
+      SET_END_BCI(start_idx, s_bci + zlen);
+      continue;
+    }
+
+#if 0
+    if (bci >= 4) {
+      unsigned zlen = 0;
+      unsigned s_bci = bci;
+
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
+      do {
+	zlen += len;
+	bci += len;
+	if (bci >= code_size) break;
+	opcode = code_base[bci];
+	stackinfo = bc_stackinfo[bci];
+
+	if (stackinfo & BC_BRANCH_TARGET) break;
+
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+
+      } while (1);
+      SET_END_BCI(start_idx, s_bci + zlen);
+      continue;
+    }
+#endif
+
+    SET_END_BCI(start_idx, bci + len);
+
+#ifdef THUMB2_JVMTI
+    // emit a start address --> bci map entry before
+    // generating machine code for this bytecode
+
+    void *addr = (void *)(codebuf->codebuf + codebuf->idx);
+    address_bci_map_add(addr, bci);
+#endif //THUMB2_JVMTI
+
+    switch (opcode) {
+      case opc_nop:
+	break;
+      case opc_aconst_null:
+	len += Thumb2_Imm(jinfo, 0, bci+1);
+	break;
+      case opc_iconst_m1:
+      case opc_iconst_0:
+      case opc_iconst_1:
+      case opc_iconst_2:
+      case opc_iconst_3:
+      case opc_iconst_4:
+      case opc_iconst_5:
+	len += Thumb2_Imm(jinfo, opcode - (unsigned)opc_iconst_0, bci+1);
+	break;
+      case opc_lconst_0:
+      case opc_lconst_1:
+	Thumb2_ImmX2(jinfo, opcode - (unsigned)opc_lconst_0, 0);
+	break;
+      case opc_fconst_0:
+      case opc_fconst_1:
+      case opc_fconst_2: {
+	unsigned v = 0;
+	if (opcode == (unsigned)opc_fconst_1) v = 0x3f800000;
+	if (opcode == (unsigned)opc_fconst_2) v = 0x40000000;
+	len += Thumb2_Imm(jinfo, v, bci+1);
+	break;
+      }
+      case opc_dconst_0:
+      case opc_dconst_1: {
+	unsigned v_hi = 0;
+	if (opcode == (unsigned)opc_dconst_1) v_hi = 0x3ff00000;
+	Thumb2_ImmX2(jinfo, 0, v_hi);
+	break;
+      }
+      case opc_bipush:
+	len += Thumb2_Imm(jinfo, GET_JAVA_S1(code_base+bci+1), bci+2);
+	break;
+      case opc_sipush:
+	len += Thumb2_Imm(jinfo, GET_JAVA_S2(code_base+bci+1), bci+3);
+	break;
+      case opc_ldc:
+      case opc_ldc_w:
+      case opc_ldc2_w: {
+	unsigned index = (opcode == (unsigned)opc_ldc) ?
+				code_base[bci+1] : GET_JAVA_U2(code_base+bci+1);
+	constantPoolOop constants = jinfo->method->constants();
+	unsigned v;
+
+	switch (v = constants->tag_at(index).value()) {
+	  case JVM_CONSTANT_Integer:
+	  case JVM_CONSTANT_Float:
+	    v = (unsigned)constants->int_at(index);
+	    len += Thumb2_Imm(jinfo, v, bci+len);
+	    break;
+	  case JVM_CONSTANT_Long:
+	  case JVM_CONSTANT_Double: {
+	    unsigned long long v;
+	    v = constants->long_at(index);
+	    Thumb2_ImmX2(jinfo, v & 0xffffffff, v >> 32);
+	    break;
+	  }
+	  case JVM_CONSTANT_Class:
+	  case JVM_CONSTANT_String: {
+	    Reg r;
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	    load_istate(jinfo, r, ISTATE_METHOD, stackdepth+1);
+	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTMETHOD);
+	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTANTS);
+	    ldr_imm(jinfo->codebuf, r, r, CONSTANTPOOL_BASE + (index << 2));
+	    if (v == JVM_CONSTANT_Class)
+	      ldr_imm(jinfo->codebuf, r, r, KLASS_PART+KLASS_JAVA_MIRROR);
+	    break;
+	  }
+	  default:
+	    unsigned loc;
+
+	    JASSERT(opcode != opc_ldc2_w, "ldc2_w unresolved?");
+	    Thumb2_Flush(jinfo);
+	    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+//	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
+	    bl(jinfo->codebuf, handlers[opcode == opc_ldc ? H_LDC : H_LDC_W]);
+	  Thumb2_restore_local_refs(jinfo, stackdepth);
+	    ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	    mov_imm(jinfo->codebuf, ARM_R2, 0);
+	    str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	    loc = forward_cb(jinfo->codebuf);
+	    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+	    cbnz_patch(jinfo->codebuf, ARM_R0, loc);
+	    PUSH(jstack, ARM_R0);
+	    break;
+	}
+	break;
+      }
+
+      case opc_iload:
+      case opc_fload:
+      case opc_aload:
+	Thumb2_Load(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_lload:
+      case opc_dload:
+	Thumb2_LoadX2(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_iload_0:
+      case opc_iload_1:
+      case opc_iload_2:
+      case opc_iload_3:
+      case opc_fload_0:
+      case opc_fload_1:
+      case opc_fload_2:
+      case opc_fload_3:
+      case opc_aload_0:
+      case opc_aload_1:
+      case opc_aload_2:
+      case opc_aload_3:
+	Thumb2_Load(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
+	break;
+      case opc_lload_0:
+      case opc_lload_1:
+      case opc_lload_2:
+      case opc_lload_3:
+      case opc_dload_0:
+      case opc_dload_1:
+      case opc_dload_2:
+      case opc_dload_3:
+	Thumb2_LoadX2(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
+	break;
+      case opc_iaload:
+      case opc_faload:
+      case opc_aaload:
+      case opc_baload:
+      case opc_caload:
+      case opc_saload:
+	Thumb2_Xaload(jinfo, opcode);
+	break;
+      case opc_laload:
+      case opc_daload:
+	Thumb2_X2aload(jinfo);
+	break;
+      case opc_istore:
+      case opc_fstore:
+      case opc_astore:
+	Thumb2_Store(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_lstore:
+      case opc_dstore:
+	Thumb2_StoreX2(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_istore_0:
+      case opc_istore_1:
+      case opc_istore_2:
+      case opc_istore_3:
+      case opc_fstore_0:
+      case opc_fstore_1:
+      case opc_fstore_2:
+      case opc_fstore_3:
+      case opc_astore_0:
+      case opc_astore_1:
+      case opc_astore_2:
+      case opc_astore_3:
+	Thumb2_Store(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
+	break;
+      case opc_lstore_0:
+      case opc_lstore_1:
+      case opc_lstore_2:
+      case opc_lstore_3:
+      case opc_dstore_0:
+      case opc_dstore_1:
+      case opc_dstore_2:
+      case opc_dstore_3:
+	Thumb2_StoreX2(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
+	break;
+      case opc_iastore:
+      case opc_fastore:
+      case opc_bastore:
+      case opc_castore:
+      case opc_sastore:
+	Thumb2_Xastore(jinfo, opcode);
+	break;
+      case opc_lastore:
+      case opc_dastore:
+	Thumb2_X2astore(jinfo);
+	break;
+
+      case opc_pop:
+      case opc_pop2:
+	Thumb2_Pop(jinfo, opcode - opc_pop + 1);
+	break;
+
+      case opc_dup:
+      case opc_dup_x1:
+      case opc_dup_x2:
+	Thumb2_Dup(jinfo, opcode - opc_dup);
+	break;
+
+      case opc_dup2:
+      case opc_dup2_x1:
+      case opc_dup2_x2:
+	Thumb2_Dup2(jinfo, opcode - opc_dup2);
+	break;
+
+      case opc_swap:
+	Thumb2_Swap(jinfo);
+	break;
+
+      case opc_iadd:
+      case opc_isub:
+      case opc_imul:
+      case opc_ishl:
+      case opc_ishr:
+      case opc_iushr:
+      case opc_iand:
+      case opc_ior:
+      case opc_ixor:
+	Thumb2_iOp(jinfo, opcode);
+	break;
+
+      case opc_ladd:
+      case opc_lsub:
+      case opc_land:
+      case opc_lor:
+      case opc_lxor:
+	Thumb2_lOp(jinfo, opcode);
+	break;
+
+      case opc_lshl: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res_lo, 0);
+	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_LSL, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSR, ARM_IP, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_hi, res_hi, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lushr: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res_hi, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_LSR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lshr: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	asr_imm(jinfo->codebuf, res_hi, lho_hi, 31);
+	dop_reg(jinfo->codebuf, DP_ASR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_ASR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lmul:
+	Thumb2_lmul(jinfo);
+	break;
+
+      case opc_fadd:
+      case opc_fsub:
+      case opc_fmul:
+      case opc_fdiv:
+	Thumb2_fOp(jinfo, opcode);
+	break;
+
+      case opc_dadd:
+      case opc_dsub:
+      case opc_dmul:
+      case opc_ddiv:
+	Thumb2_dOp(jinfo, opcode);
+	break;
+
+      case opc_fcmpl:
+      case opc_fcmpg: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	unsigned rho, lho, res;
+	unsigned loc1, loc2, loc_ne;
+
+	Thumb2_Fill(jinfo, 2);
+	rho = POP(jstack);
+	lho = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = PUSH(jstack, JSTACK_REG(jstack));
+	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
+	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
+	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 1);
+	mov_imm(jinfo->codebuf, res, opcode == opc_fcmpl ? 1 : -1);
+	vmrs(jinfo->codebuf, ARM_PC);
+	loc1 = forward_short(jinfo->codebuf);
+	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 0);
+	loc_ne = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 0);
+	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_GT : COND_MI, loc1);
+	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_MI : COND_GT, loc2);
+	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
+	break;
+      }
+
+      case opc_dcmpl:
+      case opc_dcmpg: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	unsigned rho_lo, rho_hi, lho_lo, lho_hi, res;
+	unsigned loc1, loc2, loc_ne;
+
+	Thumb2_Fill(jinfo, 4);
+	rho_lo = POP(jstack);
+	rho_hi = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = PUSH(jstack, JSTACK_REG(jstack));
+	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S0, lho_lo, lho_hi);
+	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S1, rho_lo, rho_hi);
+	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 1);
+	mov_imm(jinfo->codebuf, res, opcode == opc_dcmpl ? 1 : -1);
+	vmrs(jinfo->codebuf, ARM_PC);
+	loc1 = forward_short(jinfo->codebuf);
+	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 0);
+	loc_ne = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 0);
+	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_GT : COND_MI, loc1);
+	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_MI : COND_GT, loc2);
+	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
+	break;
+      }
+
+      case opc_drem:
+      case opc_lrem:
+      case opc_ldiv: {
+	Reg src[4], dst[4];
+
+	Thumb2_Fill(jinfo, 4);
+	src[2] = POP(jstack);
+	src[3] = POP(jstack);
+	src[0] = POP(jstack);
+	src[1] = POP(jstack);
+	Thumb2_Flush(jinfo);
+	dst[0] = ARM_R0;
+	dst[1] = ARM_R1;
+	dst[2] = ARM_R2;
+	dst[3] = ARM_R3;
+	mov_multiple(jinfo->codebuf, dst, src, 4);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	if (opcode != opc_lrem) {
+	  PUSH(jstack, ARM_R1);
+	  PUSH(jstack, ARM_R0);
+	} else {
+	  PUSH(jstack, ARM_R3);
+	  PUSH(jstack, ARM_R2);
+	}
+	break;
+      }
+
+      case opc_frem:
+      case opc_idiv:
+      case opc_irem: {
+	Reg r_rho, r_lho;
+
+	Thumb2_Fill(jinfo, 2);
+	r_rho = POP(jstack);
+	r_lho = POP(jstack);
+	Thumb2_Flush(jinfo);
+	if (r_rho == ARM_R0) {
+	  if (r_lho == ARM_R1) {
+	    mov_reg(jinfo->codebuf, ARM_IP, r_rho);
+	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
+	  } else {
+	    mov_reg(jinfo->codebuf, ARM_R1, r_rho);
+	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	  }
+	} else {
+	  mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	  mov_reg(jinfo->codebuf, ARM_R1, r_rho);
+	}
+	if (opcode == opc_frem)
+	  bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	else
+	  blx(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_f2i:
+      case opc_i2f: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R0, r);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_f2d:
+      case opc_f2l:
+      case opc_i2d: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R0, r);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R1);
+	PUSH(jstack, ARM_R0);
+	break;
+    }
+
+      case opc_d2f:
+      case opc_d2i:
+      case opc_l2d:
+      case opc_d2l:
+      case opc_l2f: {
+	Reg lo, hi;
+
+	Thumb2_Fill(jinfo, 2);
+	lo = POP(jstack);
+	hi = POP(jstack);
+	Thumb2_Flush(jinfo);
+	if (hi == ARM_R0) {
+	  if (lo == ARM_R1) {
+	    mov_reg(jinfo->codebuf, ARM_IP, hi);
+	    mov_reg(jinfo->codebuf, ARM_R0, lo);
+	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
+	  } else {
+	    mov_reg(jinfo->codebuf, ARM_R1, hi);
+	    mov_reg(jinfo->codebuf, ARM_R0, lo);
+	  }
+	} else {
+	  mov_reg(jinfo->codebuf, ARM_R0, lo);
+	  mov_reg(jinfo->codebuf, ARM_R1, hi);
+	}
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	if (opcode == opc_l2d || opcode == opc_d2l) PUSH(jstack, ARM_R1);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_ineg:
+	Thumb2_iNeg(jinfo, opcode);
+	break;
+
+      case opc_lneg:
+	Thumb2_lNeg(jinfo, opcode);
+	break;
+
+      case opc_fneg:
+	Thumb2_fNeg(jinfo, opcode);
+	break;
+
+      case opc_dneg:
+	Thumb2_dNeg(jinfo);
+	break;
+
+      case opc_i2l: {
+	unsigned r, r_res_lo, r_res_hi;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Spill(jinfo, 2, 0);
+	r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+	r_res_lo = PUSH(jstack, JSTACK_REG(jstack));
+	if (r == r_res_hi) {
+	  SWAP(jstack);
+	  r_res_hi = r_res_lo;
+	  r_res_lo = r;
+	}
+	mov_reg(jinfo->codebuf, r_res_lo, r);
+	asr_imm(jinfo->codebuf, r_res_hi, r, 31);
+	break;
+      }
+
+      case opc_l2i: {
+	unsigned r_lo, r_hi;
+	unsigned r;
+
+	Thumb2_Fill(jinfo, 2);
+	r_lo = POP(jstack);
+	r_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r = PUSH(jstack, r_lo);
+	break;
+      }
+
+      case opc_i2b: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	sxtb(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_i2s: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	sxth(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_i2c: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	uxth(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_lcmp: {
+	unsigned lho_lo, lho_hi;
+	unsigned rho_lo, rho_hi;
+	unsigned r_tmp_lo, r_tmp_hi;
+	unsigned res;
+	unsigned loc_lt, loc_eq;
+
+	Thumb2_Fill(jinfo, 4);
+	rho_lo = POP(jstack);
+	rho_hi = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = JSTACK_REG(jstack);
+	PUSH(jstack, res);
+	r_tmp_lo = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi));
+	r_tmp_hi = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi)|(1<<r_tmp_lo));
+	dop_reg(jinfo->codebuf, DP_SUB, r_tmp_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_SBC, r_tmp_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+	mov_imm(jinfo->codebuf, res, (unsigned)-1);
+	loc_lt = forward_short(jinfo->codebuf);
+	dop_reg(jinfo->codebuf, DP_ORR, res, r_tmp_lo, r_tmp_hi, SHIFT_LSL, 0);
+	loc_eq = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 1);
+	bcc_patch(jinfo->codebuf, COND_LT, loc_lt);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
+	break;
+      }
+
+      case opc_iinc: {
+	unsigned local = code_base[bci+1];
+	int constant = GET_JAVA_S1(code_base+bci+2);
+	unsigned r = jinfo->jregs->r_local[local];
+
+	if (!r) {
+	  int nlocals = jinfo->method->max_locals();
+	  r = Thumb2_Tmp(jinfo, 0);
+	  stackdepth -= jstack->depth;
+	  load_local(jinfo, r, local, stackdepth);
+	  add_imm(jinfo->codebuf, r, r, constant);
+	  store_local(jinfo, r, local, stackdepth);
+	} else {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  add_imm(jinfo->codebuf, r, r, constant);
+	}
+	break;
+      }
+
+      case opc_getfield: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	Reg r_obj;
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+ 	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_GETFIELD_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_GETFIELD_DW;
+	  if (c == 'B' || c == 'Z') handler = H_GETFIELD_SB;
+	  if (c == 'C') handler = H_GETFIELD_H;
+	  if (c == 'S') handler = H_GETFIELD_SH;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 1);
+	  r_obj = POP(jstack);
+	  Thumb2_Spill(jinfo, 2, 0);
+	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+	  Thumb2_load_long(jinfo, r_lo, r_hi, r_obj, field_offset,
+			   cache->is_volatile());
+	} else {
+	  Reg r;
+
+	  Thumb2_Fill(jinfo, 1);
+	  r_obj = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  if (tos_type == btos)
+	    ldrsb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos)
+	    ldrh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == stos)
+	    ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else
+	    ldr_imm(jinfo->codebuf, r, r_obj, field_offset);
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_getstatic: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_GETSTATIC_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_GETSTATIC_DW;
+	  if (c == 'B' || c == 'Z') handler = H_GETSTATIC_SB;
+	  if (c == 'C') handler = H_GETSTATIC_H;
+	  if (c == 'S') handler = H_GETSTATIC_SH;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+	JDEBUG_( tty->print("f2_as_index getstatic %d: %s: %s %d\n", index , name->as_C_string(), sig->as_C_string(), field_offset); );
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi, r_addr;
+	  Thumb2_Spill(jinfo, 2, 0);
+	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+	  r_addr = Thumb2_Tmp(jinfo, (1<<r_hi) | (1<<r_lo));
+	  load_istate(jinfo, r_lo, ISTATE_CONSTANTS, stackdepth+2);
+	  ldr_imm(jinfo->codebuf, r_addr, r_lo, CP_OFFSET + (index << 4) + 4);
+	  Thumb2_load_long(jinfo, r_lo, r_hi, r_addr, field_offset,
+			   cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  load_istate(jinfo, r, ISTATE_CONSTANTS, stackdepth+1);
+	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4);
+	  if (tos_type == btos)
+	    ldrsb_imm(jinfo->codebuf, r, r, field_offset);
+	  else if (tos_type == ctos)
+	    ldrh_imm(jinfo->codebuf, r, r, field_offset);
+	  else if (tos_type == stos)
+	    ldrsh_imm(jinfo->codebuf, r, r, field_offset);
+	  else
+	    ldr_imm(jinfo->codebuf, r, r, field_offset);
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_putfield: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	Reg r_obj;
+
+        cache = cp->entry_at(index);
+
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_PUTFIELD_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_PUTFIELD_DW;
+	  if (c == 'B' || c == 'Z') handler = H_PUTFIELD_B;
+	  if (c == 'C' || c == 'S') handler = H_PUTFIELD_H;
+ 	  if (c == '[' || c == 'L') handler = H_PUTFIELD_A;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+
+	  break;
+	}
+
+	if (cache->is_volatile())
+	  storeBarrier(jinfo->codebuf);
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 3);
+	  r_lo = POP(jstack);
+	  r_hi = POP(jstack);
+	  r_obj = POP(jstack);
+	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Fill(jinfo, 2);
+	  r = POP(jstack);
+	  r_obj = POP(jstack);
+	  if (tos_type == btos)
+	    strb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos | tos_type == stos)
+	    strh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else {
+	    str_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    if (tos_type == atos) {
+	      Thumb2_Flush(jinfo);
+	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
+	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
+	    }
+	  }
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_putstatic: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_PUTSTATIC_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_PUTSTATIC_DW;
+	  if (c == 'B' || c == 'Z') handler = H_PUTSTATIC_B;
+	  if (c == 'C' || c == 'S') handler = H_PUTSTATIC_H;
+	  if (c == '[' || c == 'L') handler = H_PUTSTATIC_A;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	if (cache->is_volatile())
+	  storeBarrier(jinfo->codebuf);
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+	Reg r_obj;
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 2);
+	  r_lo = POP(jstack);
+	  r_hi = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, (1<<r_lo)|(1<<r_hi));
+	  r_obj = JSTACK_PREFER(jstack, ~((1<<r_lo)|(1<<r_hi)));
+	  JASSERT(r_obj != r_lo && r_obj != r_hi, "corruption in putstatic");
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-2);
+	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4);
+	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Fill(jinfo, 1);
+	  r = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, (1<<r));
+	  r_obj = JSTACK_PREFER(jstack, ~(1<<r));
+	  JASSERT(r_obj != r, "corruption in putstatic");
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-1);
+	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4);
+	  if (tos_type == btos)
+	    strb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos | tos_type == stos)
+	    strh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else {
+	    str_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    if (tos_type == atos) {
+	      Thumb2_Flush(jinfo);
+	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
+	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
+	    }
+	  }
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_invokevirtual:
+      case opc_invokestatic:
+      case opc_invokespecial: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	unsigned loc;
+	methodOop callee;
+
+	// Call Debug if we're about to enter a synchronized method.
+#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
+	if (DebugSwitch && jinfo->method->is_synchronized()) {
+	  stm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+	  add_imm(jinfo->codebuf, ARM_R0, ISTATE_REG(jinfo), ISTATE_OFFSET(jinfo, stackdepth, 0));
+	  mov_imm(jinfo->codebuf, ARM_IP, (u32)Debug);
+	  load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+	  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+	  store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+	  blx_reg(jinfo->codebuf, ARM_IP);
+	  ldm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, POP_FD, 1);
+	}
+#undef DEBUG_REGSET
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_all_locals(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf,
+	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC :
+		     opcode == opc_invokespecial ? H_INVOKESPECIAL : H_INVOKEVIRTUAL]);
+	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	callee = opcode == opc_invokevirtual ? (methodOop)cache->f2_as_index() : (methodOop)cache->f1_as_instance();
+
+	if (opcode != opc_invokevirtual || cache->is_vfinal()) {
+	  if (handle_special_method(callee, jinfo, stackdepth))
+	    break;
+	}
+
+	if ((opcode != opc_invokevirtual || cache->is_vfinal()) && callee->is_accessor()) {
+	  u1 *code = callee->code_base();
+	  int index = GET_NATIVE_U2(&code[2]);
+	  constantPoolCacheOop callee_cache = callee->constants()->cache();
+	  ConstantPoolCacheEntry *entry = callee_cache->entry_at(index);
+	  Reg r_obj, r;
+
+	  if (entry->is_resolved(Bytecodes::_getfield)) {
+	    JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
+
+	    TosState tos_type = entry->flag_state();
+	    int field_offset = entry->f2_as_index();
+
+	    JASSERT(tos_type == btos || tos_type == ctos || tos_type == stos || tos_type == atos || tos_type == itos, "not itos or atos");
+
+	    Thumb2_Fill(jinfo, 1);
+	    r_obj = POP(jstack);
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	    if (tos_type == btos)
+	      ldrb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else if (tos_type == ctos)
+	      ldrh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else if (tos_type == stos)
+	      ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else
+	      ldr_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    break;
+	  }
+	}
+
+ 	Thumb2_Flush(jinfo);
+	if (OSPACE) {
+	  Thumb2_save_all_locals(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[
+	      opcode == opc_invokestatic ? H_INVOKESTATIC_RESOLVED :
+	      opcode == opc_invokespecial ? H_INVOKESPECIAL_RESOLVED :
+	      cache->is_vfinal() ? H_INVOKEVFINAL : H_INVOKEVIRTUAL_RESOLVED]);
+	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+ 	mov_imm(jinfo->codebuf, ARM_R1, 0);
+	if (opcode != opc_invokestatic)
+ 	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int));
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  load_istate(jinfo, ARM_R0, ISTATE_CONSTANTS, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+	if (opcode != opc_invokestatic)
+	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0,
+		CP_OFFSET + (index << 4) + (opcode == opc_invokevirtual ? 8 : 4));
+	else
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2_as_index() * 4);
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+ 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP);
+	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP);
+ 	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED);
+	store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+ 	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+ 	Thumb2_save_all_locals(jinfo, stackdepth);
+	sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
+ 	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0);
+	store_istate(jinfo, Rstack, ISTATE_STACK, stackdepth+1);
+	add_imm(jinfo->codebuf, ARM_R3, ARM_R3, FAST_ENTRY_OFFSET);
+ 	blx_reg(jinfo->codebuf, ARM_R3);
+ 	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
+	stackdepth = STACKDEPTH(jinfo, bc_stackinfo[bci+len]);
+	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+	load_istate(jinfo, ARM_R2, ISTATE_STACK_LIMIT, stackdepth);
+ 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, 0);   // set last SP to zero
+					      // before setting FP
+	str_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_LAST_JAVA_SP);
+	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
+	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC);
+	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP);
+	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP);
+	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_LAST_JAVA_SP);
+	cmp_imm(jinfo->codebuf, ARM_R3, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS], COND_NE);
+	break;
+      }
+
+      case opc_invokeinterface: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	unsigned loc, loc_inc_ex;
+
+	// Currently we just call the unresolved invokeinterface entry for resolved /
+	// unresolved alike!
+	Thumb2_Flush(jinfo);
+	Thumb2_save_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, index);
+	blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
+	Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	break;
+      }
+
+    case opc_invokedynamic:
+      {
+	Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+	break;
+      }
+
+    case opc_fast_aldc_w:
+    case opc_fast_aldc:
+      {
+	unsigned index = (opcode == (unsigned)opc_fast_aldc) ?
+				code_base[bci+1] : GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop constants = jinfo->method->constants();
+	ConstantPoolCacheEntry* cpce = constants->cache()->entry_at(index);
+        if (! cpce->is_f1_null()) {
+	  Thumb2_Spill(jinfo, 1, 0);
+	  int r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_CONSTANTS);
+	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4); // offset to cache->f1_as_instance()
+	} else {
+	  Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+	}
+	break;
+      }
+
+      case opc_jsr_w:
+      case opc_jsr: {
+	int offset = opcode == opc_jsr ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Reg r;
+
+	Thumb2_Spill(jinfo, 1, 0);
+	r = JSTACK_REG(jstack);
+	PUSH(jstack, r);
+	mov_imm(jinfo->codebuf, r, bci + ((opcode == opc_jsr) ? 3 : 5));
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len);
+	len = 0;
+	break;
+      }
+
+      case opc_ret: {
+	Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
+	break;
+      }
+
+      case opc_goto:
+      case opc_goto_w: {
+	int offset = opcode == opc_goto ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len, stackdepth);
+	len = 0;
+	break;
+      }
+
+      case opc_athrow:
+	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
+	break;
+
+      case opc_ifeq:
+      case opc_ifne:
+      case opc_iflt:
+      case opc_ifge:
+      case opc_ifgt:
+      case opc_ifle:
+      case opc_ifnull:
+      case opc_ifnonnull: {
+	Reg r;
+	unsigned cond = opcode - opc_ifeq;
+	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
+	if (opcode >= opc_ifnull) cond = opcode - opc_ifnull;
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	cmp_imm(jinfo->codebuf, r, 0);
+	bci = Thumb2_Branch(jinfo, bci, cond);
+	len = 0;
+	break;
+      }
+
+      case opc_if_icmpeq:
+      case opc_if_icmpne:
+      case opc_if_icmplt:
+      case opc_if_icmpge:
+      case opc_if_icmpgt:
+      case opc_if_icmple:
+      case opc_if_acmpeq:
+      case opc_if_acmpne: {
+	Reg r_lho, r_rho;
+	unsigned cond = opcode - opc_if_icmpeq;
+	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
+	if (opcode >= opc_if_acmpeq) cond = opcode - opc_if_acmpeq;
+	Thumb2_Fill(jinfo, 2);
+	r_rho = POP(jstack);
+	r_lho = POP(jstack);
+	Thumb2_Flush(jinfo);
+	cmp_reg(jinfo->codebuf, r_lho, r_rho);
+	bci = Thumb2_Branch(jinfo, bci, cond);
+	len = 0;
+	break;
+      }
+
+      case opc_return:
+      case opc_dreturn:
+      case opc_lreturn:
+      case opc_ireturn:
+      case opc_freturn:
+      case opc_areturn:
+	Thumb2_Return(jinfo, opcode, bci, stackdepth);
+	break;
+
+      case opc_return_register_finalizer: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	Reg r, r_tmp;
+	unsigned loc_eq;
+
+	Thumb2_Flush(jinfo);
+	Thumb2_Load(jinfo, 0, stackdepth);
+	r = POP(jstack);
+	r_tmp = Thumb2_Tmp(jinfo, (1<<r));
+	ldr_imm(jinfo->codebuf, r_tmp, r, 4);
+	ldr_imm(jinfo->codebuf, r_tmp, r_tmp, KLASS_PART+KLASS_ACCESSFLAGS);
+	tst_imm(jinfo->codebuf, r_tmp, JVM_ACC_HAS_FINALIZER);
+	loc_eq = forward_short(jinfo->codebuf);
+	Thumb2_save_local_refs(jinfo, stackdepth);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	load_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+	add_imm(jinfo->codebuf, ARM_R0, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	store_istate(jinfo, ARM_R0, ISTATE_BCP, stackdepth);
+	sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
+	store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);
+
+	mov_reg(jinfo->codebuf, ARM_R0, Rthread);
+	mov_imm(jinfo->codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc);
+	blx_reg(jinfo->codebuf, ARM_R3);
+
+	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC);
+	cmp_imm(jinfo->codebuf, ARM_R3, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
+	Thumb2_Return(jinfo, opc_return, bci, stackdepth);
+	break;
+      }
+
+      case opc_new: {
+	unsigned loc;
+
+	Thumb2_Flush(jinfo);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_NEW]);
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_aastore: {
+	Reg src[3], dst[3];
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 3);
+	src[0] = POP(jstack);	// value
+	src[1] = POP(jstack);	// index
+	src[2] = POP(jstack);	// arrayref
+	Thumb2_Flush(jinfo);
+	dst[0] = ARM_R1;
+	dst[1] = ARM_R2;
+	dst[2] = ARM_R3;
+	mov_multiple(jinfo->codebuf, dst, src, 3);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth - 3);	// 3 args popped above
+	bl(jinfo->codebuf, handlers[H_AASTORE]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - 3);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_instanceof: {
+	unsigned loc;
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth - 1);
+	bl(jinfo->codebuf, handlers[H_INSTANCEOF]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - 1);	// 1 arg popped above
+	cmp_imm(jinfo->codebuf, ARM_R0, (unsigned)-1);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_checkcast: {
+	unsigned loc;
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = TOS(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_CHECKCAST]);
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_monitorenter:
+	Thumb2_Flush(jinfo);
+	Thumb2_save_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	bl(jinfo->codebuf, handlers[H_MONITORENTER]);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	break;
+
+      case opc_monitorexit: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+        Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MONITOREXIT]);
+        Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_newarray: {
+	Reg r;
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+1]);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth-1);
+	bl(jinfo->codebuf, handlers[H_NEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth-1);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_anewarray: {
+	Reg r;
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R3, r);
+	mov_imm(jinfo->codebuf, ARM_R2, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth-1);
+	bl(jinfo->codebuf, handlers[H_ANEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth-1);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_multianewarray: {
+	unsigned loc;
+
+	Thumb2_Flush(jinfo);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+3] * 4);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MULTIANEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - code_base[bci+3]);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_arraylength: {
+	Reg r_obj, r_len;
+
+	Thumb2_Fill(jinfo, 1);
+	r_obj = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_len = JSTACK_REG(jstack);
+	PUSH(jstack, r_len);
+	ldr_imm(jinfo->codebuf, r_len, r_obj, 8);
+	break;
+      }
+
+      case opc_lookupswitch: {
+	unsigned w;
+	unsigned nbci;
+	int def;
+	int npairs;	// The Java spec says signed but must be >= 0??
+	unsigned *table, *tablep;
+	unsigned r;
+	unsigned oldidx;
+	unsigned table_loc;
+	int i;
+
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = bci + (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 8);
+	npairs = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 12);
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+
+	Thumb2_Flush(jinfo);
+
+	table_loc = out_loc(jinfo->codebuf);
+	for (i = 0, tablep = table; i < npairs; i++) {
+	  unsigned match;
+
+	  w = tablep[0];
+	  match = BYTESEX_REVERSE(w);
+	  tablep += 2;
+	  cmp_imm(jinfo->codebuf, r, match);
+	  forward_long(jinfo->codebuf);
+	}
+	forward_long(jinfo->codebuf);
+	Thumb2_codegen(jinfo, bci+len);
+
+	oldidx = codebuf->idx;
+	codebuf->idx = table_loc >> 1;
+	for (i = 0, tablep = table; i < npairs; i++) {
+	  unsigned match;
+	  unsigned dest;
+	  unsigned loc;
+
+	  w = tablep[0];
+	  match = BYTESEX_REVERSE(w);
+	  w = tablep[1];
+	  dest = bci + (int)BYTESEX_REVERSE(w);
+	  tablep += 2;
+	  cmp_imm(jinfo->codebuf, r, match);
+	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
+	  loc = forward_long(jinfo->codebuf);
+	  branch_patch(jinfo->codebuf, COND_EQ, loc, jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK);
+	}
+	JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "default in lookupswitch not compiled");
+	branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	codebuf->idx = oldidx;
+
+	bci = (unsigned)-1;
+	len = 0;
+
+	break;
+      }
+
+      case opc_tableswitch: {
+	int low, high, i;
+	unsigned w;
+	unsigned *table, *tablep;
+	unsigned nbci;
+	int def;
+	unsigned loc, table_loc;
+	unsigned r, rs;
+	unsigned oldidx;
+	unsigned negative_offsets, negative_branch_table;
+
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 8);
+	low = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 12);
+	high = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = bci + (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	Thumb2_Fill(jinfo, 1);
+	rs = POP(jstack);
+	Thumb2_Flush(jinfo);
+        r = rs;
+        if (low) {
+	  r = Thumb2_Tmp(jinfo, (1<<rs));
+	  sub_imm(jinfo->codebuf, r, rs, low);
+        }
+	cmp_imm(jinfo->codebuf, r, (high-low)+1);
+	loc = 0;
+	if (jinfo->bc_stackinfo[def] & BC_COMPILED)
+	  branch(jinfo->codebuf, COND_CS, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	else
+	  loc = forward_long(jinfo->codebuf);
+	tbh(jinfo->codebuf, ARM_PC, r);
+	table_loc = out_loc(jinfo->codebuf);
+	negative_offsets = 0;
+	for (i = low, tablep = table; i <= high; i++) {
+	  int offset;
+	  w = *tablep++;
+	  offset = (int)BYTESEX_REVERSE(w);
+	  if (offset < 0) negative_offsets++;
+	  out_16_data(jinfo->codebuf, 0);
+	}
+        if (!Thumb2) out_align(jinfo->codebuf, 4);
+	negative_branch_table = out_loc(jinfo->codebuf);
+	for (i = 0; i < (int)negative_offsets; i++) {
+	  out_32(jinfo->codebuf, 0);
+	}
+
+	Thumb2_codegen(jinfo, bci+len);
+
+	if (loc) {
+	  JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "def not compiled in tableswitch");
+	  branch_patch(jinfo->codebuf, COND_CS, loc, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	}
+
+	oldidx = codebuf->idx;
+	codebuf->idx = table_loc >> 1;
+	for (i = low, tablep = table; i <= high; i++) {
+	  unsigned dest;
+	  int offset;
+
+	  w = *tablep++;
+	  offset = (int)BYTESEX_REVERSE(w);
+	  dest = bci + offset;
+	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
+	  dest = jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK;
+	  if (offset < 0) {
+	    unsigned oldidx;
+            // ECN::FIXME - Is this right?
+	    out_16_data(jinfo->codebuf, (negative_branch_table >> 1) - (table_loc >> 1));
+	    PATCH(negative_branch_table) {
+	      branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), dest);
+	      negative_branch_table = out_loc(jinfo->codebuf);
+	    } HCTAP;
+	  } else {
+	    JASSERT((dest & 1) == 0 && (table_loc & 1) == 0, "unaligned code");
+	    offset = (dest >> 1) - (table_loc >> 1);
+            if (!Thumb2) {
+              offset -= 2;
+              JASSERT(offset >= 0, "negative offset!");
+            }
+	    if (offset >= 65536) {
+	      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+	    }
+	    out_16_data(jinfo->codebuf, offset);
+	  }
+	}
+	codebuf->idx = oldidx;
+	bci = (unsigned)-1;
+	len = 0;
+	break;
+      }
+
+      case opc_wide: {
+	unsigned local = GET_JAVA_U2(code_base + bci + 2);
+	opcode = code_base[bci+1];
+	if (opcode == opc_iinc) {
+	  int constant = GET_JAVA_S2(code_base + bci + 4);
+	  unsigned r = jinfo->jregs->r_local[local];
+	  
+	  if (!r) {
+	    int nlocals = jinfo->method->max_locals();
+	    r = ARM_IP;
+	    stackdepth -= jstack->depth;
+	    load_local(jinfo, r, local, stackdepth);
+	    add_imm(jinfo->codebuf, r, r, constant);
+	    store_local(jinfo, r, local, stackdepth);
+	  } else {
+	    Thumb2_Corrupt(jinfo, r, 0);
+	    add_imm(jinfo->codebuf, r, r, constant);
+	  }
+	} else if (opcode == opc_ret) {
+	  Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
+	} else {
+	  if (opcode == opc_iload ||
+	  	opcode == opc_fload || opcode == opc_aload)
+	    Thumb2_Load(jinfo, local, stackdepth);
+	  else if (opcode == opc_lload || opcode == opc_dload)
+	    Thumb2_LoadX2(jinfo, local, stackdepth);
+	  else if (opcode == opc_istore ||
+	  	opcode == opc_fstore || opcode == opc_astore)
+	    Thumb2_Store(jinfo, local, stackdepth);
+	  else if (opcode == opc_lstore || opcode == opc_dstore)
+	    Thumb2_StoreX2(jinfo, local, stackdepth);
+	  else fatal(err_msg("Undefined wide opcode %d\n", opcode));
+	}
+	break;
+      }
+
+      default:
+	JASSERT(0, "unknown bytecode");
+	break;
+    }
+    bci += len;
+    if (len == 0) {
+      if (start_idx == jinfo->codebuf->idx) SET_START_BCI(start_idx, -1);
+    } else
+      SET_END_BCI(start_idx, bci);
+  }
+}
+
+#define BEG_BCI_OFFSET		0
+#define END_BCI_OFFSET		1
+#define HANDLER_BCI_OFFSET	2
+#define KLASS_INDEX_OFFSET	3
+#define ENTRY_SIZE		4
+
+extern "C" int Thumb2_lr_to_bci(unsigned lr, methodOop method, Reg *regs, unsigned *locals)
+{
+  Compiled_Method *cmethod = compiled_method_list;
+  ExceptionTable table(method);
+  constantPoolOop pool = method->constants();
+  int length = table.length();
+
+  while (cmethod) {
+    unsigned *exception_table = cmethod->exception_table;
+    if (exception_table) {
+      unsigned code_base = (unsigned)cmethod;
+      if (code_base <= lr && lr <= (unsigned)exception_table) {
+	int exception_index = -1;
+	unsigned exception_found = 0;
+
+	for (int i = 0; i < length; i++) {
+	  unsigned offsets = *exception_table++;
+	  unsigned exc_beg = code_base + ((offsets >> 16) << 1);
+	  unsigned exc_end = code_base + ((offsets & 0xffff) << 1);
+
+	  if (exc_beg <= lr && lr <= exc_end) {
+	    if (exc_beg > exception_found) {
+	      // With nested try catch blocks, choose the most deeply nested
+	      exception_found = exc_beg;
+	      exception_index = i;
+	    }	    
+	  }
+	  if (exception_index >= 0) {
+	    if (regs) {
+	      for (unsigned i = 0; i < PREGS; i++) {
+		int local = cmethod->regusage[i];
+		if (local >= 0) {
+		  locals[-local] = regs[i];
+		}
+	      }
+	    }
+	    return table.start_pc(exception_index);
+	  }
+	}
+      }
+    }
+    cmethod = cmethod->next;
+  }
+  return -1;
+}
+
+void Thumb2_generate_exception_table(Compiled_Method *cmethod, Thumb2_Info *jinfo)
+{
+  methodOop method = jinfo->method;
+  ExceptionTable table(method);
+  constantPoolOop pool = method->constants();
+  int length = table.length();
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+
+  cmethod->exception_table = (unsigned *)out_pos(jinfo->codebuf);
+  for (int i = 0; i < length; i++) {
+    int b_bci = table.start_pc(i);
+    int e_bci = table.end_pc(i);
+    unsigned stackinfo;
+    unsigned beg_offset, end_offset;
+
+    stackinfo = bc_stackinfo[b_bci];
+    beg_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+    stackinfo = bc_stackinfo[e_bci];
+    end_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+    if (!(beg_offset != 0 && end_offset >= beg_offset && end_offset < 65536)) {
+	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+    }
+    out_32(jinfo->codebuf, (beg_offset << 16) | (end_offset));
+  }
+}
+
+void Thumb2_tablegen(Compiled_Method *cmethod, Thumb2_Info *jinfo)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned bci;
+  unsigned count = 0;
+  unsigned i;
+  CodeBuf *codebuf = jinfo->codebuf;
+
+  cmethod->osr_table = (unsigned *)out_pos(jinfo->codebuf);
+  out_32(codebuf, 0);
+  bc_stackinfo[0] |= BC_BACK_TARGET;
+  for (bci = 0; bci < code_size;) {
+    unsigned stackinfo = bc_stackinfo[bci];
+    unsigned bytecodeinfo;
+    unsigned opcode;
+
+    if (stackinfo & BC_BACK_TARGET) {
+      unsigned code_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+      JASSERT(stackinfo & BC_COMPILED, "back branch target not compiled???");
+      if (code_offset >= 65536) {
+	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+      }
+//      JASSERT(code_offset < (1<<16), "oops, codesize too big");
+      out_32(codebuf, (bci << 16) | code_offset);
+      count++;
+    }
+
+    opcode = code_base[bci];
+    bytecodeinfo = bcinfo[opcode];
+    if (!BCI_SPECIAL(bytecodeinfo)) {
+      bci += BCI_LEN(bytecodeinfo);
+      continue;
+    } else {
+      int len = Bytecodes::length_for((Bytecodes::Code)opcode);
+      if (len <= 0) {
+	Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	len = (Bytecodes::special_length_at
+	       (code,
+		(address)(code_base+bci), (address)(code_base+code_size)));
+      }
+      bci += len;
+    }
+  }
+  *cmethod->osr_table = count;
+  if (jinfo->method->has_exception_handler())
+    Thumb2_generate_exception_table(cmethod, jinfo);
+}
+
+extern "C" void Thumb2_Clear_Cache(char *base, char *limit);
+#define IS_COMPILED(e, cb) ((e) >= (unsigned)(cb) && (e) < (unsigned)(cb) + (cb)->size)
+
+unsigned Thumb2_osr_from_bci(Compiled_Method *cmethod, unsigned bci)
+{
+  unsigned *osr_table;
+  unsigned count;
+  unsigned i;
+
+  osr_table = cmethod->osr_table;
+  if (!osr_table) return 0;
+  count = *osr_table++;
+  for (i = 0; i < count; i++) {
+    unsigned u = *osr_table++;
+
+    if (bci == (u>>16)) return (u & 0xffff) << 1;
+  }
+  return 0;
+}
+
+extern "C" void Debug_Stack(intptr_t *stack)
+{
+  int i;
+  char msg[16];
+
+  tty->print("  Stack:");
+  for (i = 0; i < 6; i++) {
+    tty->print(" [");
+    sprintf(msg, "%d", i);
+    tty->print(msg);
+    tty->print("] = ");
+    sprintf(msg, "%08x", (int)stack[i]);
+    tty->print(msg);
+  }
+  tty->cr();
+}
+
+extern "C" void Debug_MethodEntry(interpreterState istate, intptr_t *stack, methodOop callee)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Entering ");
+    callee->print_short_name(tty);
+    tty->print(" from ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+  }
+ )
+}
+
+extern "C" void Debug_MethodExit(interpreterState istate, intptr_t *stack)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Leaving ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+    if (exc) tty->print_cr("Exception %s", exc->print_value_string());
+  }
+ )
+}
+
+extern "C" void Debug_MethodCall(interpreterState istate, intptr_t *stack, methodOop callee)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Calling ");
+    callee->print_short_name(tty);
+    tty->print(" from ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+  }
+ )
+}
+extern "C" void Thumb2_Install(methodOop mh, u32 entry);
+
+extern "C" unsigned cmpxchg_ptr(unsigned new_value, volatile unsigned *ptr, unsigned cmp_value);
+static volatile unsigned compiling;
+
+static unsigned CompileCount = 1000000;
+static unsigned DisassAfter = 0;
+static char *T2CompileOnly = NULL;
+static unsigned MaxCompile = 10000;
+
+#define COMPILE_ONLY	T2CompileOnly
+#define COMPILE_COUNT	CompileCount
+//#define DISASS_AFTER	DisassAfter
+//#define COMPILE_LIST
+
+#ifdef COMPILE_LIST
+static const char *compile_list[] = {
+	0
+};
+#endif
+
+static unsigned compiled_methods = 0;
+
+#ifdef T2_PRINT_STATISTICS
+static unsigned bytecodes_compiled = 0;
+static unsigned arm_code_generated = 0;
+static clock_t total_compile_time = 0;
+#endif
+
+extern "C" unsigned long long Thumb2_Compile(JavaThread *thread, unsigned branch_pc)
+{
+  HandleMark __hm(thread);
+  frame fr = thread->last_frame();
+  methodOop method = fr.interpreter_frame_method();
+  Symbol *name = method->name();
+  Symbol *sig = method->signature();
+  const jbyte *base = sig->base();
+
+  jubyte *code_base = (jubyte *)method->code_base();
+  int code_size = method->code_size();
+  InvocationCounter* ic = method->invocation_counter();
+  InvocationCounter* bc = method->backedge_counter();
+  Thumb2_Info jinfo_str;
+  CodeBuf codebuf_str;
+  Thumb2_Stack jstack_str;
+  Thumb2_Registers jregs_str;
+  int idx;
+  u32 code_handle, slow_entry;
+  Thumb2_CodeBuf *cb = thumb2_codebuf;
+  int rc;
+  char *saved_hp;
+  Compiled_Method *cmethod;
+  u32 compiled_offset;
+  Thumb2_Entrypoint thumb_entry;
+  int compiled_accessor;
+
+  // Only support arch >= 6 for the moment
+  if (!ARCH_GE_V6(CPUInfo)) UseCompiler = false;  
+
+  {
+    bool ignore;
+    methodHandle mh(thread, method);
+    if (!UseCompiler || method->is_not_compilable()
+	|| CompilerOracle::should_exclude(mh, ignore)) {
+      ic->set(ic->state(), 1);
+      bc->set(ic->state(), 1);
+      return 0;
+    }
+  }
+
+  slow_entry = *(unsigned *)method->from_interpreted_entry();
+  if (IS_COMPILED(slow_entry, cb)) {
+    cmethod = (Compiled_Method *)(slow_entry & ~TBIT);
+    compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
+    if (compiled_offset == 0) return 0;
+    thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
+    thumb_entry.regusage = cmethod->regusage;
+    return *(unsigned long long *)&thumb_entry;
+  }
+
+  ic->decay();
+  bc->decay();
+
+  // Dont compile anything with code size >= 32K.
+  // We rely on the bytecode index fitting in 16 bits
+  //
+  // Dont compile anything with max stack + maxlocal > 1K
+  // The range of an LDR in T2 is -4092..4092
+  // Othersize we have difficulty access the locals from the stack pointer
+  //
+  if (code_size > THUMB2_MAX_BYTECODE_SIZE ||
+		(method->max_locals() + method->max_stack()) >= 1000) {
+        method->set_not_compilable();
+	return 0;
+  }
+
+#ifdef COMPILE_COUNT
+  if (compiled_methods == COMPILE_COUNT) return 0;
+#endif
+
+#ifdef COMPILE_ONLY
+    if (COMPILE_ONLY && strcmp(name->as_C_string(), COMPILE_ONLY) != 0)
+      return 0;
+#endif
+
+#ifdef COMPILE_LIST
+  {
+	const char **argv = compile_list;
+	const char *s;
+	while (s = *argv++) {
+		if (strcmp(s, method->name_and_sig_as_C_string()) == 0)
+			break;
+	}
+	if (!s) {
+		method->set_not_compilable();
+		return 0;
+	}
+  }
+#endif
+
+  saved_hp = cb->hp;
+  if (rc = setjmp(compiler_error_env)) {
+    cb->hp = saved_hp;
+    if (rc == COMPILER_RESULT_FAILED)
+        method->set_not_compilable();
+    if (rc == COMPILER_RESULT_FATAL)
+	UseCompiler = false;
+    compiling = 0;
+    return 0;
+  }
+
+  if (cmpxchg_ptr(1, &compiling, 0)) return 0;
+
+#ifdef T2_PRINT_STATISTICS
+  clock_t compile_time = clock();
+#endif
+
+#ifdef T2_PRINT_COMPILATION
+  if (PrintCompilation || PrintAssembly) {
+    fprintf(stderr, "Compiling %d %c%c %s\n",
+	compiled_methods,
+	method->is_synchronized() ? 'S' : ' ',
+	method->has_exception_handler() ? 'E' : ' ',
+	method->name_and_sig_as_C_string());
+  }
+#endif
+
+  memset(bc_stackinfo, 0, code_size * sizeof(unsigned));
+  memset(locals_info, 0, method->max_locals() * sizeof(unsigned));
+#ifdef T2_PRINT_DISASS
+  memset(start_bci, 0xff, sizeof(start_bci));
+  memset(end_bci, 0xff, sizeof(end_bci));
+#endif
+
+#ifdef THUMB2_JVMTI
+  address_bci_map_reset(thread);
+#endif // THUMB2_JVMTI
+
+  jinfo_str.thread = thread;
+  jinfo_str.method = method;
+  jinfo_str.code_base = code_base;
+  jinfo_str.code_size = code_size;
+  jinfo_str.bc_stackinfo = bc_stackinfo;
+  jinfo_str.locals_info = locals_info;
+  jinfo_str.compiled_return = 0;
+  for (int i = 0; i < 12; i++) jinfo_str.compiled_word_return[i] = 0;
+  jinfo_str.is_leaf = 1;
+  // use_istate must be enabled for DebugSwitch because various of the
+  // Debug_ methods use it. However it may change the code generation.
+  jinfo_str.use_istate = DebugSwitch || method->has_monitor_bytecodes();
+
+  Thumb2_local_info_from_sig(&jinfo_str, method, base);
+
+  Thumb2_pass1(&jinfo_str, 0, 0);
+
+  codebuf_str.codebuf = (unsigned short *)cb->hp;
+  codebuf_str.idx = 0;
+  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  jstack_str.stack = stack;
+  jstack_str.depth = 0;
+
+  memset(r_local, 0, method->max_locals() * sizeof(unsigned));
+
+  jregs_str.r_local = r_local;
+
+  jinfo_str.codebuf = &codebuf_str;
+  jinfo_str.jstack = &jstack_str;
+  jinfo_str.jregs = &jregs_str;
+
+  jregs_str.pregs[0] = JAZ_V1;
+  jregs_str.pregs[1] = JAZ_V2;
+  jregs_str.pregs[2] = JAZ_V3;
+  jregs_str.pregs[3] = JAZ_V4;
+  jregs_str.pregs[4] = JAZ_V5;
+  jregs_str.pregs[5] = JAZ_V6;
+
+  jregs_str.npregs = PREGS;
+
+  Thumb2_RegAlloc(&jinfo_str);
+
+  slow_entry = out_align_offset(&codebuf_str, CODE_ALIGN, SLOW_ENTRY_OFFSET);
+  cmethod = (Compiled_Method *)slow_entry;
+  if (Thumb2) slow_entry |= TBIT;
+
+  cb->hp += codebuf_str.idx * 2;
+  codebuf_str.codebuf = (unsigned short *)cb->hp;
+  codebuf_str.idx = 0;
+  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  compiled_accessor = 1;
+  if (!method->is_accessor() || !Thumb2_Accessor(&jinfo_str)) {
+    Thumb2_Enter(&jinfo_str);
+    Thumb2_codegen(&jinfo_str, 0);
+    compiled_accessor = 0;
+  }
+
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+#ifdef DISASS_AFTER
+    if (compiled_methods >= DISASS_AFTER) {
+      Thumb2_disass(&jinfo_str);
+    }
+#else
+    Thumb2_disass(&jinfo_str);
+#endif
+  }
+#endif
+
+  for (int i = 0; i < PREGS; i++)
+    cmethod->regusage[i] = jregs_str.mapping[i];
+
+  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf_str.idx * 2);
+
+#ifdef T2_PRINT_STATISTICS
+  compile_time = clock() - compile_time;
+  total_compile_time += compile_time;
+
+  if (t2_print_statistics) {
+    unsigned codegen = codebuf_str.idx * 2;
+    bytecodes_compiled += code_size;
+    arm_code_generated += codegen;
+    fprintf(stderr, "%d bytecodes => %d bytes code in %.2f sec, totals: %d => %d in %.2f sec\n",
+      code_size, codegen, (double)compile_time/(double)CLOCKS_PER_SEC,
+    bytecodes_compiled, arm_code_generated, (double)total_compile_time/(double)CLOCKS_PER_SEC);
+  }
+#endif
+
+  code_handle = out_align(&codebuf_str, sizeof(address));
+
+  out_32(&codebuf_str, slow_entry);
+
+  if (!compiled_accessor)
+    Thumb2_tablegen(cmethod, &jinfo_str);
+
+  cb->hp += codebuf_str.idx * 2;
+
+  //if (!Thumb2) longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+
+  *compiled_method_list_tail_ptr = cmethod;
+  compiled_method_list_tail_ptr = &(cmethod->next);
+
+  Thumb2_Install(method, code_handle);
+
+  compiled_methods++;
+
+  compiling = 0;
+
+  compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
+  if (compiled_offset == 0) return 0;
+  thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
+  thumb_entry.regusage = cmethod->regusage;
+
+#ifdef THUMB2_JVMTI
+  {
+    // we need to dispatch a compiled_method_load event
+    // to all registered Jvmti agents
+
+    // notify the whole generated code region for this Java method
+    // from slow_entry through to the end of the osr table. some
+    // of it is data not code but that's not a problem.
+
+    const void *gen_code_start = (const void *)(slow_entry & ~TBIT);
+    unsigned gen_code_size = codebuf_str.idx * 2;
+
+    // address_bci_map translates start addresses for generated code
+    // sections to bytecode indices and contains address_bci_map_length
+    // entries
+
+    // the final compile_info argument is supposed to contain
+    // information about inlined code. we can supply NULL for now -
+    // oprofile doesn't use it anyway
+
+    void *compile_info = NULL;
+
+    // transition from in Java to in VM before calling into Jvmti
+    ThreadInVMfromJava transition(thread);
+
+    JvmtiExport::post_compiled_method_load(method, gen_code_size,
+		gen_code_start, address_bci_map_length,
+		address_bci_map, NULL);
+  }
+#endif // THUMB2_JVMTI
+
+  return *(unsigned long long *)&thumb_entry;
+}
+
+extern "C" void Thumb2_DivZero_Handler(void);
+extern "C" void Thumb2_ArrayBounds_Handler(void);
+extern "C" void Thumb2_Handle_Exception(void);
+extern "C" void Thumb2_Handle_Exception_NoRegs(void);
+extern "C" void Thumb2_Exit_To_Interpreter(void);
+extern "C" void Thumb2_Stack_Overflow(void);
+extern "C" void Thumb2_monitorenter(void);
+
+extern "C" void __divsi3(void);
+extern "C" void __aeabi_ldivmod(void);
+extern "C" void __aeabi_i2f(void);
+extern "C" void __aeabi_i2d(void);
+extern "C" void __aeabi_l2f(void);
+extern "C" void __aeabi_l2d(void);
+extern "C" void __aeabi_f2d(void);
+extern "C" void __aeabi_d2f(void);
+extern "C" void Helper_new(void);
+extern "C" void Helper_instanceof(void);
+extern "C" void Helper_checkcast(void);
+extern "C" void Helper_monitorexit(void);
+extern "C" void Helper_aastore(void);
+extern "C" void Helper_aputfield(void);
+extern "C" void Helper_synchronized_enter(void);
+extern "C" void Helper_synchronized_exit(void);
+extern "C" void Helper_SafePoint(void);
+
+extern "C" void _ZN13SharedRuntime3f2iEf(void);
+extern "C" void _ZN13SharedRuntime3f2lEf(void);
+extern "C" void _ZN13SharedRuntime3d2iEd(void);
+extern "C" void _ZN13SharedRuntime3d2lEd(void);
+extern "C" void _ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei(void);
+extern "C" void _ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii(void);
+extern "C" void _ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi(void);
+extern "C" void _ZN18InterpreterRuntime3ldcEP10JavaThreadb(void);
+
+extern char Thumb2_stubs[];
+extern char Thumb2_stubs_end[];
+extern char Thumb2_idiv_stub[];
+extern char Thumb2_irem_stub[];
+extern char Thumb2_invokeinterface_stub[];
+extern char Thumb2_invokevirtual_stub[];
+extern char Thumb2_invokestatic_stub[];
+extern char Thumb2_invokespecial_stub[];
+extern char Thumb2_getfield_word_stub[];
+extern char Thumb2_getfield_sh_stub[];
+extern char Thumb2_getfield_h_stub[];
+extern char Thumb2_getfield_sb_stub[];
+extern char Thumb2_getfield_dw_stub[];
+extern char Thumb2_putfield_word_stub[];
+extern char Thumb2_putfield_h_stub[];
+extern char Thumb2_putfield_b_stub[];
+extern char Thumb2_putfield_a_stub[];
+extern char Thumb2_putfield_dw_stub[];
+extern char Thumb2_getstatic_word_stub[];
+extern char Thumb2_getstatic_sh_stub[];
+extern char Thumb2_getstatic_h_stub[];
+extern char Thumb2_getstatic_sb_stub[];
+extern char Thumb2_getstatic_dw_stub[];
+extern char Thumb2_putstatic_word_stub[];
+extern char Thumb2_putstatic_h_stub[];
+extern char Thumb2_putstatic_b_stub[];
+extern char Thumb2_putstatic_a_stub[];
+extern char Thumb2_putstatic_dw_stub[];
+
+extern char Thumb2_invokestaticresolved_stub[];
+extern char Thumb2_invokespecialresolved_stub[];
+extern char Thumb2_invokevirtualresolved_stub[];
+extern char Thumb2_invokevfinalresolved_stub[];
+
+#define STUBS_SIZE	(Thumb2_stubs_end-Thumb2_stubs)
+#define IDIV_STUB		(Thumb2_idiv_stub-Thumb2_stubs)
+#define IREM_STUB		(Thumb2_irem_stub-Thumb2_stubs)
+#define INVOKEINTERFACE_STUB	(Thumb2_invokeinterface_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_STUB	(Thumb2_invokevirtual_stub-Thumb2_stubs)
+#define INVOKESTATIC_STUB	(Thumb2_invokestatic_stub-Thumb2_stubs)
+#define INVOKESPECIAL_STUB	(Thumb2_invokespecial_stub-Thumb2_stubs)
+#define GETFIELD_WORD_STUB	(Thumb2_getfield_word_stub-Thumb2_stubs)
+#define GETFIELD_SH_STUB	(Thumb2_getfield_sh_stub-Thumb2_stubs)
+#define GETFIELD_H_STUB		(Thumb2_getfield_h_stub-Thumb2_stubs)
+#define GETFIELD_SB_STUB	(Thumb2_getfield_sb_stub-Thumb2_stubs)
+#define GETFIELD_DW_STUB	(Thumb2_getfield_dw_stub-Thumb2_stubs)
+#define PUTFIELD_WORD_STUB	(Thumb2_putfield_word_stub-Thumb2_stubs)
+#define PUTFIELD_H_STUB		(Thumb2_putfield_h_stub-Thumb2_stubs)
+#define PUTFIELD_B_STUB		(Thumb2_putfield_b_stub-Thumb2_stubs)
+#define PUTFIELD_A_STUB		(Thumb2_putfield_a_stub-Thumb2_stubs)
+#define PUTFIELD_DW_STUB	(Thumb2_putfield_dw_stub-Thumb2_stubs)
+#define GETSTATIC_WORD_STUB	(Thumb2_getstatic_word_stub-Thumb2_stubs)
+#define GETSTATIC_SH_STUB	(Thumb2_getstatic_sh_stub-Thumb2_stubs)
+#define GETSTATIC_H_STUB	(Thumb2_getstatic_h_stub-Thumb2_stubs)
+#define GETSTATIC_SB_STUB	(Thumb2_getstatic_sb_stub-Thumb2_stubs)
+#define GETSTATIC_DW_STUB	(Thumb2_getstatic_dw_stub-Thumb2_stubs)
+#define PUTSTATIC_WORD_STUB	(Thumb2_putstatic_word_stub-Thumb2_stubs)
+#define PUTSTATIC_H_STUB	(Thumb2_putstatic_h_stub-Thumb2_stubs)
+#define PUTSTATIC_B_STUB	(Thumb2_putstatic_b_stub-Thumb2_stubs)
+#define PUTSTATIC_A_STUB	(Thumb2_putstatic_a_stub-Thumb2_stubs)
+#define PUTSTATIC_DW_STUB	(Thumb2_putstatic_dw_stub-Thumb2_stubs)
+
+#define INVOKESTATIC_RESOLVED_STUB (Thumb2_invokestaticresolved_stub-Thumb2_stubs)
+#define INVOKESPECIAL_RESOLVED_STUB (Thumb2_invokespecialresolved_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_RESOLVED_STUB (Thumb2_invokevirtualresolved_stub-Thumb2_stubs)
+#define INVOKEVFINAL_RESOLVED_STUB (Thumb2_invokevfinalresolved_stub-Thumb2_stubs)
+
+extern "C" void Thumb2_NullPtr_Handler(void);
+
+
+extern "C" int Thumb2_Check_Null(unsigned *regs, unsigned pc)
+{
+  Thumb2_CodeBuf *cb = thumb2_codebuf;
+  // Ignore if < ARMv6
+  if (!ARCH_GE_V6(CPUInfo)) return 0;
+  if (IS_COMPILED(pc, cb)) {
+    regs[ARM_LR] = pc;
+    regs[ARM_PC] = (unsigned)Thumb2_NullPtr_Handler;
+    regs[ARM_CPSR] &= ~CPSR_THUMB_BIT;
+    return 1;
+  }
+  return 0;
+}
+
+extern "C" void Thumb2_Initialize(void)
+{
+  CodeBuf codebuf;
+  Thumb2_CodeBuf *cb;
+  u32 h_divzero;
+  u32 loc_irem, loc_idiv, loc_ldiv;
+  int rc;
+
+  // Only support arch >= 6 for the moment
+  if (!ARCH_GE_V6(CPUInfo)) {
+    UseCompiler = false;
+    return;
+  }
+
+#ifdef T2_PRINT_COMPILATION
+  PrintCompilation |= getenv("T2_PRINT_COMPILATION") != NULL;
+#endif
+#ifdef T2_PRINT_STATISTICS
+  t2_print_statistics = getenv("T2_PRINT_STATISTICS");
+#endif
+#ifdef T2_PRINT_DISASS
+  PrintAssembly |= getenv("T2_PRINT_DISASS") != NULL;
+#endif
+#ifdef T2_PRINT_REGUSAGE
+  t2_print_regusage = getenv("T2_PRINT_REGUSAGE");
+#endif
+#ifdef COMPILE_COUNT
+    char *tmp = getenv("T2_COMPILE_COUNT");
+    if (tmp) CompileCount = atol(tmp);
+#endif
+#ifdef DISASS_AFTER
+  char *tmp = getenv("T2_DISASS_AFTER");
+  if (tmp) DisassAfter = atol(tmp);
+#endif
+#ifdef COMPILE_ONLY
+  T2CompileOnly = getenv("T2_COMPILE_ONLY");
+#endif
+  DebugSwitch = getenv("T2_DEBUG");
+
+  if (getenv("T2_COMPILE_ARM") != NULL) Thumb2 = 0;
+  if (getenv("T2_COMPILE_THUMB") != NULL) Thumb2 = 1;
+  if (!(CPUInfo & ARCH_THUMB2)) Thumb2 = 0;
+
+  //printf("Compiling for %s\n", Thumb2 ? "Thumb" : "ARM");
+
+  cb = (Thumb2_CodeBuf *)mmap(0, THUMB2_CODEBUF_SIZE, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+  if (cb == MAP_FAILED) {
+    UseCompiler = false;
+    return;
+  }
+
+  cb->size = THUMB2_CODEBUF_SIZE;
+  cb->hp = (char *)cb + sizeof(Thumb2_CodeBuf);
+  cb->sp = (char *)cb + THUMB2_CODEBUF_SIZE;
+
+  codebuf.codebuf = (unsigned short *)cb->hp;
+  codebuf.idx = 0;
+  codebuf.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  if (rc = setjmp(compiler_error_env)) {
+    UseCompiler = false;
+    return;
+  }
+
+#ifdef THUMB2_JVMTI
+  // cache the start of the generated stub region for notification later
+  stub_gen_code_start = cb->hp;
+#endif // THUMB2_JVMTI
+
+  memcpy(cb->hp, Thumb2_stubs, STUBS_SIZE);
+
+  // fprintf(stderr, "Thumb2_stubs offset: 0x%x\n",
+  // 	  (char*)(cb->hp) - (char*)Thumb2_stubs);
+
+  handlers[H_IDIV] = (unsigned)(cb->hp + IDIV_STUB);
+  handlers[H_IREM] = (unsigned)(cb->hp + IREM_STUB);
+handlers[H_INVOKEINTERFACE] = (unsigned)(cb->hp + INVOKEINTERFACE_STUB);
+  handlers[H_INVOKEVIRTUAL] = (unsigned)(cb->hp + INVOKEVIRTUAL_STUB);
+  handlers[H_INVOKESTATIC] = (unsigned)(cb->hp + INVOKESTATIC_STUB);
+  handlers[H_INVOKESPECIAL] = (unsigned)(cb->hp + INVOKESPECIAL_STUB);
+
+  handlers[H_GETFIELD_WORD] = (unsigned)(cb->hp + GETFIELD_WORD_STUB);
+  handlers[H_GETFIELD_SH] = (unsigned)(cb->hp + GETFIELD_SH_STUB);
+  handlers[H_GETFIELD_H] = (unsigned)(cb->hp + GETFIELD_H_STUB);
+  handlers[H_GETFIELD_SB] = (unsigned)(cb->hp + GETFIELD_SB_STUB);
+  handlers[H_GETFIELD_DW] = (unsigned)(cb->hp + GETFIELD_DW_STUB);
+
+  handlers[H_INVOKESTATIC_RESOLVED] = (unsigned)(cb->hp + INVOKESTATIC_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKESPECIAL_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKEVIRTUAL_RESOLVED_STUB);
+  handlers[H_INVOKEVFINAL] = (unsigned)(cb->hp + INVOKEVFINAL_RESOLVED_STUB);
+
+  handlers[H_PUTFIELD_WORD] = (unsigned)(cb->hp + PUTFIELD_WORD_STUB);
+  handlers[H_PUTFIELD_H] = (unsigned)(cb->hp + PUTFIELD_H_STUB);
+  handlers[H_PUTFIELD_B] = (unsigned)(cb->hp + PUTFIELD_B_STUB);
+  handlers[H_PUTFIELD_A] = (unsigned)(cb->hp + PUTFIELD_A_STUB);
+  handlers[H_PUTFIELD_DW] = (unsigned)(cb->hp + PUTFIELD_DW_STUB);
+
+  handlers[H_GETSTATIC_WORD] = (unsigned)(cb->hp + GETSTATIC_WORD_STUB);
+  handlers[H_GETSTATIC_SH] = (unsigned)(cb->hp + GETSTATIC_SH_STUB);
+  handlers[H_GETSTATIC_H] = (unsigned)(cb->hp + GETSTATIC_H_STUB);
+  handlers[H_GETSTATIC_SB] = (unsigned)(cb->hp + GETSTATIC_SB_STUB);
+  handlers[H_GETSTATIC_DW] = (unsigned)(cb->hp + GETSTATIC_DW_STUB);
+
+  handlers[H_PUTSTATIC_WORD] = (unsigned)(cb->hp + PUTSTATIC_WORD_STUB);
+  handlers[H_PUTSTATIC_H] = (unsigned)(cb->hp + PUTSTATIC_H_STUB);
+  handlers[H_PUTSTATIC_B] = (unsigned)(cb->hp + PUTSTATIC_B_STUB);
+  handlers[H_PUTSTATIC_A] = (unsigned)(cb->hp + PUTSTATIC_A_STUB);
+  handlers[H_PUTSTATIC_DW] = (unsigned)(cb->hp + PUTSTATIC_DW_STUB);
+
+  codebuf.idx += (Thumb2_stubs_end-Thumb2_stubs) >> 1;
+
+  // Disassemble the codebuf we just created.  For debugging.  This
+  // first part is all ARM code; the part that we're about to create
+  // is Thumb code.
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+    Hsdis hsdis;
+    hsdis.decode_instructions(cb->hp, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
+			      "");
+    fputc('\n', stderr);
+  }
+#endif
+
+  char *begin_thumb_code = cb->hp + codebuf.idx * 2;
+
+  handlers[H_LDIV] = handlers[H_LREM] = out_pos(&codebuf);
+  dop_reg(&codebuf, DP_ORR, ARM_IP, ARM_R2, ARM_R3, 0, 0);
+  loc_ldiv = forward_short(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_ldivmod);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+  bcc_patch(&codebuf, COND_EQ, loc_ldiv);
+  mov_imm(&codebuf, ARM_IP, (u32)Thumb2_DivZero_Handler);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_ARRAYBOUND] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_ArrayBounds_Handler);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_HANDLE_EXCEPTION] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_HANDLE_EXCEPTION_NO_REGS] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_STACK_OVERFLOW] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Stack_Overflow);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_DREM] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_imm(&codebuf, ARM_IP, (u32)fmod);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_D0, ARM_R0, ARM_R1);
+  vmov_reg_d_toVFP(&codebuf, VFP_D1, ARM_R2, ARM_R3);
+#endif
+  blx_reg(&codebuf, ARM_IP);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toARM(&codebuf, ARM_R0, ARM_R1, VFP_D0);
+#endif
+  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+  handlers[H_FREM] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_imm(&codebuf, ARM_R3, (u32)fmodf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+  vmov_reg_s_toVFP(&codebuf, VFP_S1, ARM_R1);
+#endif
+  blx_reg(&codebuf, ARM_R3);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toARM(&codebuf, ARM_R0, VFP_S0);
+#endif
+  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+  handlers[H_I2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_I2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_L2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_L2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2I] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2iEf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2L] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2lEf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_f2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2I] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2iEd);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2L] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2lEd);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_d2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// NEW Stub
+//   r1 = index
+//   r3 = bci
+//   result -> R0, == 0 => exception
+  handlers[H_NEW] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_METHOD);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_new);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R3);
+sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  str_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_BCP);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// NEWARRAY Stub
+//   r1 = atype
+//   r2 = tos
+//   r3 = bci
+//   result -> thread->vm_result
+  handlers[H_NEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_R0, ARM_R3);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// ANEWARRAY Stub
+//   r0 = bci
+//   r2 = index
+//   r3 = tos
+//   result -> thread->vm_result
+  handlers[H_ANEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  str_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTANTS);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MULTIANEWARRAY Stub
+//   r0 = bci
+//   r1 = dimensions (*4)
+  handlers[H_MULTIANEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  add_reg(&codebuf, Rstack, Rstack, ARM_R1);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC Stub
+//   r0 = bci
+  handlers[H_LDC] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC_W Stub
+//   r0 = bci
+  handlers[H_LDC_W] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 1);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// INSTANCEOF Stub
+//   r1 = index
+//   r2 = tos
+//   r3 = bci
+//   result -> R0, == -1 => exception
+  handlers[H_INSTANCEOF] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_instanceof);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// CHECKCAST Stub
+//   r1 = index
+//   r2 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_CHECKCAST] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_checkcast);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MONITORENTER
+//   r0 = bci
+  handlers[H_MONITORENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_monitorenter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// MONITOREXIT Stub
+//   r1 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_MONITOREXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_monitorexit);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// AASTORE Stub
+//   r0 = bci
+//   r1 = value
+//   r2 = index
+//   r3 = arrayref
+  handlers[H_AASTORE] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_IP, Rstack, 4);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_aastore);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// APUTFIELD Stub
+//   r0 = obj
+  handlers[H_APUTFIELD] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Helper_aputfield);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// SYNCHRONIZED_ENTER Stub
+//   r0 = bci
+//   Rstack = monitor
+  handlers[H_SYNCHRONIZED_ENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R0);
+  str_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_BCP-ISTATE_NEXT_FRAME);
+
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R1, ISTATE_STACK-ISTATE_NEXT_FRAME);
+
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_enter);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_R1, Rstack);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+//
+// SYNCHRONIZED_EXIT Stub
+//   r0 = bci
+//   r1 = monitor
+  handlers[H_SYNCHRONIZED_EXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+
+  ldr_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_exit);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
+
+// DEBUG_METHDENTRY
+  handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R2, ARM_R0);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodEntry);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// DEBUG_METHODEXIT
+  handlers[H_DEBUG_METHODEXIT] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodExit);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// DEBUG_METHODCALL
+  handlers[H_DEBUG_METHODCALL] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R2, ARM_R0);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodCall);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// EXIT_TO_INTERPRETER
+//   r0 = bci
+  handlers[H_EXIT_TO_INTERPRETER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// H_SAFEPOINT
+  handlers[H_SAFEPOINT] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+
+  // The frame walking code used by the garbage collector
+  // (frame::interpreter_frame_tos_address()) assumes that the stack
+  // pointer points one word below the top item on the stack, so we
+  // have to adjust the SP saved in istate accordingly.  If we don't,
+  // the value on TOS won't be seen by the GC and we will crash later.
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK);
+
+  // Set up BytecodeInterpreter->_bcp for the GC
+  // bci+CONSTMETHOD_CODEOFFSET is passed in ARM_R1
+  // istate is passed in ARM_R2
+  ldr_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_BCP);
+
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_SafePoint);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, (1<<ARM_LR), ARM_SP, POP_FD, 1);
+  cmp_imm(&codebuf, ARM_R0, 0);
+
+  // The sequence here is delicate.  We need to seet things up so that
+  // it looks as though Thumb2_Handle_Exception_NoRegs was called
+  // directly from a compiled method.
+  mov_reg(&codebuf, ARM_PC, ARM_LR, COND_EQ);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  // Disassemble the codebuf we just created.  For debugging
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+    Hsdis hsdis;
+    hsdis.decode_instructions(begin_thumb_code, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
+			      Thumb2 ? "force-thumb" : "");
+    fputc('\n', stderr);
+  }
+#endif
+
+  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf.idx * 2);
+  cb->hp += codebuf.idx * 2;
+
+  thumb2_codebuf = cb;
+
+#ifdef THUMB2_JVMTI
+  // cache the end of the generated stub region for notification later
+  stub_gen_code_end = cb->hp;
+#endif // THUMB2_JVMTI
+}
+
+#endif // T2JIT
+
+#endif // __arm__
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/arm_cas.S	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,31 @@
+#ifdef __ARM_ARCH_7A__
+@	jlong
+@	arm_val_compare_and_swap_long(volatile void *ptr,
+@				 jlong oldval,
+@				 jlong newval) {
+	.pushsection .text
+	.global arm_val_compare_and_swap_long
+#ifdef __thumb__
+	.syntax	unified
+	.thumb_func
+#endif
+	.type arm_val_compare_and_swap_long, %function
+arm_val_compare_and_swap_long:
+	stmfd	sp!, {r4, r5, r6, r7}
+	ldrd	r4, [sp, #16]
+	dmb	sy
+0:	ldrexd	r6, [r0]
+	cmp	r6, r2
+	it	eq
+	cmpeq	r7, r3
+	bne	1f
+	strexd	r1, r4, [r0]
+	cmp	r1, #0
+	bne	0b
+	dmb	sy
+1:	mov	r0, r6
+	mov	r1, r7
+	ldmfd	sp!, {r4, r5, r6, r7}
+	bx	lr
+	.popsection
+#endif // __ARM_ARCH_7A__
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/asm_helper.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2009, 2010 Edward Nevill
+ * Copyright 2011, Red Hat
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef __arm__
+
+#define	ARCH_THUMB2	(1<<16)
+#define ARCH_VFP	(1<<17)
+#define ARCH_CLZ	(1<<18)
+
+/* A workaround for private and protected fields */
+#define private   public
+#define protected public
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interp_masm_zero.hpp"
+#include "interpreter/bytecodeInterpreter.hpp"
+#include "interpreter/bytecodeInterpreter.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/methodDataOop.hpp"
+#include "oops/methodOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/klassOop.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+
+#ifndef STATIC_OFFSETS
+
+#include <linux/auxvec.h>
+#include <asm/hwcap.h>
+
+#define VECBUFF_SIZE 64
+
+static char valuebuf[128];
+
+// Return the name of the current method.  Not multi-thread safe.
+extern "C" char*
+meth(interpreterState istate) {
+  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
+  char *p = valuebuf + strlen(valuebuf);
+  sprintf(p, ": " PTR_FORMAT " (bci %d)",
+	  (intptr_t) istate->bcp(),
+	  istate->method()->bci_from(istate->bcp()));
+  return valuebuf;
+}
+
+// Used for debugging the interpreter.  The macro TRACE in
+// cppInterpreter_arm.S calls this routine, and you can trap on a
+// particular method.
+#define NAME1 "sun.nio.ch.FileChannelImpl$Unmapper.run()V"
+#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
+extern "C" void my_trace(void *jpc, interpreterState istate)
+{
+  JavaThread *jt = istate->thread();
+  if (jt->zero_stack()->sp() && jt->top_zero_frame()) {
+    bool has_last_Java_frame = jt->has_last_Java_frame();
+    if (!has_last_Java_frame)
+      jt->set_last_Java_frame();
+
+    StackFrameStream sfs(jt);
+    for(int i = 0; !sfs.is_done(); sfs.next(), i++) {
+    }
+
+    // Reset the frame anchor if necessary
+    if (!has_last_Java_frame)
+      jt->reset_last_Java_frame();
+  }
+}
+
+extern "C" unsigned hwcap(void)
+{
+  int fd;
+  unsigned vecs[VECBUFF_SIZE];
+  unsigned *p;
+  int i, n;
+  unsigned rc = 0;
+  unsigned arch = 4;
+ 
+  fd = open("/proc/self/auxv", O_RDONLY);
+  if (fd < 0) return 0;
+  do {
+    n = read(fd, vecs, VECBUFF_SIZE * sizeof(unsigned));
+    p = vecs;
+    i = n/8;
+    while (--i >= 0) {
+      unsigned tag = *p++;
+      unsigned value = *p++;
+      if (tag == 0) goto fini;
+      if (tag == AT_HWCAP) {
+	if (value & HWCAP_THUMBEE) rc |= ARCH_THUMB2;
+	if (value & HWCAP_VFP) rc |= ARCH_VFP;
+      } else if (tag == AT_PLATFORM) {
+	const char *s = (const char *)value;
+	int c;
+
+	if (*s++ == 'v') {
+	  arch = 0;
+	  while ((isdigit)(c = *s++)) arch = arch * 10 + c - '0';
+	}
+      }
+    }
+  } while (n == VECBUFF_SIZE * sizeof(unsigned));
+fini:
+  close(fd);
+//  printf("arch = %d, rc = 0x%08x\n", arch, rc);
+  if (arch >= 5) rc |= ARCH_CLZ;
+  if (arch >= 7) rc |= ARCH_THUMB2;
+  return rc | (1<<arch);
+}
+
+/* Thease functions allow the ASM interpreter to call CPP virtual functions.
+ * Otherwise the ASM interpreter has to grup around in the VTABLE which is
+ * not very portable.
+ */
+extern "C" bool JavaThread_is_lock_owned(JavaThread *r0, address r1)
+{
+	return r0->is_lock_owned(r1);
+}
+
+extern "C" HeapWord **CollectedHeap_top_addr(CollectedHeap *r0)
+{
+	return r0->top_addr();
+}
+
+extern "C" HeapWord **CollectedHeap_end_addr(CollectedHeap *r0)
+{
+	return r0->end_addr();
+}
+
+extern "C" char *SharedRuntime_generate_class_cast_message(const char *name, const char *klass)
+{
+	return SharedRuntime::generate_class_cast_message(name, klass);
+}
+
+#define HELPER_THROW(thread, name, msg) Exceptions::_throw_msg(thread, __FILE__, __LINE__, name, msg)
+
+class VMStructs {
+public:
+  static inline klassOop klass_at_addr(constantPoolOop constants, u2 index) {
+    return (klassOop) *constants->obj_at_addr_raw(index);
+  }
+};
+
+extern "C" oop Helper_new(interpreterState istate, unsigned index)
+{
+    JavaThread *thread = istate->thread();
+
+    constantPoolOop constants = istate->method()->constants();
+    oop result = NULL;
+    if (!constants->tag_at(index).is_unresolved_klass()) {
+      // Make sure klass is initialized and doesn't have a finalizer
+      oop entry = VMStructs::klass_at_addr(constants, index);
+      klassOop k_entry = (klassOop) entry;
+      instanceKlass* ik = (instanceKlass*) k_entry->klass_part();
+      if ( ik->is_initialized() && ik->can_be_fastpath_allocated() ) {
+	size_t obj_size = ik->size_helper();
+	// If the TLAB isn't pre-zeroed then we'll have to do it
+	bool need_zero = !ZeroTLAB;
+	if (UseTLAB) {
+	  result = (oop) thread->tlab().allocate(obj_size);
+	}
+	if (result == NULL && !CMSIncrementalMode) {
+	  need_zero = true;
+	  // Try allocate in shared eden
+    retry:
+	  HeapWord* compare_to = *Universe::heap()->top_addr();
+	  HeapWord* new_top = compare_to + obj_size;
+	  if (new_top <= *Universe::heap()->end_addr()) {
+	    if (Atomic::cmpxchg_ptr(new_top, Universe::heap()->top_addr(), compare_to) != compare_to) {
+	      goto retry;
+	    }
+	    result = (oop) compare_to;
+	  }
+	}
+	if (result != NULL) {
+	  // Initialize object (if nonzero size and need) and then the header
+	  if (need_zero ) {
+	    HeapWord* to_zero = (HeapWord*) result + sizeof(oopDesc) / oopSize;
+	    obj_size -= sizeof(oopDesc) / oopSize;
+	    if (obj_size > 0 ) {
+	      memset(to_zero, 0, obj_size * HeapWordSize);
+	    }
+	  }
+	  if (UseBiasedLocking) {
+	    result->set_mark(ik->prototype_header());
+	  } else {
+	    result->set_mark(markOopDesc::prototype());
+	  }
+	  result->set_klass_gap(0);
+	  result->set_klass(k_entry);
+	  return result;
+	}
+      }
+    }
+    // Slow case allocation
+    InterpreterRuntime::_new(thread, istate->method()->constants(), index);
+    result = thread->vm_result();
+    thread->set_vm_result(NULL);
+    return result;
+}
+
+extern "C" int Helper_instanceof(interpreterState istate, unsigned index, oop tos)
+{
+    if (tos == NULL) return 0;
+
+    // Constant pool may have actual klass or unresolved klass. If it is
+    // unresolved we must resolve it
+    if (istate->method()->constants()->tag_at(index).is_unresolved_klass()) {
+      InterpreterRuntime::quicken_io_cc(istate->thread());
+      if (istate->thread()->has_pending_exception()) return 0;
+    }
+    klassOop klassOf = VMStructs::klass_at_addr(istate->method()->constants(), index);
+    klassOop objKlassOop = tos->klass();
+    //
+    // Check for compatibilty. This check must not GC!!
+    // Seems way more expensive now that we must dispatch
+    //
+    return objKlassOop == klassOf || objKlassOop->klass_part()->is_subtype_of(klassOf);
+}
+
+extern "C" oop Helper_checkcast(interpreterState istate, unsigned index, oop tos)
+{
+    if (tos == NULL) return NULL;
+
+    // Constant pool may have actual klass or unresolved klass. If it is
+    // unresolved we must resolve it
+    if (istate->method()->constants()->tag_at(index).is_unresolved_klass()) {
+      oop except_oop;
+      InterpreterRuntime::quicken_io_cc(istate->thread());
+      if (except_oop = istate->thread()->pending_exception()) return except_oop;
+    }
+    klassOop klassOf = VMStructs::klass_at_addr(istate->method()->constants(), index);
+    klassOop objKlassOop = tos->klass(); //ebx
+    //
+    // Check for compatibilty. This check must not GC!!
+    // Seems way more expensive now that we must dispatch
+    //
+    if (objKlassOop != klassOf && !objKlassOop->klass_part()->is_subtype_of(klassOf)) {
+      ResourceMark rm(istate->thread());
+      const char* objName = Klass::cast(objKlassOop)->external_name();
+      const char* klassName = Klass::cast(klassOf)->external_name();
+      char* message = SharedRuntime::generate_class_cast_message(objName, klassName);
+      ThreadInVMfromJava trans(istate->thread());
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_ClassCastException(), message);
+    }
+    return istate->thread()->pending_exception();
+}
+
+extern "C" oop Helper_monitorenter(interpreterState istate, oop lockee)
+{
+    BasicObjectLock* limit = istate->monitor_base();
+    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+    BasicObjectLock* entry = NULL;
+    markOop displaced;
+    JavaThread *thread = istate->thread();
+
+    if (lockee == NULL) {
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_NullPointerException(), "");
+      goto handle_exception;
+    }
+    while (most_recent != limit ) {
+      if (most_recent->obj() == NULL) entry = most_recent;
+      else if (most_recent->obj() == lockee) break;
+      most_recent++;
+    }
+    if (entry == NULL) {
+      int monitor_words = frame::interpreter_frame_monitor_size();
+      ZeroStack *stack = thread->zero_stack();
+
+      if (monitor_words > stack->available_words()) {
+        InterpreterRuntime::throw_StackOverflowError(thread);
+	goto handle_exception;
+      } else {
+	stack->alloc(monitor_words * wordSize);
+
+	for (intptr_t *p = istate->stack() + 1; p < istate->stack_base(); p++)
+	  *(p - monitor_words) = *p;
+
+	istate->set_stack_limit(istate->stack_limit() - monitor_words);
+	istate->set_stack(istate->stack() - monitor_words);
+	istate->set_stack_base(istate->stack_base() - monitor_words);
+
+	entry = (BasicObjectLock *) istate->stack_base();
+      }
+    }
+    entry->set_obj(lockee);
+    displaced = lockee->mark()->set_unlocked();
+    entry->lock()->set_displaced_header(displaced);
+    if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+      // Is it simple recursive case?
+      if (thread->is_lock_owned((address) displaced->clear_lock_bits())) {
+	entry->lock()->set_displaced_header(NULL);
+      } else {
+	InterpreterRuntime::monitorenter(thread, entry);
+      }
+    }
+handle_exception:
+    return thread->pending_exception();
+}
+
+extern "C" oop Helper_monitorexit(interpreterState istate, oop lockee)
+{
+    BasicObjectLock* limit = istate->monitor_base();
+    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+    JavaThread *thread = istate->thread();
+
+    if (lockee == NULL) {
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_NullPointerException(), "");
+      goto handle_exception;
+    }
+    while (most_recent != limit ) {
+      if ((most_recent)->obj() == lockee) {
+	BasicLock* lock = most_recent->lock();
+	markOop header = lock->displaced_header();
+	most_recent->set_obj(NULL);
+	if (header != NULL) {
+	  if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+	    // restore object for the slow case
+	    most_recent->set_obj(lockee);
+	    InterpreterRuntime::monitorexit(thread, most_recent);
+	  }
+	}
+	return thread->pending_exception();
+      }
+      most_recent++;
+    }
+    InterpreterRuntime::throw_illegal_monitor_state_exception(thread);
+handle_exception:
+    return thread->pending_exception();
+}
+
+extern "C" oop Helper_aastore(interpreterState istate, oop value, int index, arrayOop arrayref)
+{
+    if (arrayref == NULL) {
+      ThreadInVMfromJava trans(istate->thread());
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_NullPointerException(), "");
+    } else if ((uint32_t)index >= (uint32_t)arrayref->length()) {
+      char message[jintAsStringSize];
+      sprintf(message, "%d", index);
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), message);
+    } else {
+      if (value != NULL) {
+	/* Check assignability of value into arrayref */
+	klassOop rhsKlassOop = value->klass(); // EBX (subclass)
+	klassOop elemKlassOop = ((objArrayKlass*) arrayref->klass()->klass_part())->element_klass();
+	//
+	// Check for compatibilty. This check must not GC!!
+	// Seems way more expensive now that we must dispatch
+	//
+	if (rhsKlassOop != elemKlassOop && !rhsKlassOop->klass_part()->is_subtype_of(elemKlassOop)) {
+	  HELPER_THROW(istate->thread(), vmSymbols::java_lang_ArrayStoreException(), "");
+	  goto handle_exception;
+	}
+      }
+      ((objArrayOopDesc *) arrayref)->obj_at_put(index, value);
+    }
+handle_exception:
+    return istate->thread()->pending_exception();
+}
+
+extern "C" void Helper_aputfield(oop obj, oop val, int offset)
+{
+      BarrierSet* bs = Universe::heap()->barrier_set();
+      jbyte* _byte_map_base = (((CardTableModRefBS*)bs)->byte_map_base);
+      OrderAccess::release_store(&_byte_map_base[(uintptr_t)obj >> CardTableModRefBS::card_shift], 0);
+}
+
+extern "C" oop Helper_synchronized_enter(JavaThread *thread, BasicObjectLock *mon)
+{
+    BasicLock *lock = mon->lock();
+    markOop displaced = lock->displaced_header();
+
+    if (thread->is_lock_owned((address)displaced->clear_lock_bits()))
+      lock->set_displaced_header(NULL);
+    else
+      InterpreterRuntime::monitorenter(thread, mon);
+    return thread->pending_exception();
+}
+
+extern "C" oop Helper_synchronized_exit(JavaThread *thread, BasicObjectLock *mon)
+{
+    {
+      HandleMark __hm(thread);
+      if (mon->obj() == NULL)
+	InterpreterRuntime::throw_illegal_monitor_state_exception(thread);
+      else
+        InterpreterRuntime::monitorexit(thread, mon);
+    }
+    return thread->pending_exception();
+}
+
+extern "C" oop Helper_SafePoint(JavaThread *thread)
+{
+    {
+      HandleMarkCleaner __hmc(thread);
+    }
+    SafepointSynchronize::block(thread);
+    return thread->pending_exception();
+}
+
+extern "C" void Helper_RaiseArrayBoundException(JavaThread *thread, int index)
+{
+  char message[jintAsStringSize];
+  sprintf(message, "%d", index);
+  {
+       ThreadInVMfromJava trans(thread);
+       Exceptions::_throw_msg(thread, "[Bytecoce Interpreter]", 99,
+			vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), message);
+  }
+}
+
+extern "C" void Helper_Raise(JavaThread *thread, Symbol *name, char const *msg)
+{
+   ThreadInVMfromJava trans(thread);
+   Exceptions::_throw_msg(thread, "[Bytecoce Interpreter]", 99, name, msg);
+}
+
+extern "C" void Helper_RaiseIllegalMonitorException(JavaThread *thread)
+{
+    HandleMark __hm(thread);
+    thread->clear_pending_exception();
+    InterpreterRuntime::throw_illegal_monitor_state_exception(thread);
+}
+
+extern "C" address Helper_HandleException(interpreterState istate, JavaThread *thread)
+{
+    HandleMarkCleaner __hmc(thread);
+    Handle except_oop(thread, thread->pending_exception());
+    HandleMark __hm(thread);
+    intptr_t continuation_bci;
+    intptr_t *topOfStack;
+    address pc;
+
+    thread->clear_pending_exception();
+    continuation_bci = (intptr_t)InterpreterRuntime::exception_handler_for_exception(thread, except_oop());
+    except_oop = (oop) thread->vm_result();
+    thread->set_vm_result(NULL);
+    if (continuation_bci >= 0) {
+      topOfStack = (intptr_t *)istate->stack();
+      *topOfStack-- = (intptr_t)except_oop();
+      istate->set_stack(topOfStack);
+      pc = istate->method()->code_base() + continuation_bci;
+#if 0
+        tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", Klass::cast(except_oop->klass())->external_name(), except_oop());
+        tty->print_cr(" thrown in interpreter method <%s>", istate->method()->name_and_sig_as_C_string());
+        tty->print_cr(" at bci %d, continuing at %d for thread " INTPTR_FORMAT,
+                      pc - (intptr_t)istate->method()->code_base(),
+                      continuation_bci, thread);
+#endif
+      return pc;
+    }
+#if 0
+      tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", Klass::cast(except_oop->klass())->external_name(), except_oop());
+      tty->print_cr(" thrown in interpreter method <%s>", istate->method()->name_and_sig_as_C_string());
+      tty->print_cr(" at bci %d, unwinding for thread " INTPTR_FORMAT,
+                    pc  - (intptr_t) istate->method()->code_base(),
+                    thread);
+#endif
+    thread->set_pending_exception(except_oop(), NULL, 0);
+    return 0;
+}
+
+extern "C" void Helper_report_fatal(char *filename, int line,
+				    char *msg, int opcode, char *name)
+{
+  report_fatal(filename, line,
+	       err_msg(msg, opcode, name));
+}
+
+extern "C" int Helper_target_offset_in_bytes()
+{
+  return java_lang_invoke_CallSite::target_offset_in_bytes();
+}
+
+#endif // STATIC_OFFSETS
+
+#ifdef STATIC_OFFSETS
+
+class VMStructs {
+public:
+	static void print_vm_offsets(void);
+};
+
+#define outfile	stdout
+
+void print_def(const char *s, int v)
+{
+	fprintf(outfile, "#undef %-40s\n", s);
+	fprintf(outfile, "#define %-40s 0x%02x\n", s, v);
+}
+
+void nl(void)
+{
+	fputc('\n', outfile);
+}
+
+// ZeroFrame is not friends with VMStructs, but it is with ZeroStackPrinter
+class ZeroStackPrinter {
+public:
+  static void print_vm_offsets(void);
+};
+
+void ZeroStackPrinter::print_vm_offsets(void)
+{
+    print_def("INTERPRETER_FRAME", ZeroFrame::INTERPRETER_FRAME);
+}
+
+void VMStructs::print_vm_offsets(void)
+{
+  print_def("ISTATE_THREAD",    offset_of(BytecodeInterpreter, _thread));
+  print_def("ISTATE_BCP",       offset_of(BytecodeInterpreter, _bcp));
+  print_def("ISTATE_LOCALS",    offset_of(BytecodeInterpreter, _locals));
+  print_def("ISTATE_CONSTANTS", offset_of(BytecodeInterpreter, _constants));
+  print_def("ISTATE_METHOD",    offset_of(BytecodeInterpreter, _method));
+  print_def("ISTATE_STACK",     offset_of(BytecodeInterpreter, _stack));
+  print_def("ISTATE_MSG",       offset_of(BytecodeInterpreter, _msg));
+  print_def("ISTATE_OOP_TEMP",	offset_of(BytecodeInterpreter, _oop_temp));
+  print_def("ISTATE_STACK_BASE",offset_of(BytecodeInterpreter, _stack_base));
+  print_def("ISTATE_STACK_LIMIT",offset_of(BytecodeInterpreter, _stack_limit));
+  print_def("ISTATE_MONITOR_BASE",offset_of(BytecodeInterpreter, _monitor_base));
+  print_def("ISTATE_SELF_LINK",	offset_of(BytecodeInterpreter, _self_link));
+  print_def("ISTATE_FRAME_TYPE", sizeof(BytecodeInterpreter) + 0);
+  print_def("ISTATE_NEXT_FRAME", sizeof(BytecodeInterpreter) + 4);
+  print_def("FRAME_SIZE", sizeof(BytecodeInterpreter) + 8);
+  nl();
+  ZeroStackPrinter::print_vm_offsets();
+  nl();
+  print_def("THREAD_PENDING_EXC", offset_of(JavaThread, _pending_exception));
+  print_def("THREAD_SUSPEND_FLAGS", offset_of(JavaThread, _suspend_flags));
+  print_def("THREAD_ACTIVE_HANDLES", offset_of(JavaThread, _active_handles));
+  print_def("THREAD_LAST_HANDLE_MARK", offset_of(JavaThread, _last_handle_mark));
+  print_def("THREAD_TLAB_TOP", offset_of(JavaThread, _tlab) + offset_of(ThreadLocalAllocBuffer, _top));
+  print_def("THREAD_TLAB_END", offset_of(JavaThread, _tlab) + offset_of(ThreadLocalAllocBuffer, _end));
+  print_def("THREAD_RESOURCEAREA", offset_of(JavaThread, _resource_area));
+  print_def("THREAD_HANDLE_AREA", offset_of(JavaThread, _handle_area));
+  print_def("THREAD_STACK_BASE", offset_of(JavaThread, _stack_base));
+  print_def("THREAD_STACK_SIZE", offset_of(JavaThread, _stack_size));
+  print_def("THREAD_LAST_JAVA_SP", offset_of(JavaThread, _anchor)
+	    + offset_of(JavaFrameAnchor, _last_Java_sp));
+  print_def("THREAD_LAST_JAVA_FP", offset_of(JavaThread, _anchor)
+	    + offset_of(JavaFrameAnchor, _last_Java_fp));
+  print_def("THREAD_JNI_ENVIRONMENT", offset_of(JavaThread, _jni_environment));
+  print_def("THREAD_VM_RESULT", offset_of(JavaThread, _vm_result));
+  print_def("THREAD_STATE", offset_of(JavaThread, _thread_state));
+  print_def("THREAD_DO_NOT_UNLOCK", offset_of(JavaThread, _do_not_unlock_if_synchronized));
+
+  print_def("THREAD_JAVA_STACK_BASE", offset_of(JavaThread, _zero_stack) + in_bytes(ZeroStack::base_offset()));
+  print_def("THREAD_JAVA_SP", offset_of(JavaThread, _zero_stack) + in_bytes(ZeroStack::sp_offset()));
+  print_def("THREAD_TOP_ZERO_FRAME", offset_of(JavaThread, _top_zero_frame));
+  print_def("THREAD_SPECIALRUNTIMEEXITCONDITION", offset_of(JavaThread, _special_runtime_exit_condition));
+  nl();
+  print_def("_thread_external_suspend",	Thread::_external_suspend);
+  print_def("_thread_ext_suspended",	Thread::_ext_suspended);
+  print_def("_thread_deopt_suspend",	Thread::_deopt_suspend);
+  nl();
+  print_def("METHOD_CONSTMETHOD", offset_of(methodOopDesc, _constMethod));
+  print_def("METHOD_CONSTANTS", offset_of(constMethodOopDesc, _constants));
+  print_def("METHOD_METHODDATA", offset_of(methodOopDesc, _method_data));
+  print_def("METHOD_INVOKECOUNT", offset_of(methodOopDesc, _interpreter_invocation_count));
+  print_def("METHOD_ACCESSFLAGS", offset_of(methodOopDesc, _access_flags));
+  print_def("METHOD_VTABLEINDEX", offset_of(methodOopDesc, _vtable_index));
+  print_def("METHOD_RESULTINDEX", offset_of(methodOopDesc, _result_index));
+  print_def("METHOD_METHODSIZE", offset_of(methodOopDesc, _method_size));
+  print_def("METHOD_MAXSTACK", offset_of(methodOopDesc, _max_stack));
+  print_def("METHOD_MAXLOCALS", offset_of(methodOopDesc, _max_locals));
+  print_def("METHOD_SIZEOFPARAMETERS", offset_of(methodOopDesc, _size_of_parameters));
+  print_def("METHOD_INVOCATIONCOUNTER", offset_of(methodOopDesc, _invocation_counter));
+  print_def("METHOD_BACKEDGECOUNTER", offset_of(methodOopDesc, _backedge_counter));
+  print_def("METHOD_FROM_INTERPRETED", offset_of(methodOopDesc, _from_interpreted_entry));
+  // ECN: These two appear to be just tagged onto the end of the class
+  print_def("METHOD_NATIVEHANDLER", sizeof(methodOopDesc));
+  print_def("METHOD_SIGNATUREHANDLER", sizeof(methodOopDesc)+4);
+  nl();
+  print_def("CONSTMETHOD_CODESIZE", offset_of(constMethodOopDesc, _code_size));
+  print_def("CONSTMETHOD_CODEOFFSET", sizeof(constMethodOopDesc));
+  nl();
+  print_def("JNIHANDLEBLOCK_TOP", offset_of(JNIHandleBlock, _top));
+  nl();
+  print_def("KLASS_PART", sizeof(klassOopDesc));
+  print_def("KLASS_ACCESSFLAGS", offset_of(Klass, _access_flags));
+  print_def("KLASS_JAVA_MIRROR", offset_of(Klass, _java_mirror));
+  print_def("INSTANCEKLASS_INITSTATE", offset_of(instanceKlass, _init_state));
+  print_def("INSTANCEKLASS_VTABLE_LEN", offset_of(instanceKlass, _vtable_len));
+  print_def("INSTANCEKLASS_ITABLE_LEN", offset_of(instanceKlass, _itable_len));
+  print_def("INSTANCEKLASS_VTABLE_OFFSET", instanceKlass::vtable_start_offset() * sizeof(int *));
+  print_def("OBJARRAYKLASS_ELEMENTKLASS", offset_of(objArrayKlass, _element_klass));
+  nl();
+  print_def("CONSTANTPOOL_TAGS", offset_of(constantPoolOopDesc, _tags));
+  print_def("CONSTANTPOOL_CACHE", offset_of(constantPoolOopDesc, _cache));
+  print_def("CONSTANTPOOL_POOL_HOLDER", offset_of(constantPoolOopDesc, _pool_holder));
+  print_def("CONSTANTPOOL_BASE", sizeof(constantPoolOopDesc));
+  print_def("CP_CACHE_VOLATILE_FIELD_FLAG_BIT", ConstantPoolCacheEntry::is_volatile_shift);
+  print_def("CP_CACHE_FLAGS", offset_of(ConstantPoolCacheEntry, _flags));
+  nl();
+  print_def("CP_OFFSET", in_bytes(constantPoolCacheOopDesc::base_offset()));
+  nl();
+  print_def("BASE_OFFSET_BYTE", arrayOopDesc::base_offset_in_bytes(T_BYTE));
+  print_def("BASE_OFFSET_SHORT", arrayOopDesc::base_offset_in_bytes(T_SHORT));
+  print_def("BASE_OFFSET_WORD", arrayOopDesc::base_offset_in_bytes(T_INT));
+  print_def("BASE_OFFSET_LONG", arrayOopDesc::base_offset_in_bytes(T_LONG));
+  nl();
+  print_def("SIZEOF_HANDLEMARK", sizeof(HandleMark));
+  print_def("SIZEOF_FFI_CIF", sizeof(ffi_cif));
+}
+
+int main(void)
+{
+	print_def("ARCH_VFP",			ARCH_VFP);
+	print_def("ARCH_THUMB2",		ARCH_THUMB2);
+	print_def("ARCH_CLZ",			ARCH_CLZ);
+	nl();
+	print_def("JVM_CONSTANT_Utf8",		JVM_CONSTANT_Utf8);
+	print_def("JVM_CONSTANT_Unicode",	JVM_CONSTANT_Unicode);
+	print_def("JVM_CONSTANT_Integer",	JVM_CONSTANT_Integer);
+	print_def("JVM_CONSTANT_Float",		JVM_CONSTANT_Float);
+	print_def("JVM_CONSTANT_Long",		JVM_CONSTANT_Long);
+	print_def("JVM_CONSTANT_Double",	JVM_CONSTANT_Double);
+	print_def("JVM_CONSTANT_Class",		JVM_CONSTANT_Class);
+	print_def("JVM_CONSTANT_String",	JVM_CONSTANT_String);
+	print_def("JVM_CONSTANT_Fieldref",	JVM_CONSTANT_Fieldref);
+	print_def("JVM_CONSTANT_Methodref",	JVM_CONSTANT_Methodref);
+	print_def("JVM_CONSTANT_InterfaceMethodref", JVM_CONSTANT_InterfaceMethodref);
+	print_def("JVM_CONSTANT_NameAndType",	JVM_CONSTANT_NameAndType);
+	nl();
+	print_def("JVM_CONSTANT_UnresolvedClass",	JVM_CONSTANT_UnresolvedClass);
+	print_def("JVM_CONSTANT_ClassIndex",		JVM_CONSTANT_ClassIndex);
+	print_def("JVM_CONSTANT_UnresolvedString",	JVM_CONSTANT_UnresolvedString);
+	print_def("JVM_CONSTANT_StringIndex",		JVM_CONSTANT_StringIndex);
+	print_def("JVM_CONSTANT_UnresolvedClassInError",JVM_CONSTANT_UnresolvedClassInError);
+	nl();
+	print_def("JVM_ACC_PUBLIC",	JVM_ACC_PUBLIC);
+	print_def("JVM_ACC_PRIVATE",	JVM_ACC_PRIVATE);
+	print_def("JVM_ACC_PROTECTED",	JVM_ACC_PROTECTED);
+	print_def("JVM_ACC_STATIC",	JVM_ACC_STATIC);
+	print_def("JVM_ACC_FINAL",	JVM_ACC_FINAL);
+	print_def("JVM_ACC_SYNCHRONIZED",	JVM_ACC_SYNCHRONIZED);
+	print_def("JVM_ACC_SUPER",	JVM_ACC_SUPER);
+	print_def("JVM_ACC_VOLATILE",	JVM_ACC_VOLATILE);
+	print_def("JVM_ACC_BRIDGE",	JVM_ACC_BRIDGE);
+	print_def("JVM_ACC_TRANSIENT",	JVM_ACC_TRANSIENT);
+	print_def("JVM_ACC_VARARGS",	JVM_ACC_VARARGS);
+	print_def("JVM_ACC_NATIVE",	JVM_ACC_NATIVE);
+	print_def("JVM_ACC_INTERFACE",	JVM_ACC_INTERFACE);
+	print_def("JVM_ACC_ABSTRACT",	JVM_ACC_ABSTRACT);
+	print_def("JVM_ACC_STRICT",	JVM_ACC_STRICT);
+	print_def("JVM_ACC_SYNTHETIC",	JVM_ACC_SYNTHETIC);
+	print_def("JVM_ACC_ANNOTATION",	JVM_ACC_ANNOTATION);
+	print_def("JVM_ACC_ENUM",	JVM_ACC_ENUM);
+	print_def("JVM_ACC_HAS_FINALIZER", JVM_ACC_HAS_FINALIZER);
+	nl();
+	print_def("T_BOOLEAN",	T_BOOLEAN);
+	print_def("T_CHAR",	T_CHAR);
+	print_def("T_FLOAT",	T_FLOAT);
+	print_def("T_DOUBLE",	T_DOUBLE);
+	print_def("T_BYTE",	T_BYTE);
+	print_def("T_SHORT",	T_SHORT);
+	print_def("T_INT",	T_INT);
+	print_def("T_LONG",	T_LONG);
+	print_def("T_OBJECT",	T_OBJECT);
+	print_def("T_ARRAY",	T_ARRAY);
+	print_def("T_VOID",	T_VOID);
+	nl();
+	print_def("tos_btos",	btos);
+	print_def("tos_ctos",	ctos);
+	print_def("tos_stos",	stos);
+	print_def("tos_itos",	itos);
+	print_def("tos_ltos",	ltos);
+	print_def("tos_ftos",	ftos);
+	print_def("tos_dtos",	dtos);
+	print_def("tos_atos",	atos);
+	nl();
+	print_def("_thread_uninitialized",	_thread_uninitialized);
+	print_def("_thread_new",		_thread_new);
+	print_def("_thread_new_trans",		_thread_new_trans);
+	print_def("_thread_in_native",		_thread_in_native);
+	print_def("_thread_in_native_trans",	_thread_in_native_trans);
+	print_def("_thread_in_vm",		_thread_in_vm);
+	print_def("_thread_in_vm_trans",	_thread_in_vm_trans);
+	print_def("_thread_in_Java",		_thread_in_Java);
+	print_def("_thread_in_Java_trans",	_thread_in_Java_trans);
+	print_def("_thread_blocked",		_thread_blocked);
+	print_def("_thread_blocked_trans",	_thread_blocked_trans);
+	print_def("_thread_max_state",		_thread_max_state);
+	nl();
+	print_def("class_unparsable_by_gc",	instanceKlass::unparsable_by_gc);
+	print_def("class_allocated",		instanceKlass::allocated);
+	print_def("class_loaded",		instanceKlass::loaded);
+	print_def("class_linked",		instanceKlass::linked);
+	print_def("class_being_initialized",	instanceKlass::being_initialized);
+	print_def("class_fully_initialized",	instanceKlass::fully_initialized);
+	print_def("class_init_error",		instanceKlass::initialization_error);
+	nl();
+	print_def("flag_is_forced_virtual",	1 << ConstantPoolCacheEntry::is_forced_virtual_shift);
+	print_def("flag_methodInterface",	1 << ConstantPoolCacheEntry::has_method_type_shift);
+	print_def("flag_volatileField",		1 << ConstantPoolCacheEntry::is_volatile_shift);
+	print_def("flag_vfinalMethod",		1 << ConstantPoolCacheEntry::is_vfinal_shift);
+	print_def("flag_finalField",		1 << ConstantPoolCacheEntry::is_final_shift);
+	nl();
+	print_def("INVOCATIONCOUNTER_COUNTINCREMENT", InvocationCounter::count_increment);
+	nl();
+	VMStructs::print_vm_offsets();
+	nl();
+	print_def("VMSYMBOLS_ArithmeticException", vmSymbols::java_lang_ArithmeticException_enum);
+	print_def("VMSYMBOLS_ArrayIndexOutOfBounds", vmSymbols::java_lang_ArrayIndexOutOfBoundsException_enum);
+	print_def("VMSYMBOLS_ArrayStoreException", vmSymbols::java_lang_ArrayStoreException_enum);
+	print_def("VMSYMBOLS_ClassCastException", vmSymbols::java_lang_ClassCastException_enum);
+	print_def("VMSYMBOLS_NullPointerException", vmSymbols::java_lang_NullPointerException_enum);
+	print_def("VMSYMBOLS_AbstractMethodError", vmSymbols::java_lang_AbstractMethodError_enum);
+	print_def("VMSYMBOLS_IncompatibleClassChangeError", vmSymbols::java_lang_IncompatibleClassChangeError_enum);
+	print_def("VMSYMBOLS_InternalError", vmSymbols::java_lang_InternalError_enum);
+
+	print_def("AbstractInterpreter_java_lang_math_sin", AbstractInterpreter::java_lang_math_sin);
+	print_def("AbstractInterpreter_number_of_method_entries", AbstractInterpreter::number_of_method_entries);
+
+	return 0;
+}
+
+#endif // STATIC_OFFSETS
+
+#endif // __arm__
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/bytecodes_arm.def	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,7850 @@
+@ Copyright 2009, 2010 Edward Nevill
+@ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+@
+@ This code is free software; you can redistribute it and/or modify it
+@ under the terms of the GNU General Public License version 2 only, as
+@ published by the Free Software Foundation.
+@
+@ This code is distributed in the hope that it will be useful, but WITHOUT
+@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+@ version 2 for more details (a copy is included in the LICENSE file that
+@ accompanied this code).
+@
+@ You should have received a copy of the GNU General Public License version
+@ 2 along with this work; if not, write to the Free Software Foundation,
+@ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+#ifdef SHARK
+#define DISABLE_NOTICE_SAFEPOINTS
+#endif
+
+#ifndef DISABLE_HW_FP
+#define HW_FP
+#endif
+#ifndef DISABLE_NOTICE_SAFEPOINTS
+#define NOTICE_SAFEPOINTS
+#endif
+#ifndef DISABLE_FAST_BYTECODES
+#define FAST_BYTECODES
+#endif
+
+/*  WARNING: If you change any of these bytecodes, you must also
+    change the safe_dispatch_table in cppInterpreter_arm.S to make it
+    match.  */
+
+nop                  = 0x00, 1
+aconst_null          = 0x01, 1
+iconst_m1            = 0x02, 1
+iconst_0             = 0x03, 1
+iconst_1             = 0x04, 1
+iconst_2             = 0x05, 1
+iconst_3             = 0x06, 1
+iconst_4             = 0x07, 1
+iconst_5             = 0x08, 1
+lconst_0             = 0x09, 1
+lconst_1             = 0x0a, 1
+fconst_0             = 0x0b, 1
+fconst_1             = 0x0c, 1
+fconst_2             = 0x0d, 1
+dconst_0             = 0x0e, 1
+dconst_1             = 0x0f, 1
+bipush               = 0x10, 2
+sipush               = 0x11, 3
+ldc                  = 0x12, 2
+ldc_w                = 0x13, 3
+ldc2_w               = 0x14, 3
+iload                = 0x15, 2
+lload                = 0x16, 2
+fload                = 0x17, 2
+dload                = 0x18, 2
+aload                = 0x19, 2
+iload_0              = 0x1a, 1
+iload_1              = 0x1b, 1
+iload_2              = 0x1c, 1
+iload_3              = 0x1d, 1
+lload_0              = 0x1e, 1
+lload_1              = 0x1f, 1
+lload_2              = 0x20, 1
+lload_3              = 0x21, 1
+fload_0              = 0x22, 1
+fload_1              = 0x23, 1
+fload_2              = 0x24, 1
+fload_3              = 0x25, 1
+dload_0              = 0x26, 1
+dload_1              = 0x27, 1
+dload_2              = 0x28, 1
+dload_3              = 0x29, 1
+aload_0              = 0x2a, 1
+aload_1              = 0x2b, 1
+aload_2              = 0x2c, 1
+aload_3              = 0x2d, 1
+iaload               = 0x2e, 1
+laload               = 0x2f, 1
+faload               = 0x30, 1
+daload               = 0x31, 1
+aaload               = 0x32, 1
+baload               = 0x33, 1
+caload               = 0x34, 1
+saload               = 0x35, 1
+istore               = 0x36, 2
+lstore               = 0x37, 2
+fstore               = 0x38, 2
+dstore               = 0x39, 2
+astore               = 0x3a, 2
+istore_0             = 0x3b, 1
+istore_1             = 0x3c, 1
+istore_2             = 0x3d, 1
+istore_3             = 0x3e, 1
+lstore_0             = 0x3f, 1
+lstore_1             = 0x40, 1
+lstore_2             = 0x41, 1
+lstore_3             = 0x42, 1
+fstore_0             = 0x43, 1
+fstore_1             = 0x44, 1
+fstore_2             = 0x45, 1
+fstore_3             = 0x46, 1
+dstore_0             = 0x47, 1
+dstore_1             = 0x48, 1
+dstore_2             = 0x49, 1
+dstore_3             = 0x4a, 1
+astore_0             = 0x4b, 1
+astore_1             = 0x4c, 1
+astore_2             = 0x4d, 1
+astore_3             = 0x4e, 1
+iastore              = 0x4f, 1
+lastore              = 0x50, 1
+fastore              = 0x51, 1
+dastore              = 0x52, 1
+aastore              = 0x53, 1
+bastore              = 0x54, 1
+castore              = 0x55, 1
+sastore              = 0x56, 1
+pop                  = 0x57, 1
+pop2                 = 0x58, 1
+dup                  = 0x59, 1
+dup_x1               = 0x5a, 1
+dup_x2               = 0x5b, 1
+dup2                 = 0x5c, 1
+dup2_x1              = 0x5d, 1
+dup2_x2              = 0x5e, 1
+swap                 = 0x5f, 1
+iadd                 = 0x60, 1
+ladd                 = 0x61, 1
+fadd                 = 0x62, 1
+dadd                 = 0x63, 1
+isub                 = 0x64, 1
+lsub                 = 0x65, 1
+fsub                 = 0x66, 1
+dsub                 = 0x67, 1
+imul                 = 0x68, 1
+lmul                 = 0x69, 1
+fmul                 = 0x6a, 1
+dmul                 = 0x6b, 1
+idiv                 = 0x6c, 1
+ldiv                 = 0x6d, 1
+fdiv                 = 0x6e, 1
+ddiv                 = 0x6f, 1
+irem                 = 0x70, 1
+lrem                 = 0x71, 1
+frem                 = 0x72, 1
+drem                 = 0x73, 1
+ineg                 = 0x74, 1
+lneg                 = 0x75, 1
+fneg                 = 0x76, 1
+dneg                 = 0x77, 1
+ishl                 = 0x78, 1
+lshl                 = 0x79, 1
+ishr                 = 0x7a, 1
+lshr                 = 0x7b, 1
+iushr                = 0x7c, 1
+lushr                = 0x7d, 1
+iand                 = 0x7e, 1
+land                 = 0x7f, 1
+ior                  = 0x80, 1
+lor                  = 0x81, 1
+ixor                 = 0x82, 1
+lxor                 = 0x83, 1
+iinc                 = 0x84, 3
+i2l                  = 0x85, 1
+i2f                  = 0x86, 1
+i2d                  = 0x87, 1
+l2i                  = 0x88, 1
+l2f                  = 0x89, 1
+l2d                  = 0x8a, 1
+f2i                  = 0x8b, 1
+f2l                  = 0x8c, 1
+f2d                  = 0x8d, 1
+d2i                  = 0x8e, 1
+d2l                  = 0x8f, 1
+d2f                  = 0x90, 1
+i2b                  = 0x91, 1
+i2c                  = 0x92, 1
+i2s                  = 0x93, 1
+lcmp                 = 0x94, 1
+fcmpl                = 0x95, 1
+fcmpg                = 0x96, 1
+dcmpl                = 0x97, 1
+dcmpg                = 0x98, 1
+ifeq                 = 0x99, 0
+ifne                 = 0x9a, 0
+iflt                 = 0x9b, 0
+ifge                 = 0x9c, 0
+ifgt                 = 0x9d, 0
+ifle                 = 0x9e, 0
+if_icmpeq            = 0x9f, 0
+if_icmpne            = 0xa0, 0
+if_icmplt            = 0xa1, 0
+if_icmpge            = 0xa2, 0
+if_icmpgt            = 0xa3, 0
+if_icmple            = 0xa4, 0
+if_acmpeq            = 0xa5, 0
+if_acmpne            = 0xa6, 0
+goto                 = 0xa7, 0
+jsr                  = 0xa8, 0
+ret                  = 0xa9, 0
+tableswitch          = 0xaa, 0
+lookupswitch         = 0xab, 0
+ireturn              = 0xac, 0
+lreturn              = 0xad, 0
+freturn              = 0xae, 0
+dreturn              = 0xaf, 0
+areturn              = 0xb0, 0
+return               = 0xb1, 0
+getstatic            = 0xb2, 3
+putstatic            = 0xb3, 3
+getfield             = 0xb4, 3
+putfield             = 0xb5, 3
+invokevirtual        = 0xb6, 3
+invokespecial        = 0xb7, 3
+invokestatic         = 0xb8, 3
+invokeinterface      = 0xb9, 0
+invokedynamic        = 0xba, 0
+new                  = 0xbb, 3
+newarray             = 0xbc, 2
+anewarray            = 0xbd, 3
+arraylength          = 0xbe, 1
+athrow               = 0xbf, 0
+checkcast            = 0xc0, 3
+instanceof           = 0xc1, 3
+monitorenter         = 0xc2, 1
+monitorexit          = 0xc3, 1
+wide                 = 0xc4, 1
+multianewarray       = 0xc5, 4
+ifnull               = 0xc6, 0
+ifnonnull            = 0xc7, 0
+goto_w               = 0xc8, 0
+jsr_w                = 0xc9, 0
+breakpoint           = 0xca, 0
+
+#ifdef FAST_BYTECODES
+
+@agetfield	= 0xcb, 3
+bgetfield	= 0xcc, 3
+cgetfield	= 0xcd, 3
+@dgetfield	= 0xce, 3
+@fgetfield	= 0xcf, 3
+igetfield	= 0xd0, 3
+lgetfield	= 0xd1, 3
+sgetfield	= 0xd2, 3
+
+aputfield	= 0xd3, 3
+bputfield	= 0xd4, 3
+cputfield	= 0xd5, 3
+@dputfield	= 0xd6, 3
+@fputfield	= 0xd7, 3
+iputfield	= 0xd8, 3
+lputfield	= 0xd9, 3
+@sputfield	= 0xda, 3
+
+iaccess_0	= 0xdb, 4
+iaccess_1	= 0xdc, 4
+iaccess_2	= 0xdd, 4
+iaccess_3	= 0xde, 4
+
+invokeresolved		= 0xdf, 3
+invokespecialresolved	= 0xe0, 3
+invokestaticresolved	= 0xe1, 3
+invokevfinal 		= 0xe2, 3
+
+iload_iload	= 0xe3, 4
+iload_iload_N	= 0xe4, 3
+
+fast_aldc	= 0xe5, 2
+fast_aldc_w	= 0xe6, 3
+@return_register_finalizer = 0xe7, 1
+
+iload_0_iconst_N        = 0xe9, 2
+iload_1_iconst_N        = 0xea, 2
+iload_2_iconst_N        = 0xeb, 2
+iload_3_iconst_N        = 0xec, 2
+iload_iconst_N          = 0xed, 3
+iadd_istore_N           = 0xee, 2
+isub_istore_N           = 0xef, 2
+iand_istore_N           = 0xf0, 2
+ior_istore_N            = 0xf1, 2
+ixor_istore_N           = 0xf2, 2
+iadd_u4store            = 0xf3, 3
+isub_u4store            = 0xf4, 3
+iand_u4store            = 0xf5, 3
+ior_u4store             = 0xf6, 3
+ixor_u4store            = 0xf7, 3
+iload_0_iload           = 0xf8, 3
+iload_1_iload           = 0xf9, 3
+iload_2_iload           = 0xfa, 3
+iload_3_iload           = 0xfb, 3
+iload_0_iload_N         = 0xfc, 2
+iload_1_iload_N         = 0xfd, 2
+iload_2_iload_N         = 0xfe, 2
+iload_3_iload_N         = 0xff, 2
+
+#endif
+
+return_register_finalizer = 0xe7, 1
+
+(nop) {
+	DISPATCH	\seq_len
+}
+
+(aconst_null,fconst_0) u4const_0 {
+	DISPATCH_START_R2
+        mov     lr, #0
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5) iconst_N {
+	sub	lr, r0, #opc_iconst_0
+	DISPATCH_START_R2
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(lconst_0,dconst_0) u8const_0 {
+	DISPATCH_START_R2
+        mov     tmp1, #0
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(lconst_1) lconst_1 {
+	DISPATCH_START_R2
+        mov     r3, #1
+	DISPATCH_NEXT
+        mov     tmp1, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(fconst_1) fconst_1 {
+	DISPATCH_START_R2
+        mov     tmp1, #0x3f800000
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(fconst_2) fconst_2 {
+	DISPATCH_START_R2
+        mov     r2, #0x40000000
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(dconst_1) dconst_1 {
+	DISPATCH_START_R2
+        mov     tmp1, #0x3f000000
+	DISPATCH_NEXT
+        orr     tmp1, tmp1, #0x00f00000
+	DISPATCH_NEXT
+        mov     r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(bipush) bipush {
+	DISPATCH_START	\seq_len
+	mov	tmp1, r2, lsl #24
+	DISPATCH_NEXT
+	mov	tmp1, tmp1, asr #24
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+@ r1 = [jpc, #2]
+(sipush) sipush {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	mov	r2, r2, lsl #24
+        orr     tmp1, r1, r2, asr #16
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,fload,aload) u4load {
+	DISPATCH_START	\seq_len
+	rsb	tmp1, r2, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(lload,dload) u8load {
+	DISPATCH_START	\seq_len
+	sub	r3, locals, r2, lsl #2
+	DISPATCH_NEXT
+	ldmda	r3, {r3, tmp1}
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(aload_0,aload_1,aload_2,aload_3) {
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3) {
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(fload_0,fload_1,fload_2,fload_3) {
+	rsb	tmp1, r0, #opc_fload_0
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(lload_0,dload_0) u8load_0 {
+	DISPATCH_START_R2
+        ldmda   locals, {r3, tmp1}
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(lload_1,dload_1) u8load_1 {
+	DISPATCH_START_R2
+        ldmdb   locals, {r3, tmp1}
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(lload_2,dload_2) u8load_2 {
+	DISPATCH_START_R2
+	ldr	r3, [locals, #-12]
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, #-8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(lload_3,dload_3) u8load_3 {
+	DISPATCH_START_R2
+	ldr	r3, [locals, #-16]
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, #-12]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload) u4aload {
+	GET_STACK	1, r3
+	DISPATCH_START_R2_R0
+	POP	r2
+	DISPATCH_START_R2_JPC
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry5:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	DISPATCH_NEXT
+	ldr	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUT_STACK	0, tmp1
+	DISPATCH_FINISH
+}
+
+(laload,daload) u8aload {
+	GET_STACK	1, r3
+	DISPATCH_START_R2_R0
+	GET_STACK	0, r2
+	DISPATCH_START_R2_JPC
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry6:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #3
+	ldr	r1, [r3, #BASE_OFFSET_LONG]
+	DISPATCH_NEXT
+	ldr	tmp1, [r3, #20]
+	DISPATCH_NEXT
+	PUT_STACK	1, tmp1
+	PUT_STACK	0, r1
+	DISPATCH_FINISH
+}
+
+(baload) baload {
+	GET_STACK	1, r3
+	DISPATCH_START_R2_R0
+	POP	r2
+	DISPATCH_START_R2_JPC
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry7:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2
+	DISPATCH_NEXT
+	ldrsb	tmp1, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_NEXT
+	PUT_STACK	0, tmp1
+	DISPATCH_FINISH
+}
+
+(caload) caload {
+	GET_STACK	1, r3
+	DISPATCH_START_R2_R0
+	POP	r2
+	DISPATCH_START_R2_JPC
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry8:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #1
+	DISPATCH_NEXT
+	ldrh	tmp1, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUT_STACK	0, tmp1
+	DISPATCH_FINISH
+}
+
+(saload) saload {
+	GET_STACK	1, r3
+	DISPATCH_START_R2_R0
+	POP	r2
+	DISPATCH_START_R2_JPC
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry9:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #1
+	DISPATCH_NEXT
+	ldrsh	tmp1, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUT_STACK	0, tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(astore,istore,fstore) u4store {
+	DISPATCH_START	\seq_len
+	rsb	tmp1, r2, #0
+	DISPATCH_NEXT
+	POP	r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(lstore,dstore) u8store {
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r3, locals, r2, lsl #2
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	stmda	r3, {r1, tmp1}
+	DISPATCH_FINISH
+}
+
+(astore_0,istore_0,fstore_0) u4store_0 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #0]
+	DISPATCH_FINISH
+}
+
+(astore_1,istore_1,fstore_1) u4store_1 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #-4]
+	DISPATCH_FINISH
+}
+
+(astore_2,istore_2,fstore_2) u4store_2 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #-8]
+	DISPATCH_FINISH
+}
+
+(astore_3,istore_3,fstore_3) u4store_3 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #-12]
+	DISPATCH_FINISH
+}
+
+(lstore_0,dstore_0) u8store_0 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        stmda   locals, {r1, tmp1}
+	DISPATCH_FINISH
+}
+
+(lstore_1,dstore_1) u8store_1 {
+	DISPATCH_START_R2
+	DISPATCH_NEXT
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        stmdb   locals, {r1, tmp1}
+	DISPATCH_FINISH
+}
+
+(lstore_2,dstore_2) u8store_2 {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+        str     r1, [locals, #-12]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #-8]
+	DISPATCH_FINISH
+}
+
+(lstore_3,dstore_3) u8store_3 {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+        str     r1, [locals, #-16]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        str     tmp1, [locals, #-12]
+	DISPATCH_FINISH
+}
+
+(iastore,fastore) u4astore {
+	POP	r1, tmp1, lr		@ r1 = value, tmp1 = index, lr = arrayref
+	DISPATCH_START_R2
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry10:
+	ldr	ip, [lr, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1, lsl #2
+	DISPATCH_NEXT
+	str	r1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(lastore,dastore) u8astore {
+	POP	r1, r3, tmp1, lr		@ r1,r3 = value, tmp1 = index, lr = arrayref
+	DISPATCH_START_R2
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry11:
+	ldr	ip, [lr, #8]		@ ip = limit
+	cmp	tmp1, ip
+	DISPATCH_NEXT
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	tmp1, lr, tmp1, lsl #3
+	str	r1, [tmp1, #BASE_OFFSET_LONG]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [tmp1, #BASE_OFFSET_LONG+4]
+	DISPATCH_FINISH
+}
+
+(bastore) bastore {
+	POP	r3, tmp1, lr		@ r3 = value, tmp1 = index, lr = arrayref
+	DISPATCH_START_R2
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry12:
+	ldr	ip, [lr, #8]		@ ip = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1
+	DISPATCH_NEXT
+	strb	r3, [lr, #BASE_OFFSET_BYTE]
+	DISPATCH_FINISH
+}
+
+(castore,sastore) u2astore {
+	POP	r3, tmp1, lr		@ r3 = value, tmp1 = index, lr = arrayref
+	DISPATCH_START_R2
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry13:
+	ldr	ip, [lr, #8]		@ ip = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1, lsl #1
+	DISPATCH_NEXT
+	strh	r3, [lr, #BASE_OFFSET_SHORT]
+	DISPATCH_FINISH
+}
+
+(pop) jpop {
+	DISPATCH_START_R2
+        add     stack, stack, #4
+	DISPATCH_FINISH
+}
+
+(pop2) jpop2 {
+	DISPATCH_START_R2
+        add     stack, stack, #8
+	DISPATCH_FINISH
+}
+
+(dup) dup {
+	DISPATCH_START_R2
+	ldr	lr, [stack, #4]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(dup_x1) dup_x1 {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(dup_x2) dup_x2 {
+	DISPATCH_START_R2
+	POP	r2, r3, lr
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_NEXT
+	PUSH	r2, r3, lr
+	DISPATCH_FINISH
+}
+
+(dup2) dup2 {
+	DISPATCH_START_R2
+	ldmib	stack, {r2, r3}
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(dup2_x1) dup2_x1 {
+	DISPATCH_START_R2
+	POP	r2, r3, lr
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_NEXT
+	PUSH	r2, r3, lr
+	DISPATCH_FINISH
+}
+
+(dup2_x2) dup2_x2 {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	PUSH	r2, r3, tmp1, lr
+	DISPATCH_FINISH
+}
+
+(swap) swap {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+
+(iadd) iadd {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(ladd) ladd {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	adds	r2, r2, tmp1
+	adc	r3, r3, lr
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(isub) isub {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(lsub) lsub {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	subs	r2, tmp1, r2
+	sbc	r3, lr, r3
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(imul) imul {
+	DISPATCH_START_R2
+	POP	r2, tmp1
+	DISPATCH_NEXT
+	mul	r1, r2, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(lmul) lmul {
+	POP	r1, tmp1, ip, lr
+        umull   r3, r0, ip, r1
+        mla     tmp1, ip, tmp1, r0
+	DISPATCH_START_R2
+        mla     tmp1, lr, r1, tmp1
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(ldiv) ldiv {
+	POP	r2, r3
+	POP	r0, r1
+	orrs	lr, r2, r3
+	beq	.ldiv_0
+	bl	__aeabi_ldivmod
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(lrem) lrem {
+	POP	r2, r3
+	POP	r0, r1
+	orrs	lr, r2, r3
+	beq	.lrem_0
+	bl	__aeabi_ldivmod
+	PUSH	r2, r3
+	DISPATCH	\seq_len
+}
+
+(frem) frem {
+	POPF1
+	POPF0
+        bl      fmodf
+	PUSHF0
+	DISPATCH	\seq_len
+}
+
+(drem) drem {
+	POPD1
+        POPD0
+        bl      fmod
+	PUSHD0
+	DISPATCH	\seq_len
+}
+
+(ineg) ineg {
+	DISPATCH_START_R2
+	POP	tmp1
+	DISPATCH_NEXT
+        rsb     tmp1, tmp1, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(lneg) lneg {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	rsbs	r2, r2, #0
+	rsc	r3, r3, #0
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(fneg) fneg {
+	DISPATCH_START_R2
+	POP	r2
+	DISPATCH_NEXT
+	eor	r2, r2, #0x80000000
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(dneg) dneg {
+	DISPATCH_START_R2
+	GET_STACK	1, r2
+	DISPATCH_NEXT
+	eor	r2, r2, #0x80000000
+	DISPATCH_NEXT
+	PUT_STACK	1, r2
+	DISPATCH_FINISH
+}
+
+(ishl) ishl {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	and	r2, r2, #31
+	mov	r2, r3, lsl r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(lshl) lshl {
+	DISPATCH_START_R2
+	POP	r2, r3, lr
+	DISPATCH_NEXT
+	tst	r2, #32
+	and	r2, r2, #31
+	movne	tmp1, #0
+	movne	lr, r3, lsl r2
+	moveq	tmp1, r3, lsl r2
+	moveq	lr, lr, lsl r2
+	rsbeq	r2, r2, #32
+	orreq	lr, lr, r3, lsr r2
+	PUSH	tmp1, lr
+	DISPATCH_FINISH
+}
+
+(ishr) ishr {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	and	r2, r2, #31
+	mov	r2, r3, asr r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(lshr) lshr {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	tst	r2, #32
+	and	r2, r2, #31
+	movne	lr, tmp1, asr #31
+	movne	r3, tmp1, asr r2
+	moveq	lr, tmp1, asr r2
+	moveq	r3, r3, lsr r2
+	rsbeq	r2, r2, #32
+	orreq	r3, r3, tmp1, lsl r2
+	PUSH	r3, lr
+	DISPATCH_FINISH
+}
+
+(iushr) iushr {
+	DISPATCH_START_R2
+	POP	r2, r3
+	DISPATCH_NEXT
+	and	r2, r2, #31
+	mov	r2, r3, lsr r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(lushr) lushr {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	tst	r2, #32
+	and	r2, r2, #31
+	movne	lr, #0
+	movne	r3, tmp1, lsr r2
+	moveq	lr, tmp1, lsr r2
+	moveq	r3, r3, lsr r2
+	rsbeq	r2, r2, #32
+	orreq	r3, r3, tmp1, lsl r2
+	PUSH	r3, lr
+	DISPATCH_FINISH
+}
+
+(iand) iand {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(land) land {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	and	r2, tmp1, r2
+	and	r3, lr, r3
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(ior) ior {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(lor) lor {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	orr	r2, tmp1, r2
+	orr	r3, lr, r3
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(ixor) ixor {
+	DISPATCH_START_R2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(lxor) lxor {
+	DISPATCH_START_R2
+	POP	r2, r3, tmp1, lr
+	DISPATCH_NEXT
+	eor	r2, tmp1, r2
+	eor	r3, lr, r3
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iinc) iinc {
+	ldrsb	lr, [jpc, #2]
+	DISPATCH_START	\seq_len
+	rsb	tmp1, r2, #0
+	DISPATCH_NEXT
+	ldr	r3, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	add	r3, r3, lr
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(i2l) i2l {
+	DISPATCH_START_R2
+	POP	r2
+	DISPATCH_NEXT
+	mov	r3, r2, asr #31
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(i2f) i2f {
+	POP	r0
+        bl      __aeabi_i2f
+	PUSH	r0
+	DISPATCH	\seq_len
+}
+
+(i2d) i2d {
+	POP	r0
+        bl      __aeabi_i2d
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(l2i) l2i {
+	DISPATCH_START_R2
+	POP	r3
+	DISPATCH_NEXT
+	PUT_STACK	0, r3
+	DISPATCH_FINISH
+}
+
+(l2f) l2f {
+	POP	r0, r1
+        bl      __aeabi_l2f
+	PUSH	r0
+	DISPATCH	\seq_len
+}
+
+(l2d) l2d {
+	POP	r0, r1
+        bl      __aeabi_l2d
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(f2i) f2i {
+	POPF0
+        bl      _ZN13SharedRuntime3f2iEf
+	PUSH	r0
+	DISPATCH	\seq_len
+}
+
+(f2l) f2l {
+	POPF0
+        bl      _ZN13SharedRuntime3f2lEf
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(f2d) f2d {
+	POP	r0
+        bl      __aeabi_f2d
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(d2i) d2i {
+	POPD0
+        bl      _ZN13SharedRuntime3d2iEd
+	PUSH	r0
+	DISPATCH	\seq_len
+}
+
+(d2l) d2l {
+	POPD0
+        bl      _ZN13SharedRuntime3d2lEd
+	PUSH	r0, r1
+	DISPATCH	\seq_len
+}
+
+(d2f) d2f {
+	POP	r0, r1
+        bl      __aeabi_d2f
+	PUSH	r0
+	DISPATCH	\seq_len
+}
+
+(i2b) i2b {
+	DISPATCH_START_R2
+	POP	r3
+	DISPATCH_NEXT
+        mov     r3, r3, asl #24
+        mov     r3, r3, asr #24
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(i2c) i2c {
+	DISPATCH_START_R2
+	POP	r3
+	DISPATCH_NEXT
+        mov     r3, r3, asl #16
+        mov     r3, r3, lsr #16
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(i2s) i2s {
+	DISPATCH_START_R2
+	POP	r3
+	DISPATCH_NEXT
+        mov     r3, r3, asl #16
+        mov     r3, r3, asr #16
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(lcmp) lcmp {
+	POP	r1, r3, tmp1, lr
+	DISPATCH_START_R2
+	subs	r1, tmp1, r1
+	movne	r1, #1
+	sbcs	lr, lr, r3
+	DISPATCH_NEXT
+	movne	r1, #1
+	rsblt	r1, r1, #0
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+#ifdef NOTICE_SAFEPOINTS
+
+@ r2 = [jpc, #1]
+@ r1 = [jpc, #2]
+(ifeq,ifnull) ifeq_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	beq	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(ifne,ifnonnull) ifne_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	bne	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(iflt) iflt_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	blt	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(ifge) ifge_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	bge	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(ifgt) ifgt_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	bgt	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(ifle) ifle_unsafe {
+	POP	r3
+	ldrb	r1, [jpc, #2]
+        cmp     r3, #0
+	ble	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmpeq,if_acmpeq) if_icmpeq_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	beq	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmpne,if_acmpne) if_icmpne_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	bne	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmplt) if_icmplt_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	blt	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmpge) if_icmpge_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	bge	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmpgt) if_icmpgt_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	bgt	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(if_icmple) if_icmple_unsafe {
+	POP	r3, tmp1
+	ldrb	r1, [jpc, #2]
+        cmp     tmp1, r3
+	ble	branch_taken_unsafe
+	DISPATCH 3
+}
+
+(goto) goto_unsafe {
+	ldrb	r1, [jpc, #2]
+	mov	r2, r2, lsl #24
+        orr     tmp1, r1, r2, asr #16
+        DISPATCH_START_REG	tmp1
+  USEC  cmp     tmp1, #0
+  USEC  ble     do_backedge
+	DISPATCH_BYTECODE
+}
+
+#endif // NOTICE_SAFEPOINTS
+
+(jsr) jsr {
+	ldr	r3, [istate, #ISTATE_METHOD]
+	ldr	r1, [r3, #METHOD_CONSTMETHOD]
+	rsb	r2, r1, jpc
+	sub	r2, r2, #CONSTMETHOD_CODEOFFSET - 3
+	PUSH	r2
+	b	do_goto
+}
+
+@ r2 = [jpc, #1]
+@ r1 = [jpc, #2]
+(ret) ret {
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r0, #METHOD_CONSTMETHOD]
+	ldr	r1, [locals, -r2, lsl #2]
+	add	jpc, r3, r1
+	DISPATCH	CONSTMETHOD_CODEOFFSET
+}
+
+@ We dont do safe and unsafe versions of tableswitch and lookupswitch
+(tableswitch) tableswitch {
+	POP	a2
+        bic     a1, jpc, #3
+        ldr     a4,[a1,#8]
+@       BYTESEX_REVERSE a3, a4, a3
+
+        eor     a3, a4, a4, ror #16
+        bic     a3, a3, #0xff0000
+        mov     a4, a4, ror #8
+        eor     a3, a4, a3, lsr #8
+
+        ldr     a4,[a1,#12]
+@       BYTESEX_REVERSE a4, a4, ip
+
+        eor     ip, a4, a4, ror #16
+        bic     ip, ip, #0xff0000
+        mov     a4, a4, ror #8
+        eor     a4, a4, ip, lsr #8
+
+        sub     a2,a2,a3
+        sub     a4,a4,a3
+        cmp     a4,a2
+        ldrcc   a1,[a1,#4]
+        addcs   a1,a1,a2,LSL #2
+        ldrcs   a1,[a1,#16]
+@       BYTESEX_REVERSE a1, a1, a4
+
+        eor     a4, a1, a1, ror #16
+        bic     a4, a4, #0xff0000
+        mov     a1, a1, ror #8
+        eors    ip, a1, a4, lsr #8
+
+        DISPATCH_START_REG	ip
+	DISPATCH_BYTECODE
+}
+
+(lookupswitch) lookupswitch {
+	POP	a2
+        bic     a1, jpc, #3
+@       BYTESEX_REVERSE a2, a2, ip
+
+        eor     ip, a2, a2, ror #16
+        bic     ip, ip, #0xff0000
+        mov     a2, a2, ror #8
+        eor     a2, a2, ip, lsr #8
+
+        ldr     a3,[a1,#8]
+@       BYTESEX_REVERSE a3, a3, ip
+
+        eor     ip, a3, a3, ror #16
+        bic     ip, ip, #0xff0000
+        mov     a3, a3, ror #8
+        eor     a3, a3, ip, lsr #8
+
+        subs    a4,a3,#1
+        bmi     1f
+        add     a1, a1, #4
+0:
+        ldr      a3,[a1,#8]!
+        cmp     a3,a2
+        beq     2f
+        subs    a4,a4,#1
+        bpl     0b
+1:
+        bic     a1, jpc, #3
+2:
+        ldr      a2,[a1,#4]
+@       BYTESEX_REVERSE a2, a2, ip
+
+        eor     ip, a2, a2, ror #16
+        bic     ip, ip, #0xff0000
+        mov     a2, a2, ror #8
+        eors    ip, a2, ip, lsr #8
+
+        DISPATCH_START_REG	ip
+	DISPATCH_BYTECODE
+}
+
+#ifdef FAST_BYTECODES
+(igetfield) igetfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	tmp1
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry78:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry78_v:
+	ldr	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(bgetfield) bgetfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	tmp1
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry79:
+	ldrsb	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry79_v:
+	ldrsb	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(cgetfield) cgetfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	tmp1
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry80:
+	ldrh	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry80_v:
+	ldrh	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(sgetfield) sgetfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	tmp1
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry81:
+	ldrsh	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry81_v:
+	ldrsh	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(lgetfield) lgetfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	tmp1
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+.abortentry82:
+	ldmia	r1, {r1, tmp1}
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	add	r1, tmp1, r1
+#ifndef	__ARM_ARCH_7A__
+.abortentry82_v:
+	ldmia	r1, {r2, r3}
+#else
+.abortentry82_v:
+	ldrexd	r2, r3 , [r1]
+#endif
+	// Be very careful here: you must be certain that
+	// DISPATCH_NEXT does not corrupt R2 or R3.
+	DISPATCH_NEXT
+	FullBarrier
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iputfield) iputfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	r3, tmp1		@ r3 = value, tmp1 = object
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+.abortentry83:
+	str	r3, [tmp1, r1]
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	StoreStoreBarrier
+.abortentry83_v:
+	str	r3, [tmp1, r1]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+}
+
+(cputfield) cputfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	r3, tmp1		@ r3 = value, tmp1 = object
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+.abortentry84:
+	strh	r3, [tmp1, r1]
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	StoreStoreBarrier
+.abortentry84_v:
+	strh	r3, [tmp1, r1]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+}
+
+(bputfield) bputfield {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	3
+	POP	r3, tmp1		@ r3 = value, tmp1 = object
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+.abortentry85:
+	strb	r3, [tmp1, r1]
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	StoreStoreBarrier
+.abortentry85_v:
+	strb	r3, [tmp1, r1]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+}
+
+(aputfield) aputfield {
+	ldrb	r1, [jpc, #2]
+	POP	r3, tmp1		@ r3 = value, tmp1 = object
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+.abortentry113:
+	str	r3, [tmp1, r1]
+	mov	r0, tmp1
+	bl	Helper_aputfield
+	DISPATCH 3
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	StoreStoreBarrier
+.abortentry113_v:
+	str	r3, [tmp1, r1]
+	StoreLoadBarrier
+	mov	r0, tmp1
+	bl	Helper_aputfield
+	DISPATCH 3
+}
+
+(lputfield) lputfield {
+	ldrb	r1, [jpc, #2]
+	POP	r3, tmp1, lr		@ r3, tmp1 = value, lr = object
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE	r2, r1, 3f
+	DISPATCH_START	3
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	add	r1, lr, r1
+	DISPATCH_NEXT
+.abortentry86:
+	stm	r1, {r3, tmp1}
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	ldr	r1, [r1, #CP_OFFSET+8]
+	add	r1, lr, r1
+	StoreStoreBarrier
+#ifndef	__ARM_ARCH_7A__
+.abortentry86_v:
+	stm	r1, {r3, tmp1}
+#else
+	mov	ip, r1
+	mov	r1, r3
+	// Data in tmp1 & r1, address in ip, r2 & r3 scratch
+	mov	r0, r1
+	mov	r1, tmp1
+.abortentry86_v:
+	ldrexd	r2, r3, [ip]
+	strexd	r2, r0, r1, [ip]
+	teq	r2, #0
+	bne	.abortentry86_v
+#endif
+	DISPATCH_START	3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	StoreLoadBarrier
+	DISPATCH_FINISH
+}
+
+#endif // FAST_BYTECODES
+
+@ r2 = [jpc, #1]
+@ r1 = [jpc, #2]
+(getstatic) getstatic {
+	ldrb	r1, [jpc, #2]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+        ldr     r3, [tmp1, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	blne	resolve_get_put
+	GO_IF_VOLATILE	r2, tmp1, 3f
+	ldr	r3, [tmp1, #CP_OFFSET+4]
+	ldr	r2, [tmp1, #CP_OFFSET+12]
+        ldr     lr, [tmp1, #CP_OFFSET+8]
+        movs    r2, r2, lsr #29
+	bhi	getstatic_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
+	bcs	getstatic_h		@ C = 1 => R2 = 1
+	beq	getstatic_sb		@ Z = 1 => R2 = 0
+	tst	r2, #2
+	bne	getstatic_dw
+	b	getstatic_sh
+3:
+	VOLATILE_VERSION
+	ldr	r3, [tmp1, #CP_OFFSET+4]
+	ldr	r2, [tmp1, #CP_OFFSET+12]
+        ldr     lr, [tmp1, #CP_OFFSET+8]
+        movs    r2, r2, lsr #29
+	bhi	getstatic_volatile_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
+	bcs	getstatic_volatile_h		@ C = 1 => R2 = 1
+	beq	getstatic_volatile_sb		@ Z = 1 => R2 = 0
+	tst	r2, #2
+	bne	getstatic_volatile_dw
+	b	getstatic_volatile_sh
+}
+
+@ r2 = [jpc, #1]
+@ r1 = [jpc, #2]
+(putstatic) putstatic {
+	ldrb	r1, [jpc, #2]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+        ldr     r3, [tmp1, #CP_OFFSET]
+        and     r3, r3, #0xff000000
+        cmp     r3, #opc_putstatic << 24
+	blne	resolve_get_put
+	GO_IF_VOLATILE	r2, tmp1, 3f
+	ldr	r3, [tmp1, #CP_OFFSET+4]		@ r3 = object
+        ldr     lr, [tmp1, #CP_OFFSET+12]           @ lr = tos_type
+        ldr     r2, [tmp1, #CP_OFFSET+8]            @ r2 = offset
+	movs	lr, lr, lsr #29
+	bhi	putstatic_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
+	bcs	putstatic_h		@ C = 1 => R2 = 1
+	beq	putstatic_sb		@ Z = 1 => R2 = 0
+	tst	lr, #2
+	bne	putstatic_dw
+	b	putstatic_sh
+3:
+	VOLATILE_VERSION
+	ldr	r3, [tmp1, #CP_OFFSET+4]		@ r3 = object
+        ldr     lr, [tmp1, #CP_OFFSET+12]           @ lr = tos_type
+        ldr     r2, [tmp1, #CP_OFFSET+8]            @ r2 = offset
+	movs	lr, lr, lsr #29
+	bhi	putstatic_volatile_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
+	bcs	putstatic_volatile_h		@ C = 1 => R2 = 1
+	beq	putstatic_volatile_sb		@ Z = 1 => R2 = 0
+	tst	lr, #2
+	bne	putstatic_volatile_dw
+	b	putstatic_volatile_sh
+}
+
+#ifdef NOTICE_SAFEPOINTS
+
+(return) return_unsafe {
+
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r2, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	add	r1, r2, #4
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	add	stack, r1, r0, lsl #2
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	bl	return_check_monitors
+	b	2b
+}
+
+(ireturn,areturn,freturn) ireturn_unsafe {
+
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+
+	POP	r1					@ pop result before we lose stack
+
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [stack, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [stack, r0, lsl #2]!
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	bl	return_check_monitors
+	b	2b
+}
+
+(lreturn,dreturn) lreturn_unsafe {
+
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r2, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	add	r1, r2, #4
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	POP	r2, r3
+
+	add	stack, r1, r0, lsl #2
+	stmdb	stack!, {r2, r3}
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	bl	return_check_monitors
+	b	2b
+}
+
+#endif // NOTICE_SAFEPOINTS
+
+(ldc) ldc {
+	ldr	r3, [istate, #ISTATE_METHOD]			@ method
+	ldrb	lr, [jpc, #1]
+
+	ldr	tmp1, [r3, #METHOD_CONSTMETHOD]
+	ldr	tmp1, [tmp1, #METHOD_CONSTANTS]			@ constants
+
+	DISPATCH_START	\seq_len
+
+	ldr	r3, [tmp1, #8]
+	DISPATCH_NEXT
+	add	r3, r3, #12
+	ldrb	r3, [r3, lr]
+
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+
+	cmp	r3, #JVM_CONSTANT_Integer
+	cmpne	r3, #JVM_CONSTANT_Float
+	cmpne	r3, #JVM_CONSTANT_String
+	bne	1f
+
+	add	r3, tmp1, lr, lsl #2
+	ldr	r3, [r3, #CONSTANTPOOL_BASE]
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+1:
+	cmp	r3, #JVM_CONSTANT_Class
+	bne	2f
+	add	r0, tmp1, #CONSTANTPOOL_BASE
+	ldr	r0, [r0, lr, lsl #2]
+	ldr	r1, [r0, #KLASS_PART + KLASS_JAVA_MIRROR]
+	PUSH	r1
+	DISPATCH	0
+2:
+	sub	jpc, jpc, #\seq_len
+	mov	r0, thread
+        DECACHE_JPC
+        DECACHE_STACK
+	mov	r1, #0
+        bl      _ZN18InterpreterRuntime3ldcEP10JavaThreadb
+	CACHE_CP
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	cmp	r1, #0
+	bne	handle_exception
+	ldr	r3, [thread, #THREAD_VM_RESULT]
+	mov	r2, #0
+	PUSH	r3
+	str	r2, [thread, #THREAD_VM_RESULT]
+	DISPATCH	\seq_len
+}
+
+(ldc_w) ldc_w {
+	ldrb	lr, [jpc, #1]
+
+	ldr	r3, [istate, #ISTATE_METHOD]			@ method
+	ldrb	ip, [jpc, #2]
+	ldr	r2, [r3, #METHOD_CONSTMETHOD]
+	ldr	r2, [r2, #METHOD_CONSTANTS]			@ constants
+
+	DISPATCH_START	\seq_len
+
+	ldr	r3, [r2, #8]
+	orr	lr, ip, lr, lsl #8
+	add	r3, r3, #12
+	ldrb	r3, [r3, lr]
+
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+
+	cmp	r3, #JVM_CONSTANT_Integer
+	cmpne	r3, #JVM_CONSTANT_Float
+	cmpne	r3, #JVM_CONSTANT_String
+	bne	1f
+
+	add	r3, r2, lr, lsl #2
+	ldr	r3, [r3, #CONSTANTPOOL_BASE]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+1:
+	cmp	r3, #JVM_CONSTANT_Class
+	bne	2f
+	add	r0, r2, #CONSTANTPOOL_BASE
+	ldr	r0, [r0, lr, lsl #2]
+	ldr	r1, [r0, #KLASS_PART + KLASS_JAVA_MIRROR]
+	PUSH	r1
+	DISPATCH	0
+2:
+	sub	jpc, jpc, #\seq_len
+	mov	r0, thread
+        DECACHE_JPC
+        DECACHE_STACK
+	mov	r1, #1
+        bl      _ZN18InterpreterRuntime3ldcEP10JavaThreadb
+	CACHE_CP
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	cmp	r1, #0
+	ldr	r3, [thread, #THREAD_VM_RESULT]
+	bne	handle_exception
+	mov	r2, #0
+	PUSH	r3
+	str	r2, [thread, #THREAD_VM_RESULT]
+	DISPATCH	\seq_len
+}
+
+(ldc2_w) ldc2_w {
+	ldrb	r3, [jpc, #1]
+
+	ldr	tmp1, [istate, #ISTATE_METHOD]			@ method
+	ldrb	lr, [jpc, #2]
+	ldr	r2, [tmp1, #METHOD_CONSTMETHOD]
+	ldr	r2, [r2, #METHOD_CONSTANTS]			@ constants
+
+	DISPATCH_START	\seq_len
+
+	ldr	tmp1, [r2, #8]
+	orr	r3, lr, r3, lsl #8
+	add	tmp1, tmp1, #12
+	ldrb	tmp1, [tmp1, r3]
+
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+
+	add	tmp1, r2, r3, lsl #2
+	ldr	r3, [tmp1, #CONSTANTPOOL_BASE]
+	ldr	tmp1, [tmp1, #CONSTANTPOOL_BASE+4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+#ifdef FAST_BYTECODES
+(iadd_u4store) {
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(isub_u4store) {
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iand_u4store) {
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(ior_u4store) {
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(ixor_u4store) {
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iadd_istore_N) {
+	ldrb	lr, [jpc, #1]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	rsb	r3, lr, #opc_istore_0
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(isub_istore_N) {
+	ldrb	lr, [jpc, #1]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	rsb	r3, lr, #opc_istore_0
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iand_istore_N) {
+	ldrb	lr, [jpc, #1]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	rsb	r3, lr, #opc_istore_0
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(ior_istore_N) {
+	ldrb	lr, [jpc, #1]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	rsb	r3, lr, #opc_istore_0
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+(ixor_istore_N) {
+	ldrb	lr, [jpc, #1]
+	DISPATCH_START	\seq_len
+	DISPATCH_NEXT
+	rsb	r3, lr, #opc_istore_0
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+{
+        ldrb    r3, [jpc, #2]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+        DISPATCH_NEXT
+        sub     r3, r3, #opc_iconst_0
+        ldr     tmp1, [locals, r2, lsl #2]
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH   r3, tmp1
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     tmp1, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     r1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH   r1, tmp1
+        DISPATCH_FINISH
+}
+
+@ (aload_N)(getfield)
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+{
+	ldrb	r2, [jpc, #3]
+	rsb	tmp1, r0, #opc_iaccess_0
+	ldrb	r3, [jpc, #2]
+	add	r1, constpool, r2, lsl #12
+	DISPATCH_START	4
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r1, r3, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	GO_IF_VOLATILE r3, r1, 3f
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry87:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry87_v:
+	ldr	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iload) fast_iload_iload
+{
+	ldrb	r3, [jpc, #3]
+	DISPATCH_START	\seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iload_N) fast_iload_iload_N
+{
+	ldrb	r3, [jpc, #2]
+	DISPATCH_START	\seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload) fast_iload_N_iload
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	DISPATCH_START	\seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N) fast_iload_N_iload_N
+{
+	rsb	r3, r0, #opc_iload_0_iload_N
+	DISPATCH_START	\seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	ldr	r3, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+#endif // FAST_BYTECODE
+
+#ifdef HW_FP
+
+(fadd) fadd_vfp {
+	DISPATCH_START_R2
+	vldr	s15, [stack, #8]
+	vldr	s14, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fadds	s15, s15, s14
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	s15, [stack, #8]
+	add	stack, stack, #4
+	DISPATCH_FINISH
+}
+
+(dadd) dadd_vfp {
+	DISPATCH_START_R2
+	vldr	d7, [stack, #12]
+	vldr	d6, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	faddd	d0, d7, d6
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	d0, [stack, #12]
+	add	stack, stack, #8
+	DISPATCH_FINISH
+}
+
+(fsub) fsub_vfp {
+	DISPATCH_START_R2
+	vldr	s15, [stack, #8]
+	vldr	s14, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fsubs	s15, s15, s14
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	s15, [stack, #8]
+	add	stack, stack, #4
+	DISPATCH_FINISH
+}
+
+(fmul) fmul_vfp {
+	DISPATCH_START_R2
+	vldr	s15, [stack, #8]
+	vldr	s14, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fmuls	s15, s15, s14
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	s15, [stack, #8]
+	add	stack, stack, #4
+	DISPATCH_FINISH
+}
+
+(dmul) dmul_vfp {
+	DISPATCH_START_R2
+	vldr	d7, [stack, #12]
+	vldr	d6, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fmuld	d0, d7, d6
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	d0, [stack, #12]
+	add	stack, stack, #8
+	DISPATCH_FINISH
+}
+
+(fdiv) fdiv_vfp {
+	DISPATCH_START_R2
+	vldr	s15, [stack, #8]
+	vldr	s14, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fdivs	s15, s15, s14
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	s15, [stack, #8]
+	add	stack, stack, #4
+	DISPATCH_FINISH
+}
+
+(ddiv) ddiv_vfp {
+	DISPATCH_START_R2
+	vldr	d7, [stack, #12]
+	vldr	d6, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fdivd	d0, d7, d6
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	vstr	d0, [stack, #12]
+	add	stack, stack, #8
+	DISPATCH_FINISH
+}
+
+(fcmpl) fcmpl_vfp {
+	DISPATCH_START_R2
+	flds	s14, [stack, #8]
+	flds	s15, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fcmpes	s14, s15
+	add	stack, stack, #8
+	fmstat
+	mvnmi	r3, #0
+	bmi	1f
+	movgt	r3, #1
+	bgt	1f
+	fcmps	s14, s15
+	fmstat
+	moveq	r3, #0
+	mvnne	r3, #0
+1:
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(fcmpg) fcmpg_vfp {
+	DISPATCH_START_R2
+	flds	s14, [stack, #8]
+	flds	s15, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fcmpes	s14, s15
+	add	stack, stack, #8
+	fmstat
+	mvnmi	r3, #0
+	bmi	1f
+	movgt	r3, #1
+	bgt	1f
+	fcmps	s14, s15
+	fmstat
+	moveq	r3, #0
+	movne	r3, #1
+1:
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(dcmpl) dcmpl_vfp {
+	DISPATCH_START_R2
+	fldd	d6, [stack, #12]
+	fldd	d7, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fcmped	d6, d7
+	fmstat
+	mvnmi	r3, #0
+	bmi	1f
+	movgt	r3, #1
+	bgt	1f
+	fcmpd	d6, d7
+	fmstat
+	moveq	r3, #0
+	mvnne	r3, #0
+1:
+	add	stack, stack, #16
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(dcmpg) dcmpg_vfp {
+	DISPATCH_START_R2
+	fldd	d6, [stack, #12]
+	fldd	d7, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	fcmped	d6, d7
+	fmstat
+	mvnmi	r3, #0
+	bmi	1f
+	movgt	r3, #1
+	bgt	1f
+	fcmpd	d6, d7
+	fmstat
+	moveq	r3, #0
+	movne	r3, #1
+1:
+	add	stack, stack, #16
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+#endif // HW_FP
+
+#ifdef FAST_BYTECODES
+
+@##############################################################################
+@ Optimised bytecode pairs
+@##############################################################################
+
+@ --- load; iaccess ------------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+{
+	rsb	lr, r0, #opc_iload_0
+	ldrb	r2, [jpc, #4]
+	rsb	tmp1, r1, #opc_iaccess_0
+	ldrb	r3, [jpc, #3]
+	ldr	lr, [locals, lr, lsl #2]
+	add	r1, constpool, r2, lsl #12
+	DISPATCH_START	\seq_len
+	PUSH	lr
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r1, r3, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry111:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+{
+	rsb	lr, r2, #0
+	ldrb	r2, [jpc, #5]
+	rsb	tmp1, r1, #opc_iaccess_0
+	ldrb	r3, [jpc, #4]
+	ldr	lr, [locals, lr, lsl #2]
+	add	r1, constpool, r2, lsl #12
+	DISPATCH_START	\seq_len
+	PUSH	lr
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r1, r3, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry112:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+@ --- load; load ---------------------------------------------------------
+
+(aload_0,aload_1,aload_2,aload_3)
+(aload_0,aload_1,aload_2,aload_3)
+{
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_aload_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(aload_0,aload_1,aload_2,aload_3)
+{
+	rsb	tmp1, r0, #opc_iload_0
+        DISPATCH_START  \seq_len
+	rsb	r1, r1, #opc_aload_0
+        DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	cmp	r0, #opc_igetfield
+	ldr	r1, [locals, r1, lsl #2]
+        beq     1f
+2:
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+        DISPATCH_FINISH
+1:
+	ldrb	lr, [jpc, #-1]
+	add	lr, lr, #opc_iaccess_0-opc_aload_0
+	REWRITE_PAIRS	strb	lr, [jpc, #-1]
+	b	2b
+}
+
+@ 7 cycles
+(iload_0,iload_1,iload_2,iload_3)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	add	r0, r0, #opc_iload_0_iload_N-opc_iload_0
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_fast_iload_N_iload_N
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(iload,aload,fload)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(aload,fload)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(iload)
+{
+	add	r0, r0, #opc_iload_0_iload-opc_iload_0
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_fast_iload_N_iload
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)
+(aload_0,aload_1,aload_2,aload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_aload_0
+	DISPATCH_NEXT
+	rsb	tmp1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	cmp	r0, #opc_igetfield
+	DISPATCH_NEXT
+	beq	1f
+2:
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+1:
+	ldrb	lr, [jpc, #-1]
+	add	lr, lr, #opc_iaccess_0-opc_aload_0
+	REWRITE_PAIRS	strb	lr, [jpc, #-1]
+	b	2b
+}
+
+@ r2 = [jpc, #1]
+(aload,fload)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	tmp1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	mov	r0, #opc_iload_iload_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_fast_iload_iload_N
+}
+
+@ r2 = [jpc, #1]
+(aload,fload)(iload,aload,fload) {
+	ldrb	r1, [jpc, #3]
+	rsb	tmp1, r2, #0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #0
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+}
+
+(iload)(iload) {
+	mov	r0, #opc_iload_iload
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_fast_iload_iload
+}
+
+(iload)(aload,fload) {
+	ldrb	r1, [jpc, #3]
+	rsb	tmp1, r2, #0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #0
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
+	DISPATCH_FINISH
+}
+
+@ --- load; store --------------------------------------------------------
+
+(aload_0,aload_1,aload_2,aload_3)
+(astore_0,astore_1,astore_2,astore_3)
+{
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_astore_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	tmp1, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(istore_0,istore_1,istore_2,istore_3)
+{
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_istore_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	tmp1, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(astore,istore,fstore)
+{
+	ldrb	r1, [jpc, #2]
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	tmp1, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(astore,istore,fstore)
+{
+	ldrb	r1, [jpc, #2]
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	tmp1, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)
+(astore_0,astore_1,astore_2,astore_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	tmp1, r1, #opc_astore_0
+	DISPATCH_NEXT
+	rsb	r1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)
+(istore_0,istore_1,istore_2,istore_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	tmp1, r1, #opc_istore_0
+	DISPATCH_NEXT
+	rsb	r1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(astore,istore,fstore) {
+	ldrb	tmp1, [jpc, #3]
+	rsb	r1, r2, #0
+	DISPATCH_START	\seq_len
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ --- load; const -------------------------------------------------------
+
+(aload_0,aload_1,aload_2,aload_3)
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)
+{
+	rsb	tmp1, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	sub	r1, r1, #opc_iconst_0
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)
+{
+        add     r0, r0, #opc_iload_0_iconst_N-opc_iload_0
+        REWRITE_PAIRS	strb    r0, [jpc]
+	b	do_iload_0_iconst_N
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)
+{
+        cmp     r0, #opc_iload
+        DISPATCH_START  \seq_len
+        sub     r1, r1, #opc_iconst_0
+        DISPATCH_NEXT
+        ldr     r3, [locals, -r2, lsl #2]
+        DISPATCH_NEXT
+        beq     1f
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH   r1, r3
+        DISPATCH_FINISH
+1:
+        mov     tmp1, #opc_iload_iconst_N
+        REWRITE_PAIRS	strb    tmp1, [jpc, #-\seq_len]
+	add	jpc, #-\seq_len
+	b	do_iload_iconst_N
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(bipush)
+{
+	ldrsb	r2, [jpc, #2]
+	rsb	r3, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(bipush)
+{
+	ldrsb	r2, [jpc, #2]
+	rsb	r3, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(sipush)
+{
+        ldrsb   r2, [jpc, #2]   @ zero_extendqisi2
+        ldrb    lr, [jpc, #3]   @ zero_extendqisi2
+	rsb	r3, r0, #opc_aload_0
+	DISPATCH_START	\seq_len
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        orr     r2, lr, r2, asl #8
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(sipush)
+{
+        ldrsb   r2, [jpc, #2]   @ zero_extendqisi2
+        ldrb    lr, [jpc, #3]   @ zero_extendqisi2
+	rsb	r3, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        orr     r2, lr, r2, asl #8
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(bipush) {
+	ldrsb	r3, [jpc, #3]
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r3, lr
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(sipush) {
+	ldrsb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #4]
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, -r2, lsl #2]
+        orr     r3, lr, r3, asl #8
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
+	DISPATCH_FINISH
+}
+
+@ --- load; Xaload -------------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(iaload,aaload,faload)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry19:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #2
+	ldr	lr, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(iaload,aaload,faload) {
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry20:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #2
+	ldr	lr, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(baload)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry21:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2
+	ldrsb	lr, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(caload)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry22:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #1
+	ldrh	lr, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(saload)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry23:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #1
+	ldrsh	lr, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(baload) {
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry24:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2
+	ldrsb	lr, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(caload) {
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry25:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #1
+	ldrh	lr, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload,aload,fload)(saload) {
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry26:
+	ldr	lr, [r3, #8]		@ lr = length
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	add	r3, r3, r2, lsl #1
+	ldrsh	lr, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+@ --- load; Xastore -------------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(iastore,fastore)
+{
+	POP	r2, r3
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry27:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(bastore)
+{
+	POP	r2, r3
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry28:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2
+	strb	tmp1, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(castore,sastore)
+{
+	POP	r2, r3
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry29:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #1
+	strh	tmp1, [r3, #BASE_OFFSET_SHORT]
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(iastore,fastore) {
+	POP	r3, tmp1
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry30:
+	ldr	lr, [tmp1, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r3, lr
+	bcs	array_bound_exception_jpc_1_r3
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r3, lsl #2
+	str	r2, [tmp1, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(bastore) {
+	POP	r3, tmp1
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry31:
+	ldr	lr, [tmp1, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r3, lr
+	bcs	array_bound_exception_jpc_1_r3
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r3
+	strb	r2, [tmp1, #BASE_OFFSET_BYTE]
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(castore,sastore) {
+	POP	r3, tmp1
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry32:
+	ldr	lr, [tmp1, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r3, lr
+	bcs	array_bound_exception_jpc_1_r3
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r3, lsl #1
+	strh	r2, [tmp1, #BASE_OFFSET_SHORT]
+	DISPATCH_FINISH
+}
+
+@ --- load; dataop -------------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(iadd)
+{
+	POP	r1
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(iadd) {
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	POP	tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(isub)
+{
+	POP	r1
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	sub	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(isub) {
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	POP	tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(imul)
+{
+	POP	r2
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	mul	r3, r2, lr
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(imul) {
+	DISPATCH_START	\seq_len
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	mul	lr, r3, r2
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ineg)
+{
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	rsb	lr, lr, #0
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(ineg) {
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	rsb	r2, r2, #0
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ishl)
+{
+	POP	r2
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	mov	lr, r2, lsl lr
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(ishl) {
+	DISPATCH_START	\seq_len
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	mov	r2, r3, lsl r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ishr)
+{
+	POP	r2
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	mov	lr, r2, asr lr
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(ishr) {
+	DISPATCH_START	\seq_len
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	mov	r2, r3, asr r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(iushr)
+{
+	POP	r2
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	mov	lr, r2, lsr lr
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(iushr) {
+	ldrb	r2, [jpc, #1]
+	DISPATCH_START	\seq_len
+	POP	r3
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+	mov	r2, r3, lsr r2
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(iand)
+{
+	POP	r1
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	and	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(iand) {
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	POP	tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ior)
+{
+	POP	r1
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	orr	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(ior) {
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	POP	tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ixor)
+{
+	POP	r1
+	rsb	tmp1, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	eor	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(ixor) {
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	POP	tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(i2c)
+{
+	rsb	lr, r0, #opc_iload_0
+	DISPATCH_START	\seq_len
+	ldr	lr, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+        mov     lr, lr, asl #16
+        mov     lr, lr, lsr #16
+	PUSH	lr
+	DISPATCH_FINISH
+}
+
+(iload,aload,fload)(i2c) {
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	DISPATCH_NEXT
+        mov     r2, r2, asl #16
+        mov     r2, r2, lsr #16
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+#ifdef NOTICE_SAFEPOINTS
+
+@ --- load; branch -------------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(ifeq,ifnull)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	beq	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(ifeq,ifnull) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	beq	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ifne,ifnonnull)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	bne	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(ifne,ifnonnull) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	bne	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(iflt)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	blt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(iflt) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	blt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ifge)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	bge	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(ifge) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	bge	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ifgt)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	bgt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(ifgt) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	bgt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(ifle)
+{
+	rsb	r3, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r3, [locals, r3, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, #0
+	ble	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(ifle) {
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r3, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, #0
+	ble	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmpeq,if_acmpeq)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	beq	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmpeq,if_acmpeq) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	beq	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmpne,if_acmpne)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	bne	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmpne,if_acmpne) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	bne	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmplt)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	blt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmplt) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	blt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmpge)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	bge	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmpge) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	bge	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmpgt)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	bgt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmpgt) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	bgt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(if_icmple)
+{
+	POP	r3
+	rsb	r2, r0, #opc_iload_0
+        ldrsb   r1, [jpc, #2]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #3]
+        cmp     r3, r2
+	ble	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iload,aload,fload)(if_icmple) {
+	POP	r3
+	rsb	r2, r2, #0
+        ldrsb   r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+        ldrb    ip, [jpc, #4]
+        cmp     r3, r2
+	ble	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+@ --- load; return/invoke -------------------------------------------------
+
+(iload_0,iload_1,iload_2,iload_3)
+(ireturn,areturn,freturn)
+{
+	rsb	r0, r0, #opc_iload_0
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+	ldr	r1, [locals, r0, lsl #2]
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [stack, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [stack, r0, lsl #2]!
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	PUSH	r1
+	add	jpc, jpc, #1
+	bl	return_check_monitors
+	POP	r1
+	b	2b
+}
+
+(iload,aload,fload)(ireturn,areturn,freturn) {
+
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+	ldr	r1, [locals, -r2, lsl #2]
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [stack, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [stack, r0, lsl #2]!
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	PUSH	r1
+	add	jpc, jpc, #2
+	bl	return_check_monitors
+	POP	r1
+	b	2b
+}
+
+#endif // NOTICE_SAFEPOINTS
+
+(iload_0,iload_1,iload_2,iload_3)
+(invokeresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_iload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeresolved
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(invokeresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_aload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeresolved
+}
+
+(iload,aload,fload)(invokeresolved) {
+	ldr	r0, [locals, -r2, lsl #2]
+	add	jpc, jpc, #2
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeresolved
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(invokevfinal)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_iload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokevfinal
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(invokevfinal)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_aload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokevfinal
+}
+
+(iload,aload,fload)(invokevfinal) {
+	ldr	r0, [locals, -r2, lsl #2]
+	add	jpc, jpc, #2
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokevfinal
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(invokespecialresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_iload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokespecialresolved
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(invokespecialresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_aload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokespecialresolved
+}
+
+(iload,aload,fload)(invokespecialresolved) {
+	ldr	r0, [locals, -r2, lsl #2]
+	add	jpc, jpc, #2
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokespecialresolved
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(invokestaticresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_iload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokestaticresolved
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(invokestaticresolved)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_aload_0
+	ldr	r0, [locals, r0, lsl #2]
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokestaticresolved
+}
+
+(iload,aload,fload)(invokestaticresolved) {
+	ldr	r0, [locals, -r2, lsl #2]
+	add	jpc, jpc, #2
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokestaticresolved
+
+}
+
+(iload_0,iload_1,iload_2,iload_3)
+(invokeinterface)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_iload_0
+	ldr	r0, [locals, r0, lsl #2]
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeinterface
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(invokeinterface)
+{
+	add	jpc, jpc, #1
+	rsb	r0, r0, #opc_aload_0
+	ldr	r0, [locals, r0, lsl #2]
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeinterface
+}
+
+(iload,aload,fload)(invokeinterface) {
+	ldr	r0, [locals, -r2, lsl #2]
+	add	jpc, jpc, #2
+        ldrb     r2, [jpc, #1]
+        ldrb     r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeinterface
+}
+
+(aload_0,aload_1,aload_2,aload_3)
+(igetfield)
+{
+	add	r0, r0, #opc_iaccess_0-opc_aload_0
+	REWRITE_PAIRS	strb	r0, [jpc]
+	DISPATCH_BYTECODE
+}
+
+@ 13 cycles
+(iload,aload,fload)(igetfield) {
+	ldrb	ip, [jpc, #4]
+	rsb	tmp1, r2, #0
+	ldrb	r3, [jpc, #3]
+	add	r1, constpool, ip, lsl #12
+	DISPATCH_START	5
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r1, r3, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+	DISPATCH_NEXT
+	GO_IF_VOLATILE r3, r1, 3f
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry88:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+3:
+	VOLATILE_VERSION
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry88_v:
+	ldr	r1, [tmp1, r1]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+@ --- iconst; store -------------------------------------------------
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)
+(istore_0,istore_1,istore_2,istore_3)
+{
+	sub	r3, r0, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	rsb	r2, r1, #opc_istore_0
+	str	r3, [locals, r2, lsl #2]
+	DISPATCH_BYTECODE
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(astore,istore,fstore) {
+	ldrb	r2, [jpc, #2]
+	sub	r3, r0, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	str	r3, [locals, -r2, lsl #2]
+	DISPATCH_BYTECODE
+}
+
+@ --- iconst; dataop -------------------------------------------------
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(iadd) {
+	sub	tmp1, r0, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	POP	r1
+	DISPATCH_NEXT
+	add	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(isub) {
+	sub	tmp1, r0, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	POP	r1
+	DISPATCH_NEXT
+	sub	tmp1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(idiv) {
+	subs	lr, r0, #opc_iconst_2
+	DISPATCH_START	\seq_len
+	POP	tmp1
+	DISPATCH_NEXT
+	beq	5f
+	bcc	3f
+	cmp	lr, #(opc_iconst_4-opc_iconst_2)
+	beq	4f
+	bcc	2f
+@ divide by 5
+1:
+	mvn	lr, #0x198		@ Form 0x66666667 in lr
+	bic	lr, lr, #0x9800
+	add	lr, lr, lr, lsl #16
+	smull	a3, a4, tmp1, lr
+	mov	a3, tmp1, asr #31
+	rsb	tmp1, a3, a4, asr #1
+	b	6f
+@ divide by 3
+2:
+	mvn	lr, #0xa9		@ Form 0x55555556 in lr
+	bic	lr, lr, #0xaa00
+	add	lr, lr, lr, lsl #16
+        smull   a3, a4, tmp1, lr
+        sub     tmp1, a4, tmp1, asr #31
+	b	6f
+3:
+	cmp	lr, #(opc_iconst_0-opc_iconst_2)
+	beq	div_zero_jpc_1
+	rsbcc	tmp1, tmp1, #0		@ Divide by -1 or 1
+	b	6f
+@ divide by 4
+4:	movs	a4, tmp1
+	addmi	a4, a4, #3
+	mov	tmp1, a4, asr #2
+	b	6f
+@ divide by 2
+5:
+	add	tmp1, tmp1, tmp1, lsr #31
+	mov	tmp1, tmp1, asr #1
+6:
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(ishl) {
+	sub	tmp1, r0, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	POP	r2
+	DISPATCH_NEXT
+	mov	tmp1, r2, lsl tmp1
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ --- iconst; branch -------------------------------------------------
+
+#ifdef NOTICE_SAFEPOINTS
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmpeq) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	beq	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmpne) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	bne	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmplt) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	blt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmpge) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	bge	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmpgt) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	bgt	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(if_icmple) {
+	POP	r3
+	sub	r2, r0, #opc_iconst_0
+        ldrsb   r1, [jpc, #2]
+        cmp     r3, r2
+        ldrb    ip, [jpc, #3]
+	ble	branch_taken_unsafe_1
+	DISPATCH 4
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(ireturn) {
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+	sub	r1, r0, #opc_iconst_0
+	cmp	tmp1, r9
+	bcc	1f
+2:
+	mov	r3, #0
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [stack, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [stack, r0, lsl #2]!
+
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+1:
+	PUSH	r1
+	add	jpc, jpc, #1
+	bl	return_check_monitors
+	POP	r1
+	b	2b
+}
+
+#endif // NOTICE_SAFEPOINTS
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(invokeresolved) {
+	add	jpc, jpc, #1
+	sub	r0, r0, #opc_iconst_0
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokeresolved
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(invokevfinal) {
+	add	jpc, jpc, #1
+	sub	r0, r0, #opc_iconst_0
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokevfinal
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(invokestaticresolved) {
+	add	jpc, jpc, #1
+	sub	r0, r0, #opc_iconst_0
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokestaticresolved
+}
+
+(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(invokespecialresolved) {
+	add	jpc, jpc, #1
+	sub	r0, r0, #opc_iconst_0
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]
+	PUSH	r0
+	b	do_invokespecialresolved
+}
+
+@# --- Bytecode sequences iaload; xxx -----------------------------------------------
+
+(iaload,faload,aaload)(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5) {
+	sub	r2, r1, #opc_iconst_0
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry38:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #12]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(bipush) {
+	ldrsb	r2, [jpc, #2]
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+.abortentry39:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_3_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #12]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(sipush) {
+	ldrsb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #3]
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	orr	r2, tmp1, r2, lsl #8
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_4
+.abortentry40:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_4_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #12]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(iload,fload,aload) {
+	ldrb	r2, [jpc, #2]
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, -r2, lsl #2]
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+.abortentry41:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_3_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	rsb	r2, r1, #opc_iload_0
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry42:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)
+(aload_0,aload_1,aload_2,aload_3)
+{
+	rsb	r2, r1, #opc_aload_0
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	ldr	r2, [locals, r2, lsl #2]
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry42_1:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	r2, r3
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(iaload,faload,aaload)
+{
+	POP	r2, r3			@ r2 = index, r3 = arrayref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_0
+.abortentry43:
+	ldr	tmp1, [r3, #8]		@ tmp1 = length
+	DISPATCH_START	\seq_len
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_2
+	add	r3, r3, r2, lsl #2
+	POP	lr			@ r2 = index, lr = arrayref
+	ldr	r2, [r3, #BASE_OFFSET_WORD]
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry44:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	r2, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	r2
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(astore,istore,fstore) {
+	ldrb	r2, [jpc, #2]
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_3
+.abortentry45:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_3_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	str	r3, [locals, -r2, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)
+(istore_0,istore_1,istore_2,istore_3) {
+	rsb	r2, r1, #opc_istore_0
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry46:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	r3, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	str	r3, [locals, r2, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(iastore,fastore) {
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_0
+.abortentry47:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_START	\seq_len
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+
+	POP	r2, r3		@ tmp1 = value, r2 = index, r3 = arrayref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry48:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(bastore) {
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_0
+.abortentry49:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_START	\seq_len
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+
+	POP	r2, r3		@ tmp1 = value, r2 = index, r3 = arrayref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry50:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2
+	strb	tmp1, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(castore,sastore) {
+	POP	r3, lr			@ r3 = index, lr = arrayref
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_0
+.abortentry51:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_START	\seq_len
+	cmp	r3, tmp1
+	bcs	array_bound_exception_jpc_2_r3
+	add	lr, lr, r3, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+
+	POP	r2, r3		@ tmp1 = value, r2 = index, r3 = arrayref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry52:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #1
+	strh	tmp1, [r3, #BASE_OFFSET_BYTE]
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(iand) {
+	POP	r2, r3			@ r2 = index, r3 = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry58:
+	ldr	tmp1, [r3, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_2
+	add	r3, r3, r2, lsl #2
+	ldr	tmp1, [r3, #BASE_OFFSET_WORD]		@ tmp1 = tos
+	POP	r2			@ r2 = tosm1
+	DISPATCH_NEXT
+	and	tmp1, r2, tmp1		@ tosm1 <dop> tos
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(ior) {
+	POP	r2, r3			@ r2 = index, r3 = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry59:
+	ldr	tmp1, [r3, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_2
+	add	r3, r3, r2, lsl #2
+	ldr	tmp1, [r3, #BASE_OFFSET_WORD]		@ tmp1 = tos
+	POP	r2			@ r2 = tosm1
+	DISPATCH_NEXT
+	orr	tmp1, r2, tmp1		@ tosm1 <dop> tos
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iaload,faload,aaload)(ixor) {
+	POP	r2, r3			@ r2 = index, r3 = arrayref
+	DISPATCH_START	\seq_len
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_2
+.abortentry60:
+	ldr	tmp1, [r3, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_2
+	add	r3, r3, r2, lsl #2
+	ldr	tmp1, [r3, #BASE_OFFSET_WORD]		@ tmp1 = tos
+	POP	r2			@ r2 = tosm1
+	DISPATCH_NEXT
+	eor	tmp1, r2, tmp1		@ tosm1 <dop> tos
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ ---- iadd; xxx ------------------------------------------------------------
+
+(iadd)(iload,fload,aload) {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iadd)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iadd)(iaload,aaload,faload) {
+	POP	r2, r3, lr		@ lr = ref
+	DISPATCH_START	\seq_len
+	add	r2, r3, r2		@ r2 = index
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry73:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iadd)(istore) {
+	mov	r0, #opc_iadd_u4store
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_iadd_u4store
+}
+
+(iadd)
+(istore_0,istore_1,istore_2,istore_3) {
+	mov	r0, #opc_iadd_istore_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_iadd_istore_N
+}
+
+(iadd)(iastore,fastore) {
+	POP	r2, r3
+	DISPATCH_START	\seq_len
+	add	tmp1, r3, r2		@ tmp1 = value
+	POP	r2, r3			@ r2, index, r3 = ref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry106:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(iadd)(iadd) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	add	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iadd)(isub) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	add	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iadd)(iinc) {
+	POP	tmp1, lr
+	DISPATCH_START	\seq_len
+	add	tmp1, lr, tmp1
+        ldrb    r3, [jpc, #-2]	@ jpc now points to next bc
+        ldrsb   r2, [jpc, #-1]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	ldr	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r2
+	str	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_FINISH
+}
+@ ---- sub; xxx ------------------------------------------------------------
+
+(isub)(iload,fload,aload) {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(isub)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(isub)(iaload,aaload,faload) {
+	POP	r2, r3, lr		@ lr = ref
+	DISPATCH_START	\seq_len
+	sub	r2, r3, r2		@ r2 = index
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry74:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(isub)(istore) {
+	mov	r0, #opc_isub_u4store
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_isub_u4store
+}
+
+(isub)
+(istore_0,istore_1,istore_2,istore_3) {
+	mov	r0, #opc_isub_istore_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_isub_istore_N
+}
+
+(isub)(iastore,fastore) {
+	POP	r2, r3
+	DISPATCH_START	\seq_len
+	sub	tmp1, r3, r2		@ tmp1 = value
+	POP	r2, r3			@ r2, index, r3 = ref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry105:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(isub)(iadd) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	sub	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(isub)(isub) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	sub	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(isub)(iinc) {
+	POP	tmp1, lr
+	DISPATCH_START	\seq_len
+	sub	tmp1, lr, tmp1
+        ldrb    r3, [jpc, #-2]	@ jpc now points to next bc
+        ldrsb   r2, [jpc, #-1]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	ldr	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r2
+	str	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_FINISH
+}
+@ ---- iand; xxx ------------------------------------------------------------
+
+(iand)(iload,fload,aload) {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iand)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iand)(iaload,aaload,faload) {
+	POP	r2, r3, lr		@ lr = ref
+	DISPATCH_START	\seq_len
+	and	r2, r3, r2		@ r2 = index
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry75:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iand)(istore) {
+	mov	r0, #opc_iand_u4store
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_iand_u4store
+}
+
+(iand)
+(istore_0,istore_1,istore_2,istore_3) {
+	mov	r0, #opc_iand_istore_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_iand_istore_N
+}
+
+(iand)(iastore,fastore) {
+	POP	r2, r3
+	DISPATCH_START	\seq_len
+	and	tmp1, r3, r2		@ tmp1 = value
+	POP	r2, r3			@ r2, index, r3 = ref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry107:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(iand)(iadd) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	and	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iand)(isub) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	and	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iand)(iinc) {
+	POP	tmp1, lr
+	DISPATCH_START	\seq_len
+	and	tmp1, lr, tmp1
+        ldrb    r3, [jpc, #-2]	@ jpc now points to next bc
+        ldrsb   r2, [jpc, #-1]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	ldr	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r2
+	str	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_FINISH
+}
+@ ---- ior; xxx ------------------------------------------------------------
+
+(ior)(iload,fload,aload) {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(ior)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(ior)(iaload,aaload,faload) {
+	POP	r2, r3, lr		@ lr = ref
+	DISPATCH_START	\seq_len
+	orr	r2, r3, r2		@ r2 = index
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry76:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(ior)(istore) {
+	mov	r0, #opc_ior_u4store
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_ior_u4store
+}
+
+(ior)
+(istore_0,istore_1,istore_2,istore_3) {
+	mov	r0, #opc_ior_istore_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_ior_istore_N
+}
+
+(ior)(iastore,fastore) {
+	POP	r2, r3
+	DISPATCH_START	\seq_len
+	orr	tmp1, r3, r2		@ tmp1 = value
+	POP	r2, r3			@ r2, index, r3 = ref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry108:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(ior)(iadd) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	orr	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(ior)(isub) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	orr	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(ior)(iinc) {
+	POP	tmp1, lr
+	DISPATCH_START	\seq_len
+	orr	tmp1, lr, tmp1
+        ldrb    r3, [jpc, #-2]	@ jpc now points to next bc
+        ldrsb   r2, [jpc, #-1]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	ldr	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r2
+	str	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ ---- ixor; xxx ------------------------------------------------------------
+
+(ixor)(iload,fload,aload) {
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(ixor)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	DISPATCH_START	\seq_len
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(ixor)(iaload,aaload,faload) {
+	POP	r2, r3, lr		@ lr = ref
+	DISPATCH_START	\seq_len
+	eor	r2, r3, r2		@ r2 = index
+	SW_NPC	cmp	lr, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry77:
+	ldr	tmp1, [lr, #8]		@ tmp1 = length
+	DISPATCH_NEXT
+	cmp	r2, tmp1
+	bcs	array_bound_exception_jpc_1
+	add	lr, lr, r2, lsl #2
+	ldr	tmp1, [lr, #BASE_OFFSET_WORD]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(ixor)(istore) {
+	mov	r0, #opc_ixor_u4store
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_ixor_u4store
+}
+
+(ixor)
+(istore_0,istore_1,istore_2,istore_3) {
+	mov	r0, #opc_ixor_istore_N
+	REWRITE_PAIRS	strb	r0, [jpc]
+	b	do_ixor_istore_N
+}
+
+(ixor)(iastore,fastore) {
+	POP	r2, r3
+	DISPATCH_START	\seq_len
+	eor	tmp1, r3, r2		@ tmp1 = value
+	POP	r2, r3			@ r2, index, r3 = ref
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry109:
+	ldr	lr, [r3, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	r2, lr
+	bcs	array_bound_exception_jpc_1
+	DISPATCH_NEXT
+	add	r3, r3, r2, lsl #2
+	str	tmp1, [r3, #BASE_OFFSET_WORD]
+	DISPATCH_FINISH
+}
+
+(ixor)(iadd) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	eor	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+        DISPATCH_FINISH
+}
+
+(ixor)(isub) {
+	DISPATCH_START	\seq_len
+	POP	r2, r3, tmp1
+	DISPATCH_NEXT
+	eor	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(ixor)(iinc) {
+	POP	tmp1, lr
+	DISPATCH_START	\seq_len
+	eor	tmp1, lr, tmp1
+        ldrb    r3, [jpc, #-2]	@ jpc now points to next bc
+        ldrsb   r2, [jpc, #-1]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	ldr	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_NEXT
+	add	tmp1, tmp1, r2
+	str	tmp1, [locals, -r3, lsl #2]
+	DISPATCH_FINISH
+}
+
+@ --- iinc; xxx --------------------------------------------------------------
+
+(iinc)(iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5) {
+        ldrsb   tmp1, [jpc, #2]
+	sub	lr, r1, #opc_iconst_0
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r3, r3, tmp1
+	DISPATCH_NEXT
+	PUSH	lr
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iinc)(iload,aload,fload) {
+	ldrb	lr, [jpc, #4]
+        ldrsb   tmp1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	rsb	lr, lr, #0
+	DISPATCH_NEXT
+	add	r3, r3, tmp1
+	DISPATCH_NEXT
+	str	r3, [locals, r1, lsl #2]
+	ldr	tmp1, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iinc)
+(iload_0,iload_1,iload_2,iload_3)
+{
+	rsb	lr, r1, #opc_iload_0
+        ldrsb   tmp1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r3, r3, tmp1
+	DISPATCH_NEXT
+	str	r3, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	ldr	tmp1, [locals, lr, lsl #2]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@###############################################################################
+@# Optimised bytecode triples
+@###############################################################################
+
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+(iload,fload,aload) {
+	ldrb	r2, [jpc, #3]
+	rsb	tmp1, r0, #opc_iaccess_0
+	ldrb	r1, [jpc, #2]
+	add	r3, constpool, r2, lsl #12
+	DISPATCH_START	6
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r3, r1, lsl #4
+	ldrb	r1, [jpc, #-1]
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_5
+	DISPATCH_NEXT
+        ldr     r3, [r3, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+.abortentry89:
+	ldr	r3, [tmp1, r3]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+(iload_0,iload_1,iload_2,iload_3) {
+	ldrb	r2, [jpc, #3]
+	rsb	tmp1, r0, #opc_iaccess_0
+	ldrb	ip, [jpc, #2]
+	add	r3, constpool, r2, lsl #12
+	DISPATCH_START	5
+	rsb	r1, r1, #opc_iload_0
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r3, ip, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_4
+	DISPATCH_NEXT
+        ldr     r3, [r3, #CP_OFFSET+8]
+	DISPATCH_NEXT
+.abortentry90:
+	ldr	r3, [tmp1, r3]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r1, r3
+	DISPATCH_FINISH
+}
+
+(iaccess_0,iaccess_1,iaccess_2,iaccess_3)
+(iadd) {
+	ldrb	r2, [jpc, #3]
+	rsb	tmp1, r0, #opc_iaccess_0
+	ldrb	ip, [jpc, #2]
+	add	r1, constpool, r2, lsl #12
+	DISPATCH_START	5
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	add	r1, ip, lsl #4
+	DISPATCH_NEXT
+	SW_NPC	cmp	tmp1, #0
+	SW_NPC	beq	null_ptr_exception_jpc_4
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	POP	r3
+.abortentry91:
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	add	r1, r1, r3
+	PUSH	r1
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iadd)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     tmp1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        add     r3, r3, tmp1
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(isub)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     tmp1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        sub     r3, r3, tmp1
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iand)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     tmp1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        and     r3, r3, tmp1
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ior)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     tmp1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        orr     r3, r3, tmp1
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ixor)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+        ldrb    r2, [jpc, #1-\seq_len]
+        DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        sub     tmp1, r2, #opc_iconst_0
+        DISPATCH_NEXT
+        eor     r3, r3, tmp1
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(idiv)
+{
+	rsb	tmp1, r0, #opc_iload_0_iconst_N
+	subs	lr, r2, #opc_iconst_2
+	DISPATCH_START	\seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	DISPATCH_NEXT
+	beq	5f
+	bcc	3f
+	cmp	lr, #(opc_iconst_4-opc_iconst_2)
+	beq	4f
+	bcc	2f
+@ divide by 5
+1:
+	mvn	lr, #0x198		@ Form 0x66666667 in lr
+	bic	lr, lr, #0x9800
+	add	lr, lr, lr, lsl #16
+	smull	a3, a4, tmp1, lr
+	mov	a3, tmp1, asr #31
+	rsb	tmp1, a3, a4, asr #1
+	b	6f
+@ divide by 3
+2:
+	mvn	lr, #0xa9		@ Form 0x55555556 in lr
+	bic	lr, lr, #0xaa00
+	add	lr, lr, lr, lsl #16
+        smull   a3, a4, tmp1, lr
+        sub     tmp1, a4, tmp1, asr #31
+	b	6f
+3:
+	cmp	lr, #(opc_iconst_0-opc_iconst_2)
+	beq	div_zero_jpc_1
+	rsbcc	tmp1, tmp1, #0		@ Divide by -1 or 1
+	b	6f
+@ divide by 4
+4:	movs	a4, tmp1
+	addmi	a4, a4, #3
+	mov	tmp1, a4, asr #2
+	b	6f
+@ divide by 2
+5:
+	add	tmp1, tmp1, tmp1, lsr #31
+	mov	tmp1, tmp1, asr #1
+6:
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+(iadd)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        DISPATCH_NEXT
+        ldr     r2, [locals, r2, lsl #2]
+        sub     r3, r3, #opc_iconst_0
+        DISPATCH_NEXT
+        add     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+(isub)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        DISPATCH_NEXT
+        ldr     r2, [locals, r2, lsl #2]
+        sub     r3, r3, #opc_iconst_0
+        DISPATCH_NEXT
+        sub     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+(iand)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        DISPATCH_NEXT
+        ldr     r2, [locals, r2, lsl #2]
+        sub     r3, r3, #opc_iconst_0
+        DISPATCH_NEXT
+        and     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+(ior)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        DISPATCH_NEXT
+        ldr     r2, [locals, r2, lsl #2]
+        sub     r3, r3, #opc_iconst_0
+        DISPATCH_NEXT
+        orr     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+@ r2 = [jpc, #1]
+(iload_iconst_N)
+(ixor)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        DISPATCH_NEXT
+        ldr     r2, [locals, r2, lsl #2]
+        sub     r3, r3, #opc_iconst_0
+        DISPATCH_NEXT
+        eor     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(idiv)
+{
+	ldrb	lr, [jpc, #2]
+	rsb	tmp1, r2, #0
+        DISPATCH_START  \seq_len
+	ldr	tmp1, [locals, tmp1, lsl #2]
+	subs	lr, lr, #opc_iconst_2
+	DISPATCH_NEXT
+	beq	5f
+	bcc	3f
+	cmp	lr, #(opc_iconst_4-opc_iconst_2)
+	beq	4f
+	bcc	2f
+@ divide by 5
+1:
+	mvn	lr, #0x198		@ Form 0x66666667 in lr
+	bic	lr, lr, #0x9800
+	add	lr, lr, lr, lsl #16
+	smull	a3, a4, tmp1, lr
+	mov	a3, tmp1, asr #31
+	rsb	tmp1, a3, a4, asr #1
+	b	6f
+@ divide by 3
+2:
+	mvn	lr, #0xa9		@ Form 0x55555556 in lr
+	bic	lr, lr, #0xaa00
+	add	lr, lr, lr, lsl #16
+        smull   a3, a4, tmp1, lr
+        sub     tmp1, a4, tmp1, asr #31
+	b	6f
+3:
+	cmp	lr, #(opc_iconst_0-opc_iconst_2)
+	beq	div_zero_jpc_1
+	rsbcc	tmp1, tmp1, #0		@ Divide by -1 or 1
+	b	6f
+@ divide by 4
+4:	movs	a4, tmp1
+	addmi	a4, a4, #3
+	mov	tmp1, a4, asr #2
+	b	6f
+@ divide by 2
+5:
+	add	tmp1, tmp1, tmp1, lsr #31
+	mov	tmp1, tmp1, asr #1
+6:
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(iadd)
+{
+        ldrb    r3, [jpc, #3]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        add     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iadd)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #opc_iload_0
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        add     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iadd)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        add     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iadd)
+{
+	rsb	r3, r2, #opc_iload_0
+	rsb	r2, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        add     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(isub)
+{
+        ldrb    r3, [jpc, #3]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        sub     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(isub)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #opc_iload_0
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        sub     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(isub)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        sub     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(isub)
+{
+	rsb	r3, r2, #opc_iload_0
+	rsb	r2, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        sub     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(iand)
+{
+        ldrb    r3, [jpc, #3]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        and     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iand)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #opc_iload_0
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        and     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iand)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        and     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iand)
+{
+	rsb	r3, r2, #opc_iload_0
+	rsb	r2, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        and     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ior)
+{
+        ldrb    r3, [jpc, #3]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        orr     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ior)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #opc_iload_0
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        orr     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ior)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        orr     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ior)
+{
+	rsb	r3, r2, #opc_iload_0
+	rsb	r2, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+        ldr     r2, [locals, r2, lsl #2]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        orr     r3, r2, r3
+	DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ixor)
+{
+        ldrb    r3, [jpc, #3]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        eor     r3, r2, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ixor)
+{
+        ldrb    r3, [jpc, #2]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #opc_iload_0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        eor     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ixor)
+{
+        ldrb    r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+        DISPATCH_START  \seq_len
+        rsb     r3, r3, #0
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        eor     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+        DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ixor)
+{
+	rsb	r3, r2, #opc_iload_0
+	rsb	r2, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+        ldr     r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        eor     r3, r2, r3
+        DISPATCH_NEXT
+        DISPATCH_NEXT
+        PUSH    r3
+	DISPATCH_FINISH
+}
+
+@ Former quads
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iadd_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(iadd_u4store)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #5]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iadd_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(iadd_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #4]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #opc_istore_0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(isub_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(isub_u4store)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #5]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(isub_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(isub_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #4]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #opc_istore_0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iand_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(iand_u4store)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #5]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(iand_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(iand_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #4]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #opc_istore_0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ior_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(ior_u4store)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #5]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ior_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(ior_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #4]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #opc_istore_0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ixor_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(ixor_u4store)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #5]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(ixor_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iconst_N
+        DISPATCH_START  \seq_len
+	sub	r2, r2, #opc_iconst_0
+	DISPATCH_NEXT
+        ldrb    tmp1, [jpc, #-1]
+        ldr     r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iconst_N)
+(ixor_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+        ldrb    lr, [jpc, #4]
+        rsb     r2, r2, #0
+        DISPATCH_START  \seq_len
+	sub	r3, r3, #opc_iconst_0
+	DISPATCH_NEXT
+        rsb     r1, lr, #opc_istore_0
+	DISPATCH_NEXT
+        ldr     tmp1, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+#ifdef NOTICE_SAFEPOINTS
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmpeq,if_acmpeq)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmpeq,if_acmpeq)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmpne,if_acmpne)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmpne,if_acmpne)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmplt)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmplt)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmpge)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmpge)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmpgt)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmpgt)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iconst_N,iload_1_iconst_N,iload_2_iconst_N,iload_3_iconst_N)
+(if_icmple)
+{
+	ldrb	r3, [jpc, #1]
+	rsb	r2, r0, #opc_iload_0_iconst_N
+	ldrsb	r1, [jpc, #3]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iconst_N)
+(if_icmple)
+{
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	ldr	r2, [locals, r2, lsl #2]
+	sub	r3, r3, #opc_iconst_0
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+#endif // NOTICE_SAFEPOINTS
+
+(iload_iload)
+(iadd_istore_N)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iadd_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #4]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iadd_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #4]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iadd_istore_N)
+{
+	ldrb	tmp1, [jpc, #3]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(isub_istore_N)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(isub_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #4]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(isub_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #4]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(isub_istore_N)
+{
+	ldrb	tmp1, [jpc, #3]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(iand_istore_N)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iand_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #4]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iand_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #4]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iand_istore_N)
+{
+	ldrb	tmp1, [jpc, #3]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ior_istore_N)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ior_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #4]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ior_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #4]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ior_istore_N)
+{
+	ldrb	tmp1, [jpc, #3]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ixor_istore_N)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ixor_istore_N)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #4]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #opc_istore_0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ixor_istore_N)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #4]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ixor_istore_N)
+{
+	ldrb	tmp1, [jpc, #3]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #opc_istore_0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(iadd_u4store)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #6]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iadd_u4store)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iadd_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #5]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iadd_u4store)
+{
+	ldrb	tmp1, [jpc, #4]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        add     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(isub_u4store)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #6]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(isub_u4store)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(isub_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #5]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(isub_u4store)
+{
+	ldrb	tmp1, [jpc, #4]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        sub     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(iand_u4store)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #6]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(iand_u4store)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(iand_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #5]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(iand_u4store)
+{
+	ldrb	tmp1, [jpc, #4]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        and     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ior_u4store)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #6]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ior_u4store)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ior_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #5]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ior_u4store)
+{
+	ldrb	tmp1, [jpc, #4]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        orr     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload)
+(ixor_u4store)
+{
+	ldrb	r3, [jpc, #3]
+	ldrb	lr, [jpc, #6]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_iload_N)
+(ixor_u4store)
+{
+	ldrb	r3, [jpc, #2]
+	ldrb	lr, [jpc, #5]
+	rsb	r2, r2, #0
+        DISPATCH_START  \seq_len
+	rsb	r3, r3, #opc_iload_0
+	DISPATCH_NEXT
+	rsb	r1, lr, #0
+	ldr	tmp1, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+        str     r3, [locals, r1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(ixor_u4store)
+{
+        rsb     r3, r0, #opc_iload_0_iload
+	ldrb	r2, [jpc, #2]
+	ldrb	tmp1, [jpc, #5]
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(ixor_u4store)
+{
+	ldrb	tmp1, [jpc, #4]
+	rsb	r3, r0, #opc_iload_0_iload_N
+        DISPATCH_START  \seq_len
+	rsb	r2, r2, #opc_iload_0
+	DISPATCH_NEXT
+        ldr     r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	DISPATCH_NEXT
+        eor     r3, r3, r2
+	DISPATCH_NEXT
+	rsb	tmp1, tmp1, #0
+	DISPATCH_NEXT
+        str     r3, [locals, tmp1, lsl #2]
+	DISPATCH_FINISH
+}
+
+#ifdef NOTICE_SAFEPOINTS
+
+(iload_iload)
+(if_icmpeq,if_acmpeq) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmpeq,if_acmpeq) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmpeq,if_acmpeq) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmpeq,if_acmpeq) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	beq	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iload)
+(if_icmpne,if_acmpne) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmpne,if_acmpne) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmpne,if_acmpne) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmpne,if_acmpne) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bne	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iload)
+(if_icmplt) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmplt) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmplt) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmplt) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	blt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iload)
+(if_icmpge) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmpge) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmpge) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmpge) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bge	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iload)
+(if_icmpgt) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmpgt) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmpgt) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmpgt) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	bgt	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+(iload_iload)
+(if_icmple) {
+	ldrb	r3, [jpc, #3]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #5]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #6]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_4
+	DISPATCH 7
+}
+
+(iload_iload_N)
+(if_icmple) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r2, #0
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #opc_iload_0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload,iload_1_iload,iload_2_iload,iload_3_iload)
+(if_icmple) {
+	ldrb	r3, [jpc, #2]
+	rsb	r2, r0, #opc_iload_0_iload
+	ldrsb	r1, [jpc, #4]
+	rsb	r3, r3, #0
+	ldr	r2, [locals, r2, lsl #2]
+	ldr	r3, [locals, r3, lsl #2]
+	ldrb	ip, [jpc, #5]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_3
+	DISPATCH 6
+}
+
+(iload_0_iload_N,iload_1_iload_N,iload_2_iload_N,iload_3_iload_N)
+(if_icmple) {
+	rsb	r3, r2, #opc_iload_0
+	ldrsb	r1, [jpc, #3]
+	rsb	r2, r0, #opc_iload_0_iload_N
+	ldr	r3, [locals, r3, lsl #2]
+	ldr	r2, [locals, r2, lsl #2]
+	ldrb	ip, [jpc, #4]
+	cmp	r2, r3
+	ble	branch_taken_unsafe_2
+	DISPATCH 5
+}
+
+#endif
+
+#endif // FAST_BYTECODES
--- a/src/cpu/zero/vm/bytecodes_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/bytecodes_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007 Red Hat, Inc.
+ * Copyright 2009 Edward Nevill
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,5 +28,54 @@
 #include "interpreter/bytecodes.hpp"
 
 void Bytecodes::pd_initialize() {
-  // No zero specific initialization
+#ifdef HOTSPOT_ASM
+  // Because iaccess_N can trap, we must say aload_N can trap, otherwise
+  // we get an assertion failure
+  def(_aload_1, "aload_1", "b", NULL, T_OBJECT ,  1, true);
+  def(_aload_2, "aload_2", "b", NULL, T_OBJECT ,  1, true);
+  def(_aload_3, "aload_3", "b", NULL, T_OBJECT ,  1, true);
+
+  def(_iaccess_0, "_iaccess_0", "b_jj", NULL, T_INT,  1, true, _aload_0);
+  def(_iaccess_1, "_iaccess_1", "b_jj", NULL, T_INT,  1, true, _aload_1);
+  def(_iaccess_2, "_iaccess_2", "b_jj", NULL, T_INT,  1, true, _aload_2);
+  def(_iaccess_3, "_iaccess_3", "b_jj", NULL, T_INT,  1, true, _aload_3);
+
+  def(_invokeresolved,   "invokeresolved",   "bjj", NULL, T_ILLEGAL, -1, true, _invokevirtual);
+  def(_invokespecialresolved, "invokespecialresolved", "bjj", NULL, T_ILLEGAL, -1, true, _invokespecial);
+  def(_invokestaticresolved,  "invokestaticresolved",  "bjj", NULL, T_ILLEGAL,  0, true, _invokestatic);
+
+  def(_dmac,            "dmac",      "b_",  NULL, T_DOUBLE, -16, false, _dmul);
+
+  def(_iload_iload,      "iload_iload",      "bi_i",NULL, T_INT, 2, false, _iload);
+  def(_iload_iload_N,    "iload_iload_N",    "bi_", NULL, T_INT, 2, false, _iload);
+
+  def(_iload_0_iconst_N, "iload_0_iconst_N", "b_",  NULL, T_INT, 2, false, _iload_0);
+  def(_iload_1_iconst_N, "iload_1_iconst_N", "b_",  NULL, T_INT, 2, false, _iload_1);
+  def(_iload_2_iconst_N, "iload_2_iconst_N", "b_",  NULL, T_INT, 2, false, _iload_2);
+  def(_iload_3_iconst_N, "iload_3_iconst_N", "b_",  NULL, T_INT, 2, false, _iload_3);
+  def(_iload_iconst_N,   "iload_iconst_N",   "bi_", NULL, T_INT, 2, false, _iload);
+
+  def(_iadd_istore_N,    "iadd_istore_N",    "b_",  NULL, T_VOID, -2, false, _iadd);
+  def(_isub_istore_N,    "isub_istore_N",    "b_",  NULL, T_VOID, -2, false, _isub);
+  def(_iand_istore_N,    "iand_istore_N",    "b_",  NULL, T_VOID, -2, false, _iand);
+  def(_ior_istore_N,     "ior_istore_N",     "b_",  NULL, T_VOID, -2, false, _ior);
+  def(_ixor_istore_N,    "ixor_istore_N",    "b_",  NULL, T_VOID, -2, false, _ixor);
+
+  def(_iadd_u4store,     "iadd_u4store",     "b_i", NULL, T_VOID, -2, false, _iadd);
+  def(_isub_u4store,     "isub_u4store",     "b_i", NULL, T_VOID, -2, false, _isub);
+  def(_iand_u4store,     "iand_u4store",     "b_i", NULL, T_VOID, -2, false, _iand);
+  def(_ior_u4store,      "ior_u4store",      "b_i", NULL, T_VOID, -2, false, _ior);
+  def(_ixor_u4store,     "ixor_u4store",     "b_i", NULL, T_VOID, -2, false, _ixor);
+
+  def(_iload_0_iload,    "iload_0_iload",    "b_i", NULL, T_INT, 2, false, _iload_0);
+  def(_iload_1_iload,    "iload_1_iload",    "b_i", NULL, T_INT, 2, false, _iload_1);
+  def(_iload_2_iload,    "iload_2_iload",    "b_i", NULL, T_INT, 2, false, _iload_2);
+  def(_iload_3_iload,    "iload_3_iload",    "b_i", NULL, T_INT, 2, false, _iload_3);
+
+  def(_iload_0_iload_N,  "iload_0_iload_N",  "b_",  NULL, T_INT, 2, false, _iload_0);
+  def(_iload_1_iload_N,  "iload_1_iload_N",  "b_",  NULL, T_INT, 2, false, _iload_1);
+  def(_iload_2_iload_N,  "iload_2_iload_N",  "b_",  NULL, T_INT, 2, false, _iload_2);
+  def(_iload_3_iload_N,  "iload_3_iload_N",  "b_",  NULL, T_INT, 2, false, _iload_3);
+
+#endif // HOTSPOT_ASM
 }
--- a/src/cpu/zero/vm/bytecodes_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/bytecodes_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2009 Red Hat, Inc.
+ * Copyright 2009 Edward Nevill
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +27,44 @@
 #ifndef CPU_ZERO_VM_BYTECODES_ZERO_HPP
 #define CPU_ZERO_VM_BYTECODES_ZERO_HPP
 
-// This file is intentionally empty
+#ifdef HOTSPOT_ASM
+#define _iaccess_0      ((Bytecodes::Code)0xdb)
+#define _iaccess_1      ((Bytecodes::Code)0xdc)
+#define _iaccess_2      ((Bytecodes::Code)0xdd)
+#define _iaccess_3      ((Bytecodes::Code)0xde)
+
+#define _invokeresolved         ((Bytecodes::Code)0xdf)
+#define _invokespecialresolved  ((Bytecodes::Code)0xe0)
+#define _invokestaticresolved   ((Bytecodes::Code)0xe1)
+
+#define _iload_iload    ((Bytecodes::Code)0xe3)
+#define _iload_iload_N  ((Bytecodes::Code)0xe4)
+
+#define _dmac           ((Bytecodes::Code)0xe8)
+
+      _iload_0_iconst_N   , // 233 0xe9
+      _iload_1_iconst_N   , // 234 0xea
+      _iload_2_iconst_N   , // 235 0xeb
+      _iload_3_iconst_N   , // 236 0xec
+      _iload_iconst_N     , // 237 0xed
+      _iadd_istore_N      , // 238 0xee
+      _isub_istore_N      , // 239 0xef
+      _iand_istore_N      , // 240 0xf0
+      _ior_istore_N       , // 241 0xf1
+      _ixor_istore_N      , // 242 0xf2
+      _iadd_u4store       , // 243 0xf3
+      _isub_u4store       , // 244 0xf4
+      _iand_u4store       , // 245 0xf5
+      _ior_u4store        , // 246 0xf6
+      _ixor_u4store       , // 247 0xf7
+      _iload_0_iload      , // 248 0xf8
+      _iload_1_iload      , // 249 0xf9
+      _iload_2_iload      , // 250 0xfa
+      _iload_3_iload      , // 251 0xfb
+      _iload_0_iload_N    , // 252 0xfc
+      _iload_1_iload_N    , // 253 0xfd
+      _iload_2_iload_N    , // 254 0xfe
+      _iload_3_iload_N    , // 255 0xff
+#endif // HOTSPOT_ASM
 
 #endif // CPU_ZERO_VM_BYTECODES_ZERO_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/cppInterpreter_arm.S	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,7390 @@
+#ifdef __arm__
+
+@ Copyright 2009, 2010 Edward Nevill
+@ Copyright 2012, Red Hat
+@ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+@
+@ This code is free software; you can redistribute it and/or modify it
+@ under the terms of the GNU General Public License version 2 only, as
+@ published by the Free Software Foundation.
+@
+@ This code is distributed in the hope that it will be useful, but WITHOUT
+@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+@ version 2 for more details (a copy is included in the LICENSE file that
+@ accompanied this code).
+@
+@ You should have received a copy of the GNU General Public License version
+@ 2 along with this work; if not, write to the Free Software Foundation,
+@ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+#undef T2JIT
+#if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
+#define T2JIT
+#endif
+
+#ifdef HOTSPOT_ASM
+
+#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+#define ARMv4
+#endif
+
+#if defined(SHARK) || defined(T2JIT)
+
+#define USE_COMPILER
+
+#endif
+
+#ifdef USE_COMPILER
+
+#ifdef SHARK
+#define MP_COMPILE_THRESHOLD    0x10000         // 65536 - must be a single MOV constant
+#define UP_COMPILE_THRESHOLD    0x30000         // 196608 - must be a single MOV constant
+#else
+#define MP_COMPILE_THRESHOLD    0x1380		// ~ 5000 - must be a single MOV constant
+#define UP_COMPILE_THRESHOLD    0x1380		// ~ 5000 - must be a single MOV constant
+#endif
+
+#define MAX_FG_METHOD_SIZE      500
+
+#ifndef DISABLE_ON_STACK_REPLACEMENT
+#define ON_STACK_REPLACEMENT
+#endif
+#ifndef ENABLE_BG_COMP_ON_NON_MP
+#define DISABLE_BG_COMP_ON_NON_MP
+#endif
+
+#ifdef T2JIT
+#define FREQ_COUNT_OVERFLOW Thumb2_Compile
+#else
+#define FREQ_COUNT_OVERFLOW _ZN18InterpreterRuntime26frequency_counter_overflowEP10JavaThreadPh
+#endif
+
+#endif // USE_COMPILER
+
+#ifndef DISABLE_NOTICE_SAFEPOINTS
+#define NOTICE_SAFEPOINTS
+#endif
+#ifndef DISABLE_HW_NULL_PTR_CHECK
+#define HW_NULL_PTR_CHECK
+#endif
+#ifndef DISABLE_FAST_BYTECODES
+#define FAST_BYTECODES
+#endif
+#ifndef DISABLE_HW_FP
+#define HW_FP
+#endif
+
+#define LEAF_STACK_SIZE	200
+#define STACK_SPARE	40
+
+#define TBIT 1
+	
+#define stack		r4
+#define	jpc		r5
+#define dispatch	r6
+#define locals		r7
+#define istate		r8
+#define constpool	r9
+#define thread		r10
+#define arm_sp		r13
+
+#define tmp_xxx		r7
+#define tmp_yyy		r5
+#define tmp_vvv		r9
+
+#define tmp1		r11
+
+#define regset		r4,r5,r6,r7,r9,r10,r11
+#define fast_regset	r8
+
+#define Rframe	r7
+
+#define FRAME_METHOD		(ISTATE_METHOD-ISTATE_NEXT_FRAME)
+#define FRAME_CONSTANTS		(ISTATE_CONSTANTS-ISTATE_NEXT_FRAME)
+#define FRAME_BCP		(ISTATE_BCP-ISTATE_NEXT_FRAME)
+#define FRAME_STACK_LIMIT	(ISTATE_STACK_LIMIT-ISTATE_NEXT_FRAME)
+#define FRAME_LOCALS		(ISTATE_LOCALS-ISTATE_NEXT_FRAME)
+#define FRAME_STACK		(ISTATE_STACK-ISTATE_NEXT_FRAME)
+
+#include "offsets_arm.s"
+
+#define last_implemented_bytecode 201
+
+	.macro	ALIGN_CODE
+	.align	6
+	.endm
+
+	.macro	ALIGN_DATA
+	.align	6
+	.endm
+
+	.macro	ALIGN_OPCODE
+	.align	6
+	.endm
+
+	.macro	ALIGN_WORD
+	.align	2
+	.endm
+
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+	.macro	SLOW_ENTRY
+	ALIGN_CODE
+	.word	0, 0, 0, 0, 0, 0
+	.endm
+
+	.macro	FAST_ENTRY
+	ALIGN_CODE
+	.endm
+
+@------------------------------------------------
+@ Software NULL Pointer check macro.
+@ Usage:
+@	SW_NPC	cmp	obj, #0
+@	SW_NPC	beq	null_ptr_exception
+@------------------------------------------------
+	.macro	SW_NPC	p1, p2, p3, p4
+#ifndef HW_NULL_PTR_CHECK
+  .ifnes "\p4", ""
+	\p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+	\p1 \p2, \p3
+    .else
+	\p1 \p2
+    .endif
+  .endif
+#endif // HW_NULL_PTR_CHECK
+	.endm
+
+	.macro	HW_NPC	p1, p2, p3, p4
+#ifdef HW_NULL_PTR_CHECK
+  .ifnes "\p4", ""
+	\p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+	\p1 \p2, \p3
+    .else
+	\p1 \p2
+    .endif
+  .endif
+#endif // HW_NULL_PTR_CHECK
+	.endm
+
+@------------------------------------------------
+@ Fast Bytecode Macros FBC and NFBC
+@ Use to conditionalise code using fast bytecodes
+@ EG:
+@	FBC	mov	r0, #opc_invokeresolved
+@	FBC	b	rewrite_bytecode
+@	NFBC	code to handle slow case
+@	NFBC	...
+@------------------------------------------------
+	.macro	FBC	p1, p2, p3, p4, p5
+#ifdef FAST_BYTECODES
+  .ifnes "\p5", ""
+	  \p1 \p2, \p3, \p4, \p5
+  .else
+    .ifnes "\p4", ""
+	  \p1 \p2, \p3, \p4
+    .else
+      .ifnes "\p3", ""
+	  \p1 \p2, \p3
+      .else
+	  \p1 \p2
+      .endif
+    .endif
+  .endif
+#endif
+	.endm
+
+	.macro	NFBC	p1, p2, p3, p4
+#ifndef FAST_BYTECODES
+  .ifnes "\p4", ""
+	\p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+	\p1 \p2, \p3
+    .else
+	\p1 \p2
+    .endif
+  .endif
+#endif
+	.endm
+
+@------------------------------------------------
+@ Notice Safepoints macro
+@ Usage:
+@	NSP	<notice safepoint specific code>
+@------------------------------------------------
+	.macro	NSP	p1, p2, p3, p4, p5
+#ifdef NOTICE_SAFEPOINTS
+  .ifnes "\p5", ""
+	  \p1 \p2, \p3, \p4, \p5
+  .else
+    .ifnes "\p4", ""
+	  \p1 \p2, \p3, \p4
+    .else
+      .ifnes "\p3", ""
+	  \p1 \p2, \p3
+      .else
+	  \p1 \p2
+      .endif
+    .endif
+  .endif
+#endif
+	.endm
+
+@------------------------------------------------
+@ Use Compiler macro
+@ Usage:
+@	USEC	<compiler specific code>
+@------------------------------------------------
+	.macro	USEC	p1, p2, p3, p4
+#ifdef USE_COMPILER
+  .ifnes "\p4", ""
+	\p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+	\p1 \p2, \p3
+    .else
+	\p1 \p2
+    .endif
+  .endif
+#endif
+	.endm
+
+@------------------------------------------------
+@ On stack replacement macro
+@ Usage:
+@       OSR     <compiler specific code>
+@------------------------------------------------
+        .macro  OSR     p1, p2, p3, p4
+#ifdef ON_STACK_REPLACEMENT
+  .ifnes "\p4", ""
+        \p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+        \p1 \p2, \p3
+    .else
+        \p1 \p2
+    .endif
+  .endif
+#endif
+        .endm
+@------------------------------------------------
+@ THUMB2 specific code macro
+@ Usage:
+@	T2	<thumb2 specific code>
+@------------------------------------------------
+	.macro	T2	p1, p2, p3, p4
+#ifdef T2JIT
+  .ifnes "\p4", ""
+        \p1 \p2, \p3, \p4
+  .else
+    .ifnes "\p3", ""
+        \p1 \p2, \p3
+    .else
+        \p1 \p2
+    .endif
+  .endif
+#endif
+        .endm
+
+@------------------------------------------------
+@ Rewrite pairs of bytecodes
+@
+@ The fast bytecodes that replace pairs of codes improve performance,
+@ but they cause races between threads and incorrect operation in some
+@ other cases too.  REWRITE_PAIRS disables rewriting bytecode pairs.
+@	
+@ Usage:
+@	REWRITE_PAIRS	<instruction>
+@------------------------------------------------
+	.macro	REWRITE_PAIRS	p1, p2, p3, p4
+        .endm
+
+	.macro	Opcode	label
+	ALIGN_OPCODE
+do_\label:
+	.endm
+
+	.macro	GET_STACK	offset, reg
+	ldr	\reg, [stack, #(\offset+1) * 4]
+	.endm
+
+	.macro	PUT_STACK	offset, reg
+	str	\reg, [stack, #(\offset+1) * 4]
+	.endm
+
+#define PUSH	java_push
+	.macro	PUSH	reg1, reg2, reg3, reg4
+  .ifnes "\reg4", ""
+	stmda	stack!, {\reg1, \reg2, \reg3, \reg4}
+  .else
+    .ifnes "\reg3", ""
+	stmda	stack!, {\reg1, \reg2, \reg3}
+    .else
+      .ifnes "\reg2", ""
+	stmda	stack!, {\reg1, \reg2}
+      .else
+	str	\reg1, [stack], #-4
+      .endif
+    .endif
+  .endif
+	.endm
+
+#define POP	java_pop
+	.macro	POP	reg1, reg2, reg3, reg4
+  .ifnes "\reg4", ""
+	ldmib	stack!, {\reg1, \reg2, \reg3, \reg4}
+  .else
+    .ifnes "\reg3", ""
+	ldmib	stack!, {\reg1, \reg2, \reg3}
+    .else
+      .ifnes "\reg2", ""
+	ldmib	stack!, {\reg1, \reg2}
+      .else
+	ldr	\reg1, [stack, #4]!
+      .endif
+    .endif
+  .endif
+	.endm
+
+	.macro POPF0
+#ifdef __ARM_PCS_VFP
+	flds s0, [stack, #4]
+	add stack, #4
+#else
+	POP r0
+#endif
+	.endm
+	
+	.macro POPF1
+#ifdef __ARM_PCS_VFP
+	flds s1, [stack, #4]
+	add stack, #4
+#else
+	POP r1
+#endif
+	.endm
+	
+	.macro POPD0
+#ifdef __ARM_PCS_VFP
+	flds s0, [stack, #4]
+	flds s1, [stack, #8]
+	add stack, #8
+#else
+	POP r0, r1
+#endif
+	.endm
+	
+	.macro POPD1
+#ifdef __ARM_PCS_VFP
+	flds s2, [stack, #4]
+	flds s3, [stack, #8]
+	add stack, #8
+#else
+	POP r2, r3
+#endif
+	.endm
+	
+	.macro PUSHF0
+#ifdef __ARM_PCS_VFP
+	add stack, #-4
+	fsts s0, [stack, #4]
+#else
+	PUSH r0
+#endif
+	.endm
+	
+	.macro PUSHD0
+#ifdef __ARM_PCS_VFP
+	add stack, #-8
+	fsts s0, [stack, #4]
+	fsts s1, [stack, #8]
+#else
+	PUSH r0, r1
+#endif
+	.endm
+	
+	.macro	LOAD_ISTATE
+	ldr	istate, [thread, #THREAD_TOP_ZERO_FRAME]
+	sub	istate, istate, #ISTATE_NEXT_FRAME
+	.endm
+
+	.macro	CACHE_JPC
+	ldr	jpc, [istate, #ISTATE_BCP]
+	.endm
+
+	.macro	CACHE_LOCALS
+	ldr	locals, [istate, #ISTATE_LOCALS]
+	.endm
+
+	.macro	CACHE_STACK
+	ldr	stack, [istate, #ISTATE_STACK]
+	.endm
+
+	.macro	CACHE_CP
+	ldr	constpool, [istate, #ISTATE_CONSTANTS]
+	.endm
+
+	.macro	DECACHE_STACK_USING_FRAME
+	str	stack, [Rframe, #FRAME_STACK]
+	.endm
+
+	.macro	DECACHE_STACK
+	str	stack, [istate, #ISTATE_STACK]
+	.endm
+
+	.macro	DECACHE_JPC_USING_FRAME
+	str	jpc, [Rframe, #FRAME_BCP]
+	.endm
+
+	.macro	DECACHE_JPC
+	str	jpc, [istate, #ISTATE_BCP]
+	.endm
+
+	.macro	BREAK_DISPATCH
+	ldr	r1, [dispatch, #DispatchBreakPoint-XXX]
+	cmp	r1, jpc
+	bleq	do_dispatch_break
+	.endm
+
+	.set	dispatch_state, 0
+
+	.macro	DISPATCH_STATE	state
+	.set	dispatch_state, \state
+	.endm
+
+	.macro	DISPATCH_START	step=0
+	.set	dispatch_state, 1
+	ldrb	r0, [jpc, #\step]!
+	.endm
+
+	.macro	DISPATCH_START_REG	reg
+	.set	dispatch_state, 1
+	ldrb	r0, [jpc, \reg]!
+	.endm
+
+	.macro	DISPATCH_START_R2_R0
+	.set	dispatch_state, 1
+	mov	r0, r2
+	.endm
+
+	.macro	DISPATCH_START_R2_JPC
+	.set	dispatch_state, 1
+	add	jpc, jpc, #1
+	.endm
+
+	.macro	DISPATCH_START_R2
+	.set	dispatch_state, 1
+	add	jpc, jpc, #1
+	mov	r0, r2
+	.endm
+
+	.macro	DISPATCH_1
+@        ldrb    r1, [jpc, #2]
+	.endm
+
+	.macro	DISPATCH_2
+        ldr     ip, [dispatch, r0, lsl #2]
+	.endm
+
+	.macro	DISPATCH_3
+        ldrb    r2, [jpc, #1]
+	.endm
+
+	.macro	DISPATCH_4
+        ands    lr, ip, #7
+	.endm
+
+	.macro	DISPATCH_NEXT
+    .if dispatch_state == 0
+	.error	"DISPATCH_FINISH without a DISPATCH_START or DISPATCH_STATE"
+    .elseif dispatch_state == 1
+	DISPATCH_1
+    .elseif dispatch_state == 2
+	DISPATCH_2
+    .elseif dispatch_state == 3
+	DISPATCH_3
+    .elseif dispatch_state == 4
+	DISPATCH_4
+    .else
+	.error "Too many DISPATCH_NEXTs"
+    .endif
+	.set	dispatch_state, dispatch_state + 1
+	.endm
+
+	@ This macro calls a user-supplied my_trace routine.  It
+	@ passes the current JPC as argument zero.  It can be safely
+	@ inserted at any point in the interpreter.
+ 	.macro TRACE
+	stmfd	sp!, {r0, r1, r2, r3, r4, ip, lr}
+	mrs	r4, cpsr
+	mov	r0, jpc
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        cmp        r1, #0
+	sub	r1, r1, #ISTATE_NEXT_FRAME
+        beq        0f
+        DECACHE_JPC
+	ldr	r2, =my_trace
+	blx	r2
+0:        msr        cpsr, r4
+	ldmfd	sp!, {r0, r1, r2, r3, r4, ip, lr}	
+	.endm
+	
+	.macro	DISPATCH_FINISH
+    .if dispatch_state == 0
+	.error	"DISPATCH_FINISH without a DISPATCH_START or DISPATCH_STATE"
+    .elseif dispatch_state == 1
+	DISPATCH_1
+	DISPATCH_2
+	DISPATCH_3
+	DISPATCH_4
+    .elseif dispatch_state == 2
+	DISPATCH_2
+	DISPATCH_3
+	DISPATCH_4
+    .elseif dispatch_state == 3
+	DISPATCH_3
+	DISPATCH_4
+    .elseif dispatch_state == 4
+	DISPATCH_4
+    .endif
+@        TRACE
+        moveq   pc, ip
+	ldrb	r1, [jpc, lr]
+        bic     ip, ip, #7
+        ldr     pc, [ip, r1, lsl #2]
+	.set	dispatch_state, 0
+	.ltorg
+	.endm
+
+	.macro	DISPATCH_BYTECODE
+@        TRACE
+@        ldrb    r1, [jpc, #2]
+        ldr     ip, [dispatch, r0, lsl #2]
+        ldrb    r2, [jpc, #1]
+        ands    lr, ip, #7
+        moveq   pc, ip
+	ldrb	r1, [jpc, lr]
+        bic     ip, ip, #7
+        ldr     pc, [ip, r1, lsl #2]
+	.set	dispatch_state, 0
+	.endm
+
+	.macro	DISPATCH step=0
+@        TRACE
+	ldrb	r0, [jpc, #\step]!
+@        ldrb    r1, [jpc, #2]
+        ldr     ip, [dispatch, r0, lsl #2]
+        ldrb    r2, [jpc, #1]
+        ands    lr, ip, #7
+        moveq   pc, ip
+	ldrb	r1, [jpc, lr]
+        bic     ip, ip, #7
+        ldr     pc, [ip, r1, lsl #2]
+	.ltorg
+	.endm
+
+#define FFI_TYPE_VOID		0
+#define FFI_TYPE_FLOAT		2
+#define	FFI_TYPE_DOUBLE		3
+#define FFI_TYPE_BOOL		5
+#define	FFI_TYPE_SINT8		6
+#define FFI_TYPE_UINT16		7
+#define FFI_TYPE_SINT16		8
+#define FFI_TYPE_SINT32		10
+#define FFI_TYPE_SINT64		12
+#define FFI_TYPE_POINTER	14
+
+	.macro	_BLX	reg
+	mov	lr, pc
+	mov	pc, \reg
+	.endm
+
+	.macro	_BX	reg
+	mov	pc, \reg
+	.endm
+
+	.macro	_BXEQ	reg
+	moveq	pc, \reg
+	.endm
+
+	.macro	_BXNE	reg
+	movne	pc, \reg
+	.endm
+
+#ifdef ARMv4
+
+#define blx _BLX
+#define bx _BX
+#define bxeq _BXEQ
+#define bxne _BXNE
+	.arch armv4
+
+#else
+	.arch armv7-a
+#endif
+
+#ifdef HW_FP
+
+#ifdef __ARM_PCS_VFP
+ 	.fpu vfpv3-d16
+	.eabi_attribute Tag_ABI_HardFP_use, 3
+	.eabi_attribute Tag_ABI_VFP_args, 1
+#else // __ARM_PCS_VFP
+	.fpu vfp
+#endif // __ARM_PCS_VFP
+
+#else // HW_FP
+	.fpu softvfp
+#endif // HW_FP
+
+#ifndef	__ARM_ARCH_7A__
+#	define dmb VOLATILE_BARRIER
+#	define dmb_st VOLATILE_BARRIER
+#else
+#	define	dmb_st .inst   0xf57ff05e
+#endif
+
+#define StoreStoreBarrier dmb_st
+#define StoreLoadBarrier dmb
+#define FullBarrier dmb
+	
+	.macro	VOLATILE_BARRIER arg
+	stmfd	sp!, {r2, lr}
+	ldr	r2, =0xffff0fa0 @ kernel_dmb
+	blx	r2
+	ldmfd	sp!, {r2, lr}
+	.endm
+	
+	.macro	GO_IF_VOLATILE reg, cp_cache, label
+	ldr	\reg, [\cp_cache, #CP_OFFSET+CP_CACHE_FLAGS]
+	tst	\reg, #(1<<CP_CACHE_VOLATILE_FIELD_FLAG_BIT)
+	bne	\label
+	.set	dispatch_saved, dispatch_state
+	.endm
+
+	@ We have to save and restore the dispatch_state because
+	@ dispatching is done twice, once each for volatile and
+	@ non-volatile versions.  It's essential that dispatch_state
+	@ be correct at the entry to the volatile version of the
+	@ handler.
+
+	.macro VOLATILE_VERSION
+	.if dispatch_state == 0
+	.set	dispatch_state, dispatch_saved
+	.else
+	.error "VOLATILE_VERSION macro used before non-volatile DISPATCH_FINISH."
+	.endif
+	.endm
+	
+	.eabi_attribute 20, 1 @ Tag_ABI_FP_denormal
+	.eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions
+	.eabi_attribute 23, 3 @ Tag_ABI_FP_number_model
+	.eabi_attribute 24, 1 @ Tag_ABI_align8_needed
+	.eabi_attribute 25, 1 @ Tag_ABI_align8_preserved
+	.eabi_attribute 26, 2 @ Tag_ABI_enum_size
+	.eabi_attribute 30, 2 @ Tag_ABI_optimization_goals
+	.eabi_attribute 18, 4 @ Tag_ABI_PCS_wchar_t
+
+	.text
+
+	.global	cmpxchg_ptr
+	.type cmpxchg_ptr, %function
+cmpxchg_ptr:
+	stmfd	sp!, {r4, r5, r6, r7, r8, lr}
+	mov	r6, #0xffffffc0
+	mov	r4, r2
+	mov	r7, r0
+	mov	r5, r1
+	bic	r6, r6, #0xf000
+	mov	r8, r2
+1:
+	ldr	r3, [r5, #0]
+	mov	r0, r4
+	mov	r1, r7
+	mov	r2, r5
+	cmp	r4, r3
+	bne	2f
+	blx	r6
+	cmp	r0, #0
+	bne	1b
+	mov	r0, r8
+	ldmfd	sp!, {r4, r5, r6, r7, r8, pc}
+2:
+	mov	r8, r3
+	mov	r0, r8
+	ldmfd	sp!, {r4, r5, r6, r7, r8, pc}
+
+build_frame:
+	mov	r3, r0
+	ldr	r0, [r1, #METHOD_ACCESSFLAGS]
+	stmfd	arm_sp!, {r4, r5, r6, r7, r8}
+	ands	r7, r0, #JVM_ACC_SYNCHRONIZED
+	movne	r7, #2
+	tst	r0, #JVM_ACC_NATIVE
+	mov	r4, #0
+	movne	r5, #0
+	ldreqh	r6, [r1, #METHOD_MAXLOCALS]
+	ldrneh	r6, [r1, #METHOD_SIZEOFPARAMETERS]
+	ldreq	r0, [r3, #8]
+	subeq	r6, r6, #1
+	ldrne	r0, [r3, #8]
+	subne	r6, r6, #1
+	ldreqh	r5, [r1, #METHOD_MAXSTACK]
+	addeq	r6, r0, r6, asl #2
+	addne	r6, r0, r6, asl #2
+	sub	ip, r0, #4
+	str	ip, [r3, #8]
+	mov     ip, #INTERPRETER_FRAME
+	str	r4, [r0, #-4]
+	ldr	r0, [r3, #8]
+	sub	r8, r0, #4
+	str	r8, [r3, #8]
+	str	ip, [r0, #-4]
+	ldr	r8, [r3, #8]
+	sub	ip, r8, #68
+	str	ip, [r3, #8]
+	str	r2, [r8, #-68]
+	mov	r8, #0
+	str	r4, [ip, #44]
+	str	r6, [ip, #8]
+	str	r1, [ip, #16]
+	str	ip, [ip, #64]
+	ldr	r2, [r1, #METHOD_ACCESSFLAGS]
+	tst	r2, #JVM_ACC_NATIVE
+	mov	r2, #0
+	ldreq	r4, [r1, #METHOD_CONSTMETHOD]
+	addeq	r4, r4, #CONSTMETHOD_CODEOFFSET
+	str	r4, [ip, #4]
+	ldr	r4, [r1, #METHOD_CONSTMETHOD]
+	ldr	r4, [r4, #METHOD_CONSTANTS]
+	ldr	r4, [r4, #CONSTANTPOOL_CACHE]
+	str	r8, [ip, #28]
+	str	r2, [ip, #32]
+	str	r4, [ip, #12]
+	str	r2, [ip, #48]
+	str	r2, [ip, #20]
+	ldr	r2, [r3, #8]
+	str	r2, [ip, #60]
+	ldr	r2, [r1, #METHOD_ACCESSFLAGS]
+	tst	r2, #JVM_ACC_SYNCHRONIZED
+	beq	.L10
+	ldr	r2, [r3, #8]
+	sub	r7, r2, r7, asl #2
+	str	r7, [r3, #8]
+	ldr	r2, [r1, #METHOD_ACCESSFLAGS]
+	ldr	r4, [r1, #METHOD_CONSTMETHOD]
+	tst	r2, #JVM_ACC_STATIC
+	ldrne	r2, [r4, #METHOD_CONSTANTS]
+	ldreq	r2, [r6, #0]
+	ldrne	r2, [r2, #CONSTANTPOOL_POOL_HOLDER]
+	ldrne	r2, [r2, #KLASS_PART + KLASS_JAVA_MIRROR]
+	str	r2, [r7, #4]
+.L10:
+	ldr	r2, [r3, #8]
+	cmp	r5, #0
+	str	r2, [ip, #52]
+	ldr	r2, [r3, #8]
+	sub	r2, r2, #4
+	str	r2, [ip, #24]
+	ldrne	r2, [r3, #8]
+	ldreq	r5, [r3, #8]
+	subne	r5, r2, r5, asl #2
+	strne	r5, [r3, #8]
+	sub	r5, r5, #4
+	str	r5, [ip, #56]
+	ldmfd	arm_sp!, {r4, r5, r6, r7, r8}
+	bx	lr
+
+	ALIGN_CODE
+	.global	asm_generate_method_entry
+	.type asm_generate_method_entry, %function
+asm_generate_method_entry:
+	mov	r3, #AbstractInterpreter_number_of_method_entries
+	cmp	r3, #((3f-2f)/4) // i.e. sizeof asm_method_table
+	bne	vm_fatal_error
+	mov	r3, r0
+	mov	r0, #0
+#ifdef PRODUCT
+	// These entry points can not be used when PRODUCT is
+	// undefined because the BytecodeInterpreter class is virtual
+	// so it has an extra word (the vtable pointer) at its
+	// beginning.
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r1, r2}
+	add	r1, r1, ip
+	add	r1, r1, r2		@ r1->dispatch
+
+	ldr	r2, [r1, #can_post_interpreter_events-XXX]
+	ldrb	r2, [r2]
+	cmp	r2, #0
+	bne	1f
+
+	ldr	r2, [r1, #PrintCommandLineFlags_Address-XXX]
+	ldrb	r2, [r2]
+	cmp	r2, #0
+	bne	1f
+
+	cmp	r3, #AbstractInterpreter_number_of_method_entries
+	adrcc	ip, asm_method_table
+	ldrcc	r0, [ip, r3, lsl #2]
+#endif // PRODUCT
+1:
+	bx	lr
+
+// This table must be kept in sync with
+// AbstractInterpreter::MethodKind.  Note that every entry must have a
+// corresponding fast entry point at addr + CODE_ALIGN_SIZE.
+asm_method_table:
+2:
+        .word   normal_entry                    // method needs locals initialization
+        .word   normal_entry_synchronized       // method needs locals initialization & is synchronized
+        .word   native_entry                    // native method
+        .word   native_entry_synchronized       // native method & is synchronized
+        .word   empty_entry                     // empty method (code: _return)
+        .word   accessor_entry                  // accessor method (code: _aload_0, _getfield, _(a|i)return)
+        .word   normal_entry                    // abstract method (throws an AbstractMethodException)
+	.org	(AbstractInterpreter_java_lang_math_sin*4)+asm_method_table
+        .word   normal_entry                    // implementation of java.lang.Math.sin   (x)
+        .word   normal_entry                    // implementation of java.lang.Math.cos   (x)
+        .word   normal_entry                    // implementation of java.lang.Math.tan   (x)
+        .word   normal_entry                    // implementation of java.lang.Math.abs   (x)
+        .word   normal_entry                    // implementation of java.lang.Math.sqrt  (x)
+        .word   normal_entry                    // implementation of java.lang.Math.log   (x)
+        .word   normal_entry                    // implementation of java.lang.Math.log10 (x)
+        .word   normal_entry                    // implementation of java.lang.Math.pow (x)
+        .word   normal_entry                    // implementation of java.lang.Math.exp (x)
+        .word   accessor_entry                  // implementation of java.lang.ref.Reference.get()
+3:
+	
+	SLOW_ENTRY
+native_entry_synchronized:
+	mov	r2, thread
+	b	_ZN14CppInterpreter12native_entryEP13methodOopDesciP6Thread
+
+	FAST_ENTRY
+fast_native_entry_synchronized:
+	mov	r2, thread
+	b	_ZN14CppInterpreter12native_entryEP13methodOopDesciP6Thread
+
+	SLOW_ENTRY
+empty_entry:
+        ldrh        r3, [r0, #METHOD_SIZEOFPARAMETERS]
+	ldr	r1, [r2, #THREAD_JAVA_SP]
+	add	r1, r1, r3, lsl #2
+	str	r1, [r2, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	bx	lr
+
+	FAST_ENTRY
+fast_empty_entry:
+        ldrh        r3, [r0, #METHOD_SIZEOFPARAMETERS]
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	add	r1, r1, r3, lsl #2
+	str	r1, [thread, #THREAD_JAVA_SP]
+	bx	lr
+
+@ ---- START execute.s ---------------------------------------------------------------------
+
+	.global	asm_check_null_ptr
+	.type asm_check_null_ptr, %function
+asm_check_null_ptr:
+
+#ifdef HW_NULL_PTR_CHECK
+
+#define uc_mcontext		20
+#define arm_registers_offset	12
+#define arm_cpsr_offset		16*4
+
+	add	r0, r0, #uc_mcontext + arm_registers_offset
+	ldr	r1, [r0, #15*4]
+	adr	ip, abort_table
+abort_loop:
+	ldr	r2, [ip], #8
+	cmp	r2, #0
+	beq	2f
+	cmp	r2, r1
+	bne	abort_loop
+
+	ldr	r3, [ip, #-4]
+	cmp	r3, #8
+	bcs	1f
+
+	ldr	ip, [r0, #5*4]
+	sub	ip, ip, r3
+	str	ip, [r0, #5*4]
+
+	adrl	r3, null_ptr_exception
+1:
+	str	r3, [r0, #15*4]
+do_setcontext:
+	mov	r0, #1
+	bx	lr
+#endif // HW_NULL_PTR_CHECK
+2:
+#ifdef T2JIT
+	b	Thumb2_Check_Null
+#else
+	mov	r0, #0
+	bx	lr
+#endif
+
+#ifdef HW_NULL_PTR_CHECK
+abort_table:
+			.word	.abortentry5, 1
+			.word	.abortentry6, 1
+			.word	.abortentry7, 1
+			.word	.abortentry8, 1
+			.word	.abortentry9, 1
+			.word	.abortentry10, 1
+			.word	.abortentry11, 1
+			.word	.abortentry12, 1
+			.word	.abortentry13, 1
+
+		FBC	.word	.abortentry19, 1
+		FBC	.word	.abortentry20, 1
+		FBC	.word	.abortentry21, 1
+		FBC	.word	.abortentry22, 1
+		FBC	.word	.abortentry23, 1
+		FBC	.word	.abortentry24, 1
+		FBC	.word	.abortentry25, 1
+		FBC	.word	.abortentry26, 1
+		FBC	.word	.abortentry27, 1
+		FBC	.word	.abortentry28, 1
+		FBC	.word	.abortentry29, 1
+		FBC	.word	.abortentry30, 1
+		FBC	.word	.abortentry31, 1
+		FBC	.word	.abortentry32, 1
+
+		FBC	.word	.abortentry38, 2
+		FBC	.word	.abortentry39, 3
+		FBC	.word	.abortentry40, 4
+		FBC	.word	.abortentry41, 3
+		FBC	.word	.abortentry42, 2
+		FBC	.word	.abortentry42_1, 2
+		FBC	.word	.abortentry43, 0
+		FBC	.word	.abortentry44, 1
+		FBC	.word	.abortentry45, 3
+		FBC	.word	.abortentry46, 2
+		FBC	.word	.abortentry47, 0
+		FBC	.word	.abortentry48, 1
+		FBC	.word	.abortentry49, 0
+		FBC	.word	.abortentry50, 1
+		FBC	.word	.abortentry51, 0
+		FBC	.word	.abortentry52, 1
+
+		FBC	.word	.abortentry58, 2
+		FBC	.word	.abortentry59, 2
+		FBC	.word	.abortentry60, 2
+
+		FBC	.word	.abortentry73, 1
+		FBC	.word	.abortentry74, 1
+		FBC	.word	.abortentry75, 1
+		FBC	.word	.abortentry76, 1
+		FBC	.word	.abortentry77, 1
+
+	    FBC		.word	.abortentry78, 3
+	    FBC		.word	.abortentry78_v, 3
+	    FBC		.word	.abortentry79, 3
+	    FBC		.word	.abortentry79_v, 3
+	    FBC		.word	.abortentry80, 3
+	    FBC		.word	.abortentry80_v, 3
+	    FBC		.word	.abortentry81, 3
+	    FBC		.word	.abortentry81_v, 3
+	    FBC		.word	.abortentry82, 3
+	    FBC		.word	.abortentry82_v, 3
+	    FBC		.word	.abortentry83, 3
+	    FBC		.word	.abortentry83_v, 3
+	    FBC		.word	.abortentry84, 3
+	    FBC		.word	.abortentry84_v, 3
+	    FBC		.word	.abortentry85, 3
+	    FBC		.word	.abortentry85_v, 3
+	    FBC		.word	.abortentry86, 3
+	    FBC		.word	.abortentry86_v, 3
+	    FBC		.word	.abortentry87, 3
+	    FBC		.word	.abortentry87_v, 3
+
+	    FBC    	.word	.abortentry88, 3
+	    FBC    	.word	.abortentry88_v, 3
+	    FBC	   	.word	.abortentry89, 5
+	    FBC	     	.word	.abortentry90, 4
+	    FBC	     	.word	.abortentry91, 4
+	    FBC		.word	.abortentry104, 0
+		FBC	.word	.abortentry105, 1
+		FBC	.word	.abortentry106, 1
+		FBC	.word	.abortentry107, 1
+		FBC	.word	.abortentry108, 1
+		FBC	.word	.abortentry109, 1
+			.word	.abortentry110, 0
+
+		FBC	.word	.abortentry111, 3
+		FBC	.word	.abortentry112, 3
+
+		FBC	.word	.abortentry113, 0
+		FBC	.word	.abortentry113_v, 0
+			.word	.abortentry114, 1
+		FBC	.word	.abortentry117, 0
+			.word	.abortentry118, 0
+			.word	.abortentry119, 1
+	.word	0
+
+#endif
+
+
+	SLOW_ENTRY
+native_entry:
+	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
+	bl	fast_native_entry	
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmia	sp!, {regset, pc}
+
+	FAST_ENTRY
+fast_native_entry:
+	adrl	ip, dispatch_init_adcon
+	mov	r11, r0
+	ldm	ip, {dispatch, r7}
+	stmdb	sp!, {fast_regset, lr}
+	add	dispatch, dispatch, ip
+	add	dispatch, dispatch, r7
+	ldrh	r1, [r11, #METHOD_SIZEOFPARAMETERS]
+	ldr	r4, [thread, #THREAD_JAVA_SP]
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	mov	r0, #0
+	mov	ip, #INTERPRETER_FRAME
+	sub	r9, r4, #FRAME_SIZE
+	str	r9, [thread, #THREAD_JAVA_SP]	@ drop stack
+	sub	r5, r9, #4		@ stack limit = r9 - 4
+	str	r3, [r9, #ISTATE_NEXT_FRAME]
+	str	ip, [r9, #ISTATE_FRAME_TYPE]
+	str	r9, [r9, #ISTATE_MONITOR_BASE]
+	str	r5, [r9, #ISTATE_STACK_LIMIT]
+	str	r9, [r9, #ISTATE_STACK_BASE]
+	str	r0, [r9, #ISTATE_OOP_TEMP]
+	str	r0, [r9, #ISTATE_MSG]
+
+        ldr     ip, [r11, #METHOD_CONSTMETHOD]
+	ldr	ip, [ip, #METHOD_CONSTANTS]
+	sub	r7, r4, #4
+	mov	r5, #0
+	add	r7, r7, r1, lsl #2
+
+	ldr	ip, [ip, #CONSTANTPOOL_CACHE]
+
+	str	thread, [r9, #ISTATE_THREAD]
+	str	r5, [r9, #ISTATE_BCP]
+	str	r7, [r9, #ISTATE_LOCALS]
+	str	ip, [r9, #ISTATE_CONSTANTS]
+	str	r11, [r9, #ISTATE_METHOD]
+	str     r9, [r9, #ISTATE_SELF_LINK]
+
+	ldr	r1, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+        add        r0, r9, #ISTATE_NEXT_FRAME
+
+	rsb	r3, r1, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #4096
+	str	r0, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r5, [r11, #METHOD_SIGNATUREHANDLER]
+	blt	.fast_native_entry_throw_stack_overflow
+	cmp	r5, #0
+	bne	.fast_native_entry_got_handleraddr
+	str	r5, [thread, #THREAD_LAST_JAVA_SP] @ r5 is zero at this point
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r0, [thread, #THREAD_JAVA_SP]
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	mov	r0, thread
+	mov	r1, r11
+	bl	_ZN18InterpreterRuntime19prepare_native_callEP10JavaThreadP13methodOopDesc
+	ldr	r11, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	str	r5, [thread, #THREAD_LAST_JAVA_SP]  @ r5 is zero at this point
+	str	r5, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r5, [thread, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_LAST_JAVA_SP]
+        ldr        r11, [r11, #-ISTATE_NEXT_FRAME + ISTATE_METHOD]
+	cmp	r1, #0
+	bne	.fast_native_entry_exception
+	ldr	r5, [r11, #METHOD_SIGNATUREHANDLER]
+.fast_native_entry_got_handleraddr:
+	ldr	r2, [dispatch, #InterpreterRuntime_slow_signature_handler_Address-XXX]
+	cmp	r5, r2
+	bne	.fast_native_entry_get_handler
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	stmfd	sp!, {r2}
+	mov	r2, #0
+        str     r2, [thread, #THREAD_LAST_JAVA_SP]
+	ldmfd	sp!, {r2}
+	mov	r0, thread
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r3, [thread, #THREAD_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	mov	r3, r2
+	mov	r1, r11
+	bl	_ZN18InterpreterRuntime22slow_signature_handlerEP10JavaThreadP13methodOopDescPiS4_
+	ldr	r11, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	mov	r3, #0
+        ldr        r11, [r11, #-ISTATE_NEXT_FRAME + ISTATE_METHOD]
+	cmp	r1, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	mov	r5, r0
+	bne	.fast_native_entry_exception
+.fast_native_entry_get_handler:
+
+	sub	ip, r7, r4
+	add	r3, r4, #ISTATE_OOP_TEMP-76
+
+	mov	ip, ip, asr #2
+	mov	r4, arm_sp
+
+	add	lr, ip, #4
+	sub	arm_sp, arm_sp, #16
+
+	bic	lr, lr, #1
+	add	r1, r5, #SIZEOF_FFI_CIF
+
+	sub	arm_sp, arm_sp, lr, lsl #2
+	add	r2, thread, #THREAD_JNI_ENVIRONMENT
+
+	mov	lr, arm_sp
+	str	r2, [lr], #4
+
+#ifdef __ARM_PCS_VFP
+	mov	thread, #0xff	@ bitmap for floating-point register set
+	orr	thread, #0xff00
+#endif	
+	ldr	r2, [r11, #METHOD_ACCESSFLAGS]
+	add	r1, r1, #4
+	tst	r2, #JVM_ACC_STATIC
+	beq	.do_fast_copy_args
+        ldr     r2, [r11, #METHOD_CONSTMETHOD]
+	ldr	r2, [r2, #METHOD_CONSTANTS]
+	ldr	r2, [r2, #CONSTANTPOOL_POOL_HOLDER]
+	str	r3, [lr], #4
+	ldr	r2, [r2, #KLASS_PART + KLASS_JAVA_MIRROR]
+	add	r1, r1, #4
+	str	r2, [r3]
+
+.do_fast_copy_args:
+	cmp	ip, #0
+	blt	.fast_no_args
+
+.fast_copy_args:
+	ldr	r0, [r1], #4
+	ldrh	r3, [r0, #6]
+	cmp	r3, #FFI_TYPE_DOUBLE
+	beq	.fast_copy_double
+	cmp	r3, #FFI_TYPE_FLOAT
+	beq	.fast_copy_float
+	ldr	r2, [r7], #-4
+	cmp	r3, #FFI_TYPE_SINT64
+	beq	.fast_copy_long
+
+	cmp	r3, #FFI_TYPE_POINTER
+	beq	.fast_copy_ptr
+
+	subs	ip, ip, #1
+	str	r2, [lr], #4
+	bge	.fast_copy_args
+	b	.fast_no_args
+
+#ifdef __ARM_PCS_VFP
+	// FIXME: These macros are very inefficient
+	.macro	FIND_LOWEST_BIT	rd, rs
+	mov	\rd, #0
+0:	tst	\rs, #1
+	lsr	\rs, #1
+	addeq	\rd, #1
+	beq	0b
+	lsl	\rs, \rd
+	lsl	\rs, #1
+	.endm
+	
+	.macro	FIND_LOWEST_BIT_PAIR rd, rs
+	stmfd	sp!, {r1}
+	stmfd	sp!, {\rs}
+	mov	\rd, #0
+0:	tst	\rs, #1
+	lsr	\rs, #2
+	addeq	\rd, #2
+	beq	0b
+	ldmfd	sp!, {\rs}
+	mov	r1, #3
+	lsl	r1, \rd
+	bic	\rs, r1
+	ldmfd	sp!, {r1}
+	.endm
+	
+.fast_copy_double:
+	orrs	thread, thread
+	ldreq	r2, [r7], #-4	
+	beq	vm_fatal_error
+	FIND_LOWEST_BIT_PAIR r0, thread
+	adrl	r2, .copy_double_table
+	add	pc, r2, r0, asl#5
+
+.fast_copy_float:
+	orrs	thread, thread
+	ldreq	r2, [r7], #-4	
+	beq	vm_fatal_error
+	FIND_LOWEST_BIT r0, thread
+	adr	r2, .copy_float_table
+	add	pc, r2, r0, asl#6
+#else
+
+.fast_copy_double:
+	ldr	r2, [r7], #-4
+	tst	lr, #4
+	ldr	r3, [r7], #-4
+	addne	lr, lr, #4
+	str	r2, [lr, #4]
+	subs	ip, ip, #2
+	str	r3, [lr], #8
+	bge	.fast_copy_args
+	b	.fast_no_args
+	
+.fast_copy_float:
+	ldr	r2, [r7], #-4
+	subs	ip, ip, #1
+	str	r2, [lr], #4
+	bge	.fast_copy_args
+
+#endif
+
+.fast_copy_long:
+	tst	lr, #4
+	ldr	r3, [r7], #-4
+	addne	lr, lr, #4
+	str	r2, [lr, #4]
+	subs	ip, ip, #2
+	str	r3, [lr], #8
+	bge	.fast_copy_args
+	b	.fast_no_args
+
+.fast_copy_ptr:
+	cmp	r2, #0
+	addne	r2, r7, #4
+	subs	ip, ip, #1
+	str	r2, [lr], #4
+	bge	.fast_copy_args
+
+.fast_no_args:
+	ldr     thread, [r9, #ISTATE_THREAD]
+	ldr	r0, [thread, #THREAD_TOP_ZERO_FRAME]
+	mov	r2, #_thread_in_native
+
+	mov	ip, #0
+	str	ip, [thread, #THREAD_LAST_JAVA_SP]
+
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_STATE]
+
+	ldr	r2, [thread, #THREAD_JAVA_SP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+
+	ldr	ip, [r11, #METHOD_NATIVEHANDLER]
+	ldrh	r11, [r11, #METHOD_SIZEOFPARAMETERS]
+
+	ldmia	arm_sp!, {r0, r1, r2, r3}
+	blx	ip
+
+	mov	ip, #_thread_in_native_trans
+	mov	arm_sp, r4
+
+	ldr	r3, [dispatch, #SafePointSynchronize_state_Address-XXX]
+	str	ip, [thread, #THREAD_STATE]
+
+	ldr	r3, [r3, #0]
+	cmp	r3, #0
+	ldreq	r3, [thread, #THREAD_SUSPEND_FLAGS]
+	cmpeq	r3, #0
+	bne	.fast_native_entry_do_special
+
+.fast_native_entry_do_return:
+	mov	r3, #_thread_in_Java
+	mov	r2, #0
+
+	str	r3, [thread, #THREAD_STATE]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_LAST_JAVA_FP]
+
+	add	r2, r5, #SIZEOF_FFI_CIF
+	ldr	r3, [r5, #4]
+
+	ldr	r5, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	ldr	lr, [r5], #4
+
+	add	r5, r5, r11, lsl #2
+
+	ldr	ip, [r2, r3, asl #2]
+	adr	r3, .return_type_table
+
+	ldrh	r2, [ip, #6]
+	ldr	ip, [thread, #THREAD_ACTIVE_HANDLES]
+
+	mov	tmp1, #0
+	ldr	pc, [r3, r2, lsl #2]
+
+.return_type_table:
+	.word	.fast_native_return_void	@ FFI_TYPE_VOID	== 0
+	.word	0
+#ifdef __ARM_PCS_VFP
+	.word	.fast_native_return_float	@ FFI_TYPE_FLOAT == 2
+	.word	.fast_native_return_double	@ FFI_TYPE_DOUBLE == 3
+#else
+	.word	.fast_native_return_w		@ FFI_TYPE_FLOAT == 2
+	.word	.fast_native_return_dw		@ FFI_TYPE_DOUBLE == 3
+#endif
+	.word	0
+	.word	.fast_native_return_bool	@ FFI_TYPE_BOOL == 5
+	.word	.fast_native_return_byte	@ FFI_TYPE_SINT8 == 6
+	.word	.fast_native_return_char	@ FFI_TYPE_UINT16 == 7
+	.word	.fast_native_return_short	@ FFI_TYPE_SINT16 == 8
+	.word	0
+	.word	.fast_native_return_w		@ FFI_TYPE_SINT32 == 10
+	.word	0
+	.word	.fast_native_return_dw		@ FFI_TYPE_SINT64 == 12
+	.word	0
+	.word	.fast_native_return_obj		@ FFI_TYPE_POINTER == 14
+
+#ifdef __ARM_PCS_VFP
+.fast_native_return_double:
+	fsts	s0, [r5, #-8]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	fsts	s1, [r5, #-4]
+	add	r5, #-8
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_float:
+	fsts	s0, [r5, #-4]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	add	r5, #-4
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+#endif
+.fast_native_return_dw:
+	str	r0, [r5, #-8]!
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	str	r1, [r5, #4]
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_byte:
+	mov	r0, r0, lsl #24
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	mov	r0, r0, asr #24
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	str	r0, [r5, #-4]!
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_char:
+	mov	r0, r0, lsl #16
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	mov	r0, r0, lsr #16
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	str	r0, [r5, #-4]!
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_bool:
+	ands	r0, r0, #255
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	movne	r0, #1
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	str	r0, [r5, #-4]!
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_obj:
+	cmp	r0, #0
+	ldrne	r0, [r0]
+	str	r0, [r5, #-4]!
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+.fast_native_return_short:
+	mov	r0, r0, lsl #16
+	mov	r0, r0, asr #16
+.fast_native_return_w:
+	str	r0, [r5, #-4]!
+.fast_native_return_void:
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
+.fast_native_exit:
+	str	r5, [thread, #THREAD_JAVA_SP]
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+.fast_native_entry_throw_stack_overflow:
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	mov	r0, thread
+	bl	_ZN18InterpreterRuntime24throw_StackOverflowErrorEP10JavaThread
+	mov	r3, #0
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+.fast_native_entry_exception:
+	ldr	r5, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r3, [r5], #4
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	ldrh	r3, [r11, #METHOD_SIZEOFPARAMETERS]
+	add	r5, r5, r3, lsl #2
+	b	.fast_native_exit
+
+.fast_native_entry_do_special:
+	stmdb	arm_sp!, {r0, r1}
+	mov	r0, thread
+	bl	_ZN10JavaThread40check_special_condition_for_native_transEPS_
+	ldmia	arm_sp!, {r0, r1}
+	b	.fast_native_entry_do_return
+
+
+	
+#ifdef __ARM_PCS_VFP
+
+	.macro	COPY_FLOAT rs, rd, rcount
+	.align	6
+	flds	\rd, [\rs]
+	add	\rs, #-4
+	subs	\rcount, #1
+	bge	.fast_copy_args
+	b	.fast_no_args
+	.endm
+
+	.align	6
+.copy_float_table:
+	COPY_FLOAT r7, s0, ip
+	COPY_FLOAT r7, s1, ip
+	COPY_FLOAT r7, s2, ip
+	COPY_FLOAT r7, s3, ip
+	COPY_FLOAT r7, s4, ip
+	COPY_FLOAT r7, s5, ip
+	COPY_FLOAT r7, s6, ip
+	COPY_FLOAT r7, s7, ip
+	COPY_FLOAT r7, s8, ip
+	COPY_FLOAT r7, s9, ip
+	COPY_FLOAT r7, s10, ip
+	COPY_FLOAT r7, s11, ip
+	COPY_FLOAT r7, s12, ip
+	COPY_FLOAT r7, s13, ip
+	COPY_FLOAT r7, s14, ip
+	COPY_FLOAT r7, s15, ip
+	COPY_FLOAT r7, s16, ip
+	COPY_FLOAT r7, s17, ip
+	COPY_FLOAT r7, s18, ip
+	COPY_FLOAT r7, s19, ip
+	COPY_FLOAT r7, s20, ip
+	COPY_FLOAT r7, s21, ip
+	COPY_FLOAT r7, s22, ip
+	COPY_FLOAT r7, s23, ip
+	COPY_FLOAT r7, s24, ip
+	COPY_FLOAT r7, s25, ip
+	COPY_FLOAT r7, s26, ip
+	COPY_FLOAT r7, s27, ip
+	COPY_FLOAT r7, s28, ip
+	COPY_FLOAT r7, s29, ip
+	COPY_FLOAT r7, s30, ip
+	COPY_FLOAT r7, s31, ip
+
+	.macro	COPY_DOUBLE rs, rdlo, rdhi, rcount
+	.align	6
+	flds	\rdhi, [\rs]
+	flds	\rdlo, [\rs, #-4]
+	add	\rs, #-8
+	subs	\rcount, #2
+	bge	.fast_copy_args
+	b	.fast_no_args
+	.endm
+
+	.align	6
+.copy_double_table:
+	COPY_DOUBLE r7, s0, s1, ip
+	COPY_DOUBLE r7, s2, s3, ip
+	COPY_DOUBLE r7, s4, s5, ip
+	COPY_DOUBLE r7, s6, s7, ip
+	COPY_DOUBLE r7, s8, s9, ip
+	COPY_DOUBLE r7, s10, s11, ip
+	COPY_DOUBLE r7, s12, s13, ip
+	COPY_DOUBLE r7, s14, s15, ip
+	COPY_DOUBLE r7, s16, s17, ip
+	COPY_DOUBLE r7, s18, s19, ip
+	COPY_DOUBLE r7, s20, s21, ip
+	COPY_DOUBLE r7, s22, s23, ip
+	COPY_DOUBLE r7, s24, s25, ip
+	COPY_DOUBLE r7, s26, s27, ip
+	COPY_DOUBLE r7, s28, s29, ip
+	COPY_DOUBLE r7, s30, s31, ip
+#endif
+
+#include "bytecodes_arm.s"
+
+	Opcode	idiv
+
+	POP	r1
+	POP	r0
+	cmp	r1, #0
+	beq	divide_by_zero_exception
+	bl	__aeabi_idiv
+	PUSH	r0
+	DISPATCH 1
+
+	Opcode	idiv_clz
+
+	POP	r1
+	POP	r0
+	bl	int_div
+idiv_clz_ret:
+	PUSH	r0
+	DISPATCH 1
+
+	Opcode	irem
+
+	POP	r1
+	POP	r0
+	cmp	r1, #0
+	beq	divide_by_zero_exception
+	bl	__aeabi_idivmod
+	PUSH	r1
+	DISPATCH 1
+
+	Opcode	irem_clz
+
+	POP	r1
+	POP	r0
+	bl	int_rem
+irem_clz_ret:
+	PUSH	r0
+	DISPATCH 1
+
+	Opcode	goto
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+branch_taken:
+        orr     r2, r2, r1, lsl #8
+        DISPATCH_START_REG	r2
+	cmp	r2, #0
+	ble	do_backedge
+	DISPATCH_FINISH
+
+branch_taken_unsafe:
+	mov	r2, r2, lsl #24
+	orr	r2, r1, r2, asr #16
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
+  USEC	ble	do_backedge
+	DISPATCH_FINISH
+
+branch_taken_unsafe_1:
+	add	jpc, jpc, #1
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
+  USEC	ble	do_backedge
+	DISPATCH_FINISH
+
+branch_taken_unsafe_2:
+	add	jpc, jpc, #2
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
+  USEC	ble	do_backedge
+	DISPATCH_FINISH
+
+branch_taken_unsafe_3:
+	add	jpc, jpc, #3
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
+  USEC	ble	do_backedge
+	DISPATCH_FINISH
+
+branch_taken_unsafe_4:
+	add	jpc, jpc, #4
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
+  USEC	ble	do_backedge
+	DISPATCH_FINISH
+
+do_backedge:
+  USEC	ldr	tmp1, [istate, #ISTATE_METHOD]
+  OSR	ldr	lr, [dispatch, #InterpreterInvocationLimit_Address-XXX]
+  USEC	ldr	r1, [tmp1, #METHOD_BACKEDGECOUNTER]
+  USEC	ldr	ip, [tmp1, #METHOD_INVOCATIONCOUNTER]
+  USEC	add	r1, r1, #INVOCATIONCOUNTER_COUNTINCREMENT
+  OSR	ldr	lr, [lr]
+  USEC	add	ip, ip, #INVOCATIONCOUNTER_COUNTINCREMENT
+  USEC	str	r1, [tmp1, #METHOD_BACKEDGECOUNTER]
+#ifdef T2JIT
+  OSR	cmp	r1, lr
+#else
+  OSR	cmp	r1, lr, lsl #2
+#endif
+  USEC	str	ip, [tmp1, #METHOD_INVOCATIONCOUNTER]
+  OSR	bcs	do_osr
+
+osr_continue:
+	ldr	ip, [dispatch, #SafePointSynchronize_state_Address-XXX]
+	ldr	r1, [ip]
+	cmp	r1, #1
+	beq	do_synchronize
+	DISPATCH_STATE	1
+	DISPATCH_FINISH
+
+
+do_synchronize:
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, thread
+	bl	Helper_SafePoint
+	CACHE_CP
+	CACHE_JPC
+	cmp	r0, #0
+	bne	handle_exception
+	DISPATCH	0
+
+#ifdef ON_STACK_REPLACEMENT
+
+#ifdef T2JIT
+do_osr:
+	ldr	r3, [tmp1, #METHOD_CONSTMETHOD]
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, thread
+	sub	r1, jpc, r3
+	sub	r1, r1, #CONSTMETHOD_CODEOFFSET
+	bl	FREQ_COUNT_OVERFLOW
+1:
+	cmp	r0, #0
+	bne	call_thumb2
+	CACHE_CP
+	CACHE_JPC
+	DISPATCH_START	0
+	b	osr_continue
+
+#else
+
+do_osr:
+	ldr	ip, [dispatch, #UseOnStackReplacement_Address-XXX]
+	ldrb	ip, [ip]
+	cmp	ip, #0
+	beq	osr_continue
+
+	ldr	r3, [tmp1, #METHOD_CONSTMETHOD]
+	DECACHE_JPC
+	ldrh	r3, [r3, #CONSTMETHOD_CODESIZE]
+	DECACHE_STACK
+	mov	r0, thread
+	sub	r1, jpc, r2
+	cmp	r3, #MAX_FG_METHOD_SIZE
+	bcc	1f
+	ldr	tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
+	mov	r3, #1
+	ldr	r5, [tmp1]
+	str	r3, [tmp1]
+	bl	FREQ_COUNT_OVERFLOW
+	str	r5, [tmp1]
+	b	2f
+1:
+	bl	FREQ_COUNT_OVERFLOW
+2:
+	CACHE_CP
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	cmp	r1, #0
+	bne	handle_exception
+	cmp	r0, #0
+	beq	1f
+	ldr	r1, [r0, #56]
+	cmn	r1, #2
+	bne	osr_migrate
+1:
+	DISPATCH_START	0
+	b	osr_continue
+
+osr_migrate:
+	ldr	tmp1, [r0, #128]	@ osr_method->osr_entry()
+	mov	r0, thread
+	bl	_ZN13SharedRuntime19OSR_migration_beginEP10JavaThread
+	mov	r1, r0
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldrh	lr, [r0, #METHOD_MAXLOCALS]
+	ldrh	ip, [r0, #METHOD_SIZEOFPARAMETERS]
+	sub	lr, lr, ip
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	ip, r2, #4
+	ldr	r2, [r2]
+	add	ip, ip, lr, lsl #2
+	str	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	ip, [thread, #THREAD_JAVA_SP]
+	mov	r2, tmp1
+@ r0 = method
+@ r1 = osr_buf
+@ r2 = osr_entry
+	mov	lr, pc
+	ldr	pc, [tmp1]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+#endif // T2JIT
+
+#endif // ON_STACK_REPLACEMENT
+
+	Opcode	ifeq
+	Opcode	ifnull
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	beq	branch_taken
+	DISPATCH	3
+
+	Opcode	ifne
+	Opcode	ifnonnull
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	bne	branch_taken
+	DISPATCH	3
+
+	Opcode	iflt
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	blt	branch_taken
+	DISPATCH	3
+
+	Opcode	ifge
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	bge	branch_taken
+	DISPATCH	3
+
+	Opcode	ifgt
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	bgt	branch_taken
+	DISPATCH	3
+
+	Opcode	ifle
+        POP     r3
+        ldrsb   r1, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        cmp     r3, #0
+	ble	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmpeq
+	Opcode	if_acmpeq
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	beq	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmpne
+	Opcode	if_acmpne
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	bne	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmplt
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	blt	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmpge
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	bge	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmpgt
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	bgt	branch_taken
+	DISPATCH	3
+
+	Opcode	if_icmple
+        POP    r2, r3
+        ldrsb   r1, [jpc, #1]
+        cmp     r3, r2
+        ldrb    r2, [jpc, #2]
+	ble	branch_taken
+	DISPATCH	3
+
+	Opcode	ireturn
+	Opcode	freturn
+	Opcode	lreturn
+	Opcode	dreturn
+	Opcode	areturn
+	Opcode	return
+	ldr	r3, [dispatch, #SafePointSynchronize_state_Address-XXX]
+	ldr	r1, [r3]
+	cmp	r1, #1
+	bne	handle_return
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, thread
+	bl	Helper_SafePoint
+	CACHE_JPC
+	cmp	r0, #0
+	beq	handle_return
+	b	handle_exception
+
+resolve_get_put:
+	mov	r1, r0
+	mov	tmp1, lr
+	@ stmfd	arm_sp!, {lr}
+	mov	r0, thread
+	DECACHE_JPC
+        DECACHE_STACK
+       	bl      _ZN18InterpreterRuntime15resolve_get_putEP10JavaThreadN9Bytecodes4CodeE
+        ldr     r3, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	CACHE_CP
+        cmp     r3, #0
+	mov	lr, tmp1
+	@ ldmfd	arm_sp!, {lr}
+        bne     getfield_exception
+@ Now restart the getfield
+        ldrb    r3, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+        orr     r3, r3, r2, lsl #8      @ r3 = index
+	add	tmp1, constpool, r3, lsl #4	@ tmp1 = cache
+	bx	lr
+
+accessor_non_w:
+	bcs	accessor_h
+	beq	accessor_sb
+	tst	r0, #2
+	bne	accessor_dw
+accessor_sh:
+	ldrsh	r0, [r3, r1]
+	str	r0, [ip, #0]
+	mov	r0, #0	@ deoptimized_frames = 0
+	bx	lr
+accessor_h:
+	ldrh	r0, [r3, r1]
+	str	r0, [ip, #0]
+	mov	r0, #0	@ deoptimized_frames = 0
+	bx	lr
+accessor_sb:
+	ldrsb	r0, [r3, r1]
+	str	r0, [ip, #0]
+	mov	r0, #0	@ deoptimized_frames = 0
+	bx	lr
+accessor_dw:
+	add	r0, r3, r1
+	ldm	r0, {r0, r1}
+	sub	ip, ip, #4
+	str	ip, [thread, #THREAD_JAVA_SP]
+	stmia	ip, {r0, r1}
+	mov	r0, #0	@ deoptimized_frames = 0
+	bx	lr
+
+	Opcode	getfield
+	ldrb	r1, [jpc, #2]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+	ldr	r3, [tmp1, #CP_OFFSET]
+        and     r3, r3, #0x00ff0000
+        cmp     r3, #opc_getfield << 16
+        blne    resolve_get_put
+  NFBC	POP	r3
+        ldr     r2, [tmp1, #CP_OFFSET+12]
+  NFBC	cmp	r3, #0
+  NFBC	beq	null_ptr_exception
+  NFBC	ldr	tmp1, [tmp1, #CP_OFFSET+8]
+        movs    r2, r2, lsr #29
+  FBC	movhi	r0, #opc_igetfield
+        bls     getfield_non_w
+  NFBC	ldr	tmp1, [r3, tmp1]
+  NFBC	PUSH	tmp1
+  NFBC	DISPATCH 3
+
+#ifdef FAST_BYTECODES
+rewrite_bytecode:
+	strb	r0, [jpc]
+	DISPATCH_BYTECODE
+#endif
+
+getfield_non_w:
+        bcs     getfield_h              @ C = 1 => R2 = 1
+        beq     getfield_sb             @ Z = 1 => R2 = 0
+        tst     r2, #2
+        bne     getfield_dw
+
+#ifdef FAST_BYTECODES
+getfield_sh:
+	mov	r0, #opc_sgetfield
+	b	rewrite_bytecode
+getfield_h:
+	mov	r0, #opc_cgetfield
+	b	rewrite_bytecode
+getfield_sb:
+	mov	r0, #opc_bgetfield
+	b	rewrite_bytecode
+getfield_dw:
+	mov	r0, #opc_lgetfield
+	b	rewrite_bytecode
+#else
+getfield_sh:
+	ldrsh	tmp1, [r3, tmp1]
+	PUSH	tmp1
+	DISPATCH 3
+getfield_h:
+	ldrh	tmp1, [r3, tmp1]
+	PUSH	tmp1
+	DISPATCH 3
+getfield_sb:
+	ldrsb	tmp1, [r3, tmp1]
+	PUSH	tmp1
+	DISPATCH 3
+getfield_dw:
+	add	r3, r3, tmp1
+	ldm	r3, {r2, tmp1}
+	PUSH	r2, tmp1
+	DISPATCH 3
+#endif
+
+	Opcode	putfield
+	ldrb	r1, [jpc, #2]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+	ldr	r3, [tmp1, #CP_OFFSET]
+        and     r3, r3, #0xff000000
+        cmp     r3, #opc_putfield << 24
+        blne    resolve_get_put
+        ldr     r2, [tmp1, #CP_OFFSET+12]
+  NFBC	ldr	tmp1, [tmp1, #CP_OFFSET+8]
+        movs    r2, r2, lsr #29
+	bls	putfield_non_w
+  FBC   mov	r0, #opc_iputfield
+	cmp	r2, #tos_atos >> 1
+  FBC	moveq	r0, #opc_aputfield
+  FBC	b	rewrite_bytecode
+  NFBC	beq	putfield_a
+  NFBC	POP	r2, r3
+  NFBC	cmp	r3, #0
+  NFBC	beq	null_ptr_exception
+  NFBC	str	r2, [r3, tmp1]
+  NFBC	DISPATCH 3
+
+putfield_non_w:
+	bcs	putfield_h
+	beq	putfield_sb
+	tst	r2, #2
+	bne	putfield_dw
+
+#ifdef FAST_BYTECODES
+putfield_sh:
+putfield_h:
+	mov	r0, #opc_cputfield
+	b	rewrite_bytecode
+putfield_sb:
+	mov	r0, #opc_bputfield
+	b	rewrite_bytecode
+putfield_dw:
+	mov	r0, #opc_lputfield
+	b	rewrite_bytecode
+#else
+putfield_sh:
+putfield_h:
+	POP	r2, r3
+	cmp	r3, #0
+	beq	null_ptr_exception
+	strh	r2, [r3, tmp1]
+	DISPATCH 3
+putfield_sb:
+	POP	r2, r3
+	cmp	r3, #0
+	beq	null_ptr_exception
+	strb	r2, [r3, tmp1]
+	DISPATCH 3
+putfield_dw:
+	POP	r2, r3, lr
+	cmp	lr, #0
+	beq	null_ptr_exception
+	add	tmp1, lr, tmp1
+	stm	tmp1, {r2, r3}
+	DISPATCH 3
+putfield_a:
+	POP	r2, r3
+	cmp	r3, #0
+	beq	null_ptr_exception
+	str	r2, [r3, tmp1]
+	mov	r0, r3
+	bl	Helper_aputfield
+	DISPATCH 3
+#endif
+
+getstatic_sh:
+	DISPATCH_START	3
+	ldrsh	tmp1, [r3, lr]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_h:
+	DISPATCH_START	3
+	ldrh	tmp1, [r3, lr]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_sb:
+	DISPATCH_START	3
+	ldrsb	tmp1, [r3, lr]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_dw:
+	DISPATCH_START	3
+	add	r3, r3, lr
+	ldm	r3, {r2, tmp1}
+	DISPATCH_NEXT
+	PUSH	r2, tmp1
+	DISPATCH_FINISH
+getstatic_w:
+	DISPATCH_START	3
+	ldr	tmp1, [r3, lr]
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+
+putstatic_sh:
+putstatic_h:
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	strh	tmp1, [r3, r2]
+	DISPATCH_FINISH
+putstatic_w:
+	cmp	lr, #tos_atos >> 1	@ >> 1 due to lsr #29 above
+	beq	putstatic_a
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	str	tmp1, [r3, r2]
+	DISPATCH_FINISH
+putstatic_sb:
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	strb	tmp1, [r3, r2]
+	DISPATCH_FINISH
+putstatic_dw:
+	DISPATCH_START	3
+	add	r2, r2, r3
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	stm	r2, {r3, tmp1}
+	DISPATCH_FINISH
+putstatic_a:
+	POP	tmp1
+	str	tmp1, [r3, r2]
+	mov	r0, r3
+	bl	Helper_aputfield
+	DISPATCH 3
+
+getstatic_volatile_sh:
+	DISPATCH_START	3
+	ldrsh	tmp1, [r3, lr]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_volatile_h:
+	DISPATCH_START	3
+	ldrh	tmp1, [r3, lr]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_volatile_sb:
+	DISPATCH_START	3
+	ldrsb	tmp1, [r3, lr]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_volatile_dw:
+	add	r3, r3, lr
+#ifndef	__ARM_ARCH_7A__
+	ldm	r3, {r2, tmp1}
+	FullBarrier
+	PUSH	r2, tmp1
+#else
+	ldrexd	r0, r1, [r3]
+	FullBarrier
+	PUSH	r0, r1
+#endif
+	DISPATCH	3
+getstatic_volatile_w:
+	DISPATCH_START	3
+	ldr	tmp1, [r3, lr]
+	FullBarrier
+	DISPATCH_NEXT
+	PUSH	tmp1
+	DISPATCH_FINISH
+
+putstatic_volatile_sh:
+putstatic_volatile_h:
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	StoreStoreBarrier
+	strh	tmp1, [r3, r2]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+putstatic_volatile_w:
+	cmp	lr, #tos_atos >> 1	@ >> 1 due to lsr #29 above
+	beq	putstatic_volatile_a
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	StoreStoreBarrier
+	str	tmp1, [r3, r2]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+putstatic_volatile_sb:
+	DISPATCH_START	3
+	POP	tmp1
+	DISPATCH_NEXT
+	StoreStoreBarrier
+	strb	tmp1, [r3, r2]
+	StoreLoadBarrier
+	DISPATCH_FINISH
+putstatic_volatile_dw:
+	add	ip, r2, r3
+	POP	r0, r1
+	StoreStoreBarrier
+#ifndef	__ARM_ARCH_7A__
+	stm	ip, {r0, r1}
+#else
+	// Data in tmp1 & tmp2, address in ip, r2 & r3 scratch
+0:	ldrexd	r2, r3, [ip]
+	strexd	r2, r0, r1, [ip]
+	teq	r2, #0
+	bne	0b
+#endif
+	DISPATCH_START	3
+	StoreLoadBarrier
+	DISPATCH_FINISH
+putstatic_volatile_a:
+	POP	tmp1
+	StoreStoreBarrier
+	str	tmp1, [r3, r2]
+	mov	r0, r3
+	bl	Helper_aputfield
+	DISPATCH 3
+
+resolve_invokeinterface:
+	mov	r1, #opc_invokeinterface
+	b	resolve_invoke
+resolve_invokevirtual:
+	mov	r1, #opc_invokevirtual
+	b	resolve_invoke
+resolve_invokespecial:
+	mov	r1, #opc_invokespecial
+	b	resolve_invoke
+resolve_invokestatic:
+	mov	r1, #opc_invokestatic
+resolve_invoke:
+	mov	tmp1, lr
+	mov	r0, thread
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	_ZN18InterpreterRuntime14resolve_invokeEP10JavaThreadN9Bytecodes4CodeE
+	CACHE_JPC
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	CACHE_CP
+	cmp	r3, #0
+        ldrb    r3, [jpc, #1]
+        ldrb    r2, [jpc, #2]
+	bne	resolve_exception
+        orr     r3, r3, r2, lsl #8      @ r3 = index
+	add	r0, constpool, r3, lsl #4	@ r1 = cache
+	bx	tmp1
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode	new
+	ldrb	r1, [jpc, #2]
+	DECACHE_JPC
+	DECACHE_STACK
+	orr	r1, r1, r2, lsl #8
+	mov	r0, r8
+	bl	Helper_new
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #0
+	beq	handle_exception
+	PUSH	r0
+	DISPATCH 3
+
+bytecode_interpreter_str:
+	.ascii  __FILE__
+	.byte 0
+	ALIGN_WORD
+
+	Opcode	newarray
+	ldrb	r1, [jpc, #1]	@ zero_extendqisi2
+	ldr	r2, [stack, #4]
+	mov	r0, thread
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei
+	ldr	ip, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	CACHE_CP
+	cmp	ip, #0
+	ldr	r2, [thread, #THREAD_VM_RESULT]
+	bne	handle_exception
+	str	r2, [stack, #4]
+	str	ip, [thread, #THREAD_VM_RESULT]
+	DISPATCH	2
+
+	Opcode	anewarray
+	ldrb	r0, [jpc, #1]	@ zero_extendqisi2
+	ldr	r3, [stack, #4]
+	ldr	lr, [istate, #ISTATE_METHOD]
+	ldrb	r2, [jpc, #2]	@ zero_extendqisi2
+	orr	r2, r2, r0, asl #8
+	DECACHE_JPC
+	DECACHE_STACK
+	
+	ldr	r1, [lr, #METHOD_CONSTMETHOD]
+	ldr	r1, [r1, #METHOD_CONSTANTS]
+	mov	r0, thread
+	bl	_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii
+	ldr	ip, [thread, #THREAD_PENDING_EXC]
+	CACHE_JPC
+	CACHE_CP
+	cmp	ip, #0
+	ldr	r2, [thread, #THREAD_VM_RESULT]
+	bne	handle_exception
+	str	r2, [stack, #4]
+	str	ip, [thread, #THREAD_VM_RESULT]
+	DISPATCH	3
+
+	Opcode	arraylength
+	DISPATCH_START	1
+	ldr	r3, [stack, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	SW_NPC	cmp	r3, #0
+	SW_NPC	beq	null_ptr_exception_jpc_1
+.abortentry114:
+	ldr	r3, [r3, #8]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [stack, #4]
+	DISPATCH_FINISH
+
+	Opcode	athrow
+	ldr	r1, [stack, #4]
+	cmp	r1, #0
+	beq	null_ptr_exception
+	mov	r2, #0
+	mov	r0, thread
+	mov	r3, r2
+	bl	_ZN12ThreadShadow21set_pending_exceptionEP7oopDescPKci
+	b	handle_exception
+
+#define secondary_super_cache_offset_in_bytes	20
+#define tmp_chunk	locals
+#define tmp_hwm		stack
+#define	tmp_max		constpool
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode	checkcast
+	ldrb	r1, [jpc, #2]
+	DECACHE_JPC
+	DECACHE_STACK
+	orr	r1, r1, r2, lsl #8
+	mov	r0, r8
+	GET_STACK	0, r2
+	bl	Helper_checkcast
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #0
+	bne	handle_exception
+	DISPATCH 3
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode	instanceof
+	ldrb	r1, [jpc, #2]
+	DECACHE_JPC
+	DECACHE_STACK
+	orr	r1, r1, r2, lsl #8
+	mov	r0, r8
+	POP	r2
+	bl	Helper_instanceof
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #-1
+	beq	handle_exception
+	PUSH	r0
+	DISPATCH 3
+
+	Opcode	monitorenter
+	mov	r0, r8
+	POP	r1
+ 	DECACHE_JPC
+ 	DECACHE_STACK
+	bl	Helper_monitorenter
+	CACHE_JPC
+	CACHE_CP
+	CACHE_STACK		@ monitorenter may expand stack!!!	
+	cmp	r0, #0
+	bne	handle_exception
+	DISPATCH 1
+
+	Opcode	monitorexit
+	mov	r0, r8
+	POP	r1
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	Helper_monitorexit
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #0
+	bne	handle_exception
+	DISPATCH 1
+
+	ALIGN_CODE
+vm_fatal_error:
+	adr	r0, .fatal_filename
+	mov	r1, #99
+	bl	_Z28report_should_not_reach_herePKci
+	b	breakpoint
+.fatal_filename:
+	.ascii	"[Optimsed Assembler Interpreter Loop]\000"
+
+// This extra entry point for vm_fatal_error (at vm_fatal_error +
+// CODE_ALIGN_SIZE) allows vm_fatal_error to be used as an entry point
+// in the asm_method_table.
+	ALIGN_CODE	
+	b	vm_fatal_error
+
+	ALIGN_WORD
+
+	Opcode	aastore
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, r8
+	POP	r1, r2, r3
+	bl	Helper_aastore
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #0
+	bne	handle_exception
+	DISPATCH 1
+
+	Opcode	wide
+	ldrb	r2, [jpc, #1]
+	ldrb	r1, [jpc, #2]	@ zero_extendqisi2
+	ldrb	r3, [jpc, #3]	@ zero_extendqisi2
+
+	sub	lr, r2, #opc_aload+1
+	cmp	lr, #opc_istore - (opc_aload+1)
+	bcc	wide_undef_opc_exception
+
+	sub	lr, r2, #opc_iload
+	cmp	r2, #opc_istore
+	subcs	lr, lr, #opc_istore - (opc_aload+1)
+	cmp	r2, #opc_astore+1
+
+	orr	r1, r3, r1, asl #8
+	adr	r3, wide_case_table
+	ldrcc	pc, [r3, lr, lsl #2]
+	
+	cmp	r2, #opc_ret
+	beq	do_wide_ret
+	cmp	r2, #opc_iinc
+	beq	do_wide_iinc
+wide_undef_opc_exception:
+	mov	r0, #VMSYMBOLS_InternalError
+	adr	r1, undef_opcode_msg
+	b	raise_exception_with_msg
+undef_opcode_msg:
+	.ascii  "undefined opcode\000"
+	ALIGN_WORD
+
+wide_case_table:
+        .word	case_wide_iload
+        .word	case_wide_lload
+        .word	case_wide_fload
+        .word	case_wide_dload
+        .word	case_wide_aload
+        .word	case_wide_istore
+        .word	case_wide_lstore
+        .word	case_wide_fstore
+        .word	case_wide_dstore
+        .word	case_wide_astore
+
+case_wide_iload:
+case_wide_fload:
+case_wide_aload:
+	ldr	r2, [locals, -r1, lsl #2]
+	PUSH	r2
+	DISPATCH	4
+case_wide_istore:
+case_wide_fstore:
+case_wide_astore:
+	POP	r2
+	str	r2, [locals, -r1, lsl #2]
+	DISPATCH	4
+case_wide_dload:
+case_wide_lload:
+	sub	r1, locals, r1, lsl #2
+	ldmda	r1, {r1, r2}
+	PUSH	r1, r2
+	DISPATCH	4
+case_wide_dstore:
+case_wide_lstore:
+	POP	r2, r3
+	sub	r1, locals, r1, lsl #2
+	stmda	r1, {r2, r3}
+	DISPATCH	4
+do_wide_ret:
+	ldr	r2, [istate, #ISTATE_METHOD]
+	ldr	r2, [r2, #METHOD_CONSTMETHOD]
+	ldr	r1, [locals, -r1, lsl #2]
+	add	jpc, r2, r1
+	DISPATCH	CONSTMETHOD_CODEOFFSET
+do_wide_iinc:
+	ldrsb	r2, [jpc, #4]
+	ldrb	r3, [jpc, #5]
+	orr	r2, r3, r2, lsl #8
+	ldr	r3, [locals, -r1, lsl #2]
+	add	r3, r3, r2
+	str	r3, [locals, -r1, lsl #2]
+	DISPATCH	6
+
+	Opcode	multianewarray
+	ldrb	tmp1, [jpc, #3]	@ zero_extendqisi2
+	mov	r0, thread
+	add	r1, stack, tmp1, lsl #2
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi
+	CACHE_JPC
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	CACHE_CP
+	cmp	r1, #0
+	ldr	r3, [thread, #THREAD_VM_RESULT]
+	bne	handle_exception
+	str	r3, [stack, tmp1, asl #2]!
+	str	r1, [thread, #THREAD_VM_RESULT]
+	sub	stack, stack, #4
+	DISPATCH	4
+
+	Opcode	jsr_w
+	ldr	r3, [istate, #ISTATE_METHOD]
+	ldr	r1, [r3, #METHOD_CONSTMETHOD]
+	rsb	r2, r1, jpc
+	sub	r2, r2, #CONSTMETHOD_CODEOFFSET - 5
+	str	r2, [stack], #-4
+	b	do_goto_w
+
+	Opcode	goto_w
+	add	r2, jpc, #1
+	ldrb	tmp1, [jpc, #1]	@ zero_extendqisi2
+	ldrb	r3, [r2, #3]	@ zero_extendqisi2
+	ldrb	r0, [r2, #1]	@ zero_extendqisi2
+	ldrb	ip, [r2, #2]	@ zero_extendqisi2
+	orr	r3, r3, tmp1, asl #24
+	orr	r3, r3, r0, asl #16
+	orr	r3, r3, ip, asl #8
+	cmp	r3, #0
+	add	jpc, jpc, r3
+	bgt	1f
+
+	ldr	r3, [dispatch, #SafePointSynchronize_state_Address-XXX]
+	ldr	r1, [r3]
+	cmp	r1, #1
+	bne	1f
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, thread
+	bl	Helper_SafePoint
+	CACHE_JPC
+	CACHE_CP
+	cmp	r0, #0
+	bne	handle_exception
+1:
+	DISPATCH	0
+
+	Opcode	breakpoint
+	mov	r2, jpc
+	DECACHE_STACK
+	DECACHE_JPC
+	mov	r0, thread
+	ldr	r1, [istate, #ISTATE_METHOD]
+	bl	_ZN18InterpreterRuntime24get_original_bytecode_atEP10JavaThreadP13methodOopDescPh
+	mov	tmp1, r0
+	mov	r0, thread
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	cmp	r3, #0
+	bne	handle_exception
+	ldr	r2, [istate, #ISTATE_BCP]
+	ldr	r1, [istate, #ISTATE_METHOD]
+	bl	_ZN18InterpreterRuntime11_breakpointEP10JavaThreadP13methodOopDescPh
+	CACHE_JPC
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	CACHE_CP
+	cmp	r3, #0
+	and	r0, tmp1, #255
+	bne	handle_exception
+	DISPATCH_BYTECODE
+
+#ifndef FAST_BYTECODES
+	Opcode	bgetfield
+	Opcode	cgetfield
+	Opcode	igetfield
+	Opcode	lgetfield
+	Opcode	sgetfield
+	Opcode	aputfield
+	Opcode	bputfield
+	Opcode	cputfield
+	Opcode	iputfield
+	Opcode	lputfield
+	Opcode	invokevfinal
+	Opcode	invokeresolved
+	Opcode	invokespecialresolved
+	Opcode	invokestaticresolved
+	Opcode	iaccess_0
+	Opcode	iload_0_iconst_N
+	Opcode	iload_iconst_N
+	Opcode	iadd_istore_N
+	Opcode	isub_istore_N
+	Opcode	iand_istore_N
+	Opcode	ior_istore_N
+	Opcode	ixor_istore_N
+	Opcode	iadd_u4store
+	Opcode	isub_u4store
+	Opcode	iand_u4store
+	Opcode	ior_u4store
+	Opcode	ixor_u4store
+	Opcode	fast_iload_iload
+	Opcode	fast_iload_iload_N
+	Opcode	fast_iload_N_iload
+	Opcode	fast_iload_N_iload_N
+#endif
+	Opcode	undefined
+	// Decache to get better diagnostic info
+	DECACHE_JPC
+	DECACHE_STACK
+	ldr	r2, [dispatch, #Bytecodes_name_Address-XXX]
+	ldrb	r3, [jpc, #0]	@ zero_extendqisi2
+	adrl	r0, bytecode_interpreter_str
+	cmp	r3, #last_implemented_bytecode+1
+	adrcs	ip, unknown_bytecode
+	ldrcc	ip, [r2, r3, asl #2]
+	adr	r2, unimplemented_opcode_msg
+	ldr	r1, =__LINE__
+	str	ip, [arm_sp, #-8]!
+ 	bl	Helper_report_fatal
+	b	breakpoint
+	.ltorg
+unimplemented_opcode_msg:
+	.ascii  "\011*** Unimplemented opcode: %d = %s\012\000"
+unknown_bytecode:
+	.ascii	"<unknown>\000"
+	ALIGN_WORD
+
+	Opcode	return_register_finalizer
+	ldr	r1, [locals, #0]
+	ldr	r3, [r1, #4]
+	ldr	r2, [r3, #KLASS_PART+KLASS_ACCESSFLAGS]
+	tst	r2, #JVM_ACC_HAS_FINALIZER
+	beq	handle_return
+	DECACHE_JPC
+	DECACHE_STACK
+	mov	r0, thread
+	bl	_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc
+	CACHE_JPC
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+@ CACHE_LOCALS & CACHE_CP not require for handle_retuen / handle_exception
+	cmp	r3, #0
+	beq	handle_return
+	b	handle_exception
+
+// This code is logically part of normal_entry_synchronized, but it's
+// been moved out because there is only a FAST_ENTRY_OFFSET sized gap
+// here.
+
+.normal_entry_return_synchronized:
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {regset, pc}
+	SLOW_ENTRY
+normal_entry_synchronized:
+	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+	rsb	r3, r7, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #32768
+	blt	stack_overflow_no_frame
+	add	lr, pc, #(.normal_entry_return_synchronized-(.fast_normal_entry1+4))
+.fast_normal_entry1:
+
+	FAST_ENTRY
+fast_normal_entry_synchronized:
+	stmfd	arm_sp!, {fast_regset, lr}
+
+	mov	tmp1, r0
+
+	ldrh	r2, [tmp1, #METHOD_MAXLOCALS]
+	ldrh	r3, [tmp1, #METHOD_SIZEOFPARAMETERS]
+	rsb	r8, r3, r2
+
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	ldrh	r0, [tmp1, #METHOD_MAXSTACK]
+	ldr	r3, [thread, #THREAD_JAVA_STACK_BASE]
+
+	sub	r5, r1, r8, lsl #2
+	sub	r5, r5, #FRAME_SIZE+STACK_SPARE+LEAF_STACK_SIZE
+	sub	r5, r5, r0, lsl #2
+	cmp	r3, r5
+	bcs	stack_overflow_before_frame
+
+	cmp	r8, #0
+	ble	.normal_entry_synchronized_no_locals
+
+	mov	r2, #0
+.zero_locals_synchronized:
+	subs	r8, r8, #1
+	str	r2, [r1, #-4]!
+	bgt	.zero_locals_synchronized
+	str	r1, [thread, #THREAD_JAVA_SP]
+
+.normal_entry_synchronized_no_locals:
+	mov	r2, thread
+	mov	r1, tmp1
+	add	r0, thread, #THREAD_JAVA_STACK_BASE
+	bl	build_frame
+	ldr	ip, [thread, #THREAD_TOP_ZERO_FRAME]
+	sub	istate, r0, #ISTATE_NEXT_FRAME
+	mov	r2, #0  @ set SP to zero before setting FP
+	str	r0, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r3, [thread, #THREAD_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	ip, [istate, #ISTATE_NEXT_FRAME]
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	add	dispatch, r1, r0
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r0, #METHOD_ACCESSFLAGS]
+	tst	r3, #JVM_ACC_SYNCHRONIZED
+	beq	1f
+
+@ Do Synchronisation
+	CACHE_STACK
+	CACHE_LOCALS
+	ldr	r2, [r0, #METHOD_CONSTMETHOD]
+	tst	r3, #JVM_ACC_STATIC
+	ldrne	r3, [r2, #METHOD_CONSTANTS]
+	ldreq	tmp1, [locals, #0]
+	ldrne	r2, [r3, #CONSTANTPOOL_POOL_HOLDER]
+	ldrne	tmp1, [r2, #KLASS_PART+KLASS_JAVA_MIRROR]
+	ldr	r3, [tmp1, #0]
+	orr	tmp_xxx, r3, #1
+	ldr	ip, [istate, #ISTATE_MONITOR_BASE]
+	str	tmp_xxx, [ip, #-8]
+.normal_do_synchronisation_2:
+	ldr	tmp_vvv, [tmp1, #0]
+	cmp	tmp_xxx, tmp_vvv
+	bne	.normal_do_synchronisation_3
+	mov	r0, tmp_xxx
+	ldr	r1, [istate, #ISTATE_MONITOR_BASE]
+	sub	r1, r1, #8
+	mov	r2, tmp1
+	mov	r3, #0xffffffc0
+	bic	r3, r3, #0xf000
+	blx	r3
+	cmp	r0, #0
+	bne	.normal_do_synchronisation_2
+	b	1f
+.normal_do_synchronisation_3:
+	mov	r0, thread
+	bic	r1, tmp_xxx, #3
+	bl	JavaThread_is_lock_owned
+	cmp	r0, #0
+	beq	.normal_do_synchronisation_4
+	ldr	ip, [istate, #ISTATE_MONITOR_BASE]
+	mov	r3, #0
+	str	r3, [ip, #-8]
+	b	1f
+.normal_do_synchronisation_4:
+	ldr	r1, [istate, #ISTATE_MONITOR_BASE]
+	sub	r1, r1, #8
+	DECACHE_STACK
+	mov	r0, thread
+	bl	_ZN18InterpreterRuntime12monitorenterEP10JavaThreadP15BasicObjectLock
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	cmp	r3, #0
+	mov	r2, r0
+	bne	handle_exception_do_not_unlock
+1:
+  USEC ldr	r0, [istate, #ISTATE_METHOD]
+  USEC	ldr	r2, [r0, #METHOD_INVOCATIONCOUNTER]
+  USEC	ldr	lr, [dispatch, #InterpreterInvocationLimit_Address-XXX]
+  USEC	add	r2, r2, #INVOCATIONCOUNTER_COUNTINCREMENT
+  USEC	ldr	lr, [lr]
+  USEC	str	r2, [r0, #METHOD_INVOCATIONCOUNTER]
+  USEC	cmp	r2, lr
+  USEC	bcs	sync_method_entry_freq_count_overflow
+	CACHE_JPC
+	CACHE_LOCALS
+	CACHE_CP
+	DISPATCH	0
+
+#ifdef USE_COMPILER
+sync_method_entry_freq_count_overflow:
+        ldr     r3, [r0, #METHOD_CONSTMETHOD]
+        ldrh    r3, [r3, #CONSTMETHOD_CODESIZE]
+	mov	r1, #0
+	mov	r0, thread
+        cmp     r3, #MAX_FG_METHOD_SIZE
+        bcc     1f
+        ldr     tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
+        mov     r3, #1
+        ldr     r5, [tmp1]
+        str     r3, [tmp1]
+        bl      FREQ_COUNT_OVERFLOW
+        str     r5, [tmp1]
+        b       2f
+1:
+	bl	FREQ_COUNT_OVERFLOW
+2:
+  T2	cmp	r0, #0
+	CACHE_LOCALS
+  T2	bne	call_thumb2
+	CACHE_JPC
+	CACHE_CP
+	DISPATCH	0
+#endif
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode	invokeinterface
+	ldrb	r1, [jpc, #2]
+	DECACHE_STACK
+        add     r0, constpool, r1, lsl #12
+	add	r0, r0, r2, asl #4
+	DECACHE_JPC
+        ldr     r2, [r0, #CP_OFFSET]
+        and     r2, r2, #0x00ff0000
+        cmp     r2, #opc_invokeinterface << 16
+        blne    resolve_invokeinterface
+
+	ldr	r3, [r0, #CP_OFFSET+12]
+	and	r2, r3, #255
+	ldr	r2, [stack, r2, lsl #2]
+	SW_NPC	cmp	r2, #0
+	SW_NPC	beq	null_ptr_exception
+.abortentry110:
+	ldr	tmp1, [r2, #4]				@ rcvr->klass()
+	tst	r3, #flag_is_forced_virtual
+	bne	.invokeinterface_methodInterface
+
+	ldr	lr, [r0, #CP_OFFSET+4]			@ lr = iclass
+
+	add	r1, tmp1, #INSTANCEKLASS_VTABLE_OFFSET
+	ldr	r2, [tmp1, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
+	ldr	ip, [tmp1, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
+	add	r2, r2, #1
+	bic	r2, r2, #1
+
+	add	r1, r1, r2, lsl #2
+
+	mov	r2, #0
+1:
+	cmp	r2, ip
+	beq	incompatibleclass_exception
+	ldr	r3, [r1], #8
+	add	r2, r2, #1
+	cmp	lr, r3
+	bne	1b
+
+	ldr	r3, [r0, #CP_OFFSET+8]
+	ldr	r2, [r1, #-4]
+	add	r3, tmp1, r3, lsl #2
+	ldr	tmp1, [r3, r2]
+	cmp	tmp1, #0
+	beq	abstractmethod_exception
+.invokeinterface_invoke:
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	mov	r1, #0
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip]
+
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
+#endif
+	blx	r3
+
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	add	dispatch, r1, r0
+
+	CACHE_LOCALS
+
+	CACHE_JPC
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
+	sub	stack, stack, #4
+
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	stmfd	arm_sp!, {r1}
+	mov	r1, #0
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	r2, r2, #4
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	ldmfd	arm_sp!, {r1}
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	DISPATCH_START	5
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	cmp	r3, #0
+	DISPATCH_NEXT
+	bne	invokeinterface_exception_fix
+	DISPATCH_NEXT
+	CACHE_CP
+	DISPATCH_FINISH
+
+.invokeinterface_methodInterface:
+	tst	r3, #flag_vfinalMethod
+	ldrne	tmp1, [r0, #CP_OFFSET+8]
+	bne	.invokeinterface_invoke
+	ldr	r1, [r0, #CP_OFFSET+8]
+	add	r3, tmp1, r1, lsl #2
+	ldr	tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+	b	.invokeinterface_invoke
+
+# r2 = [jpc, #1]
+	Opcode	invokedynamic
+	DECACHE_STACK
+	// Fetch index bytes from bytecode
+	ldrb	r0, [jpc, #2]
+	ldrb	r1, [jpc, #3]
+	ldrb	r3, [jpc, #4]
+	orr	r2, r2, r0, lsl #8
+	orr	r2, r2, r1, lsl #16
+	orr	r1, r2, r3, lsl #24
+	// The index is inverted, so we invert it back with MVN
+        mvn     r1, r1
+	// The pool entry is in R0
+        add     r0, constpool, r1, lsl #4
+
+	// Save the pool entry
+	stmfd	arm_sp!, {r0}
+
+	DECACHE_JPC
+	ldr     r1, [r0, #CP_OFFSET+4]  // Pointer to call site
+	// Already resolved?
+	cmp     r1, #0 
+	bleq    resolve_invokedynamic
+
+	// Get the offset from a call site to the corresponding target
+	// method handle
+	bl	Helper_target_offset_in_bytes
+	mov	lr, r0
+
+	// Restore the pool entry
+	ldmfd	arm_sp!, {r0}
+
+	ldr	r0, [r0, #CP_OFFSET+4]	// Call site
+.abortentry119:
+	ldr	r0, [r0, lr]		// Method handle
+	mov	r1, thread
+
+	// Call the target method
+	bl	_ZN14CppInterpreter21process_method_handleEP7oopDescP6Thread
+
+	// Load up the interpreter registers.  Probably not necessary
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	add	dispatch, r1, r0
+
+	CACHE_LOCALS
+
+	CACHE_JPC
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
+	sub	stack, stack, #4
+
+	// Fix up everything in the thread state to point to the
+	// current frame
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	stmfd	arm_sp!, {r1}
+	mov	r1, #0
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	r2, r2, #4
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	ldmfd	arm_sp!, {r1}
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	DISPATCH_START	5
+	// Test for an exception
+	ldr	r3, [thread, #4]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	cmp	r3, #0
+	DISPATCH_NEXT
+	bne	invokedynamic_exception_fix
+	DISPATCH_NEXT
+	CACHE_CP
+	DISPATCH_FINISH
+
+resolve_invokedynamic:
+	stmfd	arm_sp!, {lr}
+	ldr	r0, [istate, #ISTATE_THREAD]
+	bl	_ZN18InterpreterRuntime21resolve_invokedynamicEP10JavaThread
+	ldmfd	arm_sp!, {pc}
+
+	// Handler for java.lang.invoke.MethodHandles::invoke
+	ALIGN_CODE
+method_handle_entry:
+	stmfd   arm_sp!, {thread, lr}
+	mov     thread, r2
+	bl      fast_method_handle_entry
+	ldmfd   arm_sp!, {thread, pc}
+	ALIGN_CODE
+fast_method_handle_entry:
+	stmfd	arm_sp!, {regset, lr}
+	mov	r2, thread
+	bl	_ZN14CppInterpreter19method_handle_entryEP13methodOopDesciP6Thread
+	ldmia	sp!, {regset, pc}
+
+#ifdef FAST_BYTECODES
+	// Common code for fast_aldc and fast_aldc_w
+# r0 = constpool cache entry
+	.macro	aldc	opc, seq_len
+	// Save the pool entry
+	stmfd	arm_sp!, {r0}
+
+	DECACHE_JPC
+	ldr     r1, [r0, #CP_OFFSET+4]  // Pointer to call site
+	// Already resolved?
+	cmp     r1, #0 
+	mov	r0, thread
+	mov	r1, #\opc
+	bleq    _ZN18InterpreterRuntime11resolve_ldcEP10JavaThreadN9Bytecodes4CodeE
+
+	// Restore the pool entry
+	ldmfd	arm_sp!, {r0}
+
+	ldr     r1, [r0, #CP_OFFSET+4]  // Pointer to MethodHandle
+	PUSH	r1
+
+	// Test for an exception
+	ldr	r3, [thread, #4]
+	cmp	r3, #0
+	bne	handle_exception
+	
+	DISPATCH	\seq_len
+	.endm
+
+	// Handler for ldc MethodHandle
+# r2 = [jpc, #1]
+	Opcode fast_aldc
+	DECACHE_STACK
+        add     r0, constpool, r2, lsl #4
+	aldc opc_fast_aldc, 2
+
+	// Handler for ldc_w MethodHandle
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode fast_aldc_w
+	// Fetch index bytes from bytecode
+	ldrb	r1, [jpc, #2]
+	DECACHE_STACK
+	orr	r1, r2, r1, lsl #8
+        add     r0, constpool, r1, lsl #4
+	aldc opc_fast_aldc_w, 3
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+	Opcode	invokevfinal
+	ldrb	r1, [jpc, #2]
+	DECACHE_STACK
+	add	r0, constpool, r1, lsl #12
+	DECACHE_JPC
+	add	r0, r2, asl #4
+	ldr	r3, [r0, #CP_OFFSET+12]
+	and	r1, r3, #255
+	ldr	r2, [stack, r1, asl #2]
+	mov	r1, #0
+	SW_NPC	cmp	r2, #0
+	SW_NPC	beq	null_ptr_exception
+.abortentry117:
+	HW_NPC	ldr	r3, [r2]		@ Only to provoke abort
+
+	ldr	tmp1, [r0, #CP_OFFSET+8]
+
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip, #0]
+	b	normal_dispatch_and_return
+#endif // FAST_BYTECODES
+
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+        Opcode  invokevirtual
+	ldrb	r1, [jpc, #2]
+        add     r0, constpool, r1, lsl #12
+	add	r0, r0, r2, asl #4
+        ldr     r2, [r0, #CP_OFFSET]
+        and     r2, r2, #0xff000000
+        cmp     r2, #opc_invokevirtual << 24
+        blne    resolve_invokevirtual
+        ldr     r3, [r0, #CP_OFFSET+12]
+#ifdef FAST_BYTECODES
+        mov     r0, #opc_invokeresolved
+        tst     r3, #flag_vfinalMethod
+        movne   r0, #opc_invokevfinal
+        b       rewrite_bytecode
+#else
+        DECACHE_STACK
+	DECACHE_JPC
+        and     r1, r3, #255
+        ldr     r2, [stack, r1, asl #2]
+        mov     r1, #0
+        cmp     r2, #0
+        beq     null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        tst     r3, #flag_vfinalMethod
+        bne     1f
+
+        ldr     r3, [r2, #4]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+1:
+#endif // FAST_BYTECODES
+
+#ifdef FAST_BYTECODES
+# r2 = [jpc, #1]
+# r1 = [jpc, #2]
+        Opcode  invokeresolved
+	ldrb	r1, [jpc, #2]
+        DECACHE_STACK
+        add     r0, constpool, r1, lsl #12
+	DECACHE_JPC
+        add     r0, r0, r2, asl #4
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r1, r3, #255
+        ldr     r2, [stack, r1, asl #2]
+        mov     r1, #0
+        SW_NPC	cmp     r2, #0
+        SW_NPC	beq     null_ptr_exception_jpc_0
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+.abortentry104:
+        ldr     r3, [r2, #4]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+#endif // FAST_BYTECODES
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+normal_dispatch_and_return:
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
+#endif
+	blx	r3
+
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	add	dispatch, r1, r0
+
+	CACHE_LOCALS
+
+	CACHE_JPC
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
+	sub	stack, stack, #4
+
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	DISPATCH_START	3
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	cmp	r3, #0
+	DISPATCH_NEXT
+
+	bne	invoke_exception_fix
+	DISPATCH_NEXT
+	CACHE_CP
+	DISPATCH_FINISH
+
+	Opcode	invokestatic
+	ldrb	r1, [jpc, #2]
+        add     r0, constpool, r1, lsl #12
+	add	r0, r0, r2, asl #4
+        ldr     r2, [r0, #CP_OFFSET]
+	and	r2, r2, #0x00ff0000
+	cmp	r2, #opc_invokestatic << 16
+	blne	resolve_invokestatic
+  FBC	mov	r0, #opc_invokestaticresolved
+  FBC	b	rewrite_bytecode
+
+  FBC	Opcode	invokestaticresolved
+  FBC	ldrb	r1, [jpc, #2]
+        DECACHE_STACK
+  FBC   add     r0, constpool, r1, lsl #12
+	DECACHE_JPC
+  FBC	add	r0, r2, asl #4
+
+	ldr	tmp1, [r0, #CP_OFFSET+4]
+	mov	r1, #0
+	ldr	r3, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [r3, #0]
+	b	normal_dispatch_and_return
+
+
+	Opcode	invokespecial
+     	ldrb	r1, [jpc, #2]
+        add     r0, constpool, r1, lsl #12
+	add	r0, r0, r2, asl #4
+        ldr     r2, [r0, #CP_OFFSET]
+        and     r2, r2, #0x00ff0000
+        cmp     r2, #opc_invokespecial << 16
+        blne     resolve_invokespecial
+  FBC	mov	r0, #opc_invokespecialresolved
+  FBC	b	rewrite_bytecode
+
+  FBC	Opcode	invokespecialresolved
+  FBC	ldrb	r1, [jpc, #2]
+        DECACHE_STACK
+  FBC   add     r0, constpool, r1, lsl #12
+	DECACHE_JPC
+  FBC	add	r0, r2, asl #4
+
+	ldr	r3, [r0, #CP_OFFSET+12]
+	and	r3, r3, #255
+	ldr	r2, [stack, r3, asl #2]
+	mov	r1, #0
+	SW_NPC	cmp	r2, #0
+	SW_NPC	beq	null_ptr_exception
+.abortentry118:
+	HW_NPC	ldr	r3, [r2]		@ Only to provoke abort
+
+	ldr	tmp1, [r0, #CP_OFFSET+4]
+
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip, #0]
+	b	normal_dispatch_and_return
+
+// This code is logically part of normal_entry, but it's been moved
+// out because there is only a FAST_ENTRY_OFFSET sized gap here.
+
+.normal_entry_return:
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {regset, pc}
+	SLOW_ENTRY
+normal_entry:
+	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+	rsb	r3, r7, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #32768
+	blt	stack_overflow_no_frame
+	add	lr, pc, #(.normal_entry_return-(.normal_entry1+4))
+.normal_entry1:
+
+	FAST_ENTRY
+fast_normal_entry:
+@	TRACE
+	adrl	ip, dispatch_init_adcon
+	mov	tmp1, r0
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	add	dispatch, r1, r0
+
+	stmdb	arm_sp!, {fast_regset, lr}
+
+	ldrh	r0, [tmp1, #METHOD_MAXLOCALS]
+	mov	r1, #0
+	ldrh	r3, [tmp1, #METHOD_SIZEOFPARAMETERS]
+        mov     ip, #INTERPRETER_FRAME
+	ldrh	r2, [tmp1, #METHOD_MAXSTACK]
+        sub     r7, r0, r3
+
+	ldr	r3, [thread, #THREAD_JAVA_STACK_BASE]
+	sub	r5, stack, r7, lsl #2
+	sub	r5, r5, #FRAME_SIZE+STACK_SPARE+LEAF_STACK_SIZE
+	sub	r5, r5, r2, lsl #2
+	cmp	r3, r5
+	bcs	stack_overflow_before_frame
+
+        subs    r5, r7, #2
+        tst     r7, #1
+        strne   r1, [stack, #-4]!  // stack->push(0);
+        bcc     3f
+1:
+        str     r1, [stack, #-4]
+        str     r1, [stack, #-8]!
+        subs    r5, r5, #2
+        bcs     1b
+3:
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+	mov	lr, #0
+        sub     istate, stack, #FRAME_SIZE     // stack->push(INTERPRETER_FRAME);
+        sub     r2, istate, r2, lsl #2
+        str     lr, [istate, #ISTATE_MSG]
+	str	r2, [thread, #THREAD_JAVA_SP]
+        sub     r5, r2, #4                      @ stack limit = istate - stackwords - 4
+	str	r3, [istate, #ISTATE_NEXT_FRAME]
+	str	ip, [istate, #ISTATE_FRAME_TYPE]
+	str	istate, [istate, #ISTATE_MONITOR_BASE]
+	str	r5, [istate, #ISTATE_STACK_LIMIT]
+	str	istate, [istate, #ISTATE_STACK_BASE]
+        sub     locals, stack, #4
+	str	r1, [istate, #ISTATE_OOP_TEMP]
+        add     locals, locals, r0, lsl #2
+        sub     stack, istate, #4
+        ldr     jpc, [tmp1, #METHOD_CONSTMETHOD]
+        ldr     constpool, [jpc, #METHOD_CONSTANTS]
+        add     ip, istate, #ISTATE_NEXT_FRAME
+	DISPATCH_START	CONSTMETHOD_CODEOFFSET
+        ldr     constpool, [constpool, #CONSTANTPOOL_CACHE]
+        str     ip, [thread, #THREAD_TOP_ZERO_FRAME]
+  USEC	ldr	r3, [tmp1, #METHOD_INVOCATIONCOUNTER]
+	mov	r1, #0
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+        str     ip, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	ip, [thread, #THREAD_JAVA_SP]
+        str     ip, [thread, #THREAD_LAST_JAVA_SP]
+	DISPATCH_NEXT
+  USEC	ldr	lr, [dispatch, #InterpreterInvocationLimit_Address-XXX]
+  USEC	add	r3, r3, #INVOCATIONCOUNTER_COUNTINCREMENT
+        str     thread, [istate, #ISTATE_THREAD]
+  USEC	ldr	lr, [lr]
+  USEC	str	r3, [tmp1, #METHOD_INVOCATIONCOUNTER]
+	str	locals, [istate, #ISTATE_LOCALS]
+  USEC	cmp	r3, lr
+	str	constpool, [istate, #ISTATE_CONSTANTS]
+        str        tmp1, [istate, #ISTATE_METHOD]
+        str        istate, [istate, #ISTATE_SELF_LINK]
+  USEC	bcs	method_entry_freq_count_overflow
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+@	mov	lr, #0
+@        str     lr, [istate, #ISTATE_PREV_LINK]
+@	str	lr, [istate, #ISTATE_CALLEE]
+	DISPATCH_FINISH
+#ifdef USE_COMPILER
+method_entry_freq_count_overflow:
+        ldr     r3, [tmp1, #METHOD_CONSTMETHOD]
+	DECACHE_JPC
+        ldrh    r3, [r3, #CONSTMETHOD_CODESIZE]
+	str	tmp1, [istate, #ISTATE_METHOD]
+	mov	r1, #0
+	mov	r0, thread
+        cmp     r3, #MAX_FG_METHOD_SIZE
+        bcc     1f
+        ldr     tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
+        mov     r3, #1
+        ldr     r5, [tmp1]
+        str     r3, [tmp1]
+        bl      FREQ_COUNT_OVERFLOW
+        str     r5, [tmp1]
+        b       2f
+1:
+	bl	FREQ_COUNT_OVERFLOW
+2:
+ T2	cmp	r0, #0
+ T2	bne	call_thumb2
+	CACHE_JPC
+	CACHE_CP
+	DISPATCH	0
+
+#ifdef T2JIT
+
+#define JAZ_V1	r5
+#define JAZ_V2	r6
+#define JAZ_V3	r7
+#define JAZ_V4	r8
+#define	JAZ_V5	r9
+#define	JAZ_V6	r11
+
+#define JAZ_REGSET	JAZ_V1,JAZ_V2,JAZ_V3,JAZ_V4,JAZ_V5,JAZ_V6
+#define JAZ_REGSET_LEN	6
+
+call_thumb2:
+	str	istate, [istate, #ISTATE_SELF_LINK]
+	stmdb	sp!, {JAZ_REGSET}
+	mov	ip, #0
+3:
+	ldrsh	r3, [r1], #2
+	cmp	r3, #-1
+	ldrne	r3, [locals, -r3, lsl #2]
+	strne	r3, [sp, ip, lsl #2]
+	add	ip, ip, #1
+	cmp	ip, #JAZ_REGSET_LEN
+	bne	3b
+
+	ldmia	sp!, {JAZ_REGSET}
+1:
+	add	stack, stack, #4
+	bx r0
+#endif // T2JIT
+
+#endif // USE_COMPILER
+	.global	Thumb2_Install
+	.type Thumb2_Install, %function
+Thumb2_Install:
+@	ldr	r0, [r0]
+	str	r1, [r0, #METHOD_FROM_INTERPRETED]
+	bx	lr
+
+handle_return:
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+
+	cmp	tmp1, r9
+	blcc	return_check_monitors
+
+	mov	r3, #0
+	ldrb	lr, [jpc, #0]
+
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+@	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r2, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	add	r1, r2, #4
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	add	r1, r1, r0, lsl #2
+
+	cmp	lr, #opc_lreturn
+	cmpne	lr, #opc_dreturn
+	ldreq	r0, [stack, #8]
+	streq	r0, [r1, #-4]!
+	cmpne	lr, #opc_ireturn
+	cmpne	lr, #opc_freturn
+	cmpne	lr, #opc_areturn
+	ldreq	r0, [stack, #4]
+	streq	r0, [r1, #-4]!
+
+	str	r1, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+@ ----------------------------------------------------------------------------------------
+stack_overflow_no_frame:
+	mov	r0, thread
+	mov	ip, #0
+	str	ip, [r0, #THREAD_LAST_JAVA_SP]
+	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
+	str	ip, [r0, #THREAD_LAST_JAVA_FP]
+	ldr	ip, [r0, #THREAD_JAVA_SP]
+	str	ip, [r0, #THREAD_LAST_JAVA_SP]
+	bl	_ZN18InterpreterRuntime24throw_StackOverflowErrorEP10JavaThread
+	ldmfd	arm_sp!, {regset, pc}
+
+stack_overflow_before_frame:
+	mov	r0, thread
+	mov	ip, #0
+	str	ip, [r0, #THREAD_LAST_JAVA_SP]
+	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
+	str	ip, [r0, #THREAD_LAST_JAVA_FP]
+	ldr	ip, [r0, #THREAD_JAVA_SP]
+	str	ip, [r0, #THREAD_LAST_JAVA_SP]
+	bl	_ZN18InterpreterRuntime24throw_StackOverflowErrorEP10JavaThread
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+handle_exception_do_not_unlock:
+	mov	r3, #1
+	strb	r3, [thread, #THREAD_DO_NOT_UNLOCK]
+	b	handle_exception_with_bcp
+
+abstractmethod_exception:
+	mov	r0, #VMSYMBOLS_AbstractMethodError
+	b	raise_exception
+incompatibleclass_exception:
+	mov	r0, #VMSYMBOLS_IncompatibleClassChangeError
+raise_exception:
+	adr	r1, null_str
+raise_exception_with_msg:
+	stmdb	sp!, {r0, r1}
+	bl	load_dispatch
+	ldmia	sp!, {r0, r1}
+	DECACHE_JPC
+        DECACHE_STACK
+	mov	r2, r1
+	ldr	r1, [dispatch, #VmSymbols_symbols_Address-XXX]
+	ldr	r1, [r1, r0, lsl #2]
+	mov	r0, thread
+	bl	Helper_Raise
+        b       handle_exception_with_bcp
+null_str:
+	.byte	0
+	ALIGN_WORD
+
+invokedynamic_exception_fix:
+invokeinterface_exception_fix:
+	sub	jpc, jpc, #2
+invoke_exception_fix:
+invokenative_exception:
+return_exception:
+	sub	jpc, jpc, #3
+resolve_exception:
+putfield_exception:
+getfield_exception:
+handle_exception:
+@ jpc = Exception PC
+@ stack = garbage
+@ locals = garbage
+@ constpool = garbage
+	DECACHE_JPC
+handle_exception_with_bcp:
+	bl	load_dispatch
+	CACHE_JPC
+	ldr	stack, [istate, #ISTATE_STACK_BASE]
+	sub	stack, stack, #4
+	DECACHE_STACK
+	cmp	jpc, #0
+	beq	1f
+
+	mov	r0, istate
+	mov	r1, thread
+	bl	Helper_HandleException
+	cmp	r0, #0
+	beq	1f
+
+	mov	jpc, r0
+	CACHE_STACK
+	CACHE_LOCALS
+	CACHE_CP
+	DISPATCH 0
+1:
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
+
+	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
+
+	mov	r3, #0
+	ldrb	r0, [thread, #THREAD_DO_NOT_UNLOCK]
+	strb	r3, [thread, #THREAD_DO_NOT_UNLOCK]
+	cmp	r0, #0
+	bne	2f
+
+	cmp	tmp1, r9
+	blcc	return_check_monitors
+
+2:
+	mov	r3, #0
+
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r0, [istate, #ISTATE_METHOD]
+	ldr	r3, [r2, #0]
+	ldrh	r0, [r0, #METHOD_MAXLOCALS]
+	add	r1, r2, #4
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
+
+	add	r1, r1, r0, lsl #2
+
+	str	r1, [thread, #THREAD_JAVA_SP]
+
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+return_check_monitors:
+	stmdb	arm_sp!, {r4, lr}
+
+	DECACHE_JPC	// FIXME: May not be needed.
+	ldr	r2, [istate, #ISTATE_METHOD]
+	ldr	r4, [r2, #METHOD_ACCESSFLAGS]
+	tst	r4, #1<<5
+	subne	r9, r9, #8
+	cmp	tmp1, r9
+	bcs	2f
+1:
+	ldr	r3, [tmp1, #4]
+	cmp	r3, #0
+	bne	3f
+	add	tmp1, tmp1, #8
+	cmp	tmp1, r9
+	bcc	1b
+
+2:
+	tst	r4, #1<<5
+
+	ldmeqia	arm_sp!, {r4, pc}
+
+	ldr	tmp1, [r9, #4]		@ base->obj == NULL
+	cmp	tmp1, #0
+	beq	4f
+
+	ldr	r0, [r9, #0]			@ r0 = header
+	mov	r3, #0
+	cmp	r0, #0
+	str	r3, [r9, #4]			@ base->obj = NULL
+
+	ldmeqia	arm_sp!, {r4, pc}
+
+	mov	r1, tmp1
+	mov	r2, r9
+	bl	cmpxchg_ptr
+	cmp	r9, r0
+
+	ldmeqia	arm_sp!, {r4, pc}
+
+	str	tmp1, [r9, #4]
+
+	mov	r1, r9
+	mov	r0, thread
+	bl	Helper_synchronized_exit
+
+	ldmeqia	arm_sp!, {r4, pc}
+
+3:
+	mov	r0, thread
+	bl	Helper_RaiseIllegalMonitorException
+	b	2b
+
+4:
+	mov	r0, thread
+	bl	Helper_RaiseIllegalMonitorException
+	ldmia	arm_sp!, {r4, pc}
+
+	SLOW_ENTRY
+accessor_entry:
+	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+	rsb	r3, r7, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #32768
+	blt	stack_overflow_no_frame
+	bl	fast_accessor_entry
+	ldmia	sp!, {regset, pc}
+
+	FAST_ENTRY
+fast_accessor_entry:
+  USEC	adrl	ip, dispatch_init_adcon
+  USEC	ldr	r3, [ip]
+  USEC	add	r3, r3, ip
+  USEC	ldr	ip, [ip, #invocationlimit_adcon-dispatch_init_adcon]
+  USEC	ldr	ip, [r3, ip]
+
+  USEC	ldr	r3, [r0, #METHOD_INVOCATIONCOUNTER]
+  USEC	ldr	ip, [ip, #0]
+  USEC	add	r3, r3, #INVOCATIONCOUNTER_COUNTINCREMENT
+  USEC	str	r3, [r0, #METHOD_INVOCATIONCOUNTER]
+  USEC	cmp	r3, ip
+  USEC	bcs	fast_normal_entry
+
+	ldr	r1, [r0, #METHOD_CONSTMETHOD]
+	ldrb	r3, [r1, #CONSTMETHOD_CODEOFFSET+2]
+
+	ldr	ip, [r1, #METHOD_CONSTANTS]
+	ldrb	r1, [r1, #CONSTMETHOD_CODEOFFSET+3]
+	ldr	ip, [ip, #CONSTANTPOOL_CACHE]
+	orr	r3, r3, r1, lsl #8		@ r3 = index
+
+	add	r1, ip, #CP_OFFSET
+	ldr	r3, [r1, r3, lsl #4]!		@ r1 = cache, r3 = flags
+	ldr	ip, [thread, #THREAD_JAVA_SP]			@ ip == stack
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	ldr	r3, [ip, #0]
+	bne	fast_normal_entry
+
+	cmp	r3, #0
+	beq	fast_normal_entry
+
+	ldr	r0, [r1, #12]
+	ldr	r1, [r1, #8]
+	movs	r0, r0, lsr #29
+	bls	accessor_non_w
+
+	ldr	r0, [r3, r1]
+	str	r0, [ip, #0]
+	mov	r0, #0
+	bx	lr
+
+div_zero_jpc_1:
+	sub	jpc, jpc, #1
+.lrem_0:
+.ldiv_0:
+divide_by_zero_exception:
+	mov	r0, #VMSYMBOLS_ArithmeticException
+	adr	r1, div_zero_msg
+	b	raise_exception_with_msg
+div_zero_msg:
+	.ascii  "/ by int zero\000"
+	ALIGN_WORD
+
+array_bound_exception_jpc_4_r3:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_3_r3:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_2_r3:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_1_r3:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_0_r3:
+	mov	r2, r3
+	b	array_bounds_exception
+array_bound_exception_jpc_1_tmp1:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_0_tmp1:
+	mov	r2, tmp1
+	b	array_bounds_exception
+array_bound_exception_jpc_3:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_2:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_1:
+	sub	jpc, jpc, #1
+array_bound_exception_jpc_0:
+array_bounds_exception:
+	DECACHE_JPC
+        DECACHE_STACK
+	mov	r1, r2
+	mov	r0, thread
+	bl	Helper_RaiseArrayBoundException
+        b       handle_exception_with_bcp
+
+#ifndef HW_NULL_PTR_CHECK
+null_ptr_exception_jpc_5:
+	sub	jpc, jpc, #1
+null_ptr_exception_jpc_4:
+	sub	jpc, jpc, #1
+null_ptr_exception_jpc_3:
+	sub	jpc, jpc, #1
+null_ptr_exception_jpc_2:
+	sub	jpc, jpc, #1
+null_ptr_exception_jpc_1:
+	sub	jpc, jpc, #1
+null_ptr_exception_jpc_0:
+#endif
+null_ptr_exception:
+	mov	r0, #VMSYMBOLS_NullPointerException
+	b	raise_exception
+
+@ ==== SW FP ==============================================================================
+
+	Opcode	fadd
+	POP	r0, r1
+        bl      __aeabi_fadd
+	PUSH	r0
+	DISPATCH	1
+
+	Opcode	fsub
+	POP	r1
+	POP	r0
+        bl      __aeabi_fsub
+	PUSH	r0
+	DISPATCH	1
+
+	Opcode	fmul
+	POP	r0, r1
+        bl      __aeabi_fmul
+	PUSH	r0
+	DISPATCH	1
+
+	Opcode	fdiv
+	POP	r1
+	POP	r0
+        bl      __aeabi_fdiv
+	PUSH	r0
+	DISPATCH	1
+
+	Opcode	ddiv
+	POP	r2, r3
+	POP	r0, r1
+        bl      __aeabi_ddiv
+	PUSH	r0, r1
+	DISPATCH	1
+
+	Opcode	fcmpl
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmpgt
+        cmp     r0, #0
+        movne   r3, #-1
+        bne     3f
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmplt
+        cmp     r0, #0
+        movne   r3, #1
+        bne     3f
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmpeq
+        cmp     r0, #0
+        movne   r3, #0
+        moveq   r3, #-1
+3:
+	DISPATCH_START	1
+	add	stack, stack, #8
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+
+	Opcode	fcmpg
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmpgt
+        cmp     r0, #0
+        movne   r3, #-1
+        bne     4f
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmplt
+        cmp     r0, #0
+        movne   r3, #1
+        bne     4f
+        ldmib   stack, {r0, r1}
+        bl      __aeabi_fcmpeq
+        cmp     r0, #0
+        movne   r3, #0
+        moveq   r3, #1
+4:
+	DISPATCH_START	1
+	add	stack, stack, #8
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+
+	Opcode	dcmpl
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmpgt
+        cmp     r0, #0
+        movne   r3, #-1
+        bne     5f
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmplt
+        cmp     r0, #0
+        movne   r3, #1
+        bne     5f
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmpeq
+        cmp     r0, #0
+        movne   r3, #0
+        moveq   r3, #-1
+5:
+	DISPATCH_START	1
+	add	stack, stack, #16
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+
+	Opcode	dcmpg
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmpgt
+        cmp     r0, #0
+        movne   r3, #-1
+        bne     6f
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmplt
+        cmp     r0, #0
+        movne   r3, #1
+        bne     6f
+        ldmib   stack, {r0, r1, r2, r3}
+        bl      __aeabi_dcmpeq
+        cmp     r0, #0
+        movne   r3, #0
+        moveq   r3, #1
+6:
+	DISPATCH_START	1
+	add	stack, stack, #16
+	DISPATCH_NEXT
+	PUSH	r3
+	DISPATCH_FINISH
+
+@ ==== Fast SW FP emulation ===============================================================
+
+#define al		r0
+#define ah		r1
+#define bl		r2
+#define bh		r3
+#define tmp		tmp1
+#define sh		r12
+#define ex_add		r14
+
+@ TOS = TOSM1 + TOS
+@ What we actually do is TOS = TOS + TOSM1
+@ --- do_dadd_vtos -------------------------------------------------
+	Opcode	dadd
+	POP	al, ah, bl, bh
+	mov	tmp, #0xff000000
+	orr	tmp, tmp, #0x00e00000
+        bics    ex_add, tmp, ah, LSL #1
+        bicnes  ex_add, tmp, bh, LSL #1
+        beq     .dadd_naninf
+        teq     ah, bh
+        eormi   bh, bh, #1 << 31
+        bmi     ._dsub1
+._dadd1:
+        subs    ex_add, al, bl
+        sbcs    sh, ah, bh
+        bhs     .dadd_swapped
+        adds    bl, bl, ex_add
+        adc     bh, bh, sh
+        subs    al, al, ex_add
+        sbc     ah, ah, sh
+.dadd_swapped:
+        mov     ex_add, ah, LSR #20
+        sub     sh, ex_add, bh, LSR #20
+        tst     tmp, bh, LSL #1
+        beq     .dadd_uncommon
+        bic     ah, ah, ex_add, LSL #20
+        bic     bh, bh, tmp
+        orr     bh, bh, #1 << 20
+.dadd_doadd:
+        rsbs    tmp, sh, #32
+        blo     .dadd_bigshift
+.dadd_smallshift:
+        adds    al, al, bl, LSR sh
+        adc     ah, ah, bh, LSR sh
+        adds    al, al, bh, LSL tmp
+        adcs    ah, ah, #0
+        cmp     ah, #1 << 20
+        bhs     .dadd_carry
+        add     ah, ah, ex_add, LSL #20     @ add exponent
+.dadd_nocarry:
+        movs    bl, bl, LSL tmp         @ round and sticky bits
+	bpl	.dadd_exit
+        adds    al, al, #1
+        movccs  bl, bl, LSL #1          @ is sticky bit zero?
+	bne	.dadd_exit
+.dadd_roundeven:
+        cmp     al, #0
+        bicne   al, al, #1              @ RTE if carry didn't occur
+        adceq   ah, ah, #0              @ increment high word if it did
+.dadd_check_overflow_inx:
+        mov     bh, ah, LSL #1
+        cmp     bh, #0xFFE00000
+	blo	.dadd_exit
+        subhs   ah, ah, #3<<29          @ bias exponent
+	b	.return_double_Inf
+.dadd_bigshift:
+        cmp     bl, #1
+        adc     bl, bh, bh
+        sub     sh, sh, #32             @ nonzero
+        rsbs    tmp, sh, #31            @ sets C if within a word
+        movlo   tmp, #0                 @ C clear if sh > 31
+        addhss  al, al, bh, LSR sh
+        adc     ah, ah, ex_add, LSL #20
+        cmp     ex_add, ah, LSR #20
+        beq     .dadd_nocarry
+        sub     ah, ah, ex_add, LSL #20
+.dadd_carry:
+        add     ah, ah, #1 << 20
+        movs    ah, ah, LSR #1
+        add     ah, ah, ex_add, LSL #20
+        movs    al, al, RRX
+        bcc     .dadd_check_overflow_exact
+        adcs    al, al, #0
+        movccs  tmp, bl, LSL tmp    @EQ = round to even
+        bne     .dadd_check_overflow_exact
+        b       .dadd_roundeven
+.dadd_rnearup_carry:
+        adds    al, al, #1
+        movccs  bl, bl, LSL #1          @ is sticky bit zero?
+        bne     .dadd_check_overflow_inx
+        b       .dadd_roundeven
+.dadd_check_overflow_exact:
+        mov     bh, ah, LSL #1
+        cmp     bh, #0xFFE00000
+	blo	.dadd_exit
+        sub     ah, ah, #3<<29          @ bias exponent
+
+.return_double_Inf:
+	and	a3, ah, #0x80000000
+.return_double_Inf_1:
+	mov	al, #0
+	mov	ah, #0x7f000000
+	orr	ah, ah, #0x00f00000
+	orr	ah,ah,a3
+.dadd_exit:
+	PUSH	al, ah
+	DISPATCH	1
+
+.dadd_uncommon:
+        orrs    tmp, bl, bh, LSL #1     @ Is b zero or denormal?
+        beq     .dadd_bzero
+        movs    tmp, ex_add, LSL #21
+        bic     ah, ah, ex_add, LSL #20
+        bicne   bh, bh, #1 << 31
+        subne   sh, sh, #1              @ adjust exponent to fake exp_b = 1
+        bne     .dadd_doadd    
+        adds    al, al, bl
+        adc     ah, ah, bh
+        b       .daddsub_denorm
+.dadd_bzero:
+        movs    tmp, ex_add, LSL #21        @ is a denormal?
+	bne	.dadd_exit
+        orrs    tmp, al, ah, LSL #1     @ a zero?
+	beq	.dadd_exit
+        b       .daddsub_denorm
+
+.dadd_naninf:
+        cmp     al, #1                  @ sets C if al!=0
+        adc     ex_add, ah, ah
+        cmp     bl, #1
+        adc     sh, bh, bh
+        cmp     ex_add, tmp                 @ HI if a is NaN
+        cmpls   sh, tmp                 @ HI if either is NaN
+        bhi     .return_double_NaN
+        cmp     ex_add, sh
+        beq     .dadd_twoinf
+        cmp     ex_add, tmp                 @ EQ if a is Inf
+        movne   ah, bh
+        movne   al, bl
+	b	.dadd_exit
+.dadd_twoinf:
+        teq     ah, bh
+	bpl	.dadd_exit
+	b	.return_double_NaN
+
+@ --- do_dsub_itos -------------------------------------------------
+	Opcode	dsub
+	POP	al, ah, bl, bh
+        mov     tmp, #0xff000000
+        orr     tmp, tmp, #0x00e00000
+        bics    ex_add, tmp, ah, LSL #1
+        bicnes  ex_add, tmp, bh, LSL #1
+        beq     .drsb_naninf
+	teq	ah, bh
+	eor	ah, ah, #1 << 31
+	bmi	._dadd1
+	eor	bh, bh, #1 << 31
+._dsub1:
+        subs    ex_add, al, bl
+        sbcs    sh, ah, bh
+        bhs     .dsub_swapped
+.dsub_do_swap:
+        eor     sh, sh, #1 << 31        @ negate a and b as a - b == -b - -a
+        adds    bl, bl, ex_add
+        adc     bh, bh, sh
+        subs    al, al, ex_add
+        sbc     ah, ah, sh
+.dsub_swapped:
+        mov     ex_add, ah, LSR #20
+        sub     sh, ex_add, bh, LSR #20
+        tst     tmp, bh, LSL #1
+        beq     .dsub_uncommon
+        bic     ah, ah, ex_add, LSL #20
+        bic     bh, bh, tmp, ASR #1
+        rsbs    bl, bl, #0
+        rsc     bh, bh, tmp, ASR #1     @ 0xffe00000 >> 1 = -(1 << 20)
+.dsub_dosub:
+        rsbs    tmp, sh, #32
+        blo     .dsub_bigshift
+.dsub_smallshift:
+        adds    al, al, bl, LSR sh
+        adc     ah, ah, bh, ASR sh
+        adds    al, al, bh, LSL tmp
+        adcs    ah, ah, #0
+        bmi     .dsub_borrow
+.dsub_noborrow:
+        add     ah, ah, ex_add, LSL #20
+        movs    bl, bl, LSL tmp
+.dsub_dorounding:
+	bpl	.dsub_exit
+        adds    al, al, #1              @ Z flag set if carry to high word
+        cmpne   bl, #0x80000000         @ check we don't have to round to even
+	bne	.dsub_exit
+        cmp     al, #0
+        addeq   ah, ah, #1
+        bicne   al, al, #1
+	b	.dsub_exit
+.dsub_bigshift:
+        cmp     bl, #1
+        adc     bl, bh, bh 
+        sub     sh, sh, #32
+        rsbs    tmp, sh, #31
+        blo     .dsub_hugeshift
+        adds    al, al, bh, ASR sh
+        adcs    ah, ah, #-1
+        bpl     .dsub_noborrow
+.dsub_borrow:
+        add     tmp, tmp, #1
+        movs    tmp, bl, LSL tmp
+        adcs    al, al, al              @ shift al,ah left including guard bit
+        adc     ah, ah, ah
+        add     sh, ah, ex_add, LSL #21     @ ah = 0xFFE00000 + fraction. Adding
+        movs    sh, sh, LSR #21         @ C-bit is clear if bit 20 of ah
+        bls     .dsub_renormalize        @   clear, so 2 bits or more
+        add     ah, ah, ex_add, LSL #20
+        adds    al, al, tmp, LSR #31    @ C and Z flag are set if carry over
+        cmpcc   tmp, #0x80000000        @ check that we don't have to round
+	bne	.dsub_exit
+        cmp     al, #0
+        addeq   ah, ah, #1
+        bicne   al, al, #1
+	b	.dsub_exit
+.dsub_renormalize:
+        bcs     .dsub_ex_one
+        adds    ah, ah, #1 << 21
+        cmpeq   al, #0
+        beq     .dsub_retzero            @ go and deal with it, if so
+        mov     sh, ex_add, LSR #11
+        bic     ex_add, ex_add, #1 << 11
+        sub     ex_add, ex_add, #2        @ for leading bit
+.dsub_renormloop:                 @ TODO: add loop for 8 bit per cycle renorm
+        adds    al, al, al
+        adc     ah, ah, ah
+        sub     ex_add, ex_add, #1
+        tst     ah, #1 << 20
+        beq     .dsub_renormloop
+        add     ah, ah, sh, LSL #31 
+        add     ah, ah, ex_add, LSL #20
+        cmp     ex_add, #0
+	bgt	.dsub_exit
+        add     ah, ah, #3 << 29        @ bias exponent
+        @ Rounding direction indicator is zero (denormal results are exact)
+	mov	ip, #0
+        b       .__dunder
+.dsub_hugeshift:
+.dsub_return:
+        add     ah, ah, ex_add, LSL #20
+.dsub_return1:
+.dsub_exit:
+	PUSH	al, ah
+	DISPATCH	1
+.dsub_ex_one:    @ underflow when ex = 1 - shift back to denorm
+        movs    ah, ah, ASR #1
+        mov     al, al, RRX
+        add     ah, ah, ex_add, LSL #20
+        b       .dsub_denorm
+.dsub_uncommon:
+        orrs    tmp, bl, bh, LSL #12    @ is b zero or denorm?
+        beq     .dsub_bzero
+        movs    tmp, ex_add, LSL #21
+        bic     ah, ah, ex_add, LSL #20
+        beq     .dsub_both_denorm
+        bic     bh, bh, #1 << 31
+        sub     sh, sh, #1
+        rsbs    bl, bl,#0
+        rsc     bh, bh,#0
+        b       .dsub_dosub    
+.dsub_both_denorm:
+        subs    al, al, bl
+        sbc     ah, ah, bh
+        b       .dsub_denorm
+.dsub_bzero:
+        orrs    tmp, al, ah, LSL #1
+        bne     .dsub_denorm             @ return a@ but it might be denormal
+.dsub_retzero:
+        mov     ah, #0                  @ clear sign bit (al is already 0)
+	b	.dsub_exit
+.dsub_denorm:
+.daddsub_denorm:
+        movs    bl, ah, LSL #1          @ discard sign bit
+        tsteq   al, al                  @ do we have a zero?
+        beq     .dsub_retzero            @ yes@ go and ensure the right sign
+	b	.dsub_exit
+.drsb_naninf:
+        @ Handle NaNs and infinities in reverse subtraction. We
+        @ just swap the operands and go to dsub_naninf.
+        eor     ah, ah, bh
+        eor     al, al, bl
+        eor     bh, bh, ah
+        eor     bl, bl, al
+        eor     ah, ah, bh
+        eor     al, al, bl
+.dsub_naninf:
+        cmp     al, #1                  @ sets C if al!=0
+        adc     ex_add, ah, ah
+        cmp     bl, #1
+        adc     sh, bh, bh
+        cmp     ex_add, tmp                 @ HI if a is NaN
+        cmpls   sh, tmp                 @ HI if either is NaN
+        bhi     .return_double_NaN
+        cmp     ex_add, sh
+        beq     .dsub_twoinf
+        cmp     ex_add, tmp                 @ EQ if a is Inf
+        eorne   ah, bh, #0x80000000
+        movne   al, bl
+	b	.dsub_exit
+.dsub_twoinf:
+        teq     ah, bh
+	bmi	.dsub_exit
+
+.return_double_NaN:
+	and	a3, ah, #0x80000000
+	mov	al, #0
+	mov	ah, #0x7f000000
+	orr	ah, ah, #0x00f80000
+	orr	ah,ah,a3
+	b	.dsub_exit
+
+@ === underflow handler ================================================
+
+#define	INX_pos	30
+#define INX_bit (1<<30)
+
+#define exp	r2
+#define temp	r3
+
+.__dunder:
+        tst     ah, ah
+        orrmi   ip, ip, #1<<16
+        mov     temp, #0x600
+        mov     exp, ah, LSR #20      @ find the exponent
+        add     temp, temp, #1
+        bic     ah, ah, exp, LSL #20 @ remove exponent from mantissa
+        bic     exp, exp, #0x800        @ lose the sign bit
+        sub     exp, temp, exp
+        orr     ah, ah, #1<<20      @ put on mantissa leading bit
+        cmp     exp, #53
+        bhi     .dunder_stickyonly
+        beq     .dunder_roundbit
+        cmp     exp, #21
+        blo     .dunder_hiword
+        subs    exp, exp, #32
+        bls     .dunder_hiloword
+.dunder_loloword:
+        rsb     temp, exp, #32
+        cmp     al, #0
+        mov     al, ah, LSR exp
+        mov     exp, ah, LSL temp
+        orrne   exp, exp, #1
+        mov     ah, #0
+        b       .dunder_round
+.dunder_hiloword:
+        rsb     temp, exp, #0
+        add     exp, exp, #32
+        mov     ah, ah, LSL temp
+        orr     ah, ah, al, LSR exp
+        mov     exp, al, LSL temp
+        mov     al, ah
+        mov     ah, #0
+        b       .dunder_round
+.dunder_hiword:
+        rsb     temp, exp, #32
+        mov     tmp, al, LSL temp
+        mov     temp, ah, LSL temp
+        orr     al, temp, al, LSR exp
+        mov     ah, ah, LSR exp
+        mov     exp, tmp
+        b       .dunder_round
+.dunder_roundbit:
+        orrs    exp, al, ah, LSL #12
+        mov     al, #0
+        mov     ah, #0
+        mov     exp, #0x80000000
+        addne   exp, exp, #1
+        b       .dunder_round
+.dunder_stickyonly:
+        mov     exp, #1
+        mov     ah, #0
+        mov     al, #0
+.dunder_round:
+        tst     ip, #1<<16
+        bic     ip, ip, #1<<16
+        orrne   ah, ah, #0x80000000
+        tst     exp, exp
+	beq	.dsub_exit
+        movs    exp, exp, LSL #1        @ round bit in C, sticky in ~Z
+        bcc     .dunder_rerounded        @ if no round bit, we're done
+        beq     .dunder_roundeven        @ RTE is tricky due to rerounding
+.dunder_roundup:
+        adds    al, al, #1          @ round up
+        adc     ah, ah, #0
+.dunder_rerounded:
+	b	.dsub_exit
+.dunder_roundeven:
+        movs    exp, ip, ASR #INX_pos   @ get -1, 0, +1 from direction bits
+        bmi     .dunder_roundup          @ if -1, round up unconditionally
+        bne     .dunder_rerounded        @ if +1, round down unconditionally
+        adds    al, al, #1          @ round up ...
+        adc     ah, ah, #0
+        bic     al, al, #1          @ ... and then to even
+        b       .dunder_rerounded
+
+@ === MULTIPLY Double ===================================================
+
+#define ex_m		r14
+#define uh		r12
+#define ul		r4
+#define rs		r4
+#define th		r5
+#define tl		r11
+
+@ --- do_dmul_itos -------------------------------------------------
+	Opcode	dmul
+	POP	al, ah, bl, bh
+	stmdb	arm_sp!, {r4, r5}
+	mov	tmp, #0x7f00000
+	orr	tmp, tmp, #0x00f0000
+        bics    ex_m, tmp, ah, LSR #4     @ test for Infs or NaNs
+        bicnes  ex_m, tmp, bh, LSR #4
+        beq     .dmul_naninf
+        and     ex_m, tmp, ah, LSR #4     @ get exponent of a
+        eor     th, ah, bh              @ compute sign of result
+        orr     ex_m, ex_m, th, LSR #31     @   and save it at bottom of ex
+        ands    th, tmp, bh, LSR #4     @ get exponent of b, and test
+        tstne   ah, tmp, LSL #4         @   for zeros/denorms ...
+        beq     .dmul_zerodenorm         @   go and renormalise if we got any
+.dmul_normalised:
+        add     ex_m, ex_m, th              @ calculate exponent of result
+        sub     ex_m, ex_m, #0x3FC0000      @ rebias exponent mostly
+        bic     ah, ah, tmp, LSL #5     @ clear sign and all but lo bit of exp
+        bic     bh, bh, tmp, LSL #5
+        orr     ah, ah, #1<<20          @ set leading bit on mantissa
+        orr     bh, bh, #1<<20          @ set leading bit on mantissa
+.dmul_mantmul:
+        umull   ul, uh, ah, bl
+        umull   tl, th, al, bh
+        adds    tl, tl, ul
+        adcs    th, th, uh              @ carry from here is used below
+        umull   ul, uh, ah, bh          @ uh:ul is top part
+        adc     bh, uh, #0              @ get carry from above addition
+        umull   ah, uh, al, bl          @ uh:ah is bottom part
+        adds    tl, tl, uh
+        adcs    th, th, ul
+        adcs    bh, bh, #0
+        tst     ah, ah
+        orrne   tl, tl, #1              @ don't lose sticky bit
+        mov     bl, #-4
+        mov     uh, #32-12              @ uh will be corresponding right shift
+        cmp     bh, #0x200              @ C set if it's 11 bits
+        adc     uh, uh, #0
+        adc     bl, bl, ex_m, ASR #16     @ increment exponent correspondingly
+        rsb     ul, uh, #32             @ ul is left shift
+        mov     ah, bh, LSL ul
+        mov     al, th, LSL ul
+        orr     ah, ah, th, LSR uh
+        orr     al, al, tl, LSR uh
+        add     ah, ah, bl, LSL #20 @ put exponent back on (may wrap
+        eor     ah, ah, ex_m, LSL #31 @ put sign back on (with EOR so that
+        movs    rs, tl, LSL ul          @ compute the round word
+        beq     .dmul_exact
+        movs    uh, rs, LSL #1          @ rs is already in place
+        movcc   rs, #-INX_bit           @ direction indicator: rounded down
+        bcc     .dmul_rounded
+        orreq   bh, bh, #1<<31          @ save the round-to-even bit
+        adcs    al, al, #0          @ round up if necessary
+        adc     ah, ah, #0
+        mov     rs, #INX_bit            @ direction indicator: rounded up
+        tst     al, bh, LSR #31       @ does RTE do anything?
+        bic     al, al, bh, LSR #31 @ perform RTE
+        movne   rs, #-INX_bit           @ if RTE had effect, we rounded down
+.dmul_exact:
+.dmul_rounded:
+        teq     ah, ex_m, LSL #31       @ do the signs agree?
+	mov	uh, #0x70000000         @ even if so, need to test exp 0/7FF
+	orr	uh, uh, #0x0ff00000
+        bmi     .dmul_outflow            @ if not, UFL or OFL
+        tst     ah, uh
+        bicnes  uh, uh, ah            @ is exp 0 or 7FF?
+        beq     .dmul_outflow
+.dmul_exit:
+	ldmia	arm_sp!, {r4, r5}
+	PUSH	al, ah
+	DISPATCH	1
+
+.dmul_rdirect:
+        movs    ul, ex_m, LSL #31         @ put sign bit in N
+        tstpl   uh, #2 << 22        @ if +ve: EQ iff round up. Keeps PL
+        tstmi   uh, #1 << 22         @ if -ve: EQ iff round up.
+        moveq   rs, #INX_bit            @ rounded up
+        movne   rs, #-INX_bit           @ rounded down
+        addeqs  al, al, #1          @ may set C
+        adc     ah, ah, #0
+        b       .dmul_rounded
+
+.dmul_outflow:
+        cmp     ex_m, #0x400<<16          @ Which ballpark are we in?
+        addle   ah, ah, #0x60000000 @ Bias up if underflow
+        subge   ah, ah, #0x60000000 @ Bias down if overflow
+	mov	ip, rs
+	ldmia	arm_sp!, {r4, r5}
+        ble     .__dunder                @ underflow
+	b	.return_double_Inf
+
+.dmul_zerodenorm:
+        orrs    ul, al, ah, LSL #1      @ is a zero?
+        orrnes  ul, bl, bh, LSL #1      @ or is b zero?
+        beq     .dmul_zero               @ Return zero if so
+
+        tst     th, th                  @ is b denormal?
+        beq     .dmul_renorm_op2
+.dmul_done_op2:
+        bics    ul, ex_m, #1              @ is a denormal? (careful of sign bit)
+        beq     .dmul_renorm_op1
+        b       .dmul_normalised
+.dmul_zero:
+        mov     al, #0                  @ clear low word
+        mov     ah, ex_m, LSL #31         @ get sign of result and clear hi word
+	b	.dmul_exit
+
+.dmul_renorm_op1:
+        add     ex_m, ex_m, #1<<16          @ correct the exponent
+        bic     ah, ah, #0x80000000       @ this will get in our way
+        orrs    tl, ah, al, LSR #12    @ is highest set bit in low twelve?
+        moveq   al, al, LSL #20         @ if so, move it up
+        subeq   ex_m, ex_m, #20<<16       @ and adjust exponent
+        tst     ah, ah                  @ is highest set bit in low word?
+        moveq   ah, al, LSR #12         @ if so, move up by 20
+        moveq   al, al, LSL #20
+        subeq   ex_m, ex_m, #20<<16       @ and adjust exponent
+        mov     ul, #0                 @ shift of top word
+        movs    tl, ah, LSR #(21-16)   @ is highest set bit within 16 of top?
+        moveq   ah, ah, LSL #16         @ if not, move up
+        addeq   ul, ul, #16           @ and adjust exponent
+        movs    tl, ah, LSR #(21-8)   @ is highest set bit within 8 of top?
+        moveq   ah, ah, LSL #8         @ if not, move up
+        addeq   ul, ul, #8           @ and adjust exponent
+        movs    tl, ah, LSR #(21-4)   @ is highest set bit within 4 of top?
+        moveq   ah, ah, LSL #4         @ if not, move up
+        addeq   ul, ul, #4           @ and adjust exponent
+        movs    tl, ah, LSR #(21-2)   @ is highest set bit within 2 of top?
+        moveq   ah, ah, LSL #2         @ if not, move up
+        addeq   ul, ul, #2           @ and adjust exponent
+        movs    tl, ah, LSR #(21-1)   @ is highest set bit within 1 of top?
+        moveq   ah, ah, LSL #1         @ if not, move up
+        addeq   ul, ul, #1           @ and adjust exponent
+
+        sub     ex_m, ex_m, ul, LSL #16  @ calculate final pseudo exponent
+        mov     tl, al, LSL ul        @ shift low word up by same amout
+        rsb     ul, ul, #32           @ compute reverse shift for al
+        orr     ah, ah, al, LSR ul     @ put in high end of low word
+        mov     al, tl
+
+	mov	tmp, #0x7f00000
+	orr	tmp, tmp, #0x00f0000
+
+        b       .dmul_normalised
+
+.dmul_renorm_op2:
+        add     th, th, #1<<16          @ correct the exponent
+        bic     bh, bh, #0x80000000       @ this will get in our way
+        orrs    tl, bh, bl, LSR #12    @ is highest set bit in low twelve?
+        moveq   bl, bl, LSL #20         @ if so, move it up
+        subeq   th, th, #20<<16       @ and adjust exponent
+        tst     bh, bh                  @ is highest set bit in low word?
+        moveq   bh, bl, LSR #12         @ if so, move up by 20
+        moveq   bl, bl, LSL #20
+        subeq   th, th, #20<<16       @ and adjust exponent
+        mov     ul, #0                 @ shift of top word
+        movs    tl, bh, LSR #(21-16)   @ is highest set bit within 16 of top?
+        moveq   bh, bh, LSL #16         @ if not, move up
+        addeq   ul, ul, #16           @ and adjust exponent
+        movs    tl, bh, LSR #(21-8)   @ is highest set bit within 8 of top?
+        moveq   bh, bh, LSL #8         @ if not, move up
+        addeq   ul, ul, #8           @ and adjust exponent
+        movs    tl, bh, LSR #(21-4)   @ is highest set bit within 4 of top?
+        moveq   bh, bh, LSL #4         @ if not, move up
+        addeq   ul, ul, #4           @ and adjust exponent
+        movs    tl, bh, LSR #(21-2)   @ is highest set bit within 2 of top?
+        moveq   bh, bh, LSL #2         @ if not, move up
+        addeq   ul, ul, #2           @ and adjust exponent
+        movs    tl, bh, LSR #(21-1)   @ is highest set bit within 1 of top?
+        moveq   bh, bh, LSL #1         @ if not, move up
+        addeq   ul, ul, #1           @ and adjust exponent
+
+        sub     th, th, ul, LSL #16  @ calculate final pseudo exponent
+        mov     tl, bl, LSL ul        @ shift low word up by same amout
+        rsb     ul, ul, #32           @ compute reverse shift for bl
+        orr     bh, bh, bl, LSR ul     @ put in high end of low word
+        mov     bl, tl
+
+	mov	tmp, #0x7f00000
+	orr	tmp, tmp, #0x00f0000
+
+        b       .dmul_done_op2
+
+.dmul_naninf:
+        mov     uh, ah, LSL #1          @ discard sign bit on a
+        mov     ul, bh, LSL #1          @ and on b
+        cmp     uh, tmp, LSL #5         @ HI if ah shows a to be NaN
+        cmpeq   al, #0                  @ now HI if a is NaN
+        cmpls   ul, tmp, LSL #5         @ another chance to set HI ...
+        cmpeq   bl, #0                  @  ... if b is NaN
+        bhi     .dmul_ivo
+        orrs    ul, al, ah, LSL #1      @ is a zero?
+        orrnes  ul, bl, bh, LSL #1      @ or is b zero?
+        beq     .dmul_ivo
+        eor     ah, ah, bh
+        mov     al, #0
+        and     ah, ah, #0x80000000
+        orr     ah, ah, tmp, LSL #4
+	b	.dmul_exit
+
+.dmul_ivo:
+	ldmia	arm_sp!, {r4, r5}
+	b	.return_double_NaN
+
+#undef al
+#undef ah
+#undef bl
+#undef bh
+#undef tmp
+#undef sh
+#undef ex_add
+
+#undef	INX_pos
+#undef INX_bit
+
+#undef exp
+#undef temp
+
+#undef ex_m
+#undef uh
+#undef ul
+#undef rs
+#undef th
+#undef tl
+
+@ --- ignore_safepoints ---------------------------------------------------------------------------
+	.global	_ZN14CppInterpreter17ignore_safepointsEv
+	.type	_ZN14CppInterpreter17ignore_safepointsEv, %function
+_ZN14CppInterpreter17ignore_safepointsEv:
+#ifdef NOTICE_SAFEPOINTS
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r2, r3}
+	add	r2, r2, ip
+	add	ip, r3, r2
+	ldr	r2, [ip, #AbstractInterpreter_notice_safepoints-XXX]
+	ldrb	r1, [r2, #0]	@ zero_extendqisi2
+	cmp	r1, #0
+	bxeq	lr
+	mov	r3, #0
+	strb	r3, [r2, #0]
+	adrl	r3, main_dispatch_table
+#ifdef HW_FP
+	ldr	r0, [ip, #CPUInfo-XXX]
+	tst	r0, #ARCH_VFP
+	beq	2f
+#endif
+	mov	r2, #256
+1:
+	ldr	r1, [r3], #4
+	str	r1, [ip], #4
+	subs	r2, r2, #1
+	bne	1b
+	sub	ip, ip, #4 * 256
+	b	4f
+
+@ No HW FP - must update the table from a combination main_dispatch_table and
+@ vfp_table. Previously this updated from main_dispatch_table first, and then
+@ overwrite the updated entries with those from vfp_table. However, this creates
+@ a window where the jump table has vfp entries, so in a multithreaded world we
+@ can get undefined VFP instructions.
+@ The code below updates from both tables simultaneously. Note: this relies on
+@ the enties in vfp_table being in opcode order.
+#ifdef HW_FP
+2:
+	stmdb	arm_sp!, {r4, lr}
+	mov	r2, #0
+	adrl	r0, vfp_table
+	ldr	r4, [r0], #4
+3:
+	ldr	r1, [r3], #4
+	cmp	r2, r4
+	ldreq	r1, [r0], #4
+	ldreq	r4, [r0], #4
+	str	r1, [ip], #4
+	add	r2, r2, #1
+	cmp	r2, #256
+	bcc	3b
+	sub	ip, ip, #4 * 256
+	ldmia	arm_sp!, {r4, lr}
+#endif // HW_FP
+
+4:
+	ldr	r0, [ip, #CPUInfo-XXX]
+	tst	r0, #ARCH_CLZ
+	beq	5f
+
+	adrl	r0, do_idiv_clz
+	str	r0, [ip, #opc_idiv * 4]
+	adrl	r0, do_irem_clz
+	str	r0, [ip, #opc_irem * 4]
+
+5:
+#endif // NOTICE_SAFEPOINTS
+
+	bx	lr
+
+@ --- notice_safepoints ---------------------------------------------------------------------------
+	.global	_ZN14CppInterpreter17notice_safepointsEv
+	.type	_ZN14CppInterpreter17notice_safepointsEv, %function
+_ZN14CppInterpreter17notice_safepointsEv:
+#ifdef NOTICE_SAFEPOINTS
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r2, r3}
+	add	r2, r2, ip
+	add	ip, r3, r2
+	ldr	r2, [ip, #AbstractInterpreter_notice_safepoints-XXX]
+	ldrb	r1, [r2, #0]	@ zero_extendqisi2
+	cmp	r1, #0
+	bxne	lr
+	mov	r3, #1
+	strb	r3, [r2, #0]
+	adrl	r3, safe_dispatch_table
+	mov	r2, #256
+1:
+	ldr	r1, [r3], #4
+	str	r1, [ip], #4
+	subs	r2, r2, #1
+	bne	1b
+#endif
+	bx	lr
+
+@ --- END execute.s ----------------------------------------------------------------------------
+
+	ALIGN_CODE
+bci_init:
+        stmfd   sp!, {r4, lr}
+
+	adrl	r3, dispatch_init_adcon
+	ldm	r3, {r0, r1}
+	add	r0, r0, r3
+        add     r4, r1, r0
+	adrl	r2, adcon_init_table
+        mov     r1, r4
+1:
+	ldr	ip, [r2], #4
+	cmp	ip, #0
+	ldrne	ip, [r0, ip]
+	strne	ip, [r1, #-4]!
+	bne	1b
+	adrl	r2, main_dispatch_table
+	mov	r1, #256
+        mov     r3, r4
+2:
+	ldr	ip, [r2], #4
+	str	ip, [r3], #4
+	subs	r1, r1, #1
+	bne	2b
+
+	bl	hwcap
+	str	r0, [r4, #CPUInfo-XXX]
+
+#ifdef USE_COMPILER
+
+#define NPROCESSORS_CONF        83
+
+        mov     r0, #NPROCESSORS_CONF
+        bl      sysconf
+        cmp     r0, #2
+
+#ifdef DISABLE_BG_COMP_ON_NON_MP
+        movcc   r0, #0
+        ldrcc   r1, [r4, #BackgroundCompilation_Address-XXX]
+        strccb  r0, [r1]
+#endif
+
+        movcs   r0, #MP_COMPILE_THRESHOLD
+        movcc   r0, #UP_COMPILE_THRESHOLD
+        ldr     r1, [r4, #CompileThreshold_Address-XXX]
+        str     r0, [r1]
+
+#endif // USE_COMPILER
+
+#ifdef T2JIT
+	bl	Thumb2_Initialize
+#endif
+
+#ifdef HW_FP
+	ldr	r0, [r4, #CPUInfo-XXX]
+	tst	r0, #ARCH_VFP
+	bne	4f
+
+@ No HW FP - replace the HW FP entries with SW entries
+update_vfp_table:
+	adr	r0, vfp_table
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r2, r3}
+	add	r2, r2, ip
+	add	ip, r3, r2
+.update_vfp_loop:
+	ldr	r1, [r0], #4
+	cmp	r1, #0
+	ldrne	r2, [r0], #4
+	strne	r2, [ip, r1, lsl #2]
+	bne	.update_vfp_loop
+4:
+#endif // HW_FP
+
+	ldr	r0, [r4, #CPUInfo-XXX]
+	tst	r0, #ARCH_CLZ
+	beq	5f
+
+	adrl	r0, do_idiv_clz
+	str	r0, [r4, #opc_idiv * 4]
+	adrl	r0, do_irem_clz
+	str	r0, [r4, #opc_irem * 4]
+
+5:
+	ldmia	sp!, {r4, pc}
+
+#ifdef HW_FP
+vfp_table:
+	.word	opc_fadd,	do_fadd
+	.word	opc_dadd,	do_dadd
+	.word	opc_fsub,	do_fsub
+	.word	opc_dsub,	do_dsub
+	.word	opc_fmul,	do_fmul
+	.word	opc_dmul,	do_dmul
+	.word	opc_fdiv,	do_fdiv
+	.word	opc_ddiv,	do_ddiv
+	.word	opc_fcmpl,	do_fcmpl
+	.word	opc_fcmpg,	do_fcmpg
+	.word	opc_dcmpl,	do_dcmpl
+	.word	opc_dcmpg,	do_dcmpg
+	.word	0
+#endif // HW_FP
+
+load_dispatch:
+	adrl	ip, dispatch_init_adcon
+	ldm	ip, {r0, r1}
+	add	r0, r0, ip
+	add	dispatch, r1, r0
+	mov	pc, lr
+
+	ALIGN_DATA
+dispatch_init_adcon:
+	.word	_GLOBAL_OFFSET_TABLE_-dispatch_init_adcon, opclabels_data(GOTOFF)
+adcon_init_table:
+	.word	_ZN18InterpreterRuntime22slow_signature_handlerEP10JavaThreadP13methodOopDescPiS4_(GOT)
+	.word	_ZN20SafepointSynchronize6_stateE(GOT)
+	.word	_ZN9vmSymbols8_symbolsE(GOT)
+	.word	always_do_update_barrier(GOT)
+	.word	_ZN8Universe14_collectedHeapE(GOT)
+	.word	_ZN9Bytecodes5_nameE(GOT)
+	.word	_ZN19AbstractInterpreter18_notice_safepointsE(GOT)
+	.word	_ZN18ThreadLocalStorage13_thread_indexE(GOT)
+	.word	_ZN7oopDesc3_bsE(GOT)
+	.word	PrintCommandLineFlags(GOT)
+	.word	_ZN11JvmtiExport28_can_post_interpreter_eventsE(GOT)
+	.word	UseCompiler(GOT)
+invocationlimit_adcon:
+	.word	_ZN17InvocationCounter26InterpreterInvocationLimitE(GOT)
+        .word   CompileThreshold(GOT)
+        .word   BackgroundCompilation(GOT)
+        .word   UseOnStackReplacement(GOT)
+	.word	0
+
+	ALIGN_DATA
+main_dispatch_table:
+	MAIN_DISPATCH_TABLE
+
+#ifdef NOTICE_SAFEPOINTS
+safe_dispatch_table:
+
+/*  WARNING: If you change any of these bytecodes, you must also
+    change the table in bytecodes_arm.def to make it match.  */
+
+	.word	do_nop	@ 0 0x00
+	.word	do_u4const_0	@ 1 0x01
+	.word	do_iconst_N	@ 2 0x02
+	.word	do_iconst_N	@ 3 0x03
+	.word	do_iconst_N	@ 4 0x04
+	.word	do_iconst_N	@ 5 0x05
+	.word	do_iconst_N	@ 6 0x06
+	.word	do_iconst_N	@ 7 0x07
+	.word	do_iconst_N	@ 8 0x08
+	.word	do_u8const_0	@ 9 0x09
+	.word	do_lconst_1	@ 10 0x0a
+	.word	do_u4const_0	@ 11 0x0b
+	.word	do_fconst_1	@ 12 0x0c
+	.word	do_fconst_2	@ 13 0x0d
+	.word	do_u8const_0	@ 14 0x0e
+	.word	do_dconst_1	@ 15 0x0f
+	.word	do_bipush	@ 16 0x10
+	.word	do_sipush	@ 17 0x11
+	.word	do_ldc	@ 18 0x12
+	.word	do_ldc_w	@ 19 0x13
+	.word	do_ldc2_w	@ 20 0x14
+	.word	do_u4load	@ 21 0x15
+	.word	do_u8load	@ 22 0x16
+	.word	do_u4load	@ 23 0x17
+	.word	do_u8load	@ 24 0x18
+	.word	do_u4load	@ 25 0x19
+	.word	do_iload_0	@ 26 0x1a
+	.word	do_iload_0	@ 27 0x1b
+	.word	do_iload_0	@ 28 0x1c
+	.word	do_iload_0	@ 29 0x1d
+	.word	do_u8load_0	@ 30 0x1e
+	.word	do_u8load_1	@ 31 0x1f
+	.word	do_u8load_2	@ 32 0x20
+	.word	do_u8load_3	@ 33 0x21
+	.word	do_fload_0	@ 34 0x22
+	.word	do_fload_0	@ 35 0x23
+	.word	do_fload_0	@ 36 0x24
+	.word	do_fload_0	@ 37 0x25
+	.word	do_u8load_0	@ 38 0x26
+	.word	do_u8load_1	@ 39 0x27
+	.word	do_u8load_2	@ 40 0x28
+	.word	do_u8load_3	@ 41 0x29
+	.word	do_aload_0	@ 42 0x2a
+	.word	do_aload_0	@ 43 0x2b
+	.word	do_aload_0	@ 44 0x2c
+	.word	do_aload_0	@ 45 0x2d
+	.word	do_u4aload	@ 46 0x2e
+	.word	do_u8aload	@ 47 0x2f
+	.word	do_u4aload	@ 48 0x30
+	.word	do_u8aload	@ 49 0x31
+	.word	do_u4aload	@ 50 0x32
+	.word	do_baload	@ 51 0x33
+	.word	do_caload	@ 52 0x34
+	.word	do_saload	@ 53 0x35
+	.word	do_u4store	@ 54 0x36
+	.word	do_u8store	@ 55 0x37
+	.word	do_u4store	@ 56 0x38
+	.word	do_u8store	@ 57 0x39
+	.word	do_u4store	@ 58 0x3a
+	.word	do_u4store_0	@ 59 0x3b
+	.word	do_u4store_1	@ 60 0x3c
+	.word	do_u4store_2	@ 61 0x3d
+	.word	do_u4store_3	@ 62 0x3e
+	.word	do_u8store_0	@ 63 0x3f
+	.word	do_u8store_1	@ 64 0x40
+	.word	do_u8store_2	@ 65 0x41
+	.word	do_u8store_3	@ 66 0x42
+	.word	do_u4store_0	@ 67 0x43
+	.word	do_u4store_1	@ 68 0x44
+	.word	do_u4store_2	@ 69 0x45
+	.word	do_u4store_3	@ 70 0x46
+	.word	do_u8store_0	@ 71 0x47
+	.word	do_u8store_1	@ 72 0x48
+	.word	do_u8store_2	@ 73 0x49
+	.word	do_u8store_3	@ 74 0x4a
+	.word	do_u4store_0	@ 75 0x4b
+	.word	do_u4store_1	@ 76 0x4c
+	.word	do_u4store_2	@ 77 0x4d
+	.word	do_u4store_3	@ 78 0x4e
+	.word	do_u4astore	@ 79 0x4f
+	.word	do_u8astore	@ 80 0x50
+	.word	do_u4astore	@ 81 0x51
+	.word	do_u8astore	@ 82 0x52
+	.word	do_aastore	@ 83 0x53
+	.word	do_bastore	@ 84 0x54
+	.word	do_u2astore	@ 85 0x55
+	.word	do_u2astore	@ 86 0x56
+	.word	do_jpop	@ 87 0x57
+	.word	do_jpop2	@ 88 0x58
+	.word	do_dup	@ 89 0x59
+	.word	do_dup_x1	@ 90 0x5a
+	.word	do_dup_x2	@ 91 0x5b
+	.word	do_dup2	@ 92 0x5c
+	.word	do_dup2_x1	@ 93 0x5d
+	.word	do_dup2_x2	@ 94 0x5e
+	.word	do_swap	@ 95 0x5f
+	.word	do_iadd	@ 96 0x60
+	.word	do_ladd	@ 97 0x61
+	.word	do_fadd	@ 98 0x62
+	.word	do_dadd	@ 99 0x63
+	.word	do_isub	@ 100 0x64
+	.word	do_lsub	@ 101 0x65
+	.word	do_fsub	@ 102 0x66
+	.word	do_dsub	@ 103 0x67
+	.word	do_imul	@ 104 0x68
+	.word	do_lmul	@ 105 0x69
+	.word	do_fmul	@ 106 0x6a
+	.word	do_dmul	@ 107 0x6b
+	.word	do_idiv	@ 108 0x6c
+	.word	do_ldiv	@ 109 0x6d
+	.word	do_fdiv	@ 110 0x6e
+	.word	do_ddiv	@ 111 0x6f
+	.word	do_irem	@ 112 0x70
+	.word	do_lrem	@ 113 0x71
+	.word	do_frem	@ 114 0x72
+	.word	do_drem	@ 115 0x73
+	.word	do_ineg	@ 116 0x74
+	.word	do_lneg	@ 117 0x75
+	.word	do_fneg	@ 118 0x76
+	.word	do_dneg	@ 119 0x77
+	.word	do_ishl	@ 120 0x78
+	.word	do_lshl	@ 121 0x79
+	.word	do_ishr	@ 122 0x7a
+	.word	do_lshr	@ 123 0x7b
+	.word	do_iushr	@ 124 0x7c
+	.word	do_lushr	@ 125 0x7d
+	.word	do_iand	@ 126 0x7e
+	.word	do_land	@ 127 0x7f
+	.word	do_ior	@ 128 0x80
+	.word	do_lor	@ 129 0x81
+	.word	do_ixor	@ 130 0x82
+	.word	do_lxor	@ 131 0x83
+	.word	do_iinc	@ 132 0x84
+	.word	do_i2l	@ 133 0x85
+	.word	do_i2f	@ 134 0x86
+	.word	do_i2d	@ 135 0x87
+	.word	do_l2i	@ 136 0x88
+	.word	do_l2f	@ 137 0x89
+	.word	do_l2d	@ 138 0x8a
+	.word	do_f2i	@ 139 0x8b
+	.word	do_f2l	@ 140 0x8c
+	.word	do_f2d	@ 141 0x8d
+	.word	do_d2i	@ 142 0x8e
+	.word	do_d2l	@ 143 0x8f
+	.word	do_d2f	@ 144 0x90
+	.word	do_i2b	@ 145 0x91
+	.word	do_i2c	@ 146 0x92
+	.word	do_i2s	@ 147 0x93
+	.word	do_lcmp	@ 148 0x94
+	.word	do_fcmpl	@ 149 0x95
+	.word	do_fcmpg	@ 150 0x96
+	.word	do_dcmpl	@ 151 0x97
+	.word	do_dcmpg	@ 152 0x98
+	.word	do_ifeq	@ 153 0x99
+	.word	do_ifne	@ 154 0x9a
+	.word	do_iflt	@ 155 0x9b
+	.word	do_ifge	@ 156 0x9c
+	.word	do_ifgt	@ 157 0x9d
+	.word	do_ifle	@ 158 0x9e
+	.word	do_if_icmpeq	@ 159 0x9f
+	.word	do_if_icmpne	@ 160 0xa0
+	.word	do_if_icmplt	@ 161 0xa1
+	.word	do_if_icmpge	@ 162 0xa2
+	.word	do_if_icmpgt	@ 163 0xa3
+	.word	do_if_icmple	@ 164 0xa4
+	.word	do_if_icmpeq	@ 165 0xa5
+	.word	do_if_icmpne	@ 166 0xa6
+	.word	do_goto	@ 167 0xa7
+	.word	do_jsr	@ 168 0xa8
+	.word	do_ret	@ 169 0xa9
+	.word	do_tableswitch	@ 170 0xaa
+	.word	do_lookupswitch	@ 171 0xab
+	.word	do_ireturn	@ 172 0xac
+	.word	do_lreturn	@ 173 0xad
+	.word	do_ireturn	@ 174 0xae
+	.word	do_lreturn	@ 175 0xaf
+	.word	do_ireturn	@ 176 0xb0
+	.word	do_return	@ 177 0xb1
+	.word	do_getstatic	@ 178 0xb2
+	.word	do_putstatic	@ 179 0xb3
+	.word	do_getfield	@ 180 0xb4
+	.word	do_putfield	@ 181 0xb5
+	.word	do_invokevirtual	@ 182 0xb6
+	.word	do_invokespecial	@ 183 0xb7
+	.word	do_invokestatic	@ 184 0xb8
+	.word	do_invokeinterface	@ 185 0xb9
+	.word	do_invokedynamic	@ 186 0xba
+	.word	do_new	@ 187 0xbb
+	.word	do_newarray	@ 188 0xbc
+	.word	do_anewarray	@ 189 0xbd
+	.word	do_arraylength	@ 190 0xbe
+	.word	do_athrow	@ 191 0xbf
+	.word	do_checkcast	@ 192 0xc0
+	.word	do_instanceof	@ 193 0xc1
+	.word	do_monitorenter	@ 194 0xc2
+	.word	do_monitorexit	@ 195 0xc3
+	.word	do_wide	@ 196 0xc4
+	.word	do_multianewarray	@ 197 0xc5
+	.word	do_ifeq	@ 198 0xc6
+	.word	do_ifne	@ 199 0xc7
+	.word	do_goto_w	@ 200 0xc8
+	.word	do_jsr_w	@ 201 0xc9
+	.word	do_breakpoint	@ 202 0xca
+	.word	do_undefined	@ 203 0xcb
+	.word	do_bgetfield	@ 204 0xcc
+	.word	do_cgetfield	@ 205 0xcd
+	.word	do_undefined	@ 206 0xce
+	.word	do_undefined	@ 207 0xcf
+	.word	do_igetfield	@ 208 0xd0
+	.word	do_lgetfield	@ 209 0xd1
+	.word	do_sgetfield	@ 210 0xd2
+	.word	do_aputfield	@ 211 0xd3
+	.word	do_bputfield	@ 212 0xd4
+	.word	do_cputfield	@ 213 0xd5
+	.word	do_undefined	@ 214 0xd6
+	.word	do_undefined	@ 215 0xd7
+	.word	do_iputfield	@ 216 0xd8
+	.word	do_lputfield	@ 217 0xd9
+	.word	do_undefined	@ 218 0xda
+	.word	do_iaccess_0	@ 219 0xdb
+	.word	do_iaccess_0	@ 220 0xdc
+	.word	do_iaccess_0	@ 221 0xdd
+	.word	do_iaccess_0	@ 222 0xde
+	.word	do_invokeresolved	@ 223 0xdf
+	.word	do_invokespecialresolved	@ 224 0xe0
+	.word	do_invokestaticresolved	@ 225 0xe1
+	.word	do_invokevfinal	@ 226 0xe2
+	.word	do_fast_iload_iload	@ 227 0xe3
+	.word	do_fast_iload_iload_N	@ 228 0xe4
+	.word	do_fast_aldc		@ 229 0xe5
+	.word	do_fast_aldc_w	@ 230 0xe6
+	.word	do_return_register_finalizer	@ 231 0xe7
+	.word	do_undefined	@ 232 0xe8
+	.word	do_iload_0_iconst_N	@ 233 0xe9
+	.word	do_iload_0_iconst_N	@ 234 0xea
+	.word	do_iload_0_iconst_N	@ 235 0xeb
+	.word	do_iload_0_iconst_N	@ 236 0xec
+	.word	do_iload_iconst_N	@ 237 0xed
+	.word	do_iadd_istore_N	@ 238 0xee
+	.word	do_isub_istore_N	@ 239 0xef
+	.word	do_iand_istore_N	@ 240 0xf0
+	.word	do_ior_istore_N	@ 241 0xf1
+	.word	do_ixor_istore_N	@ 242 0xf2
+	.word	do_iadd_u4store	@ 243 0xf3
+	.word	do_isub_u4store	@ 244 0xf4
+	.word	do_iand_u4store	@ 245 0xf5
+	.word	do_ior_u4store	@ 246 0xf6
+	.word	do_ixor_u4store	@ 247 0xf7
+	.word	do_fast_iload_N_iload	@ 248 0xf8
+	.word	do_fast_iload_N_iload	@ 249 0xf9
+	.word	do_fast_iload_N_iload	@ 250 0xfa
+	.word	do_fast_iload_N_iload	@ 251 0xfb
+	.word	do_fast_iload_N_iload_N	@ 252 0xfc
+	.word	do_fast_iload_N_iload_N	@ 253 0xfd
+	.word	do_fast_iload_N_iload_N	@ 254 0xfe
+	.word	do_fast_iload_N_iload_N	@ 255 0xff
+#endif
+
+	SUB_DISPATCH_TABLES
+
+	.arch	armv7-a
+
+	ALIGN_CODE
+	.global	Thumb2_stubs
+	.type Thumb2_stubs, %function
+Thumb2_stubs:
+	.global	Thumb2_idiv_stub
+	.type Thumb2_idiv_stub, %function
+Thumb2_idiv_stub:
+int_div:
+	cmp     r1, #0x21
+	adr	r3, 1f
+	eor     r12, r0, r1
+	ldrcc	pc, [r3, r1, lsl #2]
+	rsblt   r1, r1, #0
+	subs    r2, r1, #1
+	beq     2f
+	movs    r3, r0
+	rsbmi   r3, r0, #0
+	cmp     r3, r1
+	bls     3f
+	tst     r1, r2
+	beq     4f
+	clz     r2, r3
+	clz     r0, r1
+	sub     r2, r0, r2
+	rsbs    r2, r2, #31
+	add     r2, r2, r2, lsl #1
+	mov     r0, #0
+	add     pc, pc, r2, lsl #2
+	mov	r0, #0
+	cmp     r3, r1, lsl #31
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #31
+	cmp     r3, r1, lsl #30
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #30
+	cmp     r3, r1, lsl #29
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #29
+	cmp     r3, r1, lsl #28
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #28
+	cmp     r3, r1, lsl #27
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #27
+	cmp     r3, r1, lsl #26
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #26
+	cmp     r3, r1, lsl #25
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #25
+	cmp     r3, r1, lsl #24
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #24
+	cmp     r3, r1, lsl #23
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #23
+	cmp     r3, r1, lsl #22
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #22
+	cmp     r3, r1, lsl #21
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #21
+	cmp     r3, r1, lsl #20
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #20
+	cmp     r3, r1, lsl #19
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #19
+	cmp     r3, r1, lsl #18
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #18
+	cmp     r3, r1, lsl #17
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #17
+	cmp     r3, r1, lsl #16
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #16
+	cmp     r3, r1, lsl #15
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #15
+	cmp     r3, r1, lsl #14
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #14
+	cmp     r3, r1, lsl #13
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #13
+	cmp     r3, r1, lsl #12
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #12
+	cmp     r3, r1, lsl #11
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #11
+	cmp     r3, r1, lsl #10
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #10
+	cmp     r3, r1, lsl #9
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #9
+	cmp     r3, r1, lsl #8
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #8
+	cmp     r3, r1, lsl #7
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #7
+	cmp     r3, r1, lsl #6
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #6
+	cmp     r3, r1, lsl #5
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #5
+	cmp     r3, r1, lsl #4
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #4
+	cmp     r3, r1, lsl #3
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #3
+	cmp     r3, r1, lsl #2
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #2
+	cmp     r3, r1, lsl #1
+	adc     r0, r0, r0
+	subcs   r3, r3, r1, lsl #1
+	cmp     r3, r1
+	adc     r0, r0, r0
+	subcs   r3, r3, r1
+	cmp     r12, #0
+	rsbmi   r0, r0, #0
+	bx      lr
+2:
+	teq     r12, r0
+	rsbmi   r0, r0, #0
+	bx      lr
+3:
+	movcc   r0, #0
+	asreq   r0, r12, #31
+	orreq   r0, r0, #1
+	bx      lr
+4:
+	clz     r2, r1
+	rsb     r2, r2, #31
+	cmp     r12, #0
+	lsr     r0, r3, r2
+	rsbmi   r0, r0, #0
+	bx      lr
+1:
+	.word	Thumb2_DivZero_Handler
+	.word	jdiv_1
+	.word	jdiv_2
+	.word	jdiv_3
+	.word	jdiv_4
+	.word	jdiv_5
+	.word	jdiv_6
+	.word	jdiv_7
+	.word	jdiv_8
+	.word	jdiv_9
+	.word	jdiv_10
+	.word	jdiv_11
+	.word	jdiv_12
+	.word	jdiv_13
+	.word	jdiv_14
+	.word	jdiv_15
+	.word	jdiv_16
+	.word	jdiv_17
+	.word	jdiv_18
+	.word	jdiv_19
+	.word	jdiv_20
+	.word	jdiv_21
+	.word	jdiv_22
+	.word	jdiv_23
+	.word	jdiv_24
+	.word	jdiv_25
+	.word	jdiv_26
+	.word	jdiv_27
+	.word	jdiv_28
+	.word	jdiv_29
+	.word	jdiv_30
+	.word	jdiv_31
+	.word	jdiv_32
+	ALIGN_CODE
+	.global	Thumb2_irem_stub
+	.type Thumb2_irem_stub, %function
+Thumb2_irem_stub:
+int_rem:
+	cmp     r1, #0x21
+	adr	r3, 1f
+	ldrcc	pc, [r3, r1, lsl #2]
+	rsblt   r1, r1, #0
+	subs    r2, r1, #1
+	beq     2f
+	movs    r12, r0
+	rsbmi   r0, r0, #0
+	cmp     r0, r1
+	bls     3f
+	tst     r1, r2
+	beq     4f
+	clz     r2, r0
+	clz     r3, r1
+	sub     r2, r3, r2
+	rsbs    r2, r2, #31
+	add     pc, pc, r2, lsl #3
+	mov	r3, #0
+	cmp     r0, r1, lsl #31
+	subcs   r0, r0, r1, lsl #31
+	cmp     r0, r1, lsl #30
+	subcs   r0, r0, r1, lsl #30
+	cmp     r0, r1, lsl #29
+	subcs   r0, r0, r1, lsl #29
+	cmp     r0, r1, lsl #28
+	subcs   r0, r0, r1, lsl #28
+	cmp     r0, r1, lsl #27
+	subcs   r0, r0, r1, lsl #27
+	cmp     r0, r1, lsl #26
+	subcs   r0, r0, r1, lsl #26
+	cmp     r0, r1, lsl #25
+	subcs   r0, r0, r1, lsl #25
+	cmp     r0, r1, lsl #24
+	subcs   r0, r0, r1, lsl #24
+	cmp     r0, r1, lsl #23
+	subcs   r0, r0, r1, lsl #23
+	cmp     r0, r1, lsl #22
+	subcs   r0, r0, r1, lsl #22
+	cmp     r0, r1, lsl #21
+	subcs   r0, r0, r1, lsl #21
+	cmp     r0, r1, lsl #20
+	subcs   r0, r0, r1, lsl #20
+	cmp     r0, r1, lsl #19
+	subcs   r0, r0, r1, lsl #19
+	cmp     r0, r1, lsl #18
+	subcs   r0, r0, r1, lsl #18
+	cmp     r0, r1, lsl #17
+	subcs   r0, r0, r1, lsl #17
+	cmp     r0, r1, lsl #16
+	subcs   r0, r0, r1, lsl #16
+	cmp     r0, r1, lsl #15
+	subcs   r0, r0, r1, lsl #15
+	cmp     r0, r1, lsl #14
+	subcs   r0, r0, r1, lsl #14
+	cmp     r0, r1, lsl #13
+	subcs   r0, r0, r1, lsl #13
+	cmp     r0, r1, lsl #12
+	subcs   r0, r0, r1, lsl #12
+	cmp     r0, r1, lsl #11
+	subcs   r0, r0, r1, lsl #11
+	cmp     r0, r1, lsl #10
+	subcs   r0, r0, r1, lsl #10
+	cmp     r0, r1, lsl #9
+	subcs   r0, r0, r1, lsl #9
+	cmp     r0, r1, lsl #8
+	subcs   r0, r0, r1, lsl #8
+	cmp     r0, r1, lsl #7
+	subcs   r0, r0, r1, lsl #7
+	cmp     r0, r1, lsl #6
+	subcs   r0, r0, r1, lsl #6
+	cmp     r0, r1, lsl #5
+	subcs   r0, r0, r1, lsl #5
+	cmp     r0, r1, lsl #4
+	subcs   r0, r0, r1, lsl #4
+	cmp     r0, r1, lsl #3
+	subcs   r0, r0, r1, lsl #3
+	cmp     r0, r1, lsl #2
+	subcs   r0, r0, r1, lsl #2
+	cmp     r0, r1, lsl #1
+	subcs   r0, r0, r1, lsl #1
+	cmp     r0, r1
+	subcs   r0, r0, r1
+	cmp     r12, #0
+	rsbmi   r0, r0, #0
+	bx      lr
+2:
+	mov	r0, #0
+	bx      lr
+3:
+	moveq	r0, #0
+	cmp	r12, #0
+	rsbmi	r0, r0, #0
+	bx	lr
+4:
+	and	r0, r0, r2
+	cmp	r12, #0
+	rsbmi	r0, r0, #0
+	bx      lr
+1:
+	.word	Thumb2_DivZero_Handler
+	.word	jrem_1
+	.word	jrem_2
+	.word	jrem_3
+	.word	jrem_4
+	.word	jrem_5
+	.word	jrem_6
+	.word	jrem_7
+	.word	jrem_8
+	.word	jrem_9
+	.word	jrem_10
+	.word	jrem_11
+	.word	jrem_12
+	.word	jrem_13
+	.word	jrem_14
+	.word	jrem_15
+	.word	jrem_16
+	.word	jrem_17
+	.word	jrem_18
+	.word	jrem_19
+	.word	jrem_20
+	.word	jrem_21
+	.word	jrem_22
+	.word	jrem_23
+	.word	jrem_24
+	.word	jrem_25
+	.word	jrem_26
+	.word	jrem_27
+	.word	jrem_28
+	.word	jrem_29
+	.word	jrem_30
+	.word	jrem_31
+	.word	jrem_32
+
+#ifdef T2JIT
+
+	.macro	LOAD_FRAME
+	ldr	Rframe, [thread, #THREAD_TOP_ZERO_FRAME]
+	.endm
+
+@ R0 = BCI
+@ R1 = index
+
+	.global	Thumb2_invokeinterface_stub
+	.type Thumb2_invokeinterface_stub, %function
+Thumb2_invokeinterface_stub:
+	LOAD_FRAME
+	stmdb	sp!, {ip, lr}
+	ldr	ip, [Rframe, #FRAME_METHOD]
+	sub	stack, stack, #4
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
+	ldr	ip, [ip, #METHOD_CONSTMETHOD]
+	str	stack, [Rframe, #FRAME_STACK]
+	add	jpc, ip, r0
+        add     r0, r2, r1, lsl #4
+	str	jpc, [Rframe, #FRAME_BCP]
+
+        ldr     r2, [r0, #CP_OFFSET]
+        and     r2, r2, #0x00ff0000
+        cmp     r2, #opc_invokeinterface << 16
+        bne     istub_resolve
+2:
+	ldr	r3, [r0, #CP_OFFSET+12]
+	and	r2, r3, #255
+	ldr	r2, [stack, r2, lsl #2]
+	cmp	r2, #0
+	beq	istub_null_ptr_exception
+	ldr	tmp1, [r2, #4]				@ rcvr->klass()
+	tst	r3, #flag_is_forced_virtual
+	bne	istub_methodInterface
+
+	ldr	lr, [r0, #CP_OFFSET+4]			@ lr = iclass
+
+	add	r1, tmp1, #INSTANCEKLASS_VTABLE_OFFSET
+	ldr	r2, [tmp1, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
+	ldr	ip, [tmp1, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
+	add	r2, r2, #1
+	bic	r2, r2, #1
+
+	add	r1, r1, r2, lsl #2
+
+	mov	r2, #0
+1:
+	cmp	r2, ip
+	beq	istub_incompatibleclass_exception
+	ldr	r3, [r1], #8
+	add	r2, r2, #1
+	cmp	lr, r3
+	bne	1b
+
+	ldr	r3, [r0, #CP_OFFSET+8]
+	ldr	r2, [r1, #-4]
+	add	r3, tmp1, r3, lsl #2
+	ldr	tmp1, [r3, r2]
+	cmp	tmp1, #0
+	beq	istub_abstractmethod_exception
+istub_invoke:
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	mov	r1, #0
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip]
+
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
+#endif
+	blx	r3
+
+	LOAD_FRAME
+
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [Rframe, #FRAME_STACK_LIMIT]
+
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	r2, r2, #4
+	mov	r0, #0
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r3, [thread, #4]
+	cmp	r3, #0
+	bne	istub_exception
+	ldmia	sp!, {ip, pc}
+
+istub_methodInterface:
+	tst	r3, #flag_vfinalMethod
+	ldrne	tmp1, [r0, #CP_OFFSET+8]
+	bne	istub_invoke
+	ldr	r1, [r0, #CP_OFFSET+8]
+	add	r3, tmp1, r1, lsl #2
+	ldr	tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+	b	istub_invoke
+
+istub_resolve:
+	mov	tmp1, r1
+	mov	r1, #opc_invokeinterface
+	mov	r0, thread
+	ldr	ip, resolve_invoke_adcon
+	blx	ip
+	ldr	r3, [thread, #4]
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
+	cmp	r3, #0
+	bne	istub_exception
+	add	r0, r2, tmp1, lsl #4	@ r1 = cache
+	b	2b
+
+istub_exception:
+	ldmia	sp!, {ip, lr}
+	ldr	ip, handle_exception_adcon
+	LOAD_ISTATE
+	bx	ip
+
+istub_null_ptr_exception:
+	mov	r0, #VMSYMBOLS_NullPointerException
+	b	3f
+istub_abstractmethod_exception:
+	mov	r0, #VMSYMBOLS_AbstractMethodError
+	b	3f
+istub_incompatibleclass_exception:
+	mov	r0, #VMSYMBOLS_IncompatibleClassChangeError
+3:
+	ldr	jpc, [Rframe, #FRAME_BCP]
+	ldmia	sp!, {ip, lr}
+	ldr	ip, raise_exception_adcon
+	LOAD_ISTATE
+	bx	ip
+
+resolve_invoke_adcon:
+	.word	_ZN18InterpreterRuntime14resolve_invokeEP10JavaThreadN9Bytecodes4CodeE
+resolve_get_put_adcon:
+       	.word   _ZN18InterpreterRuntime15resolve_get_putEP10JavaThreadN9Bytecodes4CodeE
+handle_exception_adcon:
+	.word	handle_exception_with_bcp
+raise_exception_adcon:
+	.word	raise_exception
+helper_aputfield_adcon:
+	.word	Helper_aputfield
+lr_to_bci_adcon:
+	.word	Thumb2_lr_to_bci
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokevirtual_stub
+	.type Thumb2_invokevirtual_stub, %function
+Thumb2_invokevirtual_stub:
+	LOAD_FRAME
+	stmdb	sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+	str	stack, [Rframe, #FRAME_STACK]
+        add     jpc, ip, r0
+        add     r0, r2, r1, lsl #4
+	str	jpc, [Rframe, #FRAME_BCP]
+
+        ldr     r2, [r0, #CP_OFFSET]
+        and     r2, r2, #0xff000000
+        cmp     r2, #opc_invokevirtual << 24
+        bne     ivstub_resolve
+2:
+
+	ldr	r3, [r0, #CP_OFFSET+12]
+        and     r2, r3, #255
+        ldr     r2, [stack, r2, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        tst     r3, #flag_vfinalMethod
+        bne     1f
+
+        ldr     r3, [r2, #4]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+1:
+	mov	r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
+#endif
+	blx	r3
+	LOAD_FRAME
+
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [Rframe, #FRAME_STACK_LIMIT]
+
+	mov	r0, #0
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	add	r2, r2, #4
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r3, [thread, #4]
+	cmp	r3, #0
+	bne	istub_exception
+	ldmia	sp!, {ip, pc}
+
+ivstub_resolve:
+	mov	tmp1, r1
+	mov	r1, #opc_invokevirtual
+	mov	r0, thread
+	ldr	ip, resolve_invoke_adcon
+	blx	ip
+	ldr	r3, [thread, #4]
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
+	cmp	r3, #0
+	bne	istub_exception
+	add	r0, r2, tmp1, lsl #4	@ r1 = cache
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokevfinalresolved_stub
+Thumb2_invokevfinalresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r2, r3, #255
+        ldr     r2, [stack, r2, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        add     r2, r2, #4
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP]
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokevirtualresolved_stub
+Thumb2_invokevirtualresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r2, r3, #255
+        ldr     r2, [stack, r2, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        ldr     r3, [r2, #4]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        add     r2, r2, #4
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP]
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokestatic_stub
+	.type Thumb2_invokestatic_stub, %function
+Thumb2_invokestatic_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+
+        ldr     r2, [r0, #CP_OFFSET]
+	and	r2, r2, #0x00ff0000
+	cmp	r2, #opc_invokestatic << 16
+	bne	isstub_resolve
+2:
+	ldr	tmp1, [r0, #CP_OFFSET+4]
+	mov	r1, #0
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+isstub_resolve:
+        mov     tmp1, r1
+        mov     r1, #opc_invokestatic
+        mov	r0, thread
+        ldr     ip, resolve_invoke_adcon
+        blx     ip
+        ldr     r3, [thread, #4]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        cmp     r3, #0
+        bne     istub_exception
+        add     r0, r2, tmp1, lsl #4    @ r1 = cache
+        b       2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokestaticresolved_stub
+Thumb2_invokestaticresolved_stub:
+	LOAD_FRAME
+	stmdb	sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     tmp1, [r0, #CP_OFFSET+4]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokespecial_stub
+	.type Thumb2_invokespecial_stub, %function
+Thumb2_invokespecial_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+ 
+        ldr     r2, [r0, #CP_OFFSET]
+ 	and	r2, r2, #0x00ff0000
+	cmp	r2, #opc_invokespecial << 16
+	bne	ispstub_resolve
+2:
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r3, r3, #255
+        ldr     r2, [stack, r3, asl #2]
+	cmp	r2, #0
+	beq	istub_null_ptr_exception
+
+	ldr	tmp1, [r0, #CP_OFFSET+4]
+	mov	r1, #0
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+
+	add	stack, stack, #4
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        mov     r2, thread
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+ispstub_resolve:
+        mov     tmp1, r1
+        mov     r1, #opc_invokespecial
+        mov	r0, thread
+        ldr     ip, resolve_invoke_adcon
+        blx     ip
+        ldr     r3, [thread, #4]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        cmp     r3, #0
+        bne     istub_exception
+        add     r0, r2, tmp1, lsl #4    @ r1 = cache
+        b       2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokespecialresolved_stub
+Thumb2_invokespecialresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r3, r3, #255
+        ldr     r2, [stack, r3, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+4]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getfield_word_stub
+	.type Thumb2_getfield_word_stub, %function
+Thumb2_getfield_word_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	bne	1f
+2:
+	ldr	r3, [stack], #4		@ POP r3
+	ldr	ip, [r2, #CP_OFFSET+8]
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	GO_IF_VOLATILE	r2, r2, 3f
+
+	ldr	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	
+	ldr	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getfield_sh_stub
+	.type Thumb2_getfield_sh_stub, %function
+Thumb2_getfield_sh_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	bne	1f
+2:
+	ldr	r3, [stack], #4		@ POP r3
+	ldr	ip, [r2, #CP_OFFSET+8]
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	GO_IF_VOLATILE	r2, r2, 3f
+
+	ldrsh	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldrsh	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getfield_h_stub
+	.type Thumb2_getfield_h_stub, %function
+Thumb2_getfield_h_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	bne	1f
+2:
+	ldr	r3, [stack], #4		@ POP r3
+	ldr	ip, [r2, #CP_OFFSET+8]
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	GO_IF_VOLATILE	r2, r2, 3f
+
+	ldrh	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldrh	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getfield_sb_stub
+	.type Thumb2_getfield_sb_stub, %function
+Thumb2_getfield_sb_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	bne	1f
+2:
+	ldr	r3, [stack], #4		@ POP r3
+	ldr	ip, [r2, #CP_OFFSET+8]
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	GO_IF_VOLATILE	r2, r2, 3f
+
+	ldrsb	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldrsb	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getfield_dw_stub
+	.type Thumb2_getfield_dw_stub, %function
+Thumb2_getfield_dw_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getfield << 16
+	bne	1f
+2:
+	ldr	r3, [stack], #4		@ POP r3
+	ldr	ip, [r2, #CP_OFFSET+8]
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	GO_IF_VOLATILE	r2, r2, 3f
+
+	ldrd	r2, r3, [r3, ip]
+	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
+	bx	lr
+3:
+	ldrd	r2, r3, [r3, ip]	// FIXME: Shold be ldrexd
+	FullBarrier
+	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+	.ltorg
+	
+@ R0 = BCI
+@ R1 = index
+putstatic_stub_unresolved:
+	mov	r2, #opc_putstatic
+	b	field_stub_unresolved
+getstatic_stub_unresolved:
+	mov	r2, #opc_getstatic
+	b	field_stub_unresolved
+putfield_stub_unresolved:
+	mov	r2, #opc_putfield
+	b	field_stub_unresolved
+getfield_stub_unresolved:
+	mov	r2, #opc_getfield
+field_stub_unresolved:
+	stmdb	sp!, {r0, r1, ip, lr}
+	ldr	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+        ldr     ip, [lr, #FRAME_METHOD]
+	sub	r3, stack, #4
+	ldr	ip, [ip, #METHOD_CONSTMETHOD]
+	str	r3, [lr, #FRAME_STACK]	@ DECACHE_STACK
+	add	r3, ip, r0
+	str	r3, [lr, #FRAME_BCP]	@ DECACHE_JPC
+	ldr	ip, resolve_get_put_adcon
+	mov	r1, r2
+	mov	r0, thread
+	blx	ip
+	ldmia	sp!, {r0, r1, ip, lr}
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r3, [thread, #4]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	cmp	r3, #0
+	bne	field_exception
+	add	r2, r2, r1, lsl #4
+	bx	lr
+
+field_null_ptr_exception:
+	stmdb	sp!, {JAZ_REGSET}
+	ldr	ip, [thread, #THREAD_TOP_ZERO_FRAME]
+        ldr     r1, [ip, #FRAME_METHOD]
+	ldr	r3, [ip, #FRAME_LOCALS]
+        ldr     ip, [r1, #METHOD_CONSTMETHOD]
+        add     jpc, ip, r0
+ 	mov	r0, #VMSYMBOLS_NullPointerException
+	bic	r0, lr, #TBIT
+	mov	r2, sp
+
+@ We already have BCI, so just call lr_to_bci to save the locals
+@ The result value is ignored
+	ldr	ip, lr_to_bci_adcon
+	blx	ip
+
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+ 	ldr	ip, raise_exception_adcon
+	LOAD_ISTATE
+ 	bx	ip
+ 
+ field_exception:
+ 	ldr	ip, handle_exception_adcon
+	LOAD_ISTATE
+ 	bx	ip
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putfield_word_stub
+	.type Thumb2_putfield_word_stub, %function
+Thumb2_putfield_word_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putfield << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	str	r2, [r3, ip]
+	bx	lr
+3:	
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	StoreStoreBarrier
+	str	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_putfield_h_stub
+	.type Thumb2_putfield_h_stub, %function
+Thumb2_putfield_h_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putfield << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	strh	r2, [r3, ip]
+	bx	lr
+3:
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	StoreStoreBarrier
+	strh	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_putfield_b_stub
+	.type Thumb2_putfield_b_stub, %function
+Thumb2_putfield_b_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putfield << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	strb	r2, [r3, ip]
+	bx	lr
+3:
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	StoreStoreBarrier
+	strb	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_putfield_a_stub
+	.type Thumb2_putfield_a_stub, %function
+Thumb2_putfield_a_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putfield << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	str	r2, [r3, ip]
+	ldr	ip, helper_aputfield_adcon
+	mov	r0, r3
+	bx	ip
+3:
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}	@ r2 = value, r3 = obj
+	cmp	r3, #0
+	beq	field_null_ptr_exception
+
+	StoreStoreBarrier
+	str	r2, [r3, ip]
+	StoreLoadBarrier
+	ldr	ip, helper_aputfield_adcon
+	mov	r0, r3
+	bx	ip
+1:
+	mov	ip, lr
+	bl	putfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_putfield_dw_stub
+	.type Thumb2_putfield_dw_stub, %function
+Thumb2_putfield_dw_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putfield << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r1, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3, ip}	@ r2,r3 = value, ip = obj
+	cmp	ip, #0
+	beq	field_null_ptr_exception
+
+	strd	r2,r3, [ip, r1]
+	bx	lr
+3:
+	ldr	r1, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3, ip}	@ r2,r3 = value, ip = obj
+	cmp	ip, #0
+	beq	field_null_ptr_exception
+
+	StoreStoreBarrier
+	// FIXME: This should use strexd on an MP system
+	strd	r2,r3, [ip, r1]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putfield_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_getstatic_word_stub
+	.type Thumb2_getstatic_word_stub, %function
+Thumb2_getstatic_word_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldr	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldr	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_getstatic_h_stub
+	.type Thumb2_getstatic_h_stub, %function
+Thumb2_getstatic_h_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrh	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrh	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_getstatic_sh_stub
+	.type Thumb2_getstatic_sh_stub, %function
+Thumb2_getstatic_sh_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrsh	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrsh	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_getstatic_sb_stub
+	.type Thumb2_getstatic_sb_stub, %function
+Thumb2_getstatic_sb_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrsb	r3, [r3, ip]
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrsb	r3, [r3, ip]
+	FullBarrier
+	str	r3, [stack, #-4]!	@ PUSH r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+	.global	Thumb2_getstatic_dw_stub
+	.type Thumb2_getstatic_dw_stub, %function
+Thumb2_getstatic_dw_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0x00ff0000
+	cmp	r3, #opc_getstatic << 16
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrd	r2, r3, [r3, ip]
+	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+
+	ldrd	r2, r3, [r3, ip]
+	FullBarrier
+	// FIXME: This should use strexd on an MP system
+	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
+	bx	lr
+1:
+	mov	ip, lr
+	bl	getstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putstatic_word_stub
+	.type Thumb2_putstatic_word_stub, %function
+Thumb2_putstatic_word_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putstatic << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	str	r2, [r3, ip]
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	StoreStoreBarrier
+	str	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putstatic_h_stub
+	.type Thumb2_putstatic_h_stub, %function
+Thumb2_putstatic_h_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putstatic << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	strh	r2, [r3, ip]
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	StoreStoreBarrier
+	strh	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putstatic_b_stub
+	.type Thumb2_putstatic_b_stub, %function
+Thumb2_putstatic_b_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putstatic << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	strb	r2, [r3, ip]
+	bx	lr
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	StoreStoreBarrier
+	strb	r2, [r3, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putstatic_dw_stub
+	.type Thumb2_putstatic_dw_stub, %function
+Thumb2_putstatic_dw_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putstatic << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r1, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}
+
+	strd	r2,r3, [r1, ip]
+	bx	lr
+3:
+	ldr	r1, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldmia	stack!, {r2, r3}
+
+	StoreStoreBarrier
+	strd	r2,r3, [r1, ip]
+	StoreLoadBarrier
+	bx	lr
+1:
+	mov	ip, lr
+	bl	putstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_putstatic_a_stub
+	.type Thumb2_putstatic_a_stub, %function
+Thumb2_putstatic_a_stub:
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
+	add	r2, r2, r1, lsl #4
+	ldr	r3, [r2, #CP_OFFSET]
+	and	r3, r3, #0xff000000
+	cmp	r3, #opc_putstatic << 24
+	bne	1f
+2:
+	GO_IF_VOLATILE	r3, r2, 3f
+	
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	str	r2, [r3, ip]
+	ldr	ip, helper_aputfield_adcon
+	mov	r0, r3
+	bx	ip
+3:
+	ldr	r3, [r2, #CP_OFFSET+4]
+	ldr	ip, [r2, #CP_OFFSET+8]
+	ldr	r2, [stack], #4		@ POP r2
+
+	StoreStoreBarrier
+	str	r2, [r3, ip]
+	StoreLoadBarrier
+	ldr	ip, helper_aputfield_adcon
+	mov	r0, r3
+	bx	ip
+1:
+	mov	ip, lr
+	bl	putstatic_stub_unresolved
+	mov	lr, ip
+	b	2b
+
+#endif // T2JIT
+
+	// Ensure that any literals generated in the stubs are output here
+	// as this code is copied to the bottom of the code buffer
+	.ltorg
+
+	.global	Thumb2_stubs_end
+	.type Thumb2_stubs_end, %function
+Thumb2_stubs_end:
+
+	ALIGN_CODE
+jdiv_1:
+	bx	lr
+jdiv_2:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+	bx	lr
+jdiv_24:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_12:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_6:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_3:
+	ldr	r1, dc_3
+        smull	r3, r2, r0, r1
+        sub	r0, r2, r0, asr #31
+	bx	lr
+jdiv_4:
+	mov	r1, r0, asr #31
+	add	r0, r0, r1, lsr #30
+	mov	r0, r0, asr #2
+	bx	lr
+jdiv_20:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_10:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_5:
+	ldr	r1, dc_5
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #1
+	bx	lr
+jdiv_28:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_14:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_7:
+	ldr	r1, dc_7
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r0, r1, r3, asr #2
+	bx	lr
+jdiv_8:
+	mov	r1, r0, asr #31
+	add	r0, r0, r1, lsr #29
+	mov	r0, r0, asr #3
+	bx	lr
+jdiv_18:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_9:
+	ldr	r1, dc_9
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #1
+	bx	lr
+jdiv_22:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_11:
+	ldr	r1, dc_11
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #1
+	bx	lr
+jdiv_26:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_13:
+	ldr	r1, dc_13
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #2
+	bx	lr
+jdiv_30:
+        add     r0, r0, r0, lsr #31
+        mov     r0, r0, asr #1
+jdiv_15:
+	ldr	r1, dc_15
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r0, r1, r3, asr #3
+	bx	lr
+jdiv_16:
+	mov	r1, r0, asr #31
+	add	r0, r0, r1, lsr #28
+	mov	r0, r0, asr #4
+	bx	lr
+jdiv_17:
+	ldr	r1, dc_17
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #3
+	bx	lr
+jdiv_19:
+	ldr	r1, dc_19
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #3
+	bx	lr
+jdiv_21:
+	ldr	r1, dc_21
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #2
+	bx	lr
+jdiv_23:
+	ldr	r1, dc_23
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r0, r1, r3, asr #4
+	bx	lr
+jdiv_25:
+	ldr	r1, dc_25
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #3
+	bx	lr
+jdiv_27:
+	ldr	r1, dc_27
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r0, r3, r2, asr #3
+	bx	lr
+jdiv_29:
+	ldr	r1, dc_29
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r0, r1, r3, asr #4
+	bx	lr
+jdiv_31:
+	ldr	r1, dc_31
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r0, r1, r3, asr #4
+	bx	lr
+jdiv_32:
+	mov	r1, r0, asr #31
+	add	r0, r0, r1, lsr #27
+	mov	r0, r0, asr #5
+	bx	lr
+jrem_1:
+	mov	r0, #0
+	bx	lr
+jrem_2:
+	add	r3, r0, r0, lsr #31
+        mov	r1, r3, asr #1
+	sub	r0, r0, r1, lsl #1
+	bx	lr
+jrem_3:
+	ldr	r1, dc_3
+        smull	r3, r2, r0, r1
+        sub	r1, r2, r0, asr #31
+	add	r3, r1, r1, lsl #1
+	sub	r0, r0, r3
+	bx	lr
+jrem_4:
+	movs	r3, r0
+        addmi	r3, r3, #3
+        mov	r1, r3, asr #2
+	sub	r0, r0, r1, lsl #2
+	bx	lr
+jrem_5:
+	ldr	r1, dc_5
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #1
+	add	r3, r1, r1, lsl #2
+	sub	r0, r0, r3
+	bx	lr
+jrem_6:
+	ldr	r1, dc_6
+        smull	r3, r2, r0, r1
+        sub	r1, r2, r0, asr #31
+	add	r3, r1, r1, lsl #1
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_7:
+	ldr	r1, dc_7
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #2
+	rsb	r3, r1, r1, lsl #3
+	sub	r0, r0, r3
+	bx	lr
+jrem_8:
+	movs	r3, r0
+        addmi	r3, r3, #7
+        mov	r1, r3, asr #3
+	sub	r0, r0, r1, lsl #3
+	bx	lr
+jrem_9:
+	ldr	r1, dc_9
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #1
+	add	r3, r1, r1, lsl #3
+	sub	r0, r0, r3
+	bx	lr
+jrem_10:
+	ldr	r1, dc_10
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #2
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_11:
+	ldr	r1, dc_11
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #1
+	add	r3, r1, r1, lsl #2
+	add	r3, r1, r3, lsl #1
+	sub	r0, r0, r3
+	bx	lr
+jrem_12:
+	ldr	r1, dc_12
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #1
+	add	r3, r1, r1, lsl #1
+	sub	r0, r0, r3, lsl #2
+	bx	lr
+jrem_13:
+	ldr	r1, dc_13
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #1
+	add	r3, r1, r3, lsl #2
+	sub	r0, r0, r3
+	bx	lr
+jrem_14:
+	ldr	r1, dc_14
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #3
+	rsb	r3, r1, r1, lsl #3
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_15:
+	ldr	r1, dc_15
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #3
+	rsb	r3, r1, r1, lsl #4
+	sub	r0, r0, r3
+	bx	lr
+jrem_16:
+	movs	r3, r0
+        addmi	r3, r3, #15
+        mov	r1, r3, asr #4
+	sub	r0, r0, r1, lsl #4
+	bx	lr
+jrem_17:
+	ldr	r1, dc_17
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #4
+	sub	r0, r0, r3
+	bx	lr
+jrem_18:
+	ldr	r1, dc_18
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #3
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_19:
+	ldr	r1, dc_19
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #3
+	add	r3, r1, r3, lsl #1
+	sub	r0, r0, r3
+	bx	lr
+jrem_20:
+	ldr	r1, dc_20
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #2
+	sub	r0, r0, r3, lsl #2
+	bx	lr
+jrem_21:
+	ldr	r1, dc_21
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #1
+	rsb	r3, r3, r3, lsl #3
+	sub	r0, r0, r3
+	bx	lr
+jrem_22:
+	ldr	r1, dc_22
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #2
+	add	r3, r1, r3, lsl #1
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_23:
+	ldr	r1, dc_23
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #4
+	add	r3, r1, r1, lsl #1
+	rsb	r3, r1, r3, lsl #3
+	sub	r0, r0, r3
+	bx	lr
+jrem_24:
+	ldr	r1, dc_24
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #2
+	add	r3, r1, r1, lsl #1
+	sub	r0, r0, r3, lsl #3
+	bx	lr
+jrem_25:
+	ldr	r1, dc_25
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #2
+	add	r3, r3, r3, lsl #2
+	sub	r0, r0, r3
+	bx	lr
+jrem_26:
+	ldr	r1, dc_26
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #1
+	add	r3, r1, r3, lsl #2
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_27:
+	ldr	r1, dc_27
+        smull	r3, r2, r0, r1
+        mov	r3, r0, asr #31
+        rsb	r1, r3, r2, asr #3
+	add	r3, r1, r1, lsl #1
+	add	r3, r3, r3, lsl #3
+	sub	r0, r0, r3
+	bx	lr
+jrem_28:
+	ldr	r1, dc_28
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #4
+	rsb	r3, r1, r1, lsl #3
+	sub	r0, r0, r3, lsl #2
+	bx	lr
+jrem_29:
+	ldr	r1, dc_29
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #4
+	rsb	r3, r1, r1, lsl #3
+	add	r3, r1, r3, lsl #2
+	sub	r0, r0, r3
+	bx	lr
+jrem_30:
+	ldr	r1, dc_30
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #4
+	rsb	r3, r1, r1, lsl #4
+	sub	r0, r0, r3, lsl #1
+	bx	lr
+jrem_31:
+	ldr	r1, dc_31
+        smull	r3, r2, r0, r1
+        mov	r1, r0, asr #31
+        add	r3, r0, r2
+        rsb	r1, r1, r3, asr #4
+	rsb	r3, r1, r1, lsl #5
+	sub	r0, r0, r3
+	bx	lr
+jrem_32:
+	movs	r3, r0
+        addmi	r3, r3, #31
+        mov	r1, r3, asr #5
+	sub	r0, r0, r1, lsl #5
+	bx	lr
+	ALIGN_DATA
+dc_7:
+dc_14:
+	.word     0x92492493
+dc_15:
+dc_30:
+	.word     0x88888889
+dc_23:
+	.word     0xb21642c9
+dc_28:
+	.word     0x92492493
+dc_29:
+	.word     0x8d3dcb09
+dc_31:
+	.word     0x84210843
+dc_6:
+dc_12:
+dc_24:
+	.word     0x2aaaaaab
+dc_19:
+	.word     0x6bca1af3
+dc_5:
+dc_10:
+dc_20:
+	.word     0x66666667
+dc_21:
+	.word     0x30c30c31
+dc_11:
+dc_22:
+	.word     0x2e8ba2e9
+dc_26:
+dc_13:
+	.word     0x4ec4ec4f
+dc_25:
+	.word     0x51eb851f
+dc_27:
+	.word     0x4bda12f7
+dc_3:
+	.word     0x55555556
+dc_17:
+	.word     0x78787879
+dc_9:
+dc_18:
+	.word     0x38e38e39
+
+	.global	Thumb2_DivZero_Handler
+	.type Thumb2_DivZero_Handler, %function
+Thumb2_DivZero_Handler:
+#ifdef T2JIT
+
+	adrl	r0, idiv_clz_ret
+	cmp	r0, lr
+	addne	r0, r0, #irem_clz_ret - idiv_clz_ret
+	cmpne	r0, lr
+	beq	divide_by_zero_exception
+	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
+	bic	r0, lr, #TBIT
+	ldr	r1, [Rframe, #FRAME_METHOD]
+        ldr     jpc, [r1, #METHOD_CONSTMETHOD]
+	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
+	mov	r2, sp
+	ldr	r3, [Rframe, #FRAME_LOCALS]
+	bl	Thumb2_lr_to_bci
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+	cmp	r0, #-1
+	moveq	jpc, #0
+	addne	jpc, jpc, r0
+	bl	load_dispatch
+	LOAD_ISTATE
+#endif // T2JIT
+	b	divide_by_zero_exception
+
+#ifdef T2JIT
+
+	.global	Thumb2_Handle_Exception
+	.type Thumb2_Handle_Exception, %function
+	.global	Thumb2_Handle_Exception_NoRegs
+	.type Thumb2_Handle_Exception_NoRegs, %function
+	.global Thumb2_ArrayBounds_Handler
+	.type Thumb2_ArrayBounds_Handler, %function
+	.global Thumb2_NullPtr_Handler
+	.type Thumb2_NullPtr_Handler, %function
+	.global Thumb2_Stack_Overflow
+	.type Thumb2_Stack_Overflow, %function
+Thumb2_ArrayBounds_Handler:
+	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
+	bic	r0, lr, #TBIT
+	ldr	r1, [Rframe, #FRAME_METHOD]
+        ldr     jpc, [r1, #METHOD_CONSTMETHOD]
+	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
+	mov	r2, sp
+	ldr	r3, [Rframe, #FRAME_LOCALS]
+	bl	Thumb2_lr_to_bci
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+	cmp	r0, #-1
+	moveq	jpc, #0
+	addne	jpc, jpc, r0
+	bl	load_dispatch
+	mov	r0, #VMSYMBOLS_ArrayIndexOutOfBounds
+	LOAD_ISTATE
+	b	raise_exception
+Thumb2_Handle_Exception:
+	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
+	bic	r0, lr, #TBIT
+	ldr	r1, [Rframe, #FRAME_METHOD]
+        ldr     jpc, [r1, #METHOD_CONSTMETHOD]
+	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
+	mov	r2, sp
+	ldr	r3, [Rframe, #FRAME_LOCALS]
+	bl	Thumb2_lr_to_bci
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+	cmp	r0, #-1
+	moveq	jpc, #0
+	addne	jpc, jpc, r0
+	bl	load_dispatch
+	LOAD_ISTATE
+	b	handle_exception
+Thumb2_Handle_Exception_NoRegs:
+	LOAD_FRAME
+	ldr	r0, [Rframe, #FRAME_STACK_LIMIT]
+	add	r0, r0, #4
+	str	r0, [thread, #THREAD_JAVA_SP]
+	bic	r0, lr, #TBIT
+	ldr	r1, [Rframe, #FRAME_METHOD]
+        ldr     jpc, [r1, #METHOD_CONSTMETHOD]
+	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
+	mov	r2, #0
+	bl	Thumb2_lr_to_bci
+	cmp	r0, #-1
+	moveq	jpc, #0
+	addne	jpc, jpc, r0
+	bl	load_dispatch
+	LOAD_ISTATE
+	b	handle_exception
+Thumb2_NullPtr_Handler:
+	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
+	bic	r0, lr, #TBIT
+	ldr	r1, [Rframe, #FRAME_METHOD]
+        ldr     jpc, [r1, #METHOD_CONSTMETHOD]
+	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
+	mov	r2, sp
+	ldr	r3, [Rframe, #FRAME_LOCALS]
+	bl	Thumb2_lr_to_bci
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+	cmp	r0, #-1
+	moveq	jpc, #0
+	addne	jpc, jpc, r0
+	bl	load_dispatch
+	LOAD_ISTATE
+	b	null_ptr_exception
+
+Thumb2_Stack_Overflow:
+	mov	r0, thread
+	mov	r2, #0
+	str	r2, [r0, #THREAD_LAST_JAVA_SP]
+	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r0, #THREAD_JAVA_SP]
+	str	ip, [r0, #THREAD_LAST_JAVA_FP]
+	str	r2, [r0, #THREAD_LAST_JAVA_SP]
+	bl	_ZN18InterpreterRuntime24throw_StackOverflowErrorEP10JavaThread
+	mov	r0, #0
+	ldmfd	arm_sp!, {fast_regset, pc}
+
+	.global	Thumb2_Exit_To_Interpreter
+	.type Thumb2_Exit_To_Interpreter, %function
+Thumb2_Exit_To_Interpreter:
+	LOAD_ISTATE
+	bl	load_dispatch
+	sub	stack, stack, #4
+	CACHE_CP
+	CACHE_LOCALS
+	DISPATCH	0
+
+	.global	Thumb2_monitorenter
+Thumb2_monitorenter:
+	stmdb	sp!, {ip, lr}
+	sub	stack, stack, #4
+	mov	r0, r8
+	POP	r1
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	Helper_monitorenter
+	CACHE_STACK		@ monitorenter may expand stack!!!
+	ldmia	sp!, {ip, lr}
+	cmp	r0, #0
+	bne	handle_exception
+	add	stack, stack, #4
+	bx	lr
+
+	.global	Thumb2_Clear_Cache
+	.type Thumb2_Clear_Cache, %function
+Thumb2_Clear_Cache:
+	stmdb	sp!, {r7}
+	mov	r2, #0
+	mov	r7, #2
+	orr	r7, r7, #0xf0000
+	svc	0
+	ldmia	sp!, {r7}
+	bx	lr
+
+#endif // T2JIT
+
+	.section	.init_array,"aw",%init_array
+	.word	bci_init(target1)
+
+	.data
+	.global	CPUInfo
+	ALIGN_DATA
+        .word   0, 0, 0, 0, 0, 0, 0, 0
+        .word   0, 0, 0, 0, 0
+DispatchBreakPoint:					.word	0
+CPUInfo:						.word	0
+CodeTrace_Idx:						.word	0
+UseOnStackReplacement_Address:                          .word   0
+BackgroundCompilation_Address:                          .word   0
+CompileThreshold_Address:                               .word   0
+InterpreterInvocationLimit_Address:			.word	0
+UseCompiler_Address:					.word	0
+can_post_interpreter_events:				.word	0 	
+PrintCommandLineFlags_Address:				.word	0
+oopDesc_Address:					.word	0
+ThreadLocalStorage_thread_index:			.word	0
+AbstractInterpreter_notice_safepoints:			.word	0
+Bytecodes_name_Address:					.word	0
+Universe_collectedHeap_Address:				.word	0
+always_do_update_barrier_Address:			.word	0
+VmSymbols_symbols_Address:				.word	0
+SafePointSynchronize_state_Address:			.word	0
+InterpreterRuntime_slow_signature_handler_Address:	.word	0
+XXX:
+opclabels_data:
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif
+
+#endif // __arm__
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -36,6 +36,7 @@
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
@@ -65,6 +66,14 @@
   CALL_VM_NOCHECK_NOFIX(func)                   \
   fixup_after_potential_safepoint()
 
+//#define CPPIDEBUG 1
+#ifdef CPPIDEBUG
+#define CPPINT_DEBUG( Z_code_ ) Z_code_
+CPPINT_DEBUG ( static const char *FFng_Zero_Flag = "CPPINT_DEBUG_ON\n"; ) 
+#else
+#define CPPINT_DEBUG( Z_code_ )
+#endif
+
 int CppInterpreter::normal_entry(methodOop method, intptr_t UNUSED, TRAPS) {
   JavaThread *thread = (JavaThread *) THREAD;
 
@@ -649,6 +658,25 @@
   return 0;
 }
 
+int CppInterpreter::method_handle_entry(methodOop method,
+                                        intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  CPPINT_DEBUG( tty->print_cr( "method_handle : 0x%x , thread: 0x%x , stack: %0x%x.", \
+						method, thread, stack ); )
+
+  return MethodHandles::method_handle_entry_invokeBasic(method, UNUSED,  THREAD);
+}
+
+void CppInterpreter::process_method_handle(oop method_handle, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  CPPINT_DEBUG( tty->print_cr( "process_method_handle : 0x%x , thread: 0x%x , stack: %0x%x.", \
+						method_handle, thread, stack ); )
+  methodOop method = (methodOop) java_lang_invoke_MemberName::vmtarget(method_handle);
+  MethodHandles::invoke_target(method, THREAD);
+}
+
 // The new slots will be inserted before slot insert_before.
 // Slots < insert_before will have the same slot number after the insert.
 // Slots >= insert_before will become old_slot + num_slots.
@@ -849,10 +877,30 @@
   return generate_entry((address) CppInterpreter::normal_entry);
 }
 
+#ifdef HOTSPOT_ASM
+extern "C" address asm_generate_method_entry(
+  AbstractInterpreter::MethodKind kind);
+#endif // HOTSPOT_ASM
+
 address AbstractInterpreterGenerator::generate_method_entry(
     AbstractInterpreter::MethodKind kind) {
   address entry_point = NULL;
 
+  CPPINT_DEBUG( tty->print_cr( "generate_method_entry : kind 0x%x ", \
+						kind ); )
+
+#ifdef HOTSPOT_ASM
+    address asm_entry = asm_generate_method_entry(kind);
+    if (asm_entry) {
+      CPPINT_DEBUG( tty->print_cr( "asm_generate_method_entry : kind 0x%x asm_entry: 0x%x ", \
+						kind, asm_entry ); )
+      entry_point = ((InterpreterGenerator*) this)->generate_entry(asm_entry);
+      CPPINT_DEBUG( tty->print_cr( "asm_generate_method_entry return : kind 0x%x entry_point : 0x%x ", \
+						kind, entry_point ); )
+      return(entry_point);
+    }
+#endif // HOTSPOT_ASM
+
   switch (kind) {
   case Interpreter::zerolocals:
   case Interpreter::zerolocals_synchronized:
@@ -1077,3 +1125,4 @@
 }
 
 #endif // CC_INTERP
+
--- a/src/cpu/zero/vm/cppInterpreter_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/cppInterpreter_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -36,6 +36,8 @@
   static int native_entry(methodOop method, intptr_t UNUSED, TRAPS);
   static int accessor_entry(methodOop method, intptr_t UNUSED, TRAPS);
   static int empty_entry(methodOop method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry(methodOop method, intptr_t UNUSED, TRAPS);
+  static void process_method_handle(oop method_handle, TRAPS);
 
  public:
   // Main loop of normal_entry
--- a/src/cpu/zero/vm/methodHandles_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/methodHandles_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,10 +31,12 @@
   return sizeof(ZeroEntry) * (Interpreter::method_handle_invoke_LAST - Interpreter::method_handle_invoke_FIRST + 1);
 }
 
+public:
+  static int method_handle_entry_invokeBasic(methodOop method, intptr_t UNUSED, TRAPS);
+  static void invoke_target(methodOop method, TRAPS);
+
 private:
   static oop popFromStack(TRAPS);
-  static void invoke_target(methodOop method, TRAPS);
-  static int method_handle_entry_invokeBasic(methodOop method, intptr_t UNUSED, TRAPS);
   static int method_handle_entry_linkToStaticOrSpecial(methodOop method, intptr_t UNUSED, TRAPS);
   static int method_handle_entry_linkToVirtual(methodOop method, intptr_t UNUSED, TRAPS);
   static int method_handle_entry_linkToInterface(methodOop method, intptr_t UNUSED, TRAPS);
--- a/src/cpu/zero/vm/sharedRuntime_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -109,8 +109,7 @@
 
 
 static RuntimeStub* generate_empty_runtime_stub(const char* name) {
-  CodeBuffer buffer(name, 0, 0);
-  return RuntimeStub::new_runtime_stub(name, &buffer, 0, 0, NULL, false);
+  return CAST_FROM_FN_PTR(RuntimeStub*,zero_stub);
 }
 
 static SafepointBlob* generate_empty_safepoint_blob() {
@@ -121,7 +120,6 @@
   return CAST_FROM_FN_PTR(DeoptimizationBlob*,zero_stub);
 }
 
-
 void SharedRuntime::generate_deopt_blob() {
   _deopt_blob = generate_empty_deopt_blob();
 }
--- a/src/cpu/zero/vm/stack_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/stack_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -99,7 +99,7 @@
   int shadow_pages_size() const {
     return _shadow_pages_size;
   }
-  int abi_stack_available(Thread *thread) const;
+  ssize_t abi_stack_available(Thread *thread) const;
 
  public:
   void overflow_check(int required_words, TRAPS);
--- a/src/cpu/zero/vm/stack_zero.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/stack_zero.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -47,10 +47,11 @@
 // This method returns the amount of ABI stack available for us
 // to use under normal circumstances.  Note that the returned
 // value can be negative.
-inline int ZeroStack::abi_stack_available(Thread *thread) const {
-  int stack_used = thread->stack_base() - (address) &stack_used;
-  int stack_free = thread->stack_size() - stack_used;
-  return stack_free - shadow_pages_size();
+inline ssize_t ZeroStack::abi_stack_available(Thread *thread) const {
+  ssize_t stack_used = thread->stack_base() - (address) &stack_used
+    + (StackYellowPages+StackRedPages+StackShadowPages) * os::vm_page_size();
+  ssize_t stack_free = thread->stack_size() - stack_used;
+  return stack_free;
 }
 
 #endif // CPU_ZERO_VM_STACK_ZERO_INLINE_HPP
--- a/src/cpu/zero/vm/vm_version_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/vm_version_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2009 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -36,4 +36,12 @@
 # include "os_bsd.inline.hpp"
 #endif
 
-// This file is intentionally empty
+
+void VM_Version::initialize() {
+  get_processor_features();
+  // This machine does not allow unaligned memory accesses
+  if (! FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+}
--- a/src/cpu/zero/vm/vm_version_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/cpu/zero/vm/vm_version_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007 Red Hat, Inc.
+ * Copyright 2015 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,16 @@
 #include "runtime/vm_version.hpp"
 
 class VM_Version : public Abstract_VM_Version {
+
  public:
+  static void get_processor_features() {
+#ifdef __ARM_ARCH_7A__
+    Abstract_VM_Version::_supports_cx8 = true;
+#endif
+  }
+
+  static void initialize();
+
   static const char* cpu_features() {
     return "";
   }
--- a/src/os/aix/vm/os_aix.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/aix/vm/os_aix.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -55,6 +55,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -388,9 +389,9 @@
   // default should be 4K.
   size_t data_page_size = SIZE_4K;
   {
-    void* p = ::malloc(SIZE_16M);
+    void* p = os::malloc(SIZE_16M, mtInternal);
     data_page_size = os::Aix::query_pagesize(p);
-    ::free(p);
+    os::free(p);
   }
 
   // query default shm page size (LDR_CNTRL SHMPSIZE)
@@ -2139,7 +2140,7 @@
   if (!pd_commit_memory(addr, size, exec)) {
     // add extra info in product mode for vm_exit_out_of_memory():
     PRODUCT_ONLY(warn_fail_commit_memory(addr, size, exec, errno);)
-    vm_exit_out_of_memory(size, mesg);
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
   }
 }
 
--- a/src/os/aix/vm/perfMemory_aix.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/aix/vm/perfMemory_aix.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -201,6 +201,7 @@
 // the backing store files. Returns true if the directory is considered
 // a secure location. Returns false if the statbuf is a symbolic link or
 // if an error occurred.
+//
 static bool is_statbuf_secure(struct stat *statp) {
   if (S_ISLNK(statp->st_mode) || !S_ISDIR(statp->st_mode)) {
     // The path represents a link or some non-directory file type,
@@ -209,15 +210,18 @@
     return false;
   }
   // We have an existing directory, check if the permissions are safe.
+  //
   if ((statp->st_mode & (S_IWGRP|S_IWOTH)) != 0) {
     // The directory is open for writing and could be subjected
     // to a symlink or a hard link attack. Declare it insecure.
+    //
     return false;
   }
-  // See if the uid of the directory matches the effective uid of the process.
-  //
-  if (statp->st_uid != geteuid()) {
+  // If user is not root then see if the uid of the directory matches the effective uid of the process.
+  uid_t euid = geteuid();
+  if ((euid != 0) && (statp->st_uid != euid)) {
     // The directory was not created by this user, declare it insecure.
+    //
     return false;
   }
   return true;
@@ -228,6 +232,7 @@
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
 // is a symbolic link or if an error occurred.
+//
 static bool is_directory_secure(const char* path) {
   struct stat statbuf;
   int result = 0;
--- a/src/os/aix/vm/porting_aix.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/aix/vm/porting_aix.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -22,6 +22,8 @@
  *
  */
 
+#include "memory/allocation.inline.hpp"
+#include "runtime/os.hpp"
 #include "loadlib_aix.hpp"
 #include "porting_aix.hpp"
 #include "utilities/debug.hpp"
@@ -84,7 +86,7 @@
     while (n) {
       node* p = n;
       n = n->next;
-      free(p->v);
+      os::free(p->v);
       delete p;
     }
   }
@@ -96,7 +98,7 @@
       }
     }
     node* p = new node;
-    p->v = strdup(s);
+    p->v = os::strdup_check_oom(s);
     p->next = first;
     first = p;
     return p->v;
--- a/src/os/bsd/dtrace/libjvm_db.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/bsd/dtrace/libjvm_db.c	Mon Apr 13 16:44:26 2020 +0100
@@ -559,13 +559,14 @@
   CHECK_FAIL(err);
 
   result[0] = '\0';
-  strncat(result, klassString, size);
-  size -= strlen(klassString);
-  strncat(result, ".", size);
-  size -= 1;
-  strncat(result, nameString, size);
-  size -= strlen(nameString);
-  strncat(result, signatureString, size);
+  if (snprintf(result, size,
+    "%s.%s%s",
+    klassString,
+    nameString,
+    signatureString) >= size) {
+    // truncation
+    goto fail;
+  }
 
   if (nameString != NULL) free(nameString);
   if (klassString != NULL) free(klassString);
@@ -1072,9 +1073,9 @@
       CHECK_FAIL(err);
   }
   if (deoptimized) {
-    strncat(result + 1, " [deoptimized frame]; ", size-1);
+    strncat(result, " [deoptimized frame]; ", size - strlen(result) - 1);
   } else {
-    strncat(result + 1, " [compiled] ", size-1);
+    strncat(result, " [compiled] ", size - strlen(result) - 1);
   }
   if (debug)
       fprintf(stderr, "name_for_nmethod: END: method name: %s, vf_cnt: %d\n\n",
--- a/src/os/bsd/vm/chaitin_bsd.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "opto/chaitin.hpp"
-#include "opto/machnode.hpp"
-
-void PhaseRegAlloc::pd_preallocate_hook() {
-  // no action
-}
-
-#ifdef ASSERT
-void PhaseRegAlloc::pd_postallocate_verify_hook() {
-  // no action
-}
-#endif
-
-
-// Reconciliation History
-// chaitin_solaris.cpp  1.7 99/07/12 23:54:22
-// End
--- a/src/os/bsd/vm/decoder_machO.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/bsd/vm/decoder_machO.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -97,6 +97,7 @@
   char * symname = mach_find_in_stringtable((char*) ((uintptr_t)mach_base + stroff), strsize, found_strx);
   if (symname) {
       strncpy(buf, symname, buflen);
+      buf[buflen - 1] = '\0';
       return true;
   }
   DEBUG_ONLY(tty->print_cr("no string or null string found."));
--- a/src/os/bsd/vm/os_bsd.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/bsd/vm/os_bsd.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -2981,7 +2981,7 @@
   if (!pd_commit_memory(addr, size, exec)) {
     // add extra info in product mode for vm_exit_out_of_memory():
     PRODUCT_ONLY(warn_fail_commit_memory(addr, size, exec, errno);)
-    vm_exit_out_of_memory(size, mesg);
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
   }
 }
 
@@ -4769,6 +4769,14 @@
             (size_t)(StackYellowPages+StackRedPages+StackShadowPages+
                     2*BytesPerWord COMPILER2_PRESENT(+1)) * Bsd::page_size());
 
+#ifdef ZERO
+  // If this is Zero, allow at the very minimum one page each for the
+  // Zero stack and the native stack.  This won't make any difference
+  // for 4k pages, but is significant for large pages.
+  os::Bsd::min_stack_allowed = MAX2(os::Bsd::min_stack_allowed,
+             (size_t)(StackYellowPages+StackRedPages+StackShadowPages+2) * Bsd::page_size());
+#endif
+
   size_t threadStackSizeInBytes = ThreadStackSize * K;
   if (threadStackSizeInBytes != 0 &&
       threadStackSizeInBytes < os::Bsd::min_stack_allowed) {
--- a/src/os/linux/vm/chaitin_linux.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "opto/chaitin.hpp"
-#include "opto/machnode.hpp"
-
-void PhaseRegAlloc::pd_preallocate_hook() {
-  // no action
-}
-
-#ifdef ASSERT
-void PhaseRegAlloc::pd_postallocate_verify_hook() {
-  // no action
-}
-#endif
-
-
-// Reconciliation History
-// chaitin_solaris.cpp  1.7 99/07/12 23:54:22
-// End
--- a/src/os/linux/vm/globals_linux.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/globals_linux.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,10 @@
           "Load DLLs with executable-stack attribute in the VM Thread") \
                                                                         \
   product(bool, UseSHM, false,                                          \
-          "Use SYSV shared memory for large pages")
+          "Use SYSV shared memory for large pages")                     \
+                                                                        \
+  diagnostic(bool, PrintActiveCpus, false,                              \
+          "Print the number of CPUs detected in os::active_processor_count")
 
 //
 // Defines Linux-specific default values. The flags are available on all
--- a/src/os/linux/vm/osThread_linux.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/osThread_linux.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 #endif
--- a/src/os/linux/vm/os_linux.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/os_linux.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -69,6 +69,10 @@
 # include "assembler_x86.inline.hpp"
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 # include "nativeInst_sparc.hpp"
@@ -120,6 +124,16 @@
 # include <inttypes.h>
 # include <sys/ioctl.h>
 
+#include <sys/prctl.h>
+
+#ifndef _GNU_SOURCE
+  #define _GNU_SOURCE
+  #include <sched.h>
+  #undef _GNU_SOURCE
+#else
+  #include <sched.h>
+#endif
+
 // if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
 // getrusage() is prepared to handle the associated failure.
 #ifndef RUSAGE_THREAD
@@ -134,6 +148,11 @@
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
 
 #define LARGEPAGES_BIT (1 << 6)
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183		/* ARM AARCH64 */
+#endif
+
 ////////////////////////////////////////////////////////////////////////////////
 // global variables
 julong os::Linux::_physical_memory = 0;
@@ -282,13 +301,19 @@
 #elif defined(PPC32)
 static char cpu_arch[] = "ppc";
 #elif defined(PPC64)
+#if defined(VM_LITTLE_ENDIAN)
+static char cpu_arch[] = "ppc64le";
+#else
 static char cpu_arch[] = "ppc64";
+#endif
 #elif defined(SPARC)
 #  ifdef _LP64
 static char cpu_arch[] = "sparcv9";
 #  else
 static char cpu_arch[] = "sparc";
 #  endif
+#elif defined(AARCH64)
+static char cpu_arch[] = "aarch64";
 #else
 #error Add appropriate cpu_arch setting
 #endif
@@ -380,7 +405,7 @@
  *        ...
  *        7: The default directories, normally /lib and /usr/lib.
  */
-#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
+#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390) || defined(AARCH64)) || defined(BUILTIN_SIM)
 #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
 #else
 #define DEFAULT_LIBPATH "/lib:/usr/lib"
@@ -771,6 +796,10 @@
   }
 }
 
+void os::Linux::expand_stack_to(address bottom) {
+  _expand_stack_to(bottom);
+}
+
 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
   assert(t!=NULL, "just checking");
   assert(t->osthread()->expanding_stack(), "expand should be set");
@@ -1456,8 +1485,12 @@
 
 #ifndef SYS_clock_getres
 
-#if defined(IA32) || defined(AMD64)
-#define SYS_clock_getres IA32_ONLY(266)  AMD64_ONLY(229)
+#if defined(IA32) || defined(AMD64) || defined(AARCH64)
+#ifdef BUILTIN_SIM
+#define SYS_clock_getres 229
+#else
+#define SYS_clock_getres IA32_ONLY(266)  AMD64_ONLY(229) AARCH64_ONLY(114)
+#endif
 #define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
 #else
 #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
@@ -1986,6 +2019,10 @@
   #define EM_486          6               /* Intel 80486 */
   #endif
 
+  #ifndef EM_AARCH64
+  #define EM_AARCH64	183
+  #endif
+
   static const arch_t arch_array[]={
     {EM_386,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
     {EM_486,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
@@ -2006,7 +2043,9 @@
     {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
     {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
-    {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}
+    {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
+    {EM_SH,          EM_SH,      ELFCLASS32, ELFDATA2LSB, (char*)"SH"}, /* Support little endian only*/
+    {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"} /* Support little endian only*/
   };
 
   #if  (defined IA32)
@@ -2037,9 +2076,13 @@
     static  Elf32_Half running_arch_code=EM_MIPS;
   #elif  (defined M68K)
     static  Elf32_Half running_arch_code=EM_68K;
+  #elif  (defined SH)
+    static  Elf32_Half running_arch_code=EM_SH;
+  #elif  (defined AARCH64)
+    static  Elf32_Half running_arch_code=EM_AARCH64;
   #else
     #error Method os::dll_load requires that one of following is defined:\
-         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K
+      IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, SH, AARCH64
   #endif
 
   // Identify compatability class for VM's architecture and library's architecture
@@ -2165,6 +2208,37 @@
   return true;
 }
 
+bool _print_lsb_file(const char* filename, outputStream* st) {
+  int fd = open(filename, O_RDONLY);
+  if (fd == -1) {
+     return false;
+  }
+
+  char buf[512], *d_i, *d_r, *d_c;
+  int bytes;
+
+  if ((bytes = read(fd, buf, sizeof(buf)-1)) == sizeof(buf)-1) {
+     close(fd);
+     return false;
+  }
+  close(fd);
+
+  buf[bytes] = '\n';
+  buf[bytes+1] = '\0';
+  d_i = strstr(buf, "DISTRIB_ID=");
+  d_r = strstr(buf, "DISTRIB_RELEASE=");
+  d_c = strstr(buf, "DISTRIB_CODENAME=");
+  if (!d_i || !d_r || !d_c) {
+     return false;
+  }
+  d_i = strchr(d_i, '=') + 1;  *strchrnul(d_i, '\n') = '\0';
+  d_r = strchr(d_r, '=') + 1;  *strchrnul(d_r, '\n') = '\0';
+  d_c = strchr(d_c, '=') + 1;  *strchrnul(d_c, '\n') = '\0';
+  st->print("%s %s (%s)", d_i, d_r, d_c);
+
+  return true;
+}
+
 void os::print_dll_info(outputStream *st) {
    st->print_cr("Dynamic libraries:");
 
@@ -2221,6 +2295,7 @@
       !_print_ascii_file("/etc/SuSE-release", st) &&
       !_print_ascii_file("/etc/turbolinux-release", st) &&
       !_print_ascii_file("/etc/gentoo-release", st) &&
+      !_print_lsb_file("/etc/lsb-release", st) &&
       !_print_ascii_file("/etc/debian_version", st) &&
       !_print_ascii_file("/etc/ltib-release", st) &&
       !_print_ascii_file("/etc/angstrom-version", st)) {
@@ -2750,7 +2825,7 @@
 
   if (!recoverable_mmap_error(err)) {
     warn_fail_commit_memory(addr, size, exec, err);
-    vm_exit_out_of_memory(size, "committing reserved memory.");
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory.");
   }
 
   return err;
@@ -2767,7 +2842,7 @@
   if (err != 0) {
     // the caller wants all commit errors to exit with the specified mesg:
     warn_fail_commit_memory(addr, size, exec, err);
-    vm_exit_out_of_memory(size, mesg);
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
   }
 }
 
@@ -2803,7 +2878,7 @@
   if (err != 0) {
     // the caller wants all commit errors to exit with the specified mesg:
     warn_fail_commit_memory(addr, size, alignment_hint, exec, err);
-    vm_exit_out_of_memory(size, mesg);
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
   }
 }
 
@@ -2872,12 +2947,7 @@
   unsigned int cpu;
   int retval = -1;
 
-#if defined(IA32)
-# ifndef SYS_getcpu
-# define SYS_getcpu 318
-# endif
-  retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
-#elif defined(AMD64)
+#if defined(AMD64) || defined(BUILTIN_SIM)
 // Unfortunately we have to bring all these macros here from vsyscall.h
 // to be able to compile on old linuxes.
 # define __NR_vgetcpu 2
@@ -2887,6 +2957,11 @@
   typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
   vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
   retval = vgetcpu(&cpu, NULL, NULL);
+#elif defined(IA32) || defined(AARCH64)
+# ifndef SYS_getcpu
+# define SYS_getcpu AARCH64_ONLY(168) NOT_AARCH64(318)
+# endif
+  retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
 #endif
 
   return (retval == -1) ? retval : cpu;
@@ -3006,6 +3081,53 @@
   return res  != (uintptr_t) MAP_FAILED;
 }
 
+static
+address get_stack_commited_bottom(address bottom, size_t size) {
+  address nbot = bottom;
+  address ntop = bottom + size;
+
+  size_t page_sz = os::vm_page_size();
+  unsigned pages = size / page_sz;
+
+  unsigned char vec[1];
+  unsigned imin = 1, imax = pages + 1, imid;
+  int mincore_return_value;
+
+  while (imin < imax) {
+    imid = (imax + imin) / 2;
+    nbot = ntop - (imid * page_sz);
+
+    // Use a trick with mincore to check whether the page is mapped or not.
+    // mincore sets vec to 1 if page resides in memory and to 0 if page
+    // is swapped output but if page we are asking for is unmapped
+    // it returns -1,ENOMEM
+    mincore_return_value = mincore(nbot, page_sz, vec);
+
+    if (mincore_return_value == -1) {
+      // Page is not mapped go up
+      // to find first mapped page
+      if (errno != EAGAIN) {
+        assert(errno == ENOMEM, "Unexpected mincore errno");
+        imax = imid;
+      }
+    } else {
+      // Page is mapped go down
+      // to find first not mapped page
+      imin = imid + 1;
+    }
+  }
+
+  nbot = nbot + page_sz;
+
+  // Adjust stack bottom one page up if last checked page is not mapped
+  if (mincore_return_value == -1) {
+    nbot = nbot + page_sz;
+  }
+
+  return nbot;
+}
+
+
 // Linux uses a growable mapping for the stack, and if the mapping for
 // the stack guard pages is not removed when we detach a thread the
 // stack cannot grow beyond the pages where the stack guard was
@@ -3020,59 +3142,37 @@
 // So, we need to know the extent of the stack mapping when
 // create_stack_guard_pages() is called.
 
-// Find the bounds of the stack mapping.  Return true for success.
-//
 // We only need this for stacks that are growable: at the time of
 // writing thread stacks don't use growable mappings (i.e. those
 // creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
 // only applies to the main thread.
 
-static
-bool get_stack_bounds(uintptr_t *bottom, uintptr_t *top) {
-
-  char buf[128];
-  int fd, sz;
-
-  if ((fd = ::open("/proc/self/maps", O_RDONLY)) < 0) {
-    return false;
-  }
-
-  const char kw[] = "[stack]";
-  const int kwlen = sizeof(kw)-1;
-
-  // Address part of /proc/self/maps couldn't be more than 128 bytes
-  while ((sz = os::get_line_chars(fd, buf, sizeof(buf))) > 0) {
-     if (sz > kwlen && ::memcmp(buf+sz-kwlen, kw, kwlen) == 0) {
-        // Extract addresses
-        if (sscanf(buf, "%" SCNxPTR "-%" SCNxPTR, bottom, top) == 2) {
-           uintptr_t sp = (uintptr_t) __builtin_frame_address(0);
-           if (sp >= *bottom && sp <= *top) {
-              ::close(fd);
-              return true;
-           }
-        }
-     }
-  }
-
- ::close(fd);
-  return false;
-}
-
-
 // If the (growable) stack mapping already extends beyond the point
 // where we're going to put our guard pages, truncate the mapping at
 // that point by munmap()ping it.  This ensures that when we later
 // munmap() the guard pages we don't leave a hole in the stack
-// mapping. This only affects the main/initial thread, but guard
-// against future OS changes
+// mapping. This only affects the main/initial thread
+
 bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
-  uintptr_t stack_extent, stack_base;
-  bool chk_bounds = NOT_DEBUG(os::Linux::is_initial_thread()) DEBUG_ONLY(true);
-  if (chk_bounds && get_stack_bounds(&stack_extent, &stack_base)) {
-      assert(os::Linux::is_initial_thread(),
-           "growable stack in non-initial thread");
-    if (stack_extent < (uintptr_t)addr)
-      ::munmap((void*)stack_extent, (uintptr_t)addr - stack_extent);
+
+  if (os::Linux::is_initial_thread()) {
+    // As we manually grow stack up to bottom inside create_attached_thread(),
+    // it's likely that os::Linux::initial_thread_stack_bottom is mapped and
+    // we don't need to do anything special.
+    // Check it first, before calling heavy function.
+    uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom();
+    unsigned char vec[1];
+
+    if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) {
+      // Fallback to slow path on all errors, including EAGAIN
+      stack_extent = (uintptr_t) get_stack_commited_bottom(
+                                    os::Linux::initial_thread_stack_bottom(),
+                                    (size_t)addr - stack_extent);
+    }
+
+    if (stack_extent < (uintptr_t)addr) {
+      ::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent));
+    }
   }
 
   return os::commit_memory(addr, size, !ExecMem);
@@ -3081,13 +3181,13 @@
 // If this is a growable mapping, remove the guard pages entirely by
 // munmap()ping them.  If not, just call uncommit_memory(). This only
 // affects the main/initial thread, but guard against future OS changes
+// It's safe to always unmap guard pages for initial thread because we
+// always place it right after end of the mapped region
+
 bool os::remove_stack_guard_pages(char* addr, size_t size) {
   uintptr_t stack_extent, stack_base;
-  bool chk_bounds = NOT_DEBUG(os::Linux::is_initial_thread()) DEBUG_ONLY(true);
-  if (chk_bounds && get_stack_bounds(&stack_extent, &stack_base)) {
-      assert(os::Linux::is_initial_thread(),
-           "growable stack in non-initial thread");
-
+
+  if (os::Linux::is_initial_thread()) {
     return ::munmap(addr, size) == 0;
   }
 
@@ -3304,7 +3404,7 @@
 
 #ifndef ZERO
   large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
-                     ARM_ONLY(2 * M) PPC_ONLY(4 * M);
+                     ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M);
 #endif // ZERO
 
   FILE *fp = fopen("/proc/meminfo", "r");
@@ -4740,6 +4840,48 @@
   }
 }
 
+/* Per task speculation control */
+#ifndef PR_GET_SPECULATION_CTRL
+# define PR_GET_SPECULATION_CTRL    52
+#endif
+#ifndef PR_SET_SPECULATION_CTRL
+# define PR_SET_SPECULATION_CTRL    53
+#endif
+/* Speculation control variants */
+#ifndef PR_SPEC_STORE_BYPASS
+# define PR_SPEC_STORE_BYPASS          0
+#endif
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+
+#ifndef PR_SPEC_NOT_AFFECTED
+# define PR_SPEC_NOT_AFFECTED          0
+#endif
+#ifndef PR_SPEC_PRCTL
+# define PR_SPEC_PRCTL                 (1UL << 0)
+#endif
+#ifndef PR_SPEC_ENABLE
+# define PR_SPEC_ENABLE                (1UL << 1)
+#endif
+#ifndef PR_SPEC_DISABLE
+# define PR_SPEC_DISABLE               (1UL << 2)
+#endif
+#ifndef PR_SPEC_FORCE_DISABLE
+# define PR_SPEC_FORCE_DISABLE         (1UL << 3)
+#endif
+#ifndef PR_SPEC_DISABLE_NOEXEC
+# define PR_SPEC_DISABLE_NOEXEC        (1UL << 4)
+#endif
+
+static void set_speculation() __attribute__((constructor));
+static void set_speculation() {
+  if ( prctl(PR_SET_SPECULATION_CTRL,
+             PR_SPEC_STORE_BYPASS,
+             PR_SPEC_DISABLE_NOEXEC, 0, 0) == 0 ) {
+    return;
+  }
+  prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+}
+
 // this is called _before_ the most of global arguments have been parsed
 void os::init(void) {
   char dummy;   /* used to get a guess on initial stack address */
@@ -4797,6 +4939,7 @@
 
   pthread_mutex_init(&dl_mutex, NULL);
 
+NOT_ZERO (
   // If the pagesize of the VM is greater than 8K determine the appropriate
   // number of initial guard pages.  The user can change this with the
   // command line arguments, if needed.
@@ -4805,6 +4948,7 @@
     StackRedPages = 1;
     StackShadowPages = round_to((StackShadowPages*Linux::vm_default_page_size()), vm_page_size()) / vm_page_size();
   }
+ )
 }
 
 // To install functions for atexit system call
@@ -4859,6 +5003,14 @@
             (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
                     (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
 
+#ifdef ZERO
+  // If this is Zero, allow at the very minimum one page each for the
+  // Zero stack and the native stack.  This won't make any difference
+  // for 4k pages, but is significant for large pages.
+  os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
+             (size_t)(StackYellowPages+StackRedPages+StackShadowPages+2) * Linux::page_size());
+#endif
+
   size_t threadStackSizeInBytes = ThreadStackSize * K;
   if (threadStackSizeInBytes != 0 &&
       threadStackSizeInBytes < os::Linux::min_stack_allowed) {
@@ -4875,7 +5027,7 @@
 
   Linux::capture_initial_stack(JavaThread::stack_size_at_create());
 
-#if defined(IA32)
+#if defined(IA32) && !defined(ZERO)
   workaround_expand_exec_shield_cs_limit();
 #endif
 
@@ -4989,12 +5141,42 @@
   }
 };
 
+static int os_cpu_count(const cpu_set_t* cpus) {
+  int count = 0;
+  // only look up to the number of configured processors
+  for (int i = 0; i < os::processor_count(); i++) {
+    if (CPU_ISSET(i, cpus)) {
+      count++;
+    }
+  }
+  return count;
+}
+
+// Get the current number of available processors for this process.
+// This value can change at any time during a process's lifetime.
+// sched_getaffinity gives an accurate answer as it accounts for cpusets.
+// If anything goes wrong we fallback to returning the number of online
+// processors - which can be greater than the number available to the process.
 int os::active_processor_count() {
-  // Linux doesn't yet have a (official) notion of processor sets,
-  // so just return the number of online processors.
-  int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
-  assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
-  return online_cpus;
+  cpu_set_t cpus;  // can represent at most 1024 (CPU_SETSIZE) processors
+  int cpus_size = sizeof(cpu_set_t);
+  int cpu_count = 0;
+
+  // pid 0 means the current thread - which we have to assume represents the process
+  if (sched_getaffinity(0, cpus_size, &cpus) == 0) {
+    cpu_count = os_cpu_count(&cpus);
+    if (PrintActiveCpus) {
+      tty->print_cr("active_processor_count: sched_getaffinity processor count: %d", cpu_count);
+    }
+  }
+  else {
+    cpu_count = ::sysconf(_SC_NPROCESSORS_ONLN);
+    warning("sched_getaffinity failed (%s)- using online processor count (%d) "
+            "which may exceed available processors", strerror(errno), cpu_count);
+  }
+
+  assert(cpu_count > 0 && cpu_count <= processor_count(), "sanity check");
+  return cpu_count;
 }
 
 void os::set_native_thread_name(const char *name) {
@@ -5216,33 +5398,11 @@
     errno = ENAMETOOLONG;
     return -1;
   }
-  int fd;
   int o_delete = (oflag & O_DELETE);
   oflag = oflag & ~O_DELETE;
 
-  fd = ::open64(path, oflag, mode);
-  if (fd == -1) return -1;
-
-  //If the open succeeded, the file might still be a directory
-  {
-    struct stat64 buf64;
-    int ret = ::fstat64(fd, &buf64);
-    int st_mode = buf64.st_mode;
-
-    if (ret != -1) {
-      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
-        ::close(fd);
-        return -1;
-      }
-    } else {
-      ::close(fd);
-      return -1;
-    }
-  }
-
     /*
-     * All file descriptors that are opened in the JVM and not
+     * All file descriptors that are opened in the Java process and not
      * specifically destined for a subprocess should have the
      * close-on-exec flag set.  If we don't set it, then careless 3rd
      * party native code might fork and exec without closing all
@@ -5263,12 +5423,49 @@
      * 4843136: (process) pipe file descriptor from Runtime.exec not being closed
      * 6339493: (process) Runtime.exec does not close all file descriptors on Solaris 9
      */
+  // Modern Linux kernels (after 2.6.23 2007) support O_CLOEXEC with open().
+  // O_CLOEXEC is preferable to using FD_CLOEXEC on an open file descriptor
+  // because it saves a system call and removes a small window where the flag
+  // is unset.  On ancient Linux kernels the O_CLOEXEC flag will be ignored
+  // and we fall back to using FD_CLOEXEC (see below).
+#ifdef O_CLOEXEC
+  oflag |= O_CLOEXEC;
+#endif
+
+  int fd = ::open64(path, oflag, mode);
+  if (fd == -1) return -1;
+
+  //If the open succeeded, the file might still be a directory
+  {
+    struct stat64 buf64;
+    int ret = ::fstat64(fd, &buf64);
+    int st_mode = buf64.st_mode;
+
+    if (ret != -1) {
+      if ((st_mode & S_IFMT) == S_IFDIR) {
+        errno = EISDIR;
+        ::close(fd);
+        return -1;
+      }
+    } else {
+      ::close(fd);
+      return -1;
+    }
+  }
+
 #ifdef FD_CLOEXEC
-    {
-        int flags = ::fcntl(fd, F_GETFD);
-        if (flags != -1)
-            ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
+  // Validate that the use of the O_CLOEXEC flag on open above worked.
+  // With recent kernels, we will perform this check exactly once.
+  static sig_atomic_t O_CLOEXEC_is_known_to_work = 0;
+  if (!O_CLOEXEC_is_known_to_work) {
+    int flags = ::fcntl(fd, F_GETFD);
+    if (flags != -1) {
+      if ((flags & FD_CLOEXEC) != 0)
+        O_CLOEXEC_is_known_to_work = 1;
+      else
+        ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
     }
+  }
 #endif
 
   if (o_delete != 0) {
@@ -6053,11 +6250,19 @@
 extern char** environ;
 
 #ifndef __NR_fork
-#define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57)
+#ifdef BUILTIN_SIM
+#define __NR_fork 57
+#else
+#define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57) AARCH64_ONLY(1079)
+#endif
 #endif
 
 #ifndef __NR_execve
-#define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59)
+#ifdef BUILTIN_SIM
+#define __NR_execve 59
+#else
+#define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59) AARCH64_ONLY(221)
+#endif
 #endif
 
 #if defined(SPARC)
--- a/src/os/linux/vm/os_linux.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/os_linux.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -239,6 +239,8 @@
   static int safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime);
 
 private:
+  static void expand_stack_to(address bottom);
+
   typedef int (*sched_getcpu_func_t)(void);
   typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
   typedef int (*numa_max_node_func_t)(void);
--- a/src/os/linux/vm/os_linux.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/os_linux.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 # include "atomic_linux_x86.inline.hpp"
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "orderAccess_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "atomic_linux_sparc.inline.hpp"
 # include "orderAccess_linux_sparc.inline.hpp"
--- a/src/os/linux/vm/thread_linux.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/linux/vm/thread_linux.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,11 @@
 # include "orderAccess_linux_x86.inline.hpp"
 # include "prefetch_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "atomic_linux_aarch64.inline.hpp"
+# include "orderAccess_linux_aarch64.inline.hpp"
+# include "prefetch_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "atomic_linux_sparc.inline.hpp"
 # include "orderAccess_linux_sparc.inline.hpp"
--- a/src/os/posix/vm/os_posix.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/posix/vm/os_posix.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -523,7 +523,11 @@
   strncpy(buffer, "none", size);
 
   const struct {
-    int i;
+    // NB: i is an unsigned int here because SA_RESETHAND is on some
+    // systems 0x80000000, which is implicitly unsigned.  Assignining
+    // it to an int field would be an overflow in unsigned-to-signed
+    // conversion.
+    unsigned int i;
     const char* s;
   } flaginfo [] = {
     { SA_NOCLDSTOP, "SA_NOCLDSTOP" },
--- a/src/os/solaris/dtrace/libjvm_db.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/solaris/dtrace/libjvm_db.c	Mon Apr 13 16:44:26 2020 +0100
@@ -559,13 +559,14 @@
   CHECK_FAIL(err);
 
   result[0] = '\0';
-  strncat(result, klassString, size);
-  size -= strlen(klassString);
-  strncat(result, ".", size);
-  size -= 1;
-  strncat(result, nameString, size);
-  size -= strlen(nameString);
-  strncat(result, signatureString, size);
+  if (snprintf(result, size,
+    "%s.%s%s",
+    klassString,
+    nameString,
+    signatureString) >= size) {
+    // truncation
+    goto fail;
+  }
 
   if (nameString != NULL) free(nameString);
   if (klassString != NULL) free(klassString);
@@ -1072,9 +1073,9 @@
       CHECK_FAIL(err);
   }
   if (deoptimized) {
-    strncat(result + 1, " [deoptimized frame]; ", size-1);
+    strncat(result, " [deoptimized frame]; ", size - strlen(result) - 1);
   } else {
-    strncat(result + 1, " [compiled] ", size-1);
+    strncat(result, " [compiled] ", size - strlen(result) - 1);
   }
   if (debug)
       fprintf(stderr, "name_for_nmethod: END: method name: %s, vf_cnt: %d\n\n",
--- a/src/os/solaris/vm/chaitin_solaris.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "opto/chaitin.hpp"
-#include "opto/machnode.hpp"
-
-void PhaseRegAlloc::pd_preallocate_hook() {
-  // no action
-}
-
-#ifdef ASSERT
-void PhaseRegAlloc::pd_postallocate_verify_hook() {
-  // no action
-}
-#endif
-
-
-//Reconciliation History
-// 1.1 99/02/12 15:35:26 chaitin_win32.cpp
-// 1.2 99/02/18 15:38:56 chaitin_win32.cpp
-// 1.4 99/03/09 10:37:48 chaitin_win32.cpp
-// 1.6 99/03/25 11:07:44 chaitin_win32.cpp
-// 1.8 99/06/22 16:38:58 chaitin_win32.cpp
-//End
--- a/src/os/solaris/vm/os_solaris.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/solaris/vm/os_solaris.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -833,7 +833,7 @@
       // allocate new buffer and initialize
       info = (Dl_serinfo*)malloc(_info.dls_size);
       if (info == NULL) {
-        vm_exit_out_of_memory(_info.dls_size,
+        vm_exit_out_of_memory(_info.dls_size, OOM_MALLOC_ERROR,
                               "init_system_properties_values info");
       }
       info->dls_size = _info.dls_size;
@@ -875,7 +875,7 @@
       common_path = malloc(bufsize);
       if (common_path == NULL) {
         free(info);
-        vm_exit_out_of_memory(bufsize,
+        vm_exit_out_of_memory(bufsize, OOM_MALLOC_ERROR,
                               "init_system_properties_values common_path");
       }
       sprintf(common_path, COMMON_DIR "/lib/%s", cpu_arch);
@@ -888,7 +888,7 @@
       if (library_path == NULL) {
         free(info);
         free(common_path);
-        vm_exit_out_of_memory(bufsize,
+        vm_exit_out_of_memory(bufsize, OOM_MALLOC_ERROR,
                               "init_system_properties_values library_path");
       }
       library_path[0] = '\0';
@@ -1632,7 +1632,8 @@
   // %%% this is used only in threadLocalStorage.cpp
   if (thr_setspecific((thread_key_t)index, value)) {
     if (errno == ENOMEM) {
-       vm_exit_out_of_memory(SMALLINT, "thr_setspecific: out of swap space");
+       vm_exit_out_of_memory(SMALLINT, OOM_MALLOC_ERROR,
+                             "thr_setspecific: out of swap space");
     } else {
       fatal(err_msg("os::thread_local_storage_at_put: thr_setspecific failed "
                     "(%s)", strerror(errno)));
@@ -2309,7 +2310,9 @@
   st->print(", physical " UINT64_FORMAT "k", os::physical_memory()>>10);
   st->print("(" UINT64_FORMAT "k free)", os::available_memory() >> 10);
   st->cr();
-  (void) check_addr0(st);
+  if (VMError::fatal_error_in_progress()) {
+     (void) check_addr0(st);
+  }
 }
 
 // Taken from /usr/include/sys/machsig.h  Supposed to be architecture specific
@@ -2872,7 +2875,7 @@
 
   if (!recoverable_mmap_error(err)) {
     warn_fail_commit_memory(addr, bytes, exec, err);
-    vm_exit_out_of_memory(bytes, "committing reserved memory.");
+    vm_exit_out_of_memory(bytes, OOM_MMAP_ERROR, "committing reserved memory.");
   }
 
   return err;
@@ -2889,7 +2892,7 @@
   if (err != 0) {
     // the caller wants all commit errors to exit with the specified mesg:
     warn_fail_commit_memory(addr, bytes, exec, err);
-    vm_exit_out_of_memory(bytes, mesg);
+    vm_exit_out_of_memory(bytes, OOM_MMAP_ERROR, mesg);
   }
 }
 
@@ -2935,7 +2938,7 @@
   if (err != 0) {
     // the caller wants all commit errors to exit with the specified mesg:
     warn_fail_commit_memory(addr, bytes, alignment_hint, exec, err);
-    vm_exit_out_of_memory(bytes, mesg);
+    vm_exit_out_of_memory(bytes, OOM_MMAP_ERROR, mesg);
   }
 }
 
--- a/src/os/windows/vm/chaitin_windows.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "opto/chaitin.hpp"
-#include "opto/machnode.hpp"
-
-// Disallow the use of the frame pointer (EBP) for implicit null exceptions
-// on win95/98.  If we do not do this, the OS gets confused and gives a stack
-// error.
-void PhaseRegAlloc::pd_preallocate_hook() {
-#ifndef _WIN64
-  if (ImplicitNullChecks && !os::win32::is_nt()) {
-    for (uint block_num=1; block_num<_cfg._num_blocks; block_num++) {
-      Block *block = _cfg._blocks[block_num];
-
-      Node *block_end = block->end();
-      if (block_end->is_MachNullCheck() &&
-          block_end->as_Mach()->ideal_Opcode() != Op_Con) {
-        // The last instruction in the block is an implicit null check.
-        // Fix its input so that it does not load into the frame pointer.
-        _matcher.pd_implicit_null_fixup(block_end->in(1)->as_Mach(),
-                                        block_end->as_MachNullCheck()->_vidx);
-      }
-    }
-  }
-#else
-  // WIN64==itanium on XP
-#endif
-}
-
-#ifdef ASSERT
-// Verify that no implicit null check uses the frame pointer (EBP) as
-// its register on win95/98.  Use of the frame pointer in an implicit
-// null check confuses the OS, yielding a stack error.
-void PhaseRegAlloc::pd_postallocate_verify_hook() {
-#ifndef _WIN64
-  if (ImplicitNullChecks && !os::win32::is_nt()) {
-    for (uint block_num=1; block_num<_cfg._num_blocks; block_num++) {
-      Block *block = _cfg._blocks[block_num];
-
-      Node *block_end = block->_nodes[block->_nodes.size()-1];
-      if (block_end->is_MachNullCheck() && block_end->as_Mach()->ideal_Opcode() != Op_Con) {
-        // The last instruction in the block is an implicit
-        // null check.  Verify that this instruction does not
-        // use the frame pointer.
-        int reg = get_reg_first(block_end->in(1)->in(block_end->as_MachNullCheck()->_vidx));
-        assert(reg != EBP_num,
-               "implicit null check using frame pointer on win95/98");
-      }
-    }
-  }
-#else
-  // WIN64==itanium on XP
-#endif
-}
-#endif
--- a/src/os/windows/vm/os_windows.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/windows/vm/os_windows.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -3325,7 +3325,7 @@
   assert(mesg != NULL, "mesg must be specified");
   if (!pd_commit_memory(addr, size, exec)) {
     warn_fail_commit_memory(addr, size, exec);
-    vm_exit_out_of_memory(size, mesg);
+    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
   }
 }
 
--- a/src/os/windows/vm/perfMemory_windows.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os/windows/vm/perfMemory_windows.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,7 @@
 #include "oops/oop.inline.hpp"
 #include "os_windows.inline.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/os.hpp"
 #include "runtime/perfMemory.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
@@ -1388,7 +1389,7 @@
   // the file has been successfully created and the file mapping
   // object has been created.
   sharedmem_fileHandle = fh;
-  sharedmem_fileName = strdup(filename);
+  sharedmem_fileName = os::strdup(filename);
 
   return fmh;
 }
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -982,7 +982,7 @@
      // JVM needs to know exact stack location, abort if it fails
      if (rslt != 0) {
        if (rslt == ENOMEM) {
-         vm_exit_out_of_memory(0, "pthread_getattr_np");
+         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
        } else {
          fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
        }
--- a/src/os_cpu/bsd_zero/vm/atomic_bsd_zero.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/bsd_zero/vm/atomic_bsd_zero.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -239,7 +239,9 @@
   // operation.  Note that some platforms only support this with the
   // limitation that the only valid value to store is the immediate
   // constant 1.  There is a test for this in JNI_CreateJavaVM().
-  return __sync_lock_test_and_set (dest, exchange_value);
+  jint result = __sync_lock_test_and_set (dest, exchange_value);
+  __sync_synchronize();
+  return result;
 #endif // M68K
 #endif // ARM
 }
@@ -252,7 +254,9 @@
 #ifdef M68K
   return m68k_lock_test_and_set(dest, exchange_value);
 #else
-  return __sync_lock_test_and_set (dest, exchange_value);
+  intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+  __sync_synchronize();
+  return result;
 #endif // M68K
 #endif // ARM
 }
--- a/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -372,7 +372,7 @@
   int res = pthread_getattr_np(pthread_self(), &attr);
   if (res != 0) {
     if (res == ENOMEM) {
-      vm_exit_out_of_memory(0, "pthread_getattr_np");
+      vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
     }
     else {
       fatal(err_msg("pthread_getattr_np failed with errno = " INT32_FORMAT,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/assembler_linux_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_aarch64.inline.hpp"
+#include "runtime/os.hpp"
+#include "runtime/threadLocalStorage.hpp"
+
+
+// get_thread can be called anywhere inside generated code so we need
+// to save whatever non-callee save context might get clobbered by the
+// call to the C thread_local lookup call or, indeed, the call setup
+// code. x86 appears to save C arg registers.
+
+void MacroAssembler::get_thread(Register dst) {
+  // call pthread_getspecific
+  // void * pthread_getspecific(pthread_key_t key);
+
+  // Save all call-clobbered regs except dst, plus r19 and r20.
+  RegSet saved_regs = RegSet::range(r0, r20) + lr - dst;
+  push(saved_regs, sp);
+  mov(c_rarg0, ThreadLocalStorage::thread_index());
+  mov(r19, CAST_FROM_FN_PTR(address, pthread_getspecific));
+  blrt(r19, 1, 0, 1);
+  if (dst != c_rarg0) {
+    mov(dst, c_rarg0);
+  }
+  // restore pushed registers
+  pop(saved_regs, sp);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_aarch64.hpp"
+
+// Implementation of class atomic
+
+#define FULL_MEM_BARRIER  __sync_synchronize()
+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+
+inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
+
+
+inline jint Atomic::add(jint add_value, volatile jint* dest)
+{
+ return __sync_add_and_fetch(dest, add_value);
+}
+
+inline void Atomic::inc(volatile jint* dest)
+{
+ add(1, dest);
+}
+
+inline void Atomic::inc_ptr(volatile void* dest)
+{
+ add_ptr(1, dest);
+}
+
+inline void Atomic::dec (volatile jint* dest)
+{
+ add(-1, dest);
+}
+
+inline void Atomic::dec_ptr(volatile void* dest)
+{
+ add_ptr(-1, dest);
+}
+
+inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
+{
+  jint res = __sync_lock_test_and_set (dest, exchange_value);
+  FULL_MEM_BARRIER;
+  return res;
+}
+
+inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
+{
+  return (void *) xchg_ptr((intptr_t) exchange_value,
+                           (volatile intptr_t*) dest);
+}
+
+
+inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value)
+{
+ return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+}
+
+inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
+inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
+
+inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
+{
+ return __sync_add_and_fetch(dest, add_value);
+}
+
+inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest)
+{
+  return (void *) add_ptr(add_value, (volatile intptr_t *) dest);
+}
+
+inline void Atomic::inc_ptr(volatile intptr_t* dest)
+{
+ add_ptr(1, dest);
+}
+
+inline void Atomic::dec_ptr(volatile intptr_t* dest)
+{
+ add_ptr(-1, dest);
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
+{
+  intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
+  FULL_MEM_BARRIER;
+  return res;
+}
+
+inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value)
+{
+ return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+}
+
+inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
+{
+ return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+}
+
+inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value)
+{
+  return (void *) cmpxchg_ptr((intptr_t) exchange_value,
+                              (volatile intptr_t*) dest,
+                              (intptr_t) compare_value);
+}
+
+inline jlong Atomic::load(volatile jlong* src) { return *src; }
+
+#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/bytes_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2   Bytes::swap_u2(u2 x) {
+  return bswap_16(x);
+}
+
+inline u4   Bytes::swap_u4(u4 x) {
+  return bswap_32(x);
+}
+
+inline u8 Bytes::swap_u8(u8 x) {
+  return bswap_64(x);
+}
+
+#endif // OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    (void)memcpy(to, from, count * HeapWordSize);
+    break;
+  }
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    while (count-- > 0) {
+      *to++ = *from++;
+    }
+    break;
+  }
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  _Copy_conjoint_jshorts_atomic(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  _Copy_conjoint_jints_atomic(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+  _Copy_conjoint_jlongs_atomic(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+  assert(!UseCompressedOops, "foo!");
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+  _Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_bytes(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_jshorts(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+   _Copy_arrayof_conjoint_jints(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  assert(!UseCompressedOops, "foo!");
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+  _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+#endif // OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, DontYieldALot,            false);
+define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
+define_pd_global(intx, VMThreadStackSize,        2048);
+
+define_pd_global(intx, CompilerThreadStackSize,  0);
+
+define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address
+define_pd_global(uintx,HeapBaseMinAddress,       2*G);
+// Only used on 64 bit Windows platforms
+define_pd_global(bool, UseVectoredExceptions,    false);
+
+extern __thread Thread *aarch64_currentThread;
+
+#endif // OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/linux_aarch64.S	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,25 @@
+#ifdef BUILTIN_SIM
+
+        .globl SafeFetch32, Fetch32PFI, Fetch32Resume
+        .align  16
+        .type   SafeFetch32,@function
+        // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 
+SafeFetch32:
+        movl    %esi, %eax
+Fetch32PFI:
+        movl    (%rdi), %eax
+Fetch32Resume:
+        ret
+
+        .globl SafeFetchN, FetchNPFI, FetchNResume
+        .align  16
+        .type   SafeFetchN,@function
+        // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 
+SafeFetchN:
+        movq    %rsi, %rax
+FetchNPFI:
+        movq    (%rdi), %rax
+FetchNResume:
+        ret
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/linux_aarch64.ad	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,68 @@
+//
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// AMD64 Linux Architecture Description File
+
+//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes generate functions which are
+// called by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.  Instructions specify two basic values for encoding.
+// They use the ins_encode keyword to specify their encoding class
+// (which must be one of the class names specified in the encoding
+// block), and they use the opcode keyword to specify, in order, their
+// primary, secondary, and tertiary opcode.  Only the opcode sections
+// which a particular instruction needs for encoding need to be
+// specified.
+encode %{
+  // Build emit functions for each basic byte or larger field in the intel
+  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
+  // code in the enc_class source block.  Emit functions will live in the
+  // main source block for now.  In future, we can generalize this by
+  // adding a syntax that specifies the sizes of fields in an order,
+  // so that the adlc can build the emit functions automagically
+
+  enc_class Java_To_Runtime(method meth) %{
+  %}
+
+%}
+
+
+// Platform dependent source
+
+source %{
+
+%}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2007, 2008, 2009 Red Hat, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "atomic_linux_aarch64.inline.hpp"
+#include "runtime/orderAccess.hpp"
+#include "vm_version_aarch64.hpp"
+
+// Implementation of class OrderAccess.
+
+inline void OrderAccess::loadload()   { acquire(); }
+inline void OrderAccess::storestore() { release(); }
+inline void OrderAccess::loadstore()  { acquire(); }
+inline void OrderAccess::storeload()  { fence(); }
+
+inline void OrderAccess::acquire() {
+  READ_MEM_BARRIER;
+}
+
+inline void OrderAccess::release() {
+  WRITE_MEM_BARRIER;
+}
+
+inline void OrderAccess::fence() {
+  FULL_MEM_BARRIER;
+}
+
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p)
+{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p)
+{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jint     OrderAccess::load_acquire(volatile jint*    p)
+{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p)
+{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jubyte    OrderAccess::load_acquire(volatile jubyte*   p)
+{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jushort   OrderAccess::load_acquire(volatile jushort*  p)
+{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline juint     OrderAccess::load_acquire(volatile juint*    p)
+{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline julong   OrderAccess::load_acquire(volatile julong*  p)
+{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p)
+{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p)
+{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p)
+{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+inline void*    OrderAccess::load_ptr_acquire(volatile void*       p)
+{ void* data; __atomic_load((void* volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
+inline void*    OrderAccess::load_ptr_acquire(const volatile void* p)
+{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
+
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v)
+{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v)
+{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); }
+
+inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jshort*  p, jshort  v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jint*    p, jint    v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jushort* p, jushort v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(juint*   p, juint   v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+inline void     OrderAccess::store_ptr_fence(void**    p, void*    v)
+{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
+
+inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
+
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
+
+#endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "assembler_aarch64.inline.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "mutex_linux.inline.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/timer.hpp"
+#include "thread_linux.inline.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
+
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+# include <fpu_control.h>
+
+#ifdef BUILTIN_SIM
+#define REG_SP REG_RSP
+#define REG_PC REG_RIP
+#define REG_FP REG_RBP
+#define SPELL_REG_SP "rsp"
+#define SPELL_REG_FP "rbp"
+#else
+#define REG_FP 29
+
+#define SPELL_REG_SP "sp"
+#define SPELL_REG_FP "x29"
+#endif
+
+address os::current_stack_pointer() {
+  register void *esp __asm__ (SPELL_REG_SP);
+  return (address) esp;
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  // even in its subfields (as defined by the CPU immediate fields,
+  // if the CPU splits constants across multiple instructions).
+
+  return (char*) 0xffffffffffff;
+}
+
+void os::initialize_thread(Thread *thr) {
+}
+
+address os::Linux::ucontext_get_pc(ucontext_t * uc) {
+#ifdef BUILTIN_SIM
+  return (address)uc->uc_mcontext.gregs[REG_PC];
+#else
+  return (address)uc->uc_mcontext.pc;
+#endif
+}
+
+intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
+#ifdef BUILTIN_SIM
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
+#else
+  return (intptr_t*)uc->uc_mcontext.sp;
+#endif
+}
+
+intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
+#ifdef BUILTIN_SIM
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
+#else
+  return (intptr_t*)uc->uc_mcontext.regs[REG_FP];
+#endif
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  assert(thread != NULL, "just checking");
+  assert(ret_sp != NULL, "just checking");
+  assert(ret_fp != NULL, "just checking");
+
+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC  epc;
+  ucontext_t* uc = (ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
+    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
+  } else {
+    // construct empty ExtendedPC for return value checking
+    epc = ExtendedPC(NULL);
+    if (ret_sp) *ret_sp = (intptr_t *)NULL;
+    if (ret_fp) *ret_fp = (intptr_t *)NULL;
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(void* ucVoid) {
+  intptr_t* sp;
+  intptr_t* fp;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+  return frame(sp, fp, epc.pc());
+}
+
+// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
+// turned off by -fomit-frame-pointer,
+frame os::get_sender_for_C_frame(frame* fr) {
+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+intptr_t* _get_previous_fp() {
+  register intptr_t **ebp __asm__ (SPELL_REG_FP);
+  return (intptr_t*) *ebp;   // we want what it points to.
+}
+
+
+frame os::current_frame() {
+  intptr_t* fp = _get_previous_fp();
+  frame myframe((intptr_t*)os::current_stack_pointer(),
+                (intptr_t*)fp,
+                CAST_FROM_FN_PTR(address, os::current_frame));
+  if (os::is_first_C_frame(&myframe)) {
+    // stack is not walkable
+    return frame();
+  } else {
+    return os::get_sender_for_C_frame(&myframe);
+  }
+}
+
+// Utility functions
+
+// From IA32 System Programming Guide
+enum {
+  trap_page_fault = 0xE
+};
+
+#ifdef BUILTIN_SIM
+extern "C" void Fetch32PFI () ;
+extern "C" void Fetch32Resume () ;
+extern "C" void FetchNPFI () ;
+extern "C" void FetchNResume () ;
+#endif
+
+// An operation in Unsafe has faulted.  We're going to return to the
+// instruction after the faulting load or store.  We also set
+// pending_unsafe_access_error so that at some point in the future our
+// user will get a helpful message.
+static address handle_unsafe_access(JavaThread* thread, address pc) {
+  // pc is the instruction which we must emulate
+  // doing a no-op is fine:  return garbage from the load
+  // therefore, compute npc
+  address npc = pc + NativeCall::instruction_size;
+
+  // request an async exception
+  thread->set_pending_unsafe_access_error();
+
+  // return address of next instruction to execute
+  return npc;
+}
+
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = ThreadLocalStorage::get_thread_slow();
+
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  if (sig == SIGPIPE || sig == SIGXFSZ) {
+    // allow chained handler to go first
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      if (PrintMiscellaneous && (WizardMode || Verbose)) {
+        char buf[64];
+        warning("Ignoring %s - see bugs 4229104 or 646499219",
+                os::exception_name(sig, buf, sizeof(buf)));
+      }
+      return true;
+    }
+  }
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL ){
+      if(t->is_Java_thread()) {
+        thread = (JavaThread*)t;
+      }
+      else if(t->is_VM_thread()){
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+/*
+  NOTE: does not seem to work on linux.
+  if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
+    // can't decode this kind of signal
+    info = NULL;
+  } else {
+    assert(sig == info->si_signo, "bad siginfo");
+  }
+*/
+  // decide if this trap can be handled by a stub
+  address stub = NULL;
+
+  address pc          = NULL;
+
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = (address) os::Linux::ucontext_get_pc(uc);
+
+#ifdef BUILTIN_SIM
+    if (pc == (address) Fetch32PFI) {
+       uc->uc_mcontext.gregs[REG_PC] = intptr_t(Fetch32Resume) ;
+       return 1 ;
+    }
+    if (pc == (address) FetchNPFI) {
+       uc->uc_mcontext.gregs[REG_PC] = intptr_t (FetchNResume) ;
+       return 1 ;
+    }
+#else
+    if (StubRoutines::is_safefetch_fault(pc)) {
+      uc->uc_mcontext.pc = intptr_t(StubRoutines::continuation_for_safefetch_fault(pc));
+      return 1;
+    }
+#endif
+
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address) info->si_addr;
+
+      // check if fault address is within thread stack
+      if (addr < thread->stack_base() &&
+          addr >= thread->stack_base() - thread->stack_size()) {
+        // stack overflow
+        if (thread->in_stack_yellow_zone(addr)) {
+          thread->disable_stack_yellow_zone();
+          if (thread->thread_state() == _thread_in_Java) {
+            // Throw a stack overflow exception.  Guard pages will be reenabled
+            // while unwinding the stack.
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code.  Return and try to finish.
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+          // This is a likely cause, but hard to verify. Let's just print
+          // it as a hint.
+          tty->print_raw_cr("Please check if any of your loaded .so files has "
+                            "enabled executable stack (see man page execstack(8))");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      }
+    }
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+
+      // Handle signal from NativeJump::patch_verified_entry().
+      if ((sig == SIGILL || sig == SIGTRAP)
+          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+        if (TraceTraps) {
+          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+        }
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+        stub = SharedRuntime::get_poll_stub(pc);
+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault
+        // here if the underlying file has been truncated.
+        // Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+        if (nm != NULL && nm->has_unsafe_access()) {
+          stub = handle_unsafe_access(thread, pc);
+        }
+      }
+      else
+
+      if (sig == SIGFPE  &&
+          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
+        stub =
+          SharedRuntime::
+          continuation_for_implicit_exception(thread,
+                                              pc,
+                                              SharedRuntime::
+                                              IMPLICIT_DIVIDE_BY_ZERO);
+      } else if (sig == SIGSEGV &&
+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+          // Determination of interpreter/vtable stub/compiled code null exception
+          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+      }
+    } else if (thread->thread_state() == _thread_in_vm &&
+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access()) {
+        stub = handle_unsafe_access(thread, pc);
+    }
+
+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+    // and the heap gets shrunk before the field access.
+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
+      }
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    if ((sig == SIGSEGV) &&
+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+      // Block current thread until the memory serialize page permission restored.
+      os::block_on_serialize_page_trap();
+      return true;
+    }
+  }
+
+  if (stub != NULL) {
+    // save all thread context in case we need to restore it
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+#ifdef BUILTIN_SIM
+    uc->uc_mcontext.gregs[REG_PC] = (greg_t)stub;
+#else
+    uc->uc_mcontext.pc = (__u64)stub;
+#endif
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+     return true;
+  }
+
+  if (!abort_if_unrecognized) {
+    // caller wants another chance, so give it to him
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+
+  VMError err(t, sig, pc, info, ucVoid);
+  err.report_and_die();
+
+  ShouldNotReachHere();
+  return true; // Mute compiler
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+}
+
+int os::Linux::get_fpu_control_word(void) {
+  return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+}
+
+// Check that the linux kernel version is 2.4 or higher since earlier
+// versions do not support SSE without patches.
+bool os::supports_sse() {
+  return true;
+}
+
+bool os::is_allocatable(size_t bytes) {
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Linux::min_stack_allowed  = 64 * K;
+
+// amd64: pthread on amd64 is always in floating stack mode
+bool os::Linux::supports_variable_stack_size() {  return true; }
+
+// return default stack size for thr_type
+size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
+  return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+  // Creating guard page is very expensive. Java thread has HotSpot
+  // guard page, only enable glibc guard page for non-Java threads.
+  return (thr_type == java_thread ? 0 : page_size());
+}
+
+// Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\  JavaThread created by VM does not have glibc
+//    |    glibc guard page    | - guard, attached Java thread usually has
+//    |                        |/  1 page glibc guard.
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |  HotSpot Guard Pages   | - red and yellow pages
+//    |                        |/
+//    +------------------------+ JavaThread::stack_yellow_zone_base()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |  glibc guard page      | - usually 1 page
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
+//    pthread_attr_getstack()
+
+static void current_stack_region(address * bottom, size_t * size) {
+  if (os::Linux::is_initial_thread()) {
+     // initial thread needs special handling because pthread_getattr_np()
+     // may return bogus value.
+     *bottom = os::Linux::initial_thread_stack_bottom();
+     *size   = os::Linux::initial_thread_stack_size();
+  } else {
+     pthread_attr_t attr;
+
+     int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+     // JVM needs to know exact stack location, abort if it fails
+     if (rslt != 0) {
+       if (rslt == ENOMEM) {
+         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+       } else {
+         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
+       }
+     }
+
+     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+         fatal("Can not locate current stack attributes!");
+     }
+
+     pthread_attr_destroy(&attr);
+
+  }
+  assert(os::current_stack_pointer() >= *bottom &&
+         os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+  // stack size includes normal stack and HotSpot guard pages
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+void os::print_context(outputStream *st, void *context) {
+  if (context == NULL) return;
+
+  ucontext_t *uc = (ucontext_t*)context;
+  st->print_cr("Registers:");
+#ifdef BUILTIN_SIM
+  st->print(  "RAX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RAX]);
+  st->print(", RBX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBX]);
+  st->print(", RCX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RCX]);
+  st->print(", RDX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDX]);
+  st->cr();
+  st->print(  "RSP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSP]);
+  st->print(", RBP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBP]);
+  st->print(", RSI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSI]);
+  st->print(", RDI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDI]);
+  st->cr();
+  st->print(  "R8 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R8]);
+  st->print(", R9 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R9]);
+  st->print(", R10=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R10]);
+  st->print(", R11=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R11]);
+  st->cr();
+  st->print(  "R12=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R12]);
+  st->print(", R13=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R13]);
+  st->print(", R14=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R14]);
+  st->print(", R15=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R15]);
+  st->cr();
+  st->print(  "RIP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RIP]);
+  st->print(", EFLAGS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EFL]);
+  st->print(", CSGSFS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_CSGSFS]);
+  st->print(", ERR=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ERR]);
+  st->cr();
+  st->print("  TRAPNO=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_TRAPNO]);
+  st->cr();
+#else
+  for (int r = 0; r < 31; r++)
+	  st->print_cr(  "R%d=" INTPTR_FORMAT, r, uc->uc_mcontext.regs[r]);
+#endif
+  st->cr();
+
+  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", sp);
+  print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", pc);
+  print_hex_dump(st, pc - 32, pc + 32, sizeof(char));
+}
+
+void os::print_register_info(outputStream *st, void *context) {
+  if (context == NULL) return;
+
+  ucontext_t *uc = (ucontext_t*)context;
+
+  st->print_cr("Register to memory mapping:");
+  st->cr();
+
+  // this is horrendously verbose but the layout of the registers in the
+  // context does not match how we defined our abstract Register set, so
+  // we can't just iterate through the gregs area
+
+  // this is only for the "general purpose" registers
+
+#ifdef BUILTIN_SIM
+  st->print("RAX="); print_location(st, uc->uc_mcontext.gregs[REG_RAX]);
+  st->print("RBX="); print_location(st, uc->uc_mcontext.gregs[REG_RBX]);
+  st->print("RCX="); print_location(st, uc->uc_mcontext.gregs[REG_RCX]);
+  st->print("RDX="); print_location(st, uc->uc_mcontext.gregs[REG_RDX]);
+  st->print("RSP="); print_location(st, uc->uc_mcontext.gregs[REG_RSP]);
+  st->print("RBP="); print_location(st, uc->uc_mcontext.gregs[REG_RBP]);
+  st->print("RSI="); print_location(st, uc->uc_mcontext.gregs[REG_RSI]);
+  st->print("RDI="); print_location(st, uc->uc_mcontext.gregs[REG_RDI]);
+  st->print("R8 ="); print_location(st, uc->uc_mcontext.gregs[REG_R8]);
+  st->print("R9 ="); print_location(st, uc->uc_mcontext.gregs[REG_R9]);
+  st->print("R10="); print_location(st, uc->uc_mcontext.gregs[REG_R10]);
+  st->print("R11="); print_location(st, uc->uc_mcontext.gregs[REG_R11]);
+  st->print("R12="); print_location(st, uc->uc_mcontext.gregs[REG_R12]);
+  st->print("R13="); print_location(st, uc->uc_mcontext.gregs[REG_R13]);
+  st->print("R14="); print_location(st, uc->uc_mcontext.gregs[REG_R14]);
+  st->print("R15="); print_location(st, uc->uc_mcontext.gregs[REG_R15]);
+#else
+  for (int r = 0; r < 31; r++)
+	  st->print_cr(  "R%d=" INTPTR_FORMAT, r, uc->uc_mcontext.regs[r]);
+#endif
+  st->cr();
+}
+
+void os::setup_fpu() {
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+}
+#endif
+
+extern "C" {
+  int SpinPause() {
+    return 0;
+  }
+
+  void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+    if (from > to) {
+      jshort *end = from + count;
+      while (from < end)
+        *(to++) = *(from++);
+    }
+    else if (from < to) {
+      jshort *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end)
+        *(to--) = *(from--);
+    }
+  }
+  void _Copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+    if (from > to) {
+      jint *end = from + count;
+      while (from < end)
+        *(to++) = *(from++);
+    }
+    else if (from < to) {
+      jint *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end)
+        *(to--) = *(from--);
+    }
+  }
+  void _Copy_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+    if (from > to) {
+      jlong *end = from + count;
+      while (from < end)
+        os::atomic_copy64(from++, to++);
+    }
+    else if (from < to) {
+      jlong *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end)
+        os::atomic_copy64(from--, to--);
+    }
+  }
+
+  void _Copy_arrayof_conjoint_bytes(HeapWord* from,
+                                    HeapWord* to,
+                                    size_t    count) {
+    memmove(to, from, count);
+  }
+  void _Copy_arrayof_conjoint_jshorts(HeapWord* from,
+                                      HeapWord* to,
+                                      size_t    count) {
+    memmove(to, from, count * 2);
+  }
+  void _Copy_arrayof_conjoint_jints(HeapWord* from,
+                                    HeapWord* to,
+                                    size_t    count) {
+    memmove(to, from, count * 4);
+  }
+  void _Copy_arrayof_conjoint_jlongs(HeapWord* from,
+                                     HeapWord* to,
+                                     size_t    count) {
+    memmove(to, from, count * 8);
+  }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP
+
+  static void setup_fpu();
+  static bool supports_sse();
+
+  static jlong rdtsc();
+
+  static bool is_allocatable(size_t bytes);
+
+  // Used to register dynamic code cache area with the OS
+  // Note: Currently only used in 64 bit Windows implementations
+  static bool register_code_area(char *low, char *high) { return true; }
+
+  // Atomically copy 64 bits of data
+  static void atomic_copy64(volatile void *src, volatile void *dst) {
+#if defined(PPC) && !defined(_LP64)
+    double tmp;
+    asm volatile ("lfd  %0, 0(%1)\n"
+                  "stfd %0, 0(%2)\n"
+                  : "=f"(tmp)
+                  : "b"(src), "b"(dst));
+#elif defined(S390) && !defined(_LP64)
+    double tmp;
+    asm volatile ("ld  %0, 0(%1)\n"
+                  "std %0, 0(%2)\n"
+                  : "=r"(tmp)
+                  : "a"(src), "a"(dst));
+#else
+    *(jlong *) dst = *(jlong *) src;
+#endif
+  }
+
+#endif // OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+  uint64_t res;
+  uint32_t ts1, ts2;
+  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+  return (jlong)res;
+}
+
+#endif // OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/prefetch_linux_aarch64.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP
+#define OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+
+inline void Prefetch::read (void *loc, intx interval) {
+#ifndef BUILTIN_SIM
+  if (interval >= 0)
+    asm("prfm PLDL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval));
+#endif
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+#ifndef BUILTIN_SIM
+  if (interval >= 0)
+    asm("prfm PSTL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval));
+#endif
+}
+
+#endif // OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/threadLocalStorage.hpp"
+#include "thread_linux.inline.hpp"
+
+void ThreadLocalStorage::generate_code_for_get_thread() {
+    // nothing we can do here for user-level thread
+}
+
+void ThreadLocalStorage::pd_init() {
+}
+
+__thread Thread *aarch64_currentThread;
+
+void ThreadLocalStorage::pd_set_thread(Thread* thread) {
+  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
+  aarch64_currentThread = thread;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP
+
+  // Processor dependent parts of ThreadLocalStorage
+
+public:
+
+  static Thread* thread() {
+    return aarch64_currentThread;
+   }
+
+#endif // OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/thread_linux_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.inline.hpp"
+#include "thread_linux.inline.hpp"
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+  void* ucontext, bool isInJava) {
+
+  assert(Thread::current() == this, "caller must be current thread");
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+  assert(this->is_Java_thread(), "must be JavaThread");
+  JavaThread* jt = (JavaThread *)this;
+
+  // If we have a last_Java_frame, then we should use it even if
+  // isInJava == true.  It should be more reliable than ucontext info.
+  if (jt->has_last_Java_frame()) {
+    *fr_addr = jt->pd_last_frame();
+    return true;
+  }
+
+  // At this point, we don't have a last_Java_frame, so
+  // we try to glean some information out of the ucontext
+  // if we were running Java code when SIGPROF came in.
+  if (isInJava) {
+    ucontext_t* uc = (ucontext_t*) ucontext;
+
+    intptr_t* ret_fp;
+    intptr_t* ret_sp;
+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+      &ret_sp, &ret_fp);
+    if (addr.pc() == NULL || ret_sp == NULL ) {
+      // ucontext wasn't useful
+      return false;
+    }
+
+    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    if (!ret_frame.safe_for_sender(jt)) {
+#ifdef COMPILER2
+      // C2 uses ebp as a general register see if NULL fp helps
+      frame ret_frame2(ret_sp, NULL, addr.pc());
+      if (!ret_frame2.safe_for_sender(jt)) {
+        // nothing else to try if the frame isn't good
+        return false;
+      }
+      ret_frame = ret_frame2;
+#else
+      // nothing else to try if the frame isn't good
+      return false;
+#endif /* COMPILER2 */
+    }
+    *fr_addr = ret_frame;
+    return true;
+  }
+
+  // nothing else to try
+  return false;
+}
+
+void JavaThread::cache_global_variables() { }
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/thread_linux_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP
+
+ private:
+#ifdef ASSERT
+  // spill stack holds N callee-save registers at each Java call and
+  // grows downwards towards limit
+  // we need limit to check we have space for a spill and base so we
+  // can identify all live spill frames at GC (eventually)
+  address          _spill_stack;
+  address          _spill_stack_base;
+  address          _spill_stack_limit;
+#endif // ASSERT
+
+  void pd_initialize() {
+    _anchor.clear();
+  }
+
+  frame pd_last_frame() {
+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+    if (_anchor.last_Java_pc() != NULL) {
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+    } else {
+      // This will pick up pc from sp
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+    }
+  }
+
+ public:
+  // Mutators are highly dangerous....
+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+
+  void set_base_of_stack_pointer(intptr_t* base_sp) {
+  }
+
+  static ByteSize last_Java_fp_offset()          {
+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+  }
+
+  intptr_t* base_of_stack_pointer() {
+    return NULL;
+  }
+  void record_base_of_stack_pointer() {
+  }
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+    bool isInJava);
+
+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+public:
+
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
+
+#endif // OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/vmStructs_linux_aarch64.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
+  /* This must be the last entry, and must be present */                                                                             \
+  last_entry()
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(OSThread::thread_id_t)                             \
+  declare_unsigned_integer_type(pthread_t)                                \
+                                                                          \
+  /* This must be the last entry, and must be present */                  \
+  last_entry()
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+                                                                        \
+  /* This must be the last entry, and must be present */                \
+  last_entry()
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+                                                                        \
+  /* This must be the last entry, and must be present */                \
+  last_entry()
+
+#endif // OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/vm_version_linux_aarch64.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_aarch64.hpp"
+
--- a/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -529,7 +529,7 @@
     // JVM needs to know exact stack location, abort if it fails
     if (rslt != 0) {
       if (rslt == ENOMEM) {
-        vm_exit_out_of_memory(0, "pthread_getattr_np");
+        vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
       } else {
         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
       }
--- a/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -175,7 +175,7 @@
     // JVM needs to know exact stack location, abort if it fails
     if (rslt != 0) {
       if (rslt == ENOMEM) {
-        vm_exit_out_of_memory(0, "pthread_getattr_np");
+        vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
       } else {
         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
       }
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -716,7 +716,7 @@
      // JVM needs to know exact stack location, abort if it fails
      if (rslt != 0) {
        if (rslt == ENOMEM) {
-         vm_exit_out_of_memory(0, "pthread_getattr_np");
+         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
        } else {
          fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
        }
@@ -881,8 +881,27 @@
  * updates (JDK-8023956).
  */
 void os::workaround_expand_exec_shield_cs_limit() {
-#if defined(IA32)
+#if defined(IA32) && !defined(ZERO)
   size_t page_size = os::vm_page_size();
+
+  /*
+   * JDK-8197429
+   *
+   * Expand the stack mapping to the end of the initial stack before
+   * attempting to install the codebuf.  This is needed because newer
+   * Linux kernels impose a distance of a megabyte between stack
+   * memory and other memory regions.  If we try to install the
+   * codebuf before expanding the stack the installation will appear
+   * to succeed but we'll get a segfault later if we expand the stack
+   * in Java code.
+   *
+   */
+  if (os::Linux::is_initial_thread()) {
+    address limit = Linux::initial_thread_stack_bottom();
+    limit += (StackYellowPages + StackRedPages) * page_size;
+    os::Linux::expand_stack_to(limit);
+  }
+
   /*
    * Take the highest VA the OS will give us and exec
    *
@@ -901,6 +920,16 @@
   char* hint = (char*) (Linux::initial_thread_stack_bottom() -
                         ((StackYellowPages + StackRedPages + 1) * page_size));
   char* codebuf = os::pd_attempt_reserve_memory_at(page_size, hint);
+
+  if (codebuf == NULL) {
+    // JDK-8197429: There may be a stack gap of one megabyte between
+    // the limit of the stack and the nearest memory region: this is a
+    // Linux kernel workaround for CVE-2017-1000364.  If we failed to
+    // map our codebuf, try again at an address one megabyte lower.
+    hint -= 1 * M;
+    codebuf = os::pd_attempt_reserve_memory_at(page_size, hint);
+  }
+
   if ( (codebuf == NULL) || (!os::commit_memory(codebuf, page_size, true)) ) {
     return; // No matter, we tried, best effort.
   }
--- a/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -160,6 +160,16 @@
         return prev;
     }
 }
+
+#ifdef __ARM_ARCH_7A__
+/* Perform an atomic compare and swap: if the current value of `*PTR'
+   is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
+   `*PTR' before the operation.*/
+extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
+					       jlong oldval,
+					       jlong newval);
+
+#endif	// __ARM_ARCH_7A__
 #endif // ARM
 
 inline void Atomic::store(jint store_value, volatile jint* dest) {
@@ -233,7 +243,9 @@
   // operation.  Note that some platforms only support this with the
   // limitation that the only valid value to store is the immediate
   // constant 1.  There is a test for this in JNI_CreateJavaVM().
-  return __sync_lock_test_and_set (dest, exchange_value);
+  jint result = __sync_lock_test_and_set (dest, exchange_value);
+  __sync_synchronize();
+  return result;
 #endif // M68K
 #endif // ARM
 }
@@ -246,7 +258,9 @@
 #ifdef M68K
   return m68k_lock_test_and_set(dest, exchange_value);
 #else
-  return __sync_lock_test_and_set (dest, exchange_value);
+  intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+  __sync_synchronize();
+  return result;
 #endif // M68K
 #endif // ARM
 }
@@ -274,7 +288,11 @@
                              volatile jlong* dest,
                              jlong compare_value) {
 
+#ifndef	__ARM_ARCH_7A__
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+#else
+  return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
+#endif
 }
 
 inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
--- a/src/os_cpu/linux_zero/vm/globals_linux_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_zero/vm/globals_linux_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -26,17 +26,19 @@
 #ifndef OS_CPU_LINUX_ZERO_VM_GLOBALS_LINUX_ZERO_HPP
 #define OS_CPU_LINUX_ZERO_VM_GLOBALS_LINUX_ZERO_HPP
 
+#include "utilities/macros.hpp"
+
 //
 // Set the default values for platform dependent flags used by the
 // runtime system.  See globals.hpp for details of what they do.
 //
 
 define_pd_global(bool,  DontYieldALot,           false);
-define_pd_global(intx,  ThreadStackSize,         1536);
+define_pd_global(intx,  ThreadStackSize,         1664);
 #ifdef _LP64
-define_pd_global(intx,  VMThreadStackSize,       1024);
+define_pd_global(intx,  VMThreadStackSize,       PPC_ONLY(1664) NOT_PPC(1024));
 #else
-define_pd_global(intx,  VMThreadStackSize,       512);
+define_pd_global(intx,  VMThreadStackSize,       PPC_ONLY(1152) NOT_PPC(512));
 #endif // _LP64
 define_pd_global(intx,  CompilerThreadStackSize, 0);
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -53,6 +53,12 @@
 #include "thread_linux.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
 
 address os::current_stack_pointer() {
   // return the address of the current function
@@ -114,6 +120,11 @@
   return frame(NULL, NULL); // silence compile warnings
 }
 
+#ifdef HOTSPOT_ASM
+extern "C" int asm_check_null_ptr(ucontext_t *uc);
+extern int Thumb2_Install_Safepoint_PC(ucontext_t *uc, int magicBytes);
+#endif // HOTSPOT_ASM
+
 extern "C" JNIEXPORT int
 JVM_handle_linux_signal(int sig,
                         siginfo_t* info,
@@ -121,6 +132,26 @@
                         int abort_if_unrecognized) {
   ucontext_t* uc = (ucontext_t*) ucVoid;
 
+#ifdef HOTSPOT_ASM
+  if (sig == SIGSEGV) {
+    // check to see if this was the result of a back edge safepoint check
+    if (os::is_poll_address((address)info->si_addr)) {
+      // check that this is a legitimate safepoint rather
+      // than any old illegal access to the polling page.
+      // if the the check code returns true it will patch
+      // the return address to enter the safepoint check code
+      // n.b. the offset into the page gives us twice the offset to
+      // the magic word in bytes
+      int magicByteOffset = ((address)info->si_addr - (address)os::get_polling_page()) / 2;
+      if (Thumb2_Install_Safepoint_PC(uc, magicByteOffset)) {
+	return true;
+      }
+    } else if (asm_check_null_ptr(uc)) {
+      return 1;
+    }
+  }
+#endif // HOTSPOT_ASM
+
   Thread* t = ThreadLocalStorage::get_thread_slow();
 
   SignalHandlerMark shm(t);
@@ -292,8 +323,16 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 // thread stack
-
+#ifdef PPC
+#ifdef _LP64
+// Default for 64 bit must be at least 1600 K
+size_t os::Linux::min_stack_allowed = 1664 * K;
+#else
+size_t os::Linux::min_stack_allowed = 1152 * K;
+#endif
+#else
 size_t os::Linux::min_stack_allowed = 64 * K;
+#endif
 
 bool os::Linux::supports_variable_stack_size() {
   return true;
@@ -319,7 +358,7 @@
   int res = pthread_getattr_np(pthread_self(), &attr);
   if (res != 0) {
     if (res == ENOMEM) {
-      vm_exit_out_of_memory(0, "pthread_getattr_np");
+      vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
     }
     else {
       fatal(err_msg("pthread_getattr_np failed with errno = %d", res));
@@ -422,6 +461,7 @@
     value = *adr;
     return value;
   }
+
   intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) {
     intptr_t value = errValue;
     value = *adr;
@@ -508,7 +548,6 @@
   }
 };
 #endif // !_LP64
-
 #ifndef PRODUCT
 void os::verify_stack_alignment() {
 }
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/linux_zero/vm/os_linux_zero.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -36,18 +36,30 @@
 
   // Atomically copy 64 bits of data
   static void atomic_copy64(volatile void *src, volatile void *dst) {
-#if defined(PPC32)
+#if defined(PPC32) && !defined(__SPE__)
     double tmp;
-    asm volatile ("lfd  %0, 0(%1)\n"
-                  "stfd %0, 0(%2)\n"
-                  : "=f"(tmp)
-                  : "b"(src), "b"(dst));
+    asm volatile ("lfd  %0, %2\n"
+                  "stfd %0, %1\n"
+                  : "=&f"(tmp), "=Q"(*(volatile double*)dst)
+                  : "Q"(*(volatile double*)src));
+#elif defined(PPC32) && defined(__SPE__)
+    long tmp;
+    asm volatile ("evldd  %0, %2\n"
+                  "evstdd %0, %1\n"
+                  : "=&r"(tmp), "=Q"(*(volatile long*)dst)
+                  : "Q"(*(volatile long*)src));
 #elif defined(S390) && !defined(_LP64)
     double tmp;
-    asm volatile ("ld  %0, 0(%1)\n"
-                  "std %0, 0(%2)\n"
+    asm volatile ("ld  %0, %2\n"
+                  "std %0, %1\n"
+                  : "=&f"(tmp), "=Q"(*(volatile double*)dst)
+                  : "Q"(*(volatile double*)src));
+#elif defined(__ARM_ARCH_7A__)
+    jlong tmp;
+    asm volatile ("ldrexd  %0, [%1]\n"
                   : "=r"(tmp)
-                  : "a"(src), "a"(dst));
+                  : "r"(src), "m"(src));
+    *(jlong *) dst = tmp;
 #else
     *(jlong *) dst = *(jlong *) src;
 #endif
--- a/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -579,7 +579,7 @@
   // on the thread stack, which could get a mapping error when touched.
   address addr = (address) info->si_addr;
   if (sig == SIGBUS && info->si_code == BUS_OBJERR && info->si_errno == ENOMEM) {
-    vm_exit_out_of_memory(0, "Out of swap space to map in thread stack.");
+    vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "Out of swap space to map in thread stack.");
   }
 
   VMError err(t, sig, pc, info, ucVoid);
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -26,10 +26,240 @@
 #include "runtime/os.hpp"
 #include "vm_version_sparc.hpp"
 
-# include <sys/auxv.h>
-# include <sys/auxv_SPARC.h>
-# include <sys/systeminfo.h>
-# include <kstat.h>
+#include <sys/auxv.h>
+#include <sys/auxv_SPARC.h>
+#include <sys/systeminfo.h>
+#include <kstat.h>
+#include <picl.h>
+#include <dlfcn.h>
+#include <link.h>
+
+extern "C" static int PICL_visit_cpu_helper(picl_nodehdl_t nodeh, void *result);
+
+// Functions from the library we need (signatures should match those in picl.h)
+extern "C" {
+  typedef int (*picl_initialize_func_t)(void);
+  typedef int (*picl_shutdown_func_t)(void);
+  typedef int (*picl_get_root_func_t)(picl_nodehdl_t *nodehandle);
+  typedef int (*picl_walk_tree_by_class_func_t)(picl_nodehdl_t rooth,
+      const char *classname, void *c_args,
+      int (*callback_fn)(picl_nodehdl_t hdl, void *args));
+  typedef int (*picl_get_prop_by_name_func_t)(picl_nodehdl_t nodeh, const char *nm,
+      picl_prophdl_t *ph);
+  typedef int (*picl_get_propval_func_t)(picl_prophdl_t proph, void *valbuf, size_t sz);
+  typedef int (*picl_get_propinfo_func_t)(picl_prophdl_t proph, picl_propinfo_t *pi);
+}
+
+class PICL {
+  // Pointers to functions in the library
+  picl_initialize_func_t _picl_initialize;
+  picl_shutdown_func_t _picl_shutdown;
+  picl_get_root_func_t _picl_get_root;
+  picl_walk_tree_by_class_func_t _picl_walk_tree_by_class;
+  picl_get_prop_by_name_func_t _picl_get_prop_by_name;
+  picl_get_propval_func_t _picl_get_propval;
+  picl_get_propinfo_func_t _picl_get_propinfo;
+  // Handle to the library that is returned by dlopen
+  void *_dl_handle;
+
+  bool open_library();
+  void close_library();
+
+  template<typename FuncType> bool bind(FuncType& func, const char* name);
+  bool bind_library_functions();
+
+  // Get a value of the integer property. The value in the tree can be either 32 or 64 bit
+  // depending on the platform. The result is converted to int.
+  int get_int_property(picl_nodehdl_t nodeh, const char* name, int* result) {
+    picl_propinfo_t pinfo;
+    picl_prophdl_t proph;
+    if (_picl_get_prop_by_name(nodeh, name, &proph) != PICL_SUCCESS ||
+        _picl_get_propinfo(proph, &pinfo) != PICL_SUCCESS) {
+      return PICL_FAILURE;
+    }
+
+    if (pinfo.type != PICL_PTYPE_INT && pinfo.type != PICL_PTYPE_UNSIGNED_INT) {
+      assert(false, "Invalid property type");
+      return PICL_FAILURE;
+    }
+    if (pinfo.size == sizeof(int64_t)) {
+      int64_t val;
+      if (_picl_get_propval(proph, &val, sizeof(int64_t)) != PICL_SUCCESS) {
+        return PICL_FAILURE;
+      }
+      *result = static_cast<int>(val);
+    } else if (pinfo.size == sizeof(int32_t)) {
+      int32_t val;
+      if (_picl_get_propval(proph, &val, sizeof(int32_t)) != PICL_SUCCESS) {
+        return PICL_FAILURE;
+      }
+      *result = static_cast<int>(val);
+    } else {
+      assert(false, "Unexpected integer property size");
+      return PICL_FAILURE;
+    }
+    return PICL_SUCCESS;
+  }
+
+  // Visitor and a state machine that visits integer properties and verifies that the
+  // values are the same. Stores the unique value observed.
+  class UniqueValueVisitor {
+    PICL *_picl;
+    enum {
+      INITIAL,        // Start state, no assignments happened
+      ASSIGNED,       // Assigned a value
+      INCONSISTENT    // Inconsistent value seen
+    } _state;
+    int _value;
+  public:
+    UniqueValueVisitor(PICL* picl) : _picl(picl), _state(INITIAL) { }
+    int value() {
+      assert(_state == ASSIGNED, "Precondition");
+      return _value;
+    }
+    void set_value(int value) {
+      assert(_state == INITIAL, "Precondition");
+      _value = value;
+      _state = ASSIGNED;
+    }
+    bool is_initial()       { return _state == INITIAL;      }
+    bool is_assigned()      { return _state == ASSIGNED;     }
+    bool is_inconsistent()  { return _state == INCONSISTENT; }
+    void set_inconsistent() { _state = INCONSISTENT;         }
+
+    bool visit(picl_nodehdl_t nodeh, const char* name) {
+      assert(!is_inconsistent(), "Precondition");
+      int curr;
+      if (_picl->get_int_property(nodeh, name, &curr) == PICL_SUCCESS) {
+        if (!is_assigned()) { // first iteration
+          set_value(curr);
+        } else if (curr != value()) { // following iterations
+          set_inconsistent();
+        }
+        return true;
+      }
+      return false;
+    }
+  };
+
+  class CPUVisitor {
+    UniqueValueVisitor _l1_visitor;
+    UniqueValueVisitor _l2_visitor;
+    int _limit; // number of times visit() can be run
+  public:
+    CPUVisitor(PICL *picl, int limit) : _l1_visitor(picl), _l2_visitor(picl), _limit(limit) {}
+    static int visit(picl_nodehdl_t nodeh, void *arg) {
+      CPUVisitor *cpu_visitor = static_cast<CPUVisitor*>(arg);
+      UniqueValueVisitor* l1_visitor = cpu_visitor->l1_visitor();
+      UniqueValueVisitor* l2_visitor = cpu_visitor->l2_visitor();
+      if (!l1_visitor->is_inconsistent()) {
+        l1_visitor->visit(nodeh, "l1-dcache-line-size");
+      }
+      static const char* l2_data_cache_line_property_name = NULL;
+      // On the first visit determine the name of the l2 cache line size property and memoize it.
+      if (l2_data_cache_line_property_name == NULL) {
+        assert(!l2_visitor->is_inconsistent(), "First iteration cannot be inconsistent");
+        l2_data_cache_line_property_name = "l2-cache-line-size";
+        if (!l2_visitor->visit(nodeh, l2_data_cache_line_property_name)) {
+          l2_data_cache_line_property_name = "l2-dcache-line-size";
+          l2_visitor->visit(nodeh, l2_data_cache_line_property_name);
+        }
+      } else {
+        if (!l2_visitor->is_inconsistent()) {
+          l2_visitor->visit(nodeh, l2_data_cache_line_property_name);
+        }
+      }
+
+      if (l1_visitor->is_inconsistent() && l2_visitor->is_inconsistent()) {
+        return PICL_WALK_TERMINATE;
+      }
+      cpu_visitor->_limit--;
+      if (cpu_visitor->_limit <= 0) {
+        return PICL_WALK_TERMINATE;
+      }
+      return PICL_WALK_CONTINUE;
+    }
+    UniqueValueVisitor* l1_visitor() { return &_l1_visitor; }
+    UniqueValueVisitor* l2_visitor() { return &_l2_visitor; }
+  };
+  int _L1_data_cache_line_size;
+  int _L2_data_cache_line_size;
+public:
+  static int visit_cpu(picl_nodehdl_t nodeh, void *state) {
+    return CPUVisitor::visit(nodeh, state);
+  }
+
+  PICL(bool is_fujitsu, bool is_sun4v) : _L1_data_cache_line_size(0), _L2_data_cache_line_size(0), _dl_handle(NULL) {
+    if (!open_library()) {
+      return;
+    }
+    if (_picl_initialize() == PICL_SUCCESS) {
+      picl_nodehdl_t rooth;
+      if (_picl_get_root(&rooth) == PICL_SUCCESS) {
+        const char* cpu_class = "cpu";
+        // If it's a Fujitsu machine, it's a "core"
+        if (is_fujitsu) {
+          cpu_class = "core";
+        }
+        CPUVisitor cpu_visitor(this, (is_sun4v && !is_fujitsu) ? 1 : os::processor_count());
+        _picl_walk_tree_by_class(rooth, cpu_class, &cpu_visitor, PICL_visit_cpu_helper);
+        if (cpu_visitor.l1_visitor()->is_assigned()) { // Is there a value?
+          _L1_data_cache_line_size = cpu_visitor.l1_visitor()->value();
+        }
+        if (cpu_visitor.l2_visitor()->is_assigned()) {
+          _L2_data_cache_line_size = cpu_visitor.l2_visitor()->value();
+        }
+      }
+      _picl_shutdown();
+    }
+    close_library();
+  }
+
+  unsigned int L1_data_cache_line_size() const { return _L1_data_cache_line_size; }
+  unsigned int L2_data_cache_line_size() const { return _L2_data_cache_line_size; }
+};
+
+
+extern "C" static int PICL_visit_cpu_helper(picl_nodehdl_t nodeh, void *result) {
+  return PICL::visit_cpu(nodeh, result);
+}
+
+template<typename FuncType>
+bool PICL::bind(FuncType& func, const char* name) {
+  func = reinterpret_cast<FuncType>(dlsym(_dl_handle, name));
+  return func != NULL;
+}
+
+bool PICL::bind_library_functions() {
+  assert(_dl_handle != NULL, "library should be open");
+  return bind(_picl_initialize,         "picl_initialize"        ) &&
+         bind(_picl_shutdown,           "picl_shutdown"          ) &&
+         bind(_picl_get_root,           "picl_get_root"          ) &&
+         bind(_picl_walk_tree_by_class, "picl_walk_tree_by_class") &&
+         bind(_picl_get_prop_by_name,   "picl_get_prop_by_name"  ) &&
+         bind(_picl_get_propval,        "picl_get_propval"       ) &&
+         bind(_picl_get_propinfo,       "picl_get_propinfo"      );
+}
+
+bool PICL::open_library() {
+  _dl_handle = dlopen("libpicl.so.1", RTLD_LAZY);
+  if (_dl_handle == NULL) {
+    warning("PICL (libpicl.so.1) is missing. Performance will not be optimal.");
+    return false;
+  }
+  if (!bind_library_functions()) {
+    assert(false, "unexpected PICL API change");
+    close_library();
+    return false;
+  }
+  return true;
+}
+
+void PICL::close_library() {
+  assert(_dl_handle != NULL, "library should be open");
+  dlclose(_dl_handle);
+  _dl_handle = NULL;
+}
 
 // We need to keep these here as long as we have to build on Solaris
 // versions before 10.
@@ -248,5 +478,9 @@
     kstat_close(kc);
   }
 
+  // Figure out cache line sizes using PICL
+  PICL picl((features & sparc64_family_m) != 0, (features & sun4v_m) != 0);
+  _L2_data_cache_line_size = picl.L2_data_cache_line_size();
+
   return features;
 }
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -734,7 +734,7 @@
   // on the thread stack, which could get a mapping error when touched.
   address addr = (address) info->si_addr;
   if (sig == SIGBUS && info->si_code == BUS_OBJERR && info->si_errno == ENOMEM) {
-    vm_exit_out_of_memory(0, "Out of swap space to map in thread stack.");
+    vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "Out of swap space to map in thread stack.");
   }
 
   VMError err(t, sig, pc, info, ucVoid);
--- a/src/share/tools/hsdis/Makefile	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/tools/hsdis/Makefile	Mon Apr 13 16:44:26 2020 +0100
@@ -68,6 +68,9 @@
 CONFIGURE_ARGS= --host=$(MINGW) --target=$(MINGW)
 else
 CPU             = $(shell uname -m)
+ifneq		 ($(findstring arm,$(CPU)),)
+ARCH=arm
+else
 ARCH1=$(CPU:x86_64=amd64)
 ARCH=$(ARCH1:i686=i386)
 CFLAGS/i386	+= -m32
@@ -75,8 +78,10 @@
 CFLAGS/sparcv9	+= -m64
 CFLAGS/amd64	+= -m64
 CFLAGS/ppc64	+= -m64
+CFLAGS/aarch64	+= 
+endif
 CFLAGS		+= $(CFLAGS/$(ARCH))
-CFLAGS		+= -fPIC
+CFLAGS		+= -fPIC -g
 OS		= linux
 LIB_EXT		= .so
 CC 		= gcc
@@ -85,6 +90,14 @@
 DLDFLAGS	+= -shared
 LDFLAGS         += -ldl
 OUTFLAGS	+= -o $@
+# special case x86-aarch64 hybrid
+ifeq ($(ARCH),amd64)
+ifeq ($(BUILD_AARCH64),true)
+CONFIGURE_ARGS= --target=aarch64-none-linux-gnu
+LP64=1
+CFLAGS+= -DTARGET_ARCH_aarch64
+endif
+endif
 ## OS = Windows ##
 else   # !SunOS, !Linux => Windows
 OS		= windows
@@ -119,7 +132,7 @@
 BINUTILSDIR	= $(shell cd $(BINUTILS);pwd)
 endif
 
-CPPFLAGS	+= -I$(BINUTILSDIR)/include -I$(BINUTILS)/bfd -I$(TARGET_DIR)/bfd
+CPPFLAGS	+= -I$(BINUTILSDIR)/include -I$(BINUTILSDIR)/bfd -I$(TARGET_DIR)/bfd
 CPPFLAGS	+= -DLIBARCH_$(LIBARCH) -DLIBARCH=\"$(LIBARCH)\" -DLIB_EXT=\"$(LIB_EXT)\"
 
 TARGET_DIR	= build/$(OS)-$(JDKARCH)
@@ -146,7 +159,7 @@
 demo: $(TARGET) $(DEMO_TARGET)
 
 $(LIBRARIES): $(TARGET_DIR) $(TARGET_DIR)/Makefile
-	if [ ! -f $@ ]; then cd $(TARGET_DIR); make all-opcodes; fi
+	if [ ! -f $@ ]; then cd $(TARGET_DIR); make all-opcodes "CFLAGS=$(CFLAGS)"; fi
 
 $(TARGET_DIR)/Makefile:
 	(cd $(TARGET_DIR); CC=$(CC) CFLAGS="$(CFLAGS)" $(BINUTILSDIR)/configure --disable-nls $(CONFIGURE_ARGS))
--- a/src/share/tools/hsdis/hsdis.c	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/tools/hsdis/hsdis.c	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,8 @@
 #include <dis-asm.h>
 #include <inttypes.h>
 
+#include <string.h>
+
 #ifndef bool
 #define bool int
 #define true 1
@@ -368,6 +370,7 @@
     }
     p = q;
   }
+  *iop = '\0';
 }
 
 static void print_help(struct hsdis_app_data* app_data,
@@ -409,15 +412,25 @@
 #ifdef LIBARCH_i386
     res = "i386";
 #endif
+#ifdef LIBARCH_aarch64
+    res = "aarch64";
+#endif
 #ifdef LIBARCH_amd64
+#ifdef TARGET_ARCH_aarch64
+    res = "aarch64";
+#else
     res = "i386:x86-64";
 #endif
+#endif
 #ifdef LIBARCH_sparc
     res = "sparc:v8plusb";
 #endif
 #ifdef LIBARCH_sparcv9
     res = "sparc:v9b";
 #endif
+#ifdef LIBARCH_arm
+    res = "arm";
+#endif
 #ifdef LIBARCH_ppc64
     res = "powerpc:common64";
 #endif
--- a/src/share/vm/adlc/adlparse.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/adlc/adlparse.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -2778,7 +2778,8 @@
   const char* param = NULL;
   inst._parameters.reset();
   while ((param = inst._parameters.iter()) != NULL) {
-    OperandForm* opForm = (OperandForm*) inst._localNames[param];
+    OpClassForm* opForm = inst._localNames[param]->is_opclass();
+    assert(opForm != NULL, "sanity");
     encoding->add_parameter(opForm->_ident, param);
   }
 
@@ -3218,7 +3219,8 @@
   const char* param = NULL;
   inst._parameters.reset();
   while ((param = inst._parameters.iter()) != NULL) {
-    OperandForm* opForm = (OperandForm*) inst._localNames[param];
+    OpClassForm* opForm = inst._localNames[param]->is_opclass();
+    assert(opForm != NULL, "sanity");
     encoding->add_parameter(opForm->_ident, param);
   }
 
--- a/src/share/vm/adlc/dfa.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/adlc/dfa.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -757,19 +757,27 @@
 }
 
 int Expr::compute_min(const Expr *c1, const Expr *c2) {
-  int result = c1->_min_value + c2->_min_value;
-  assert( result >= 0, "Invalid cost computation");
+  int v1 = c1->_min_value;
+  int v2 = c2->_min_value;
+  assert(0 <= v2 && v2 <= Expr::Max, "sanity");
+  assert(v1 <= Expr::Max - v2, "Invalid cost computation");
 
-  return result;
+  return v1 + v2;
 }
 
+
 int Expr::compute_max(const Expr *c1, const Expr *c2) {
-  int result = c1->_max_value + c2->_max_value;
-  if( result < 0 ) {  // check for overflow
-    result = Expr::Max;
+  int v1 = c1->_max_value;
+  int v2 = c2->_max_value;
+
+  // Check for overflow without producing UB. If v2 is positive
+  // and not larger than Max, the subtraction cannot underflow.
+  assert(0 <= v2 && v2 <= Expr::Max, "sanity");
+  if (v1 > Expr::Max - v2) {
+    return Expr::Max;
   }
 
-  return result;
+  return v1 + v2;
 }
 
 void Expr::print() const {
--- a/src/share/vm/adlc/formssel.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/adlc/formssel.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -565,12 +565,6 @@
     attr = (Attribute *)attr->_next;
   }
 
-  // Ugly: until a better fix is implemented, disable rematerialization for
-  // negD nodes because they are proved to be problematic.
-  if (is_ideal_negD()) {
-    return false;
-  }
-
   // Constants
   if( _components.count() == 1 && _components[0]->is(Component::USE_DEF) )
     rematerialize = true;
@@ -930,7 +924,8 @@
   const char *name;
   const char *kill_name = NULL;
   for (_parameters.reset(); (name = _parameters.iter()) != NULL;) {
-    OperandForm *opForm = (OperandForm*)_localNames[name];
+    OpClassForm *opForm = _localNames[name]->is_opclass();
+    assert(opForm != NULL, "sanity");
 
     Effect* e = NULL;
     {
@@ -947,7 +942,8 @@
       // complex so simply enforce the restriction during parse.
       if (kill_name != NULL &&
           e->isa(Component::TEMP) && !e->isa(Component::DEF)) {
-        OperandForm* kill = (OperandForm*)_localNames[kill_name];
+        OpClassForm* kill = _localNames[kill_name]->is_opclass();
+        assert(kill != NULL, "sanity");
         globalAD->syntax_err(_linenum, "%s: %s %s must be at the end of the argument list\n",
                              _ident, kill->_ident, kill_name);
       } else if (e->isa(Component::KILL) && !e->isa(Component::USE)) {
@@ -1249,7 +1245,8 @@
       !is_short_branch() &&     // Don't match another short branch variant
       reduce_result() != NULL &&
       strcmp(reduce_result(), short_branch->reduce_result()) == 0 &&
-      _matrule->equivalent(AD.globalNames(), short_branch->_matrule)) {
+      _matrule->equivalent(AD.globalNames(), short_branch->_matrule) &&
+      equivalent_predicates(this, short_branch)) {
     // The instructions are equivalent.
 
     // Now verify that both instructions have the same parameters and
@@ -2339,7 +2336,8 @@
   // Add parameters that "do not appear in match rule".
   const char *name;
   for (_parameters.reset(); (name = _parameters.iter()) != NULL;) {
-    OperandForm *opForm = (OperandForm*)_localNames[name];
+    OpClassForm *opForm = _localNames[name]->is_opclass();
+    assert(opForm != NULL, "sanity");
 
     if ( _components.operand_position(name) == -1 ) {
       _components.insert(name, opForm->_ident, Component::INVALID, false);
--- a/src/share/vm/adlc/main.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/adlc/main.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -236,6 +236,11 @@
   AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp");
   AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp");
 #endif
+#ifdef TARGET_ARCH_aarch64
+  AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp");
+  AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp");
+  AD.addInclude(AD._CPP_file, "vmreg_aarch64.inline.hpp");
+#endif
 #ifdef TARGET_ARCH_sparc
   AD.addInclude(AD._CPP_file, "assembler_sparc.inline.hpp");
   AD.addInclude(AD._CPP_file, "nativeInst_sparc.hpp");
--- a/src/share/vm/adlc/output_c.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/adlc/output_c.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1095,7 +1095,7 @@
         fprintf(fp, "  // Identify previous instruction if inside this block\n");
         fprintf(fp, "  if( ");
         print_block_index(fp, inst_position);
-        fprintf(fp, " > 0 ) {\n    Node *n = block->_nodes.at(");
+        fprintf(fp, " > 0 ) {\n    Node *n = block->get_node(");
         print_block_index(fp, inst_position);
         fprintf(fp, ");\n    inst%d = (n->is_Mach()) ? ", inst_position);
         fprintf(fp, "n->as_Mach() : NULL;\n  }\n");
--- a/src/share/vm/asm/assembler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/asm/assembler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 #endif
@@ -63,7 +66,7 @@
   _oop_recorder= code->oop_recorder();
   DEBUG_ONLY( _short_branch_delta = 0; )
   if (_code_begin == NULL)  {
-    vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
+    vm_exit_out_of_memory(0, OOM_MMAP_ERROR, err_msg("CodeCache: no room for %s",
                                      code->name()));
   }
 }
--- a/src/share/vm/asm/assembler.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/asm/assembler.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,10 @@
 # include "register_x86.hpp"
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "register_aarch64.hpp"
+# include "vm_version_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "register_sparc.hpp"
 # include "vm_version_sparc.hpp"
@@ -455,6 +459,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.hpp"
 #endif
--- a/src/share/vm/asm/codeBuffer.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/asm/codeBuffer.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -94,7 +94,7 @@
   bool        _locs_own;        // did I allocate the locs myself?
   bool        _frozen;          // no more expansion of this section
   bool        _scratch_emit;    // buffer is used for scratch emit, don't relocate!
-  char        _index;           // my section number (SECT_INST, etc.)
+  signed char _index;           // my section number (SECT_INST, etc.)
   CodeBuffer* _outer;           // enclosing CodeBuffer
 
   // (Note:  _locs_point used to be called _last_reloc_offset.)
@@ -570,6 +570,9 @@
 #ifdef TARGET_ARCH_x86
 # include "codeBuffer_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "codeBuffer_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "codeBuffer_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -905,6 +905,13 @@
     return false;
   }
 
+// AARCH64 cannot handle shifts which are not either 0, or log2 of the type size
+#ifdef AARCH64
+  if (*log2_scale != 0 &&
+	(1 << *log2_scale) != type2aelembytes(x->basic_type(), true))
+    return false;
+#endif
+
   // If the value is pinned then it will be always be computed so
   // there's no profit to reshaping the expression.
   return !root->is_pinned();
--- a/src/share/vm/c1/c1_Compilation.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_Compilation.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #include "code/debugInfoRec.hpp"
 #include "compiler/compileLog.hpp"
 
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
 
 typedef enum {
   _t_compile,
@@ -291,6 +294,29 @@
   if (!setup_code_buffer(code(), allocator()->num_calls())) {
     BAILOUT_("size requested greater than avail code buffer size", 0);
   }
+
+#ifdef BUILTIN_SIM
+  if (NotifySimulator) {
+    // Names are up to 65536 chars long.  UTF8-coded strings are up to
+    // 3 bytes per character.  We concatenate three such strings.
+    // Yes, I know this is ridiculous, but it's debug code and glibc
+    // allocates large arrays very efficiently.
+    size_t len = (65536 * 3) * 3;
+    char *name = new char[len];
+
+    strncpy(name, _method->holder()->name()->as_utf8(), len);
+    for (char *p = strpbrk(name, "/"); p; p = strpbrk(p, "/")) {
+      *p = '.';
+    }
+    strncat(name, ".", len);
+    strncat(name, _method->name()->as_utf8(), len);
+    strncat(name, _method->signature()->as_symbol()->as_utf8(), len);
+    unsigned char *base = code()->insts()->start();
+    AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck)->notifyCompile(name, base);
+//    delete[] name;
+  }
+#endif
+
   code()->initialize_oop_recorder(env()->oop_recorder());
 
   _masm = new C1_MacroAssembler(code());
--- a/src/share/vm/c1/c1_Defs.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_Defs.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "register_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "register_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "register_sparc.hpp"
 #endif
@@ -53,6 +56,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_Defs_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_Defs_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_Defs_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_FpuStackSim.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_FpuStackSim.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_FpuStackSim_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_FpuStackSim_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_FpuStackSim_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_FrameMap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_FrameMap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmreg_sparc.inline.hpp"
 #endif
--- a/src/share/vm/c1/c1_FrameMap.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_FrameMap.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -85,6 +85,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_FrameMap_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_FrameMap_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_FrameMap_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_LIR.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIR.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -67,7 +67,7 @@
 
 #endif
 
-#ifdef ARM
+#if defined(ARM) || defined (TARGET_ARCH_aarch64)
 
 FloatRegister LIR_OprDesc::as_float_reg() const {
   return as_FloatRegister(fpu_regnr());
@@ -147,7 +147,11 @@
 #endif
 #ifdef _LP64
   assert(base()->is_cpu_register(), "wrong base operand");
+#ifndef TARGET_ARCH_aarch64
   assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
+#else
+  assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
+#endif
   assert(base()->type() == T_OBJECT || base()->type() == T_LONG,
          "wrong type for addresses");
 #else
@@ -420,6 +424,13 @@
   _stub = new ArrayCopyStub(this);
 }
 
+#ifdef TARGET_ARCH_aarch64
+LIR_OpUpdateCRC32::LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res)
+  : LIR_Op(lir_updatecrc32, res, NULL)
+  , _crc(crc)
+  , _val(val) {
+}
+#endif
 
 //-------------------verify--------------------------
 
@@ -546,7 +557,7 @@
       assert(opConvert->_info == NULL, "must be");
       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
-#ifdef PPC
+#if defined(PPC) || defined(TARGET_ARCH_aarch64)
       if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
       if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
 #endif
@@ -865,6 +876,21 @@
     }
 
 
+#ifdef TARGET_ARCH_aarch64
+// LIR_OpUpdateCRC32
+    case lir_updatecrc32: {
+      assert(op->as_OpUpdateCRC32() != NULL, "must be");
+      LIR_OpUpdateCRC32* opUp = (LIR_OpUpdateCRC32*)op;
+
+      assert(opUp->_crc->is_valid(), "used");          do_input(opUp->_crc);     do_temp(opUp->_crc);
+      assert(opUp->_val->is_valid(), "used");          do_input(opUp->_val);     do_temp(opUp->_val);
+      assert(opUp->_result->is_valid(), "used");       do_output(opUp->_result);
+      assert(opUp->_info == NULL, "no info for LIR_OpUpdateCRC32");
+
+      break;
+    }
+#endif
+
 // LIR_OpLock
     case lir_lock:
     case lir_unlock: {
@@ -1045,6 +1071,12 @@
   masm->emit_code_stub(stub());
 }
 
+#ifdef TARGET_ARCH_aarch64
+void LIR_OpUpdateCRC32::emit_code(LIR_Assembler* masm) {
+  masm->emit_updatecrc32(this);
+}
+#endif
+
 void LIR_Op0::emit_code(LIR_Assembler* masm) {
   masm->emit_op0(this);
 }
@@ -1741,6 +1773,10 @@
      case lir_dynamic_call:          s = "dynamic";       break;
      // LIR_OpArrayCopy
      case lir_arraycopy:             s = "arraycopy";     break;
+#ifdef TARGET_ARCH_aarch64
+     // LIR_OpUpdateCRC32
+     case lir_updatecrc32:           s = "updatecrc32";   break;
+#endif
      // LIR_OpLock
      case lir_lock:                  s = "lock";          break;
      case lir_unlock:                s = "unlock";        break;
@@ -1789,6 +1825,15 @@
   tmp()->print(out);     out->print(" ");
 }
 
+#ifdef TARGET_ARCH_aarch64
+// LIR_OpUpdateCRC32
+void LIR_OpUpdateCRC32::print_instr(outputStream* out) const {
+  crc()->print(out);     out->print(" ");
+  val()->print(out);     out->print(" ");
+  result_opr()->print(out); out->print(" ");
+}
+#endif
+
 // LIR_OpCompareAndSwap
 void LIR_OpCompareAndSwap::print_instr(outputStream* out) const {
   addr()->print(out);      out->print(" ");
--- a/src/share/vm/c1/c1_LIR.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIR.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -439,7 +439,7 @@
   // for compatibility with RInfo
   int fpu () const                                  { return lo_reg_half(); }
 #endif // X86
-#if defined(SPARC) || defined(ARM) || defined(PPC)
+#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64)
   FloatRegister as_float_reg   () const;
   FloatRegister as_double_reg  () const;
 #endif
@@ -527,7 +527,7 @@
      , _type(type)
      , _disp(0) { verify(); }
 
-#if defined(X86) || defined(ARM)
+#if defined(X86) || defined(ARM) || defined(AARCH64)
   LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type):
        _base(base)
      , _index(index)
@@ -602,7 +602,7 @@
                                                                              LIR_OprDesc::fpu_register         |
                                                                              LIR_OprDesc::double_size); }
 #endif
-#ifdef X86
+#if defined(X86) || defined(AARCH64)
   static LIR_Opr double_fpu(int reg)            { return (LIR_Opr)(intptr_t)((reg  << LIR_OprDesc::reg1_shift) |
                                                                              (reg  << LIR_OprDesc::reg2_shift) |
                                                                              LIR_OprDesc::double_type          |
@@ -842,6 +842,9 @@
 class      LIR_OpJavaCall;
 class      LIR_OpRTCall;
 class    LIR_OpArrayCopy;
+#ifdef TARGET_ARCH_aarch64
+class    LIR_OpUpdateCRC32;
+#endif
 class    LIR_OpLock;
 class    LIR_OpTypeCheck;
 class    LIR_OpCompareAndSwap;
@@ -945,6 +948,11 @@
   , begin_opArrayCopy
       , lir_arraycopy
   , end_opArrayCopy
+#ifdef TARGET_ARCH_aarch64
+  , begin_opUpdateCRC32
+      , lir_updatecrc32
+  , end_opUpdateCRC32
+#endif
   , begin_opLock
     , lir_lock
     , lir_unlock
@@ -1097,6 +1105,9 @@
   virtual LIR_Op2* as_Op2() { return NULL; }
   virtual LIR_Op3* as_Op3() { return NULL; }
   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+#ifdef TARGET_ARCH_aarch64
+  virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+#endif
   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
   virtual LIR_OpCompareAndSwap* as_OpCompareAndSwap() { return NULL; }
   virtual LIR_OpProfileCall* as_OpProfileCall() { return NULL; }
@@ -1251,6 +1262,28 @@
 };
 
 
+#ifdef TARGET_ARCH_aarch64
+// LIR_OpUpdateCRC32
+class LIR_OpUpdateCRC32: public LIR_Op {
+  friend class LIR_OpVisitState;
+
+private:
+  LIR_Opr   _crc;
+  LIR_Opr   _val;
+
+public:
+
+  LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res);
+
+  LIR_Opr crc() const                            { return _crc; }
+  LIR_Opr val() const                            { return _val; }
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32()  { return this; }
+  void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+#endif
+
 // --------------------------------------------------
 // LIR_Op0
 // --------------------------------------------------
@@ -1404,7 +1437,7 @@
  private:
    Bytecodes::Code _bytecode;
    ConversionStub* _stub;
-#ifdef PPC
+#if defined(PPC) || defined(TARGET_ARCH_aarch64)
   LIR_Opr _tmp1;
   LIR_Opr _tmp2;
 #endif
@@ -1419,7 +1452,7 @@
 #endif
      , _bytecode(code)                           {}
 
-#ifdef PPC
+#if defined(PPC) || defined(TARGET_ARCH_aarch64)
    LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub
                  ,LIR_Opr tmp1, LIR_Opr tmp2)
      : LIR_Op1(lir_convert, opr, result)
@@ -1431,7 +1464,7 @@
 
   Bytecodes::Code bytecode() const               { return _bytecode; }
   ConversionStub* stub() const                   { return _stub; }
-#ifdef PPC
+#if defined(PPC) || defined(TARGET_ARCH_aarch64)
   LIR_Opr tmp1() const                           { return _tmp1; }
   LIR_Opr tmp2() const                           { return _tmp2; }
 #endif
@@ -2007,7 +2040,14 @@
 #ifdef PPC
   void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); }
 #endif
+#if defined (TARGET_ARCH_aarch64)
+  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst,
+	       ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) {
+    append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr()));
+  }
+#else
   void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
+#endif
 
   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
@@ -2142,6 +2182,10 @@
 
   void arraycopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, ciArrayKlass* expected_type, int flags, CodeEmitInfo* info) { append(new LIR_OpArrayCopy(src, src_pos, dst, dst_pos, length, tmp, expected_type, flags, info)); }
 
+#ifdef TARGET_ARCH_aarch64
+  void update_crc32(LIR_Opr crc, LIR_Opr val, LIR_Opr res)  { append(new LIR_OpUpdateCRC32(crc, val, res)); }
+#endif
+
   void fpop_raw()                                { append(new LIR_Op0(lir_fpop_raw)); }
 
   void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci);
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,10 +30,15 @@
 #include "c1/c1_MacroAssembler.hpp"
 #include "c1/c1_ValueStack.hpp"
 #include "ci/ciInstance.hpp"
+#include "runtime/os.hpp"
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 # include "vmreg_sparc.inline.hpp"
@@ -107,6 +112,9 @@
  , _pending_non_safepoint_offset(0)
 {
   _slow_case_stubs = new CodeStubList();
+#ifdef TARGET_ARCH_aarch64
+  init(); // Target-dependent initialization
+#endif
 }
 
 
@@ -853,7 +861,7 @@
           stringStream st;
           st.print("bad oop %s at %d", r->as_Register()->name(), _masm->offset());
 #ifdef SPARC
-          _masm->_verify_oop(r->as_Register(), strdup(st.as_string()), __FILE__, __LINE__);
+          _masm->_verify_oop(r->as_Register(), os::strdup(st.as_string(), mtCompiler), __FILE__, __LINE__);
 #else
           _masm->verify_oop(r->as_Register());
 #endif
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -192,6 +192,9 @@
   void emit_opBranch(LIR_OpBranch* op);
   void emit_opLabel(LIR_OpLabel* op);
   void emit_arraycopy(LIR_OpArrayCopy* op);
+#ifdef TARGET_ARCH_aarch64
+  void emit_updatecrc32(LIR_OpUpdateCRC32* op);
+#endif
   void emit_opConvert(LIR_OpConvert* op);
   void emit_alloc_obj(LIR_OpAllocObj* op);
   void emit_alloc_array(LIR_OpAllocArray* op);
@@ -254,6 +257,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_LIRAssembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_LIRAssembler_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_LIRAssembler_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1623,6 +1623,11 @@
   } else {
     __ unsigned_shift_right(addr, CardTableModRefBS::card_shift, tmp);
   }
+
+  if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+    __ membar_storestore();
+  }
+
   if (can_inline_as_constant(card_table_base)) {
     __ move(LIR_OprFact::intConst(0),
               new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE));
@@ -2062,7 +2067,7 @@
     assert(log2_scale == 0, "must not have a scale");
     addr = new LIR_Address(base_op, index_op->as_jint(), dst_type);
   } else {
-#ifdef X86
+#if defined(X86) || defined(AARCH64)
 #ifdef _LP64
     if (!index_op->is_illegal() && index_op->type() == T_INT) {
       LIR_Opr tmp = new_pointer_register();
@@ -2988,6 +2993,14 @@
     do_Reference_get(x);
     break;
 
+#ifdef TARGET_ARCH_aarch64
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    do_update_CRC32(x);
+    break;
+#endif
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -247,6 +247,9 @@
   void do_NIOCheckIndex(Intrinsic* x);
   void do_FPIntrinsics(Intrinsic* x);
   void do_Reference_get(Intrinsic* x);
+#ifdef TARGET_ARCH_aarch64
+  void do_update_CRC32(Intrinsic* x);
+#endif
 
   void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);
 
--- a/src/share/vm/c1/c1_LinearScan.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmreg_sparc.inline.hpp"
 #endif
@@ -2192,7 +2195,8 @@
 
   LIR_Opr res = operand_for_interval(interval);
 
-#ifdef X86
+#if defined(X86) || defined(AARCH64)
+
   // new semantic for is_last_use: not only set on definite end of interval,
   // but also before hole
   // This may still miss some cases (e.g. for dead values), but it is not necessary that the
--- a/src/share/vm/c1/c1_LinearScan.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_LinearScan.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -976,6 +976,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_LinearScan_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_LinearScan_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_LinearScan_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_MacroAssembler.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_MacroAssembler.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 #endif
@@ -64,6 +67,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_MacroAssembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_MacroAssembler_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_MacroAssembler_sparc.hpp"
 #endif
--- a/src/share/vm/c1/c1_Runtime1.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -58,6 +58,9 @@
 #include "utilities/copy.hpp"
 #include "utilities/events.hpp"
 
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
 
 // Implementation of StubAssembler
 
@@ -187,6 +190,23 @@
   StubAssembler* sasm = new StubAssembler(&code, name_for(id), id);
   // generate code for runtime stub
   OopMapSet* oop_maps;
+#ifdef BUILTIN_SIM
+    AArch64Simulator *simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+  if (NotifySimulator) {
+    size_t len = 65536;
+    char *name = new char[len];
+
+    // tell the sim about the new stub code
+    strncpy(name, name_for(id), len);
+    // replace spaces with underscore so we can write to file and reparse
+    for (char *p = strpbrk(name, " "); p; p = strpbrk(p, " ")) {
+      *p = '_';
+    }
+    unsigned char *base = buffer_blob->code_begin();
+    simulator->notifyCompile(name, base);
+//    delete[] name;
+  }
+#endif
   oop_maps = generate_code_for(id, sasm);
   assert(oop_maps == NULL || sasm->frame_size() != no_frame_size,
          "if stub has an oop map it must have a valid frame size");
@@ -224,6 +244,12 @@
                                                  sasm->frame_size(),
                                                  oop_maps,
                                                  sasm->must_gc_arguments());
+#ifdef BUILTIN_SIM
+  if (NotifySimulator) {
+    unsigned char *base = buffer_blob->code_begin();
+    simulator->notifyRelocate(base, blob->code_begin() - base);
+  }
+#endif
   // install blob
   assert(blob != NULL, "blob must exist");
   _blobs[id] = blob;
@@ -790,6 +816,8 @@
 //
 //
 
+// AArch64 provides its own implementation which just deopts
+#ifndef TARGET_ARCH_aarch64
 JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id ))
   NOT_PRODUCT(_patch_code_slowcase_cnt++;)
 
@@ -995,7 +1023,8 @@
               Disassembler::decode(copy_buff, copy_buff + *byte_count, tty);
             }
 
-#if defined(SPARC) || defined(PPC)
+            // !!! AARCH64 FIXME -- this is not the same as JDK8 !!!
+#if defined(SPARC) || defined(PPC) || defined(AARCH64)
             // Update the oop location in the nmethod with the proper
             // oop.  When the code was generated, a NULL was stuffed
             // in the oop table and that table needs to be update to
@@ -1054,7 +1083,9 @@
           ICache::invalidate_range(instr_pc, *byte_count);
           NativeGeneralJump::replace_mt_safe(instr_pc, copy_buff);
 
-          if (stub_id == Runtime1::load_klass_patching_id) {
+          // !!! AARCH64 FIXME -- this is not the same as JDK8 !!!
+          if (stub_id == Runtime1::load_klass_patching_id
+              AARCH64_ONLY(|| stub_id == Runtime1::access_field_patching_id)) {
             // update relocInfo to oop
             nmethod* nm = CodeCache::find_nmethod(instr_pc);
             assert(nm != NULL, "invalid nmethod_pc");
@@ -1104,6 +1135,7 @@
     Universe::heap()->register_nmethod(nm);
   }
 JRT_END
+#endif
 
 //
 // Entry point for compiled code. We want to patch a nmethod.
--- a/src/share/vm/c1/c1_globals.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/c1/c1_globals.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_globals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_globals_sparc.hpp"
 #endif
--- a/src/share/vm/ci/ciInstanceKlass.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/ci/ciInstanceKlass.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -211,13 +211,42 @@
 
 // ------------------------------------------------------------------
 // ciInstanceKlass::uses_default_loader
-bool ciInstanceKlass::uses_default_loader() {
+bool ciInstanceKlass::uses_default_loader() const {
   // Note:  We do not need to resolve the handle or enter the VM
   // in order to test null-ness.
   return _loader == NULL;
 }
 
 // ------------------------------------------------------------------
+
+/**
+ * Return basic type of boxed value for box klass or T_OBJECT if not.
+ */
+BasicType ciInstanceKlass::box_klass_type() const {
+  if (uses_default_loader() && is_loaded()) {
+    return SystemDictionary::box_klass_type(get_klassOop());
+  } else {
+    return T_OBJECT;
+  }
+}
+
+/**
+ * Is this boxing klass?
+ */
+bool ciInstanceKlass::is_box_klass() const {
+  return is_java_primitive(box_klass_type());
+}
+
+/**
+ *  Is this boxed value offset?
+ */
+bool ciInstanceKlass::is_boxed_value_offset(int offset) const {
+  BasicType bt = box_klass_type();
+  return is_java_primitive(bt) &&
+         (offset == java_lang_boxing_object::value_offset_in_bytes(bt));
+}
+
+// ------------------------------------------------------------------
 // ciInstanceKlass::is_in_package
 //
 // Is this klass in the given package?
--- a/src/share/vm/ci/ciInstanceKlass.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/ci/ciInstanceKlass.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -218,10 +218,14 @@
   ciInstanceKlass* implementor();
 
   // Is the defining class loader of this class the default loader?
-  bool uses_default_loader();
+  bool uses_default_loader() const;
 
   bool is_java_lang_Object();
 
+  BasicType box_klass_type() const;
+  bool is_box_klass() const;
+  bool is_boxed_value_offset(int offset) const;
+
   // Is this klass in the given package?
   bool is_in_package(const char* packagename) {
     return is_in_package(packagename, (int) strlen(packagename));
--- a/src/share/vm/ci/ciMethod.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/ci/ciMethod.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -954,6 +954,23 @@
 }
 
 // ------------------------------------------------------------------
+// ciMethod::has_option_value
+//
+template<typename T>
+bool ciMethod::has_option_value(const char* option, T& value) {
+  check_is_loaded();
+  VM_ENTRY_MARK;
+  methodHandle mh(THREAD, get_methodOop());
+  return CompilerOracle::has_option_value(mh, option, value);
+}
+// Explicit instantiation for all OptionTypes supported.
+template bool ciMethod::has_option_value<intx>(const char* option, intx& value);
+template bool ciMethod::has_option_value<uintx>(const char* option, uintx& value);
+template bool ciMethod::has_option_value<bool>(const char* option, bool& value);
+template bool ciMethod::has_option_value<ccstr>(const char* option, ccstr& value);
+template bool ciMethod::has_option_value<double>(const char* option, double& value);
+
+// ------------------------------------------------------------------
 // ciMethod::can_be_compiled
 //
 // Have previous compilations of this method succeeded?
@@ -1144,6 +1161,44 @@
 bool ciMethod::is_accessor    () const {         FETCH_FLAG_FROM_VM(is_accessor); }
 bool ciMethod::is_initializer () const {         FETCH_FLAG_FROM_VM(is_initializer); }
 
+bool ciMethod::is_boxing_method() const {
+  if (holder()->is_box_klass()) {
+    switch (intrinsic_id()) {
+      case vmIntrinsics::_Boolean_valueOf:
+      case vmIntrinsics::_Byte_valueOf:
+      case vmIntrinsics::_Character_valueOf:
+      case vmIntrinsics::_Short_valueOf:
+      case vmIntrinsics::_Integer_valueOf:
+      case vmIntrinsics::_Long_valueOf:
+      case vmIntrinsics::_Float_valueOf:
+      case vmIntrinsics::_Double_valueOf:
+        return true;
+      default:
+        return false;
+    }
+  }
+  return false;
+}
+
+bool ciMethod::is_unboxing_method() const {
+  if (holder()->is_box_klass()) {
+    switch (intrinsic_id()) {
+      case vmIntrinsics::_booleanValue:
+      case vmIntrinsics::_byteValue:
+      case vmIntrinsics::_charValue:
+      case vmIntrinsics::_shortValue:
+      case vmIntrinsics::_intValue:
+      case vmIntrinsics::_longValue:
+      case vmIntrinsics::_floatValue:
+      case vmIntrinsics::_doubleValue:
+        return true;
+      default:
+        return false;
+    }
+  }
+  return false;
+}
+
 BCEscapeAnalyzer  *ciMethod::get_bcea() {
 #ifdef COMPILER2
   if (_bcea == NULL) {
--- a/src/share/vm/ci/ciMethod.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/ci/ciMethod.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -248,6 +248,8 @@
   bool should_print_assembly();
   bool break_at_execute();
   bool has_option(const char *option);
+  template<typename T>
+  bool has_option_value(const char* option, T& value);
   bool can_be_compiled();
   bool can_be_osr_compiled(int entry_bci);
   void set_not_compilable(const char* reason = NULL);
@@ -291,6 +293,8 @@
   bool is_accessor    () const;
   bool is_initializer () const;
   bool can_be_statically_bound() const           { return _can_be_statically_bound; }
+  bool is_boxing_method() const;
+  bool is_unboxing_method() const;
 
   // Print the bytecodes of this method.
   void print_codes_on(outputStream* st);
--- a/src/share/vm/ci/ciTypeFlow.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/ci/ciTypeFlow.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -403,7 +403,7 @@
   // Set the rest of the locals to bottom.
   Cell cell = state->next_cell(state->tos());
   state->set_stack_size(0);
-  int limit = state->limit_cell();
+  Cell limit = state->limit_cell();
   for (; cell < limit; cell = state->next_cell(cell)) {
     state->set_type_at(cell, state->bottom_type());
   }
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/classFileParser.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -3919,9 +3919,15 @@
   methodOop m = k->lookup_method(vmSymbols::finalize_method_name(),
                                  vmSymbols::void_method_signature());
   if (m != NULL && !m->is_empty_method()) {
-    f = true;
+      f = true;
   }
-  assert(f == k->has_finalizer(), "inconsistent has_finalizer");
+
+  // Spec doesn't prevent agent from redefinition of empty finalizer.
+  // Despite the fact that it's generally bad idea and redefined finalizer
+  // will not work as expected we shouldn't abort vm in this case
+  if (!k->has_redefined_this_or_super()) {
+    assert(f == k->has_finalizer(), "inconsistent has_finalizer");
+  }
 #endif
 
   // Check if this klass supports the java.lang.Cloneable interface
--- a/src/share/vm/classfile/classFileStream.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/classFileStream.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/classfile/classLoader.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/classLoader.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -288,13 +288,17 @@
 }
 
 LazyClassPathEntry::LazyClassPathEntry(char* path, const struct stat* st) : ClassPathEntry() {
-  _path = strdup(path);
+  _path = os::strdup_check_oom(path);
   _st = *st;
   _meta_index = NULL;
   _resolved_entry = NULL;
   _has_error = false;
 }
 
+LazyClassPathEntry::~LazyClassPathEntry() {
+  os::free((void*) _path);
+}
+
 bool LazyClassPathEntry::is_jar_file() {
   return ((_st.st_mode & S_IFREG) == S_IFREG);
 }
@@ -431,7 +435,7 @@
         default:
         {
           if (!skipCurrentJar && cur_entry != NULL) {
-            char* new_name = strdup(package_name);
+            char* new_name = os::strdup_check_oom(package_name);
             boot_class_path_packages.append(new_name);
           }
         }
--- a/src/share/vm/classfile/classLoader.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/classLoader.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -132,6 +132,8 @@
   bool is_jar_file();
   const char* name()  { return _path; }
   LazyClassPathEntry(char* path, const struct stat* st);
+  virtual ~LazyClassPathEntry();
+
   ClassFileStream* open_stream(const char* name, TRAPS);
   void set_meta_index(MetaIndex* meta_index) { _meta_index = meta_index; }
   virtual bool is_lazy();
--- a/src/share/vm/classfile/javaClasses.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/javaClasses.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -137,7 +137,7 @@
       tty->print_cr("  name: %s, sig: %s, flags: %08x", fs.name()->as_C_string(), fs.signature()->as_C_string(), fs.access_flags().as_int());
     }
 #endif //PRODUCT
-    fatal("Invalid layout of preloaded class");
+    vm_exit_during_initialization("Invalid layout of preloaded class: use -XX:+TraceClassLoading to see the origin of the problem class");
   }
   dest_offset = fd.offset();
 }
@@ -760,6 +760,23 @@
   }
 }
 
+// Returns the Java name for this Java mirror (Resource allocated)
+// See Klass::external_name().
+// For primitive type Java mirrors, its type name is returned.
+const char* java_lang_Class::as_external_name(oop java_class) {
+  assert(java_lang_Class::is_instance(java_class), "must be a Class object");
+  const char* name = NULL;
+  if (is_primitive(java_class)) {
+    name = type2name(primitive_type(java_class));
+  } else {
+    klassOop k = as_klassOop(java_class);
+    name = instanceKlass::cast(k)->external_name();
+  }
+  if (name == NULL) {
+    name = "<null>";
+  }
+  return name;
+}
 
 oop java_lang_Class::primitive_mirror(BasicType t) {
   oop mirror = Universe::java_mirror(t);
@@ -3358,7 +3375,7 @@
     tty->print_cr("  name: %s, sig: %s, flags: %08x", fs.name()->as_C_string(), fs.signature()->as_C_string(), fs.access_flags().as_int());
   }
 #endif //PRODUCT
-  fatal("Invalid layout of preloaded class");
+  vm_exit_during_initialization("Invalid layout of preloaded class: use -XX:+TraceClassLoading to see the origin of the problem class");
   return -1;
 }
 
--- a/src/share/vm/classfile/javaClasses.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/javaClasses.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -254,6 +254,7 @@
   }
   static Symbol* as_signature(oop java_class, bool intern_if_not_found, TRAPS);
   static void print_signature(oop java_class, outputStream *st);
+  static const char* as_external_name(oop java_class);
   // Testing
   static bool is_instance(oop obj) {
     return obj != NULL && obj->klass() == SystemDictionary::Class_klass();
--- a/src/share/vm/classfile/stackMapTable.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/stackMapTable.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/classfile/systemDictionary.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/systemDictionary.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -2159,7 +2159,6 @@
 
   // Better never do a GC while we're holding these oops
   No_Safepoint_Verifier nosafepoint;
-
   klassOop klass1 = find_class(d_index1, d_hash1, constraint_name, class_loader1);
   klassOop klass2 = find_class(d_index2, d_hash2, constraint_name, class_loader2);
   return constraints()->add_entry(constraint_name, klass1, class_loader1,
--- a/src/share/vm/classfile/verifier.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/verifier.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -48,6 +48,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
@@ -656,6 +659,7 @@
 
 
     bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
+    bool verified_exc_handlers = false;
 
     // Merge with the next instruction
     {
@@ -687,6 +691,18 @@
         }
       }
 
+      // Look for possible jump target in exception handlers and see if it
+      // matches current_frame.  Do this check here for astore*, dstore*,
+      // fstore*, istore*, and lstore* opcodes because they can change the type
+      // state by adding a local.  JVM Spec says that the incoming type state
+      // should be used for this check.  So, do the check here before a possible
+      // local is added to the type state.
+      if (Bytecodes::is_store_into_local(opcode) && bci >= ex_min && bci < ex_max) {
+        verify_exception_handler_targets(
+          bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
+        verified_exc_handlers = true;
+      }
+
       switch (opcode) {
         case Bytecodes::_nop :
           no_control_flow = false; break;
@@ -1663,9 +1679,13 @@
       }  // end switch
     }  // end Merge with the next instruction
 
-    // Look for possible jump target in exception handlers and see if it
-    // matches current_frame
-    if (bci >= ex_min && bci < ex_max) {
+    // Look for possible jump target in exception handlers and see if it matches
+    // current_frame.  Don't do this check if it has already been done (for
+    // ([a,d,f,i,l]store* opcodes).  This check cannot be done earlier because
+    // opcodes, such as invokespecial, may set the this_uninit flag.
+    assert(!(verified_exc_handlers && this_uninit),
+      "Exception handler targets got verified before this_uninit got set");
+    if (!verified_exc_handlers && bci >= ex_min && bci < ex_max) {
       verify_exception_handler_targets(
         bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
     }
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/classfile/vmSymbols.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -68,7 +68,7 @@
   template(java_lang_Float,                           "java/lang/Float")                          \
   template(java_lang_Double,                          "java/lang/Double")                         \
   template(java_lang_Byte,                            "java/lang/Byte")                           \
-  template(java_lang_Byte_Cache,                      "java/lang/Byte$ByteCache")                 \
+  template(java_lang_Byte_ByteCache,                  "java/lang/Byte$ByteCache")                 \
   template(java_lang_Short,                           "java/lang/Short")                          \
   template(java_lang_Short_ShortCache,                "java/lang/Short$ShortCache")               \
   template(java_lang_Integer,                         "java/lang/Integer")                        \
@@ -745,6 +745,18 @@
    do_name(     decrypt_name,                                      "implDecrypt")                                       \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
+  /* support for java.util.zip */                                                                                       \
+  AARCH64_ONLY(                         \
+  do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
+  do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
+   do_name(     update_name,                                      "update")                                             \
+  do_intrinsic(_updateBytesCRC32,          java_util_zip_CRC32,   updateBytes_name, updateBytes_signature,       F_SN)  \
+   do_name(     updateBytes_name,                                "updateBytes")                                         \
+   do_signature(updateBytes_signature,                           "(I[BII)I")                                            \
+  do_intrinsic(_updateByteBufferCRC32,     java_util_zip_CRC32,   updateByteBuffer_name, updateByteBuffer_signature, F_SN) \
+   do_name(     updateByteBuffer_name,                           "updateByteBuffer")                                    \
+   do_signature(updateByteBuffer_signature,                      "(IJII)I")                                             \
+   )                                    \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
--- a/src/share/vm/code/codeBlob.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/codeBlob.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -42,6 +42,9 @@
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 #endif
--- a/src/share/vm/code/compiledIC.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/compiledIC.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -528,7 +528,7 @@
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
 
   assert(method_holder->data()    == 0           || method_holder->data()    == (intptr_t)callee(), "a) MT-unsafe modification of inline cache");
-  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, "b) MT-unsafe modification of inline cache");
+  //assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, "b) MT-unsafe modification of inline cache");
 
   // Update stub
   method_holder->set_data((intptr_t)callee());
--- a/src/share/vm/code/compiledIC.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/compiledIC.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -32,6 +32,9 @@
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 #endif
--- a/src/share/vm/code/icBuffer.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/icBuffer.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -40,6 +40,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 #endif
--- a/src/share/vm/code/nmethod.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/nmethod.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,6 +46,10 @@
 #include "shark/sharkCompiler.hpp"
 #endif
 
+#ifdef BUILTIN_SIM
+#include "../../../../../simulator/simulator.hpp"
+#endif
+
 #ifdef DTRACE_ENABLED
 
 // Only bother with this argument setup if dtrace is available
@@ -679,6 +683,15 @@
       Universe::heap()->register_nmethod(this);
     }
     debug_only(verify_scavenge_root_oops());
+
+#ifdef BUILTIN_SIM
+    if (NotifySimulator) {
+      unsigned char *base = code_buffer->insts()->start();
+      long delta = entry_point() - base;
+      AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck)->notifyRelocate(base, delta);
+    }
+#endif
+
     CodeCache::commit(this);
   }
 
@@ -758,6 +771,7 @@
 
     code_buffer->copy_oops_to(this);
     debug_only(verify_scavenge_root_oops());
+
     CodeCache::commit(this);
   }
 
@@ -874,6 +888,14 @@
     }
     debug_only(verify_scavenge_root_oops());
 
+#ifdef BUILTIN_SIM
+    if (NotifySimulator) {
+      unsigned char *base = code_buffer->insts()->start();
+      long delta = entry_point() - base;
+      AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck)->notifyRelocate(base, delta);
+    }
+#endif
+
     CodeCache::commit(this);
 
     // Copy contents of ExceptionHandlerTable to nmethod
--- a/src/share/vm/code/relocInfo.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/relocInfo.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,10 @@
 # include "assembler_x86.inline.hpp"
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 # include "nativeInst_sparc.hpp"
--- a/src/share/vm/code/relocInfo.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/relocInfo.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -425,6 +425,9 @@
 #ifdef TARGET_ARCH_x86
 # include "relocInfo_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "relocInfo_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "relocInfo_sparc.hpp"
 #endif
--- a/src/share/vm/code/stubs.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/stubs.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -67,7 +67,7 @@
   intptr_t size = round_to(buffer_size, 2*BytesPerWord);
   BufferBlob* blob = BufferBlob::create(name, size);
   if( blob == NULL) {
-    vm_exit_out_of_memory(size, err_msg("CodeCache: no room for %s", name));
+    vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, err_msg("CodeCache: no room for %s", name));
   }
   _stub_interface  = stub_interface;
   _buffer_size     = blob->content_size();
--- a/src/share/vm/code/vmreg.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/vmreg.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,6 +30,9 @@
 #ifdef TARGET_ARCH_x86
 # include "register_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "register_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "register_sparc.hpp"
 #endif
@@ -51,6 +54,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/adGlobals_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/adGlobals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/adGlobals_sparc.hpp"
 #endif
@@ -185,6 +191,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmreg_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmreg_sparc.hpp"
 #endif
--- a/src/share/vm/code/vtableStubs.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/code/vtableStubs.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -60,7 +60,7 @@
     const int bytes = chunk_factor * real_size + pd_code_alignment();
     BufferBlob* blob = BufferBlob::create("vtable chunks", bytes);
     if (blob == NULL) {
-      vm_exit_out_of_memory(bytes, "CodeCache: no room for vtable chunks");
+      vm_exit_out_of_memory(bytes, OOM_MALLOC_ERROR, "CodeCache: no room for vtable chunks");
     }
     _chunk = blob->content_begin();
     _chunk_end = _chunk + bytes;
--- a/src/share/vm/compiler/compileBroker.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/compileBroker.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1815,8 +1815,10 @@
     tty->print("%7d ", (int) tty->time_stamp().milliseconds());  // print timestamp
     tty->print("%4d ", compile_id);    // print compilation number
     tty->print("%s ", (is_osr ? "%" : " "));
-    int code_size = (task->code() == NULL) ? 0 : task->code()->total_size();
-    tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, (int)time.milliseconds(), task->num_inlined_bytecodes());
+    if (task->code() != NULL) {
+      tty->print("size: %d(%d) ", task->code()->total_size(), task->code()->insts_size());
+    }
+    tty->print_cr("time: %d inlined: %d bytes", (int)time.milliseconds(), task->num_inlined_bytecodes());
   }
 
   // Disable compilation, if required.
--- a/src/share/vm/compiler/compileBroker.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/compileBroker.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -155,7 +155,8 @@
     // these methods should be called in a thread safe context
 
     void set_current_method(const char* method) {
-      strncpy(_current_method, method, (size_t)cmname_buffer_length);
+      strncpy(_current_method, method, (size_t)cmname_buffer_length-1);
+      _current_method[cmname_buffer_length-1] = '\0';
       if (UsePerfData) _perf_current_method->set_value(method);
     }
 
--- a/src/share/vm/compiler/compilerOracle.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/compilerOracle.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,7 @@
 #include "oops/symbol.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/jniHandles.hpp"
+#include "runtime/os.hpp"
 
 class MethodMatcher : public CHeapObj<mtCompiler> {
  public:
@@ -167,44 +168,148 @@
   }
 }
 
+enum OptionType {
+  IntxType,
+  UintxType,
+  BoolType,
+  CcstrType,
+  DoubleType,
+  UnknownType
+};
 
-class MethodOptionMatcher: public MethodMatcher {
-  const char * option;
- public:
-  MethodOptionMatcher(Symbol* class_name, Mode class_mode,
-                             Symbol* method_name, Mode method_mode,
-                             Symbol* signature, const char * opt, MethodMatcher* next):
-    MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next) {
-    option = opt;
+/* Methods to map real type names to OptionType */
+template<typename T>
+static OptionType get_type_for() {
+  return UnknownType;
+};
+
+template<> OptionType get_type_for<intx>() {
+  return IntxType;
+}
+
+template<> OptionType get_type_for<uintx>() {
+  return UintxType;
+}
+
+template<> OptionType get_type_for<bool>() {
+  return BoolType;
+}
+
+template<> OptionType get_type_for<ccstr>() {
+  return CcstrType;
+}
+
+template<> OptionType get_type_for<double>() {
+  return DoubleType;
+}
+
+template<typename T>
+static const T copy_value(const T value) {
+  return value;
+}
+
+template<> const ccstr copy_value<ccstr>(const ccstr value) {
+  return (const ccstr)strdup(value);
+}
+
+template <typename T>
+class TypedMethodOptionMatcher : public MethodMatcher {
+  const char* _option;
+  OptionType _type;
+  const T _value;
+
+public:
+  TypedMethodOptionMatcher(Symbol* class_name, Mode class_mode,
+                           Symbol* method_name, Mode method_mode,
+                           Symbol* signature, const char* opt,
+                           const T value,  MethodMatcher* next) :
+    MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next),
+                  _type(get_type_for<T>()), _value(copy_value<T>(value)) {
+    _option = os::strdup_check_oom(opt);
   }
 
-  bool match(methodHandle method, const char* opt) {
-    MethodOptionMatcher* current = this;
+  ~TypedMethodOptionMatcher() {
+    free((void*)_option);
+  }
+
+  TypedMethodOptionMatcher* match(methodHandle method, const char* opt) {
+    TypedMethodOptionMatcher* current = this;
     while (current != NULL) {
-      current = (MethodOptionMatcher*)current->find(method);
+      current = (TypedMethodOptionMatcher*)current->find(method);
       if (current == NULL) {
-        return false;
+        return NULL;
       }
-      if (strcmp(current->option, opt) == 0) {
-        return true;
+      if (strcmp(current->_option, opt) == 0) {
+        return current;
       }
       current = current->next();
     }
-    return false;
+    return NULL;
+  }
+
+  TypedMethodOptionMatcher* next() {
+    return (TypedMethodOptionMatcher*)_next;
   }
 
-  MethodOptionMatcher* next() {
-    return (MethodOptionMatcher*)_next;
-  }
+  OptionType get_type(void) {
+      return _type;
+  };
+
+  T value() { return _value; }
 
-  virtual void print() {
+  void print() {
+    ttyLocker ttyl;
     print_base();
-    tty->print(" %s", option);
+    tty->print(" %s", _option);
+    tty->print(" <unknown option type>");
     tty->cr();
   }
 };
 
+template<>
+void TypedMethodOptionMatcher<intx>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" intx %s", _option);
+  tty->print(" = " INTX_FORMAT, _value);
+  tty->cr();
+};
 
+template<>
+void TypedMethodOptionMatcher<uintx>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" uintx %s", _option);
+  tty->print(" = " UINTX_FORMAT, _value);
+  tty->cr();
+};
+
+template<>
+void TypedMethodOptionMatcher<bool>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" bool %s", _option);
+  tty->print(" = %s", _value ? "true" : "false");
+  tty->cr();
+};
+
+template<>
+void TypedMethodOptionMatcher<ccstr>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" const char* %s", _option);
+  tty->print(" = '%s'", _value);
+  tty->cr();
+};
+
+template<>
+void TypedMethodOptionMatcher<double>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" double %s", _option);
+  tty->print(" = %f", _value);
+  tty->cr();
+};
 
 // this must parallel the command_names below
 enum OracleCommand {
@@ -266,23 +371,47 @@
   return lists[command];
 }
 
-
-
+template<typename T>
 static MethodMatcher* add_option_string(Symbol* class_name, MethodMatcher::Mode c_mode,
                                         Symbol* method_name, MethodMatcher::Mode m_mode,
                                         Symbol* signature,
-                                        const char* option) {
-  lists[OptionCommand] = new MethodOptionMatcher(class_name, c_mode, method_name, m_mode,
-                                                 signature, option, lists[OptionCommand]);
+                                        const char* option,
+                                        T value) {
+  lists[OptionCommand] = new TypedMethodOptionMatcher<T>(class_name, c_mode, method_name, m_mode,
+                                                         signature, option, value, lists[OptionCommand]);
   return lists[OptionCommand];
 }
 
+template<typename T>
+static bool get_option_value(methodHandle method, const char* option, T& value) {
+   TypedMethodOptionMatcher<T>* m;
+   if (lists[OptionCommand] != NULL
+       && (m = ((TypedMethodOptionMatcher<T>*)lists[OptionCommand])->match(method, option)) != NULL
+       && m->get_type() == get_type_for<T>()) {
+       value = m->value();
+       return true;
+   } else {
+     return false;
+   }
+}
 
 bool CompilerOracle::has_option_string(methodHandle method, const char* option) {
-  return lists[OptionCommand] != NULL &&
-    ((MethodOptionMatcher*)lists[OptionCommand])->match(method, option);
+  bool value = false;
+  get_option_value(method, option, value);
+  return value;
 }
 
+template<typename T>
+bool CompilerOracle::has_option_value(methodHandle method, const char* option, T& value) {
+  return ::get_option_value(method, option, value);
+}
+
+// Explicit instantiation for all OptionTypes supported.
+template bool CompilerOracle::has_option_value<intx>(methodHandle method, const char* option, intx& value);
+template bool CompilerOracle::has_option_value<uintx>(methodHandle method, const char* option, uintx& value);
+template bool CompilerOracle::has_option_value<bool>(methodHandle method, const char* option, bool& value);
+template bool CompilerOracle::has_option_value<ccstr>(methodHandle method, const char* option, ccstr& value);
+template bool CompilerOracle::has_option_value<double>(methodHandle method, const char* option, double& value);
 
 bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) {
   quietly = true;
@@ -440,6 +569,106 @@
 
 
 
+// Scan next flag and value in line, return MethodMatcher object on success, NULL on failure.
+// On failure, error_msg contains description for the first error.
+// For future extensions: set error_msg on first error.
+static MethodMatcher* scan_flag_and_value(const char* type, const char* line, int& total_bytes_read,
+                                          Symbol* c_name, MethodMatcher::Mode c_match,
+                                          Symbol* m_name, MethodMatcher::Mode m_match,
+                                          Symbol* signature,
+                                          char* errorbuf, const int buf_size) {
+  total_bytes_read = 0;
+  int bytes_read = 0;
+  char flag[256];
+
+  // Read flag name.
+  if (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", flag, &bytes_read) == 1) {
+    line += bytes_read;
+    total_bytes_read += bytes_read;
+
+    // Read value.
+    if (strcmp(type, "intx") == 0) {
+      intx value;
+      if (sscanf(line, "%*[ \t]" INTX_FORMAT "%n", &value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s ", flag, type);
+      }
+    } else if (strcmp(type, "uintx") == 0) {
+      uintx value;
+      if (sscanf(line, "%*[ \t]" UINTX_FORMAT "%n", &value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "ccstr") == 0) {
+      ResourceMark rm;
+      char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1);
+      if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "ccstrlist") == 0) {
+      // Accumulates several strings into one. The internal type is ccstr.
+      ResourceMark rm;
+      char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1);
+      char* next_value = value;
+      if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        line += bytes_read;
+        next_value += bytes_read;
+        char* end_value = next_value-1;
+        while (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) {
+          total_bytes_read += bytes_read;
+          line += bytes_read;
+          *end_value = ' '; // override '\0'
+          next_value += bytes_read;
+          end_value = next_value-1;
+        }
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "bool") == 0) {
+      char value[256];
+      if (sscanf(line, "%*[ \t]%255[a-zA-Z]%n", value, &bytes_read) == 1) {
+        if (strcmp(value, "true") == 0) {
+          total_bytes_read += bytes_read;
+          return add_option_string(c_name, c_match, m_name, m_match, signature, flag, true);
+        } else if (strcmp(value, "false") == 0) {
+          total_bytes_read += bytes_read;
+          return add_option_string(c_name, c_match, m_name, m_match, signature, flag, false);
+        } else {
+          jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+        }
+      } else {
+        jio_snprintf(errorbuf, sizeof(errorbuf), "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "double") == 0) {
+      char buffer[2][256];
+      // Decimal separator '.' has been replaced with ' ' or '/' earlier,
+      // so read integer and fraction part of double value separately.
+      if (sscanf(line, "%*[ \t]%255[0-9]%*[ /\t]%255[0-9]%n", buffer[0], buffer[1], &bytes_read) == 2) {
+        char value[512] = "";
+        jio_snprintf(value, sizeof(value), "%s.%s", buffer[0], buffer[1]);
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, atof(value));
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else {
+      jio_snprintf(errorbuf, sizeof(errorbuf), "  Type %s not supported ", type);
+    }
+  } else {
+    jio_snprintf(errorbuf, sizeof(errorbuf), "  Flag name for type %s should be alphanumeric ", type);
+  }
+  return NULL;
+}
+
 void CompilerOracle::parse_from_line(char* line) {
   if (line[0] == '\0') return;
   if (line[0] == '#')  return;
@@ -469,8 +698,10 @@
   int bytes_read;
   OracleCommand command = parse_command_name(line, &bytes_read);
   line += bytes_read;
+  ResourceMark rm;
 
   if (command == UnknownCommand) {
+    ttyLocker ttyl;
     tty->print_cr("CompilerOracle: unrecognized line");
     tty->print_cr("  \"%s\"", original_line);
     return;
@@ -492,7 +723,7 @@
   char method_name[256];
   char sig[1024];
   char errorbuf[1024];
-  const char* error_msg = NULL;
+  const char* error_msg = NULL; // description of first error that appears
   MethodMatcher* match = NULL;
 
   if (scan_line(line, class_name, &c_match, method_name, &m_match, &bytes_read, error_msg)) {
@@ -511,43 +742,77 @@
     }
 
     if (command == OptionCommand) {
-      // Look for trailing options to support
-      // ciMethod::has_option("string") to control features in the
-      // compiler.  Multiple options may follow the method name.
-      char option[256];
+      // Look for trailing options.
+      //
+      // Two types of trailing options are
+      // supported:
+      //
+      // (1) CompileCommand=option,Klass::method,flag
+      // (2) CompileCommand=option,Klass::method,type,flag,value
+      //
+      // Type (1) is used to enable a boolean flag for a method.
+      //
+      // Type (2) is used to support options with a value. Values can have the
+      // the following types: intx, uintx, bool, ccstr, ccstrlist, and double.
+      //
+      // For future extensions: extend scan_flag_and_value()
+      char option[256]; // stores flag for Type (1) and type of Type (2)
       while (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", option, &bytes_read) == 1) {
         if (match != NULL && !_quiet) {
           // Print out the last match added
+          ttyLocker ttyl;
           tty->print("CompilerOracle: %s ", command_names[command]);
           match->print();
         }
-        match = add_option_string(c_name, c_match, m_name, m_match, signature, strdup(option));
         line += bytes_read;
-      }
+
+        if (strcmp(option, "intx") == 0
+            || strcmp(option, "uintx") == 0
+            || strcmp(option, "bool") == 0
+            || strcmp(option, "ccstr") == 0
+            || strcmp(option, "ccstrlist") == 0
+            || strcmp(option, "double") == 0
+            ) {
+
+          // Type (2) option: parse flag name and value.
+          match = scan_flag_and_value(option, line, bytes_read,
+                                      c_name, c_match, m_name, m_match, signature,
+                                      errorbuf, sizeof(errorbuf));
+          if (match == NULL) {
+            error_msg = errorbuf;
+            break;
+          }
+          line += bytes_read;
+        } else {
+          // Type (1) option
+          match = add_option_string(c_name, c_match, m_name, m_match, signature, option, true);
+        }
+      } // while(
     } else {
-      bytes_read = 0;
-      sscanf(line, "%*[ \t]%n", &bytes_read);
-      if (line[bytes_read] != '\0') {
-        jio_snprintf(errorbuf, sizeof(errorbuf), "  Unrecognized text after command: %s", line);
-        error_msg = errorbuf;
-      } else {
-        match = add_predicate(command, c_name, c_match, m_name, m_match, signature);
-      }
+      match = add_predicate(command, c_name, c_match, m_name, m_match, signature);
     }
   }
 
-  if (match != NULL) {
-    if (!_quiet) {
-      ResourceMark rm;
-      tty->print("CompilerOracle: %s ", command_names[command]);
-      match->print();
-    }
-  } else {
+  ttyLocker ttyl;
+  if (error_msg != NULL) {
+    // an error has happened
     tty->print_cr("CompilerOracle: unrecognized line");
     tty->print_cr("  \"%s\"", original_line);
     if (error_msg != NULL) {
       tty->print_cr("%s", error_msg);
     }
+  } else {
+    // check for remaining characters
+    bytes_read = 0;
+    sscanf(line, "%*[ \t]%n", &bytes_read);
+    if (line[bytes_read] != '\0') {
+      tty->print_cr("CompilerOracle: unrecognized line");
+      tty->print_cr("  \"%s\"", original_line);
+      tty->print_cr("  Unrecognized text %s after command ", line);
+    } else if (match != NULL && !_quiet) {
+      tty->print("CompilerOracle: %s ", command_names[command]);
+      match->print();
+    }
   }
 }
 
--- a/src/share/vm/compiler/compilerOracle.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/compilerOracle.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -64,6 +64,11 @@
   // Check to see if this method has option set for it
   static bool has_option_string(methodHandle method, const char * option);
 
+  // Check if method has option and value set. If yes, overwrite value and return true,
+  // otherwise leave value unchanged and return false.
+  template<typename T>
+  static bool has_option_value(methodHandle method, const char* option, T& value);
+
   // Reads from string instead of file
   static void parse_from_string(const char* command_string, void (*parser)(char*));
 
--- a/src/share/vm/compiler/disassembler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/disassembler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "depChecker_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "depChecker_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "depChecker_sparc.hpp"
 #endif
@@ -281,6 +284,7 @@
         strlen((const char*)arg) > sizeof(buffer) - 1) {
       // Only print this when the mach changes
       strncpy(buffer, (const char*)arg, sizeof(buffer) - 1);
+      buffer[sizeof(buffer) - 1] = '\0';
       output()->print_cr("[Disassembling for mach='%s']", arg);
     }
   } else if (match(event, "format bytes-per-line")) {
--- a/src/share/vm/compiler/disassembler.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/disassembler.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -71,6 +71,9 @@
 #ifdef TARGET_ARCH_x86
 # include "disassembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "disassembler_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "disassembler_sparc.hpp"
 #endif
--- a/src/share/vm/compiler/methodLiveness.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/methodLiveness.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -572,15 +572,15 @@
 
 
 MethodLiveness::BasicBlock::BasicBlock(MethodLiveness *analyzer, int start, int limit) :
-         _gen((uintptr_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
+         _gen((BitMap::bm_word_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
                          analyzer->bit_map_size_bits()),
-         _kill((uintptr_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
+         _kill((BitMap::bm_word_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
                          analyzer->bit_map_size_bits()),
-         _entry((uintptr_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
+         _entry((BitMap::bm_word_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
                          analyzer->bit_map_size_bits()),
-         _normal_exit((uintptr_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
+         _normal_exit((BitMap::bm_word_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
                          analyzer->bit_map_size_bits()),
-         _exception_exit((uintptr_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
+         _exception_exit((BitMap::bm_word_t*)analyzer->arena()->Amalloc(BytesPerWord * analyzer->bit_map_size_words()),
                          analyzer->bit_map_size_bits()),
          _last_bci(-1) {
   _analyzer = analyzer;
@@ -998,7 +998,7 @@
 }
 
 MethodLivenessResult MethodLiveness::BasicBlock::get_liveness_at(ciMethod* method, int bci) {
-  MethodLivenessResult answer(NEW_RESOURCE_ARRAY(uintptr_t, _analyzer->bit_map_size_words()),
+  MethodLivenessResult answer(NEW_RESOURCE_ARRAY(BitMap::bm_word_t, _analyzer->bit_map_size_words()),
                 _analyzer->bit_map_size_bits());
   answer.set_is_valid();
 
--- a/src/share/vm/compiler/oopMap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/compiler/oopMap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,9 +33,13 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/signature.hpp"
+#include "utilities/dtrace.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Defs.hpp"
 #endif
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL1(provider, gc__collection__delete, *uintptr_t);
+#endif /* !USDT2 */
 
 // OopMapStream
 
@@ -677,6 +681,9 @@
                     " - Derived: " INTPTR_FORMAT "  Base: " INTPTR_FORMAT " (Offset: " INTX_FORMAT ")",
           p2i(derived_loc), p2i((address)*derived_loc), p2i((address)base), offset);
     }
+#ifndef USDT2
+  HS_DTRACE_PROBE1(hotspot, gc__collection__delete, entry);
+#endif /* !USDT2 */
 
     // Delete entry
     delete entry;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -2704,7 +2704,7 @@
   if (ResizeOldPLAB && CMSOldPLABResizeQuicker) {
     size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks);
     n_blks +=  CMSOldPLABReactivityFactor*multiple*n_blks;
-    n_blks = MIN2(n_blks, CMSOldPLABMax);
+    n_blks = MIN2(n_blks, (size_t)CMSOldPLABMax);
   }
   assert(n_blks > 0, "Error");
   _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -59,6 +59,12 @@
 #include "runtime/vmThread.hpp"
 #include "services/memoryService.hpp"
 #include "services/runtimeService.hpp"
+#include "utilities/dtrace.hpp"
+
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__contig__begin, bool, bool, size_t, bool);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__contig__end, bool, bool, size_t, bool);
+#endif /* !USDT2 */
 
 // statics
 CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
@@ -964,7 +970,7 @@
   if (free_percentage < desired_free_percentage) {
     size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
     assert(desired_capacity >= capacity(), "invalid expansion size");
-    expand_bytes = MAX2(desired_capacity - capacity(), MinHeapDeltaBytes);
+    expand_bytes = MAX2((long unsigned int) (desired_capacity - capacity()), (long unsigned int) MinHeapDeltaBytes);
   }
   if (expand_bytes > 0) {
     if (PrintGCDetails && Verbose) {
@@ -1654,7 +1660,13 @@
                                             size_t size,
                                             bool   tlab)
 {
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__contig__begin, full, clear_all_soft_refs, size, tlab);
+#endif /* !USDT2 */
   collector()->collect(full, clear_all_soft_refs, size, tlab);
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__contig__end, full, clear_all_soft_refs, size, tlab);
+#endif /* !USDT2 */
 }
 
 void CMSCollector::collect(bool   full,
@@ -6500,7 +6512,7 @@
     HeapWord* curAddr = _markBitMap.startWord();
     while (curAddr < _markBitMap.endWord()) {
       size_t remaining  = pointer_delta(_markBitMap.endWord(), curAddr);
-      MemRegion chunk(curAddr, MIN2(CMSBitMapYieldQuantum, remaining));
+      MemRegion chunk(curAddr, MIN2((size_t) CMSBitMapYieldQuantum, remaining));
       _markBitMap.clear_large_range(chunk);
       if (ConcurrentMarkSweepThread::should_yield() &&
           !foregroundGCIsActive() &&
@@ -6794,7 +6806,7 @@
     return;
   }
   // Double capacity if possible
-  size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
+  size_t new_capacity = MIN2((size_t) _capacity*2, (size_t) MarkStackSizeMax);
   // Do not give up existing stack until we have managed to
   // get the double capacity that we desired.
   ReservedSpace rs(ReservedSpace::allocation_align_size_up(
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -3622,7 +3622,7 @@
   // of things to do) or totally (at the very end).
   size_t target_size;
   if (partially) {
-    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
+    target_size = MIN2((uintx)_task_queue->max_elems()/3, GCDrainStackTargetSize);
   } else {
     target_size = 0;
   }
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -77,7 +77,7 @@
     assert(delta > 0, "just checking");
     if (!_vs.expand_by(delta)) {
       // Do better than this for Merlin
-      vm_exit_out_of_memory(delta, "offset table expansion");
+      vm_exit_out_of_memory(delta, OOM_MMAP_ERROR, "offset table expansion");
     }
     assert(_vs.high() == high + delta, "invalid expansion");
     // Initialization of the contents is left to the
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1714,7 +1714,7 @@
 
   verify_region_sets_optional();
 
-  size_t expand_bytes = MAX2(word_size * HeapWordSize, MinHeapDeltaBytes);
+  size_t expand_bytes = MAX2(word_size * HeapWordSize, (size_t)MinHeapDeltaBytes);
   ergo_verbose1(ErgoHeapSizing,
                 "attempt heap expansion",
                 ergo_format_reason("allocation request failed")
@@ -1806,7 +1806,7 @@
     if (G1ExitOnExpansionFailure &&
         _g1_storage.uncommitted_size() >= aligned_expand_bytes) {
       // We had head room...
-      vm_exit_out_of_memory(aligned_expand_bytes, "G1 heap expansion");
+      vm_exit_out_of_memory(aligned_expand_bytes, OOM_MMAP_ERROR, "G1 heap expansion");
     }
   }
   return successful;
@@ -3781,7 +3781,7 @@
   uint array_length = g1_policy()->young_cset_region_length();
   _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC);
   if (_surviving_young_words == NULL) {
-    vm_exit_out_of_memory(sizeof(size_t) * array_length,
+    vm_exit_out_of_memory(sizeof(size_t) * array_length, OOM_MALLOC_ERROR,
                           "Not enough space for young surv words summary.");
   }
   memset(_surviving_young_words, 0, (size_t) array_length * sizeof(size_t));
@@ -4589,7 +4589,7 @@
                       PADDING_ELEM_NUM;
   _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
   if (_surviving_young_words_base == NULL)
-    vm_exit_out_of_memory(array_length * sizeof(size_t),
+    vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
                           "Not enough space for young surv histo.");
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
   memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -50,8 +50,13 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__G1__begin, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__G1__end, *uintptr_t, *uintptr_t);
+ #endif /* !USDT2 */ 
 class HeapRegion;
 
 void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
@@ -89,6 +94,9 @@
   // The marking doesn't preserve the marks of biased objects.
   BiasedLocking::preserve_marks();
 
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__G1__begin, &sh, sh->gc_cause());
+#endif /* !USDT2 */
   mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
 
   mark_sweep_phase2();
@@ -108,6 +116,9 @@
   GenRemSet* rs = sh->rem_set();
   rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
 
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__G1__end, &sh, sh->gc_cause());
+#endif /* !USDT2 */
   // "free at last gc" is calculated from these.
   // CHF: cheating for now!!!
   //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -286,7 +286,7 @@
   _fine_grain_regions = new PerRegionTablePtr[_max_fine_entries];
 
   if (_fine_grain_regions == NULL) {
-    vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
+    vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries, OOM_MALLOC_ERROR,
                           "Failed to allocate _fine_grain_entries.");
   }
 
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -54,6 +54,12 @@
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/workgroup.hpp"
+#include "utilities/dtrace.hpp"
+
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__parnew__begin, bool, bool, size_t, bool);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__parnew__end, bool, bool, size_t, bool);
+#endif /* !USDT2 */
 
 #ifdef _MSC_VER
 #pragma warning( push )
@@ -192,7 +198,7 @@
   const size_t num_overflow_elems = of_stack->size();
   const size_t space_available = queue->max_elems() - queue->size();
   const size_t num_take_elems = MIN3(space_available / 4,
-                                     ParGCDesiredObjsFromOverflowList,
+                                     (size_t)ParGCDesiredObjsFromOverflowList,
                                      num_overflow_elems);
   // Transfer the most recent num_take_elems from the overflow
   // stack to our work queue.
@@ -919,6 +925,9 @@
                                bool   clear_all_soft_refs,
                                size_t size,
                                bool   is_tlab) {
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__parnew__begin, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
   assert(full || size > 0, "otherwise we don't want to collect");
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
@@ -1070,6 +1079,10 @@
     gch->print_heap_change(gch_prev_used);
   }
 
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__parnew__end, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
+
   if (PrintGCDetails && ParallelGCVerbose) {
     TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
     TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -43,7 +43,7 @@
   _time_stamp_index(0)
 {
   if (!os::create_thread(this, os::pgc_thread))
-    vm_exit_out_of_memory(0, "Cannot create GC thread. Out of system resources.");
+    vm_exit_out_of_memory(0, OOM_MALLOC_ERROR, "Cannot create GC thread. Out of system resources.");
 
   if (PrintGCTaskTimeStamps) {
     _time_stamps = NEW_C_HEAP_ARRAY(GCTaskTimeStamp, GCTaskTimeStampEntries, mtGC);
--- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -99,7 +99,7 @@
     // Expand
     size_t expand_by = requested_blocks_size_in_bytes - current_blocks_size_in_bytes;
     if (!_virtual_space.expand_by(expand_by)) {
-      vm_exit_out_of_memory(expand_by, "object start array expansion");
+      vm_exit_out_of_memory(expand_by, OOM_MMAP_ERROR, "object start array expansion");
     }
     // Clear *only* the newly allocated region
     memset(_blocks_region.end(), clean_block, expand_by);
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -72,7 +72,7 @@
   if (_virtual_space != NULL && _virtual_space->expand_by(_reserved_byte_size)) {
     _region_start = covered_region.start();
     _region_size = covered_region.word_size();
-    idx_t* map = (idx_t*)_virtual_space->reserved_low_addr();
+    BitMap::bm_word_t* map = (BitMap::bm_word_t*)_virtual_space->reserved_low_addr();
     _beg_bits.set_map(map);
     _beg_bits.set_size(bits / 2);
     _end_bits.set_map(map + words / 2);
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -43,8 +43,14 @@
 #include "runtime/java.hpp"
 #include "runtime/vmThread.hpp"
 #include "services/memTracker.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/vmError.hpp"
 
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__parscavenge__heap__begin, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__parscavenge__heap__end, *uintptr_t, *uintptr_t);
+#endif /* !USDT2 */
+
 PSYoungGen*  ParallelScavengeHeap::_young_gen = NULL;
 PSOldGen*    ParallelScavengeHeap::_old_gen = NULL;
 PSPermGen*   ParallelScavengeHeap::_perm_gen = NULL;
@@ -815,7 +821,13 @@
   }
 
   VM_ParallelGCSystemGC op(gc_count, full_gc_count, cause);
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__parscavenge__heap__begin, &op, cause);
+#endif /* !USDT2 */
   VMThread::execute(&op);
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__parscavenge__heap__end, &op, cause);
+#endif /* !USDT2 */
 }
 
 // This interface assumes that it's being called by the
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -58,11 +58,18 @@
 #include "services/management.hpp"
 #include "services/memoryService.hpp"
 #include "services/memTracker.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/stack.inline.hpp"
 
 #include <math.h>
 
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__ParallelCompact__clear, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__parallel__collect, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__move, *uintptr_t, *uintptr_t, *uintptr_t, *uintptr_t);
+#endif /* !USDT2 */
+
 // All sizes are in HeapWords.
 const size_t ParallelCompactData::Log2RegionSize  = 16; // 64K words
 const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
@@ -469,6 +476,9 @@
 
 void ParallelCompactData::clear()
 {
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__ParallelCompact__clear, &_region_data, _region_data->data_location());
+#endif /* !USDT2 */
   memset(_region_data, 0, _region_vspace->committed_size());
   memset(_block_data, 0, _block_vspace->committed_size());
 }
@@ -939,8 +949,8 @@
 void PSParallelCompact::initialize_dead_wood_limiter()
 {
   const size_t max = 100;
-  _dwl_mean = double(MIN2(ParallelOldDeadWoodLimiterMean, max)) / 100.0;
-  _dwl_std_dev = double(MIN2(ParallelOldDeadWoodLimiterStdDev, max)) / 100.0;
+  _dwl_mean = double(MIN2((size_t) ParallelOldDeadWoodLimiterMean, max)) / 100.0;
+  _dwl_std_dev = double(MIN2((size_t) ParallelOldDeadWoodLimiterStdDev, max)) / 100.0;
   _dwl_first_term = 1.0 / (sqrt(2.0 * M_PI) * _dwl_std_dev);
   DEBUG_ONLY(_dwl_initialized = true;)
   _dwl_adjustment = normal_distribution(1.0);
@@ -2011,6 +2021,9 @@
          "should be in vm thread");
 
   ParallelScavengeHeap* heap = gc_heap();
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__parallel__collect, heap, heap->gc_cause());
+#endif /* !USDT2 */
   GCCause::Cause gc_cause = heap->gc_cause();
   assert(!heap->is_gc_active(), "not reentrant");
 
@@ -3509,6 +3522,9 @@
   // past the end of the partial object entering the region (if any).
   HeapWord* const dest_addr = sd.partial_obj_end(dp_region);
   HeapWord* const new_top = _space_info[space_id].new_top();
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__move, &beg_addr, &end_addr, &dest_addr, &new_top);
+#endif /* !USDT2 */
   assert(new_top >= dest_addr, "bad new_top value");
   const size_t words = pointer_delta(new_top, dest_addr);
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -349,7 +349,7 @@
     HeapWord*            _partial_obj_addr;
     region_sz_t          _partial_obj_size;
     region_sz_t volatile _dc_and_los;
-    bool                 _blocks_filled;
+    bool        volatile _blocks_filled;
 
 #ifdef ASSERT
     size_t               _blocks_filled_count;   // Number of block table fills.
@@ -503,7 +503,9 @@
 inline bool
 ParallelCompactData::RegionData::blocks_filled() const
 {
-  return _blocks_filled;
+  bool result = _blocks_filled;
+  OrderAccess::acquire();
+  return result;
 }
 
 #ifdef ASSERT
@@ -517,6 +519,7 @@
 inline void
 ParallelCompactData::RegionData::set_blocks_filled()
 {
+  OrderAccess::release();
   _blocks_filled = true;
   // Debug builds count the number of times the table was filled.
   DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -69,7 +69,7 @@
   _last_used = current_live;
 
   // We have different alignment constraints than the rest of the heap.
-  const size_t alignment = MAX2(MinPermHeapExpansion,
+  const size_t alignment = MAX2((size_t) MinPermHeapExpansion,
                                 virtual_space()->alignment());
 
   // Compute the desired size:
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -55,8 +55,17 @@
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/memoryService.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/stack.inline.hpp"
 
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSScavenge__begin, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSScavenge__end, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSParallelCompact__begin, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSParallelCompact__end, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSMarkSweep__begin, *uintptr_t, *uintptr_t);
+  HS_DTRACE_PROBE_DECL2(provider, gc__collection__PSMarkSweep__end, *uintptr_t, *uintptr_t);
+#endif /* !USDT2 */
 
 HeapWord*                  PSScavenge::_to_space_top_before_gc = NULL;
 int                        PSScavenge::_consecutive_skipped_scavenges = 0;
@@ -231,7 +240,13 @@
   PSAdaptiveSizePolicy* policy = heap->size_policy();
   IsGCActiveMark mark;
 
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSScavenge__begin, &heap, heap->gc_cause());
+#endif /* !USDT2 */
   const bool scavenge_done = PSScavenge::invoke_no_policy();
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSScavenge__end, &heap, heap->gc_cause());
+#endif /* !USDT2 */
   const bool need_full_gc = !scavenge_done ||
     policy->should_full_GC(heap->old_gen()->free_in_bytes());
   bool full_gc_done = false;
@@ -248,9 +263,21 @@
     const bool clear_all_softrefs = cp->should_clear_all_soft_refs();
 
     if (UseParallelOldGC) {
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSParallelCompact__begin, &heap, heap->gc_cause());
+#endif /* !USDT2 */
       full_gc_done = PSParallelCompact::invoke_no_policy(clear_all_softrefs);
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSParallelCompact__end, &heap, heap->gc_cause());
+#endif /* !USDT2 */
     } else {
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSMarkSweep__begin, &heap, heap->gc_cause());
+#endif /* !USDT2 */
       full_gc_done = PSMarkSweep::invoke_no_policy(clear_all_softrefs);
+#ifndef USDT2
+  HS_DTRACE_PROBE2(hotspot, gc__collection__PSMarkSweep__end, &heap, heap->gc_cause());
+#endif /* !USDT2 */
     }
   }
 
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -32,6 +32,9 @@
 #ifdef TARGET_ARCH_x86
 # include "interp_masm_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "interp_masm_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "interp_masm_sparc.hpp"
 #endif
@@ -116,6 +119,11 @@
     java_lang_math_pow,                                         // implementation of java.lang.Math.pow   (x,y)
     java_lang_math_exp,                                         // implementation of java.lang.Math.exp   (x)
     java_lang_ref_reference_get,                                // implementation of java.lang.ref.Reference.get()
+#ifdef TARGET_ARCH_aarch64
+    java_util_zip_CRC32_update,                                 // implementation of java.util.zip.CRC32.update()
+    java_util_zip_CRC32_updateBytes,                            // implementation of java.util.zip.CRC32.updateBytes()
+    java_util_zip_CRC32_updateByteBuffer,                       // implementation of java.util.zip.CRC32.updateByteBuffer()
+#endif
     number_of_method_entries,
     invalid = -1
   };
--- a/src/share/vm/interpreter/bytecode.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecode.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,6 +46,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "orderAccess_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "orderAccess_linux_sparc.inline.hpp"
 #endif
@@ -2112,8 +2115,7 @@
             if ( *count_addr > 0 ) {
               if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
                 obj = (oop)NULL;
-              }
-              else {
+              } else {
                 if (cache->is_long() || cache->is_double()) {
                   obj = (oop) STACK_OBJECT(-3);
                 } else {
@@ -2134,7 +2136,7 @@
           // QQQ Need to make this as inlined as possible. Probably need to split all the bytecode cases
           // out so c++ compiler has a chance for constant prop to fold everything possible away.
 
-          oop obj;
+          oop obj,too;
           int count;
           TosState tos_type = cache->flag_state();
 
@@ -2158,8 +2160,9 @@
             if (tos_type == itos) {
               obj->release_int_field_put(field_offset, STACK_INT(-1));
             } else if (tos_type == atos) {
-              VERIFY_OOP(STACK_OBJECT(-1));
-              obj->release_obj_field_put(field_offset, STACK_OBJECT(-1));
+              too = (oop) STACK_OBJECT(-1);
+              VERIFY_OOP(too);
+              obj->release_obj_field_put(field_offset, too);
             } else if (tos_type == btos) {
               obj->release_byte_field_put(field_offset, STACK_INT(-1));
             } else if (tos_type == ztos) {
@@ -2181,7 +2184,8 @@
             if (tos_type == itos) {
               obj->int_field_put(field_offset, STACK_INT(-1));
             } else if (tos_type == atos) {
-              VERIFY_OOP(STACK_OBJECT(-1));
+              too = (oop) STACK_OBJECT(-1);
+              VERIFY_OOP(too);
               // On IA64 we perform all stores of references with release semantics.
               // This guarantees that everybody using this reference sees a fully
               // initialized object. On PPC64 we emit a storestore barrier after
@@ -2310,7 +2314,8 @@
       }
       CASE(_checkcast):
           if (STACK_OBJECT(-1) != NULL) {
-            VERIFY_OOP(STACK_OBJECT(-1));
+            oop too = (oop) STACK_OBJECT(-1);
+            VERIFY_OOP(too);
             u2 index = Bytes::get_Java_u2(pc+1);
             // Constant pool may have actual klass or unresolved klass. If it is
             // unresolved we must resolve it.
@@ -2347,7 +2352,8 @@
             // Profile instanceof with null_seen and receiver.
             BI_PROFILE_UPDATE_INSTANCEOF(/*null_seen=*/true, NULL);
           } else {
-            VERIFY_OOP(STACK_OBJECT(-1));
+            oop too = (oop) STACK_OBJECT(-1);
+            VERIFY_OOP(too);
             u2 index = Bytes::get_Java_u2(pc+1);
             // Constant pool may have actual klass or unresolved klass. If it is
             // unresolved we must resolve it.
@@ -2576,7 +2582,8 @@
         // another compliant java compiler.
         if (cache->is_forced_virtual()) {
           methodOop callee;
-          CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+          oop too = (oop) STACK_OBJECT(-(cache->parameter_size()));
+          CHECK_NULL(too);
           if (cache->is_vfinal()) {
             callee = cache->f2_as_vfinal_method();
             // Profile 'special case of invokeinterface' final call.
@@ -2663,7 +2670,8 @@
         {
           methodOop callee;
           if ((Bytecodes::Code)opcode == Bytecodes::_invokevirtual) {
-            CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+            oop too = (oop) STACK_OBJECT(-(cache->parameter_size()));
+            CHECK_NULL(too);
             if (cache->is_vfinal()) {
               callee = cache->f2_as_vfinal_method();
               // Profile final call.
@@ -2702,7 +2710,8 @@
             }
           } else {
             if ((Bytecodes::Code)opcode == Bytecodes::_invokespecial) {
-              CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+              oop too = (oop) STACK_OBJECT(-(cache->parameter_size()));
+              CHECK_NULL(too);
             }
             callee = cache->f1_as_method();
 
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
@@ -60,7 +63,16 @@
     jlong   l;
     jdouble d;
     uint32_t      v[2];
-};
+}
+#ifndef _LP64
+  /* Hotspot only aligns the union to the uintptr_t type, that is 32 bit
+     on a 32-bit CPU. Accesses to double values should be 64-bit aligned
+     on at least MIPS and SPARC. Declare it to GCC for all 32-bit CPUs,
+     as it might also help GCC to select the best instruction on other
+     CPUs. */
+  __attribute__ ((packed, aligned (4)))
+#endif
+;
 
 
 typedef class BytecodeInterpreter* interpreterState;
@@ -173,7 +185,16 @@
     jlong   l;
     jdouble d;
     uint32_t      v[2];
-};
+}
+#ifndef _LP64
+  /* Hotspot only aligns the union to the uintptr_t type, that is 32 bit
+     on a 32-bit CPU. Accesses to double values should be 64-bit aligned
+     on at least MIPS and SPARC. Declare it to GCC for all 32-bit CPUs,
+     as it might also help GCC to select the best instruction on other
+     CPUs. */
+  __attribute__ ((packed, aligned (4)))
+#endif
+;
 
 /*
  * Generic 32-bit wide "Java slot" definition. This type occurs
@@ -592,6 +613,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytecodeInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytecodeInterpreter_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytecodeInterpreter_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,6 +46,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytecodeInterpreter_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytecodeInterpreter_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytecodeInterpreter_sparc.inline.hpp"
 #endif
--- a/src/share/vm/interpreter/bytecodeStream.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodeStream.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/bytecodes.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodes.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/bytecodes.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/bytecodes.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -292,6 +292,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytecodes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytecodes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytecodes_sparc.hpp"
 #endif
@@ -421,6 +424,7 @@
   static bool        is_astore      (Code code)    { return (code == _astore || code == _astore_0 || code == _astore_1
                                                                              || code == _astore_2 || code == _astore_3); }
 
+  static bool        is_store_into_local(Code code){ return (_istore <= code && code <= _astore_3); }
   static bool        is_zero_const  (Code code)    { return (code == _aconst_null || code == _iconst_0
                                                            || code == _fconst_0 || code == _dconst_0); }
   static bool        is_invoke      (Code code)    { return (_invokevirtual <= code && code <= _invokedynamic); }
--- a/src/share/vm/interpreter/cppInterpreter.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/cppInterpreter.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -84,6 +84,9 @@
 #ifdef TARGET_ARCH_x86
 # include "cppInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "cppInterpreter_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "cppInterpreter_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/cppInterpreterGenerator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/cppInterpreterGenerator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -52,6 +52,9 @@
 #ifdef TARGET_ARCH_x86
 # include "cppInterpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "cppInterpreterGenerator_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "cppInterpreterGenerator_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/interpreter.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/interpreter.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -148,6 +148,9 @@
 #ifdef TARGET_ARCH_x86
 # include "interpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "interpreter_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "interpreter_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/interpreterGenerator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/interpreterGenerator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -44,6 +44,9 @@
 #ifdef TARGET_ARCH_x86
 # include "interpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "interpreterGenerator_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "interpreterGenerator_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -59,6 +59,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vm_version_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vm_version_sparc.hpp"
 #endif
@@ -1093,7 +1096,7 @@
     return;
   }
   if (set_handler_blob() == NULL) {
-    vm_exit_out_of_memory(blob_size, "native signature handlers");
+    vm_exit_out_of_memory(blob_size, OOM_MALLOC_ERROR, "native signature handlers");
   }
 
   BufferBlob* bb = BufferBlob::create("Signature Handler Temp Buffer",
@@ -1231,7 +1234,7 @@
   // preparing the same method will be sure to see non-null entry & mirror.
 IRT_END
 
-#if defined(IA32) || defined(AMD64) || defined(ARM)
+#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
 IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
   if (src_address == dest_address) {
     return;
@@ -1269,5 +1272,7 @@
   if (MethodHandles::has_member_arg(cname, mname)) {
     oop member_name = java_lang_invoke_DirectMethodHandle::member((oop)dmh);
     thread->set_vm_result(member_name);
+  } else {
+    thread->set_vm_result(NULL);
   }
 IRT_END
--- a/src/share/vm/interpreter/interpreterRuntime.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/interpreterRuntime.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -168,7 +168,7 @@
                                         methodOopDesc* method,
                                         intptr_t* from, intptr_t* to);
 
-#if defined(IA32) || defined(AMD64) || defined(ARM)
+#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
   // Popframe support (only needed on x86, AMD64 and ARM)
   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
 #endif
@@ -177,6 +177,9 @@
 #ifdef TARGET_ARCH_x86
 # include "interpreterRT_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "interpreterRT_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "interpreterRT_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/templateInterpreter.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/templateInterpreter.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -186,6 +186,9 @@
 #ifdef TARGET_ARCH_x86
 # include "templateInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "templateInterpreter_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "templateInterpreter_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -89,6 +89,9 @@
 #ifdef TARGET_ARCH_x86
 # include "templateInterpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "templateInterpreterGenerator_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "templateInterpreterGenerator_sparc.hpp"
 #endif
--- a/src/share/vm/interpreter/templateTable.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/templateTable.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -56,6 +56,11 @@
 
 
 void Template::generate(InterpreterMacroAssembler* masm) {
+#ifdef TARGET_OS_ARCH_linux_aarch64
+  if (NotifySimulator
+      && bytecode() != Bytecodes::_return)
+    masm->notify(Assembler::bytecode_start);
+#endif
   // parameter passing
   TemplateTable::_desc = this;
   TemplateTable::_masm = masm;
--- a/src/share/vm/interpreter/templateTable.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/interpreter/templateTable.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_x86
 # include "interp_masm_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "interp_masm_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "interp_masm_sparc.hpp"
 #endif
@@ -365,6 +368,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "templateTable_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "templateTable_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "templateTable_sparc.hpp"
 #endif
--- a/src/share/vm/memory/allocation.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/allocation.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -211,7 +211,7 @@
     }
     if (p == NULL) p = os::malloc(bytes, mtChunk, CURRENT_PC);
     if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM) {
-      vm_exit_out_of_memory(bytes, "ChunkPool::allocate");
+      vm_exit_out_of_memory(bytes, OOM_MALLOC_ERROR, "ChunkPool::allocate");
     }
     return p;
   }
@@ -323,7 +323,7 @@
    default: {
      void *p =  os::malloc(bytes, mtChunk, CALLER_PC);
      if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM) {
-       vm_exit_out_of_memory(bytes, "Chunk::new");
+       vm_exit_out_of_memory(bytes, OOM_MALLOC_ERROR, "Chunk::new");
      }
      return p;
    }
@@ -484,7 +484,7 @@
 }
 
 void Arena::signal_out_of_memory(size_t sz, const char* whence) const {
-  vm_exit_out_of_memory(sz, whence);
+  vm_exit_out_of_memory(sz, OOM_MALLOC_ERROR, whence);
 }
 
 // Grow a new Chunk
--- a/src/share/vm/memory/allocation.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/allocation.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -515,8 +515,15 @@
 #define NEW_RESOURCE_ARRAY_IN_THREAD(thread, type, size)\
   (type*) resource_allocate_bytes(thread, (size) * sizeof(type))
 
+#define NEW_RESOURCE_ARRAY_IN_THREAD_RETURN_NULL(thread, type, size)\
+  (type*) resource_allocate_bytes(thread, (size) * sizeof(type), AllocFailStrategy::RETURN_NULL)
+
 #define REALLOC_RESOURCE_ARRAY(type, old, old_size, new_size)\
-  (type*) resource_reallocate_bytes((char*)(old), (old_size) * sizeof(type), (new_size) * sizeof(type) )
+  (type*) resource_reallocate_bytes((char*)(old), (old_size) * sizeof(type), (new_size) * sizeof(type))
+
+#define REALLOC_RESOURCE_ARRAY_RETURN_NULL(type, old, old_size, new_size)\
+  (type*) resource_reallocate_bytes((char*)(old), (old_size) * sizeof(type),\
+                                    (new_size) * sizeof(type), AllocFailStrategy::RETURN_NULL)
 
 #define FREE_RESOURCE_ARRAY(type, old, size)\
   resource_free_bytes((char*)(old), (size) * sizeof(type))
@@ -527,11 +534,29 @@
 #define NEW_RESOURCE_OBJ(type)\
   NEW_RESOURCE_ARRAY(type, 1)
 
+#define NEW_RESOURCE_OBJ_RETURN_NULL(type)\
+  NEW_RESOURCE_ARRAY_RETURN_NULL(type, 1)
+
+#define NEW_C_HEAP_ARRAY3(type, size, memflags, pc, allocfail)\
+  (type*) AllocateHeap((size) * sizeof(type), memflags, pc, allocfail)
+
+#define NEW_C_HEAP_ARRAY2(type, size, memflags, pc)\
+  (type*) (AllocateHeap((size) * sizeof(type), memflags, pc))
+
 #define NEW_C_HEAP_ARRAY(type, size, memflags)\
   (type*) (AllocateHeap((size) * sizeof(type), memflags))
 
+#define NEW_C_HEAP_ARRAY2_RETURN_NULL(type, size, memflags, pc)\
+  NEW_C_HEAP_ARRAY3(type, (size), memflags, pc, AllocFailStrategy::RETURN_NULL)
+
+#define NEW_C_HEAP_ARRAY_RETURN_NULL(type, size, memflags)\
+  NEW_C_HEAP_ARRAY3(type, (size), memflags, (address)0, AllocFailStrategy::RETURN_NULL)
+
 #define REALLOC_C_HEAP_ARRAY(type, old, size, memflags)\
-  (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags))
+  (type*) (ReallocateHeap((char*)(old), (size) * sizeof(type), memflags))
+
+#define REALLOC_C_HEAP_ARRAY_RETURN_NULL(type, old, size, memflags)\
+  (type*) (ReallocateHeap((char*)(old), (size) * sizeof(type), memflags, AllocFailStrategy::RETURN_NULL))
 
 #define FREE_C_HEAP_ARRAY(type,old,memflags) \
   FreeHeap((char*)(old), memflags)
@@ -539,12 +564,8 @@
 #define NEW_C_HEAP_OBJ(type, memflags)\
   NEW_C_HEAP_ARRAY(type, 1, memflags)
 
-
-#define NEW_C_HEAP_ARRAY2(type, size, memflags, pc)\
-  (type*) (AllocateHeap((size) * sizeof(type), memflags, pc))
-
-#define REALLOC_C_HEAP_ARRAY2(type, old, size, memflags, pc)\
-  (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags, pc))
+#define NEW_C_HEAP_OBJ_RETURN_NULL(type, memflags)\
+  NEW_C_HEAP_ARRAY_RETURN_NULL(type, 1, memflags)
 
 #define NEW_C_HEAP_OBJ2(type, memflags, pc)\
   NEW_C_HEAP_ARRAY2(type, 1, memflags, pc)
--- a/src/share/vm/memory/allocation.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/allocation.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,7 +35,7 @@
 #ifndef PRODUCT
 // Increments unsigned long value for statistics (not atomic on MP).
 inline void inc_stat_counter(volatile julong* dest, julong add_value) {
-#if defined(SPARC) || defined(X86)
+#if defined(SPARC) || defined(X86) || defined(AARCH64)
   // Sparc and X86 have atomic jlong (8 bytes) instructions
   julong value = Atomic::load((volatile jlong*)dest);
   value += add_value;
@@ -48,6 +48,9 @@
 #endif
 
 // allocate using malloc; will fail if no memory available
+#ifdef __GNUC__
+__attribute__((always_inline))
+#endif
 inline char* AllocateHeap(size_t size, MEMFLAGS flags, address pc = 0,
      AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM) {
   if (pc == 0) {
@@ -57,19 +60,24 @@
   #ifdef ASSERT
   if (PrintMallocFree) trace_heap_malloc(size, "AllocateHeap", p);
   #endif
-  if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM)
-    vm_exit_out_of_memory(size, "AllocateHeap");
+  if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM) {
+    vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "AllocateHeap");
+  }
   return p;
 }
 
+#ifdef __GNUC__
+__attribute__((always_inline))
+#endif
 inline char* ReallocateHeap(char *old, size_t size, MEMFLAGS flags,
     AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM) {
   char* p = (char*) os::realloc(old, size, flags, CURRENT_PC);
   #ifdef ASSERT
   if (PrintMallocFree) trace_heap_malloc(size, "ReallocateHeap", p);
   #endif
-  if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM)
-    vm_exit_out_of_memory(size, "ReallocateHeap");
+  if (p == NULL && alloc_failmode == AllocFailStrategy::EXIT_OOM) {
+    vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "ReallocateHeap");
+  }
   return p;
 }
 
@@ -129,7 +137,7 @@
 
   _addr = os::reserve_memory(_size, NULL, alignment, F);
   if (_addr == NULL) {
-    vm_exit_out_of_memory(_size, "Allocator (reserve)");
+    vm_exit_out_of_memory(_size, OOM_MMAP_ERROR, "Allocator (reserve)");
   }
 
   os::commit_memory_or_exit(_addr, _size, !ExecMem, "Allocator (commit)");
--- a/src/share/vm/memory/blockOffsetTable.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/blockOffsetTable.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -80,7 +80,7 @@
     assert(delta > 0, "just checking");
     if (!_vs.expand_by(delta)) {
       // Do better than this for Merlin
-      vm_exit_out_of_memory(delta, "offset table expansion");
+      vm_exit_out_of_memory(delta, OOM_MMAP_ERROR, "offset table expansion");
     }
     assert(_vs.high() == high + delta, "invalid expansion");
   } else {
--- a/src/share/vm/memory/collectorPolicy.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/collectorPolicy.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -77,7 +77,7 @@
   }
   PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment()));
   // Don't increase Perm size limit above specified.
-  MaxPermSize = align_size_down(MaxPermSize, max_alignment());
+  MaxPermSize = MAX2(max_alignment(), align_size_down_(MaxPermSize, max_alignment()));
   if (PermSize > MaxPermSize) {
     PermSize = MaxPermSize;
   }
@@ -371,7 +371,7 @@
     // yield a size that is too small) and bound it by MaxNewSize above.
     // Ergonomics plays here by previously calculating the desired
     // NewSize and MaxNewSize.
-    max_new_size = MIN2(MAX2(max_new_size, NewSize), MaxNewSize);
+    max_new_size = MIN2(MAX2(max_new_size, (size_t) NewSize), (size_t) MaxNewSize);
   }
   assert(max_new_size > 0, "All paths should set max_new_size");
 
@@ -398,7 +398,7 @@
       // generally small compared to the NewRatio calculation.
       _min_gen0_size = NewSize;
       desired_new_size = NewSize;
-      max_new_size = MAX2(max_new_size, NewSize);
+      max_new_size = MAX2(max_new_size, (size_t)NewSize);
     } else {
       // For the case where NewSize is the default, use NewRatio
       // to size the minimum and initial generation sizes.
@@ -406,10 +406,10 @@
       // NewRatio is overly large, the resulting sizes can be too
       // small.
       _min_gen0_size = MAX2(scale_by_NewRatio_aligned(min_heap_byte_size()),
-                          NewSize);
+                          (size_t)NewSize);
       desired_new_size =
         MAX2(scale_by_NewRatio_aligned(initial_heap_byte_size()),
-             NewSize);
+             (size_t)NewSize);
     }
 
     assert(_min_gen0_size > 0, "Sanity check");
@@ -464,14 +464,14 @@
       // Adjust gen0 down to accomodate OldSize
       *gen0_size_ptr = heap_size - min_gen0_size;
       *gen0_size_ptr =
-        MAX2((uintx)align_size_down(*gen0_size_ptr, min_alignment()),
+        MAX2((size_t)align_size_down(*gen0_size_ptr, min_alignment()),
              min_alignment());
       assert(*gen0_size_ptr > 0, "Min gen0 is too large");
       result = true;
     } else {
       *gen1_size_ptr = heap_size - *gen0_size_ptr;
       *gen1_size_ptr =
-        MAX2((uintx)align_size_down(*gen1_size_ptr, min_alignment()),
+        MAX2((size_t)align_size_down(*gen1_size_ptr, min_alignment()),
                        min_alignment());
     }
   }
@@ -495,7 +495,7 @@
   // for setting the gen1 maximum.
   _max_gen1_size = max_heap_byte_size() - _max_gen0_size;
   _max_gen1_size =
-    MAX2((uintx)align_size_down(_max_gen1_size, min_alignment()),
+    MAX2((size_t)align_size_down(_max_gen1_size, min_alignment()),
          min_alignment());
   // If no explicit command line flag has been set for the
   // gen1 size, use what is left for gen1.
@@ -509,11 +509,11 @@
       "gen0 has an unexpected minimum size");
     set_min_gen1_size(min_heap_byte_size() - min_gen0_size());
     set_min_gen1_size(
-      MAX2((uintx)align_size_down(_min_gen1_size, min_alignment()),
+      MAX2((size_t)align_size_down(_min_gen1_size, min_alignment()),
            min_alignment()));
     set_initial_gen1_size(initial_heap_byte_size() - initial_gen0_size());
     set_initial_gen1_size(
-      MAX2((uintx)align_size_down(_initial_gen1_size, min_alignment()),
+      MAX2((size_t)align_size_down(_initial_gen1_size, min_alignment()),
            min_alignment()));
 
   } else {
--- a/src/share/vm/memory/defNewGeneration.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/defNewGeneration.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -42,6 +42,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/stack.inline.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "thread_linux.inline.hpp"
@@ -58,7 +59,10 @@
 #ifdef TARGET_OS_FAMILY_bsd
 # include "thread_bsd.inline.hpp"
 #endif
-
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__defnew__begin, bool, bool, size_t, bool);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__defnew__end, bool, bool, size_t, bool);
+#endif /* !USDT2 */
 //
 // DefNewGeneration functions.
 
@@ -537,6 +541,9 @@
                                bool   clear_all_soft_refs,
                                size_t size,
                                bool   is_tlab) {
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__defnew__begin, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
   assert(full || size > 0, "otherwise we don't want to collect");
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
@@ -682,6 +689,10 @@
   jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
   update_time_of_last_gc(now);
 
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__defnew__end, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
+
   gch->trace_heap_after_gc(&gc_tracer);
   gc_tracer.report_tenuring_threshold(tenuring_threshold());
 
--- a/src/share/vm/memory/generation.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/generation.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -41,8 +41,14 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__contig__begin, bool, bool, size_t, bool);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__contig__end, bool, bool, size_t, bool);
+#endif /* !USDT2 */
+
 Generation::Generation(ReservedSpace rs, size_t initial_size, int level) :
   _level(level),
   _ref_processor(NULL) {
@@ -481,7 +487,13 @@
   SerialOldTracer* gc_tracer = GenMarkSweep::gc_tracer();
   gc_tracer->report_gc_start(gch->gc_cause(), gc_timer->gc_start());
 
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__contig__begin, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
   GenMarkSweep::invoke_at_safepoint(_level, ref_processor(), clear_all_soft_refs);
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__contig__end, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
 
   gc_timer->register_gc_end();
 
--- a/src/share/vm/memory/tenuredGeneration.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/memory/tenuredGeneration.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,12 @@
 #include "memory/tenuredGeneration.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "utilities/dtrace.hpp"
+
+#ifndef USDT2
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__tenured__begin, bool, bool, size_t, bool);
+  HS_DTRACE_PROBE_DECL4(provider, gc__collection__tenured__end, bool, bool, size_t, bool);
+#endif /* !USDT2 */
 
 TenuredGeneration::TenuredGeneration(ReservedSpace rs,
                                      size_t initial_byte_size, int level,
@@ -307,8 +313,14 @@
                                 size_t size,
                                 bool   is_tlab) {
   retire_alloc_buffers_before_full_gc();
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__tenured__begin, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
   OneContigSpaceCardGeneration::collect(full, clear_all_soft_refs,
                                         size, is_tlab);
+#ifndef USDT2
+  HS_DTRACE_PROBE4(hotspot, gc__collection__tenured__end, full, clear_all_soft_refs, size, is_tlab);
+#endif  /* !USDT2 */
 }
 
 void TenuredGeneration::update_gc_stats(int current_level,
--- a/src/share/vm/oops/constantPoolOop.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/constantPoolOop.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/oops/instanceKlass.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/instanceKlass.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1161,6 +1161,21 @@
   return NULL;
 }
 
+#ifdef ASSERT
+// search through class hierarchy and return true if this class or
+// one of the superclasses was redefined
+bool instanceKlass::has_redefined_this_or_super() const {
+  klassOop klass = as_klassOop();
+  while (klass != NULL) {
+    if (instanceKlass::cast(klass)->has_been_redefined()) {
+      return true;
+    }
+    klass = instanceKlass::cast(klass)->super();
+  }
+  return false;
+}
+#endif
+
 // lookup a method in all the interfaces that this class implements
 methodOop instanceKlass::lookup_method_in_all_interfaces(Symbol* name,
                                                          Symbol* signature) const {
--- a/src/share/vm/oops/instanceKlass.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/instanceKlass.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -733,6 +733,11 @@
   // subclass/subinterface checks
   bool implements_interface(klassOop k) const;
 
+#ifdef ASSERT
+  // check whether this class or one of its superclasses was redefined
+  bool has_redefined_this_or_super() const;
+#endif
+
   // Access to the implementor of an interface.
   klassOop implementor() const
   {
--- a/src/share/vm/oops/methodOop.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/methodOop.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -217,7 +217,12 @@
 
 int methodOopDesc::object_size(bool is_native) {
   // If native, then include pointers for native_function and signature_handler
+#ifdef TARGET_ARCH_aarch64
+  // aarch64 requires extra word for call format
+  int extra_bytes = (is_native) ? 3*sizeof(address*) : 0;
+#else
   int extra_bytes = (is_native) ? 2*sizeof(address*) : 0;
+#endif // TARGET_ARCH_aarch64
   int extra_words = align_size_up(extra_bytes, BytesPerWord) / BytesPerWord;
   return align_object_size(header_size() + extra_words);
 }
@@ -607,6 +612,17 @@
 }
 
 
+#ifdef TARGET_ARCH_aarch64
+void methodOopDesc::set_call_format(unsigned int call_format) {
+  unsigned int* call_format_p =  (unsigned int *)call_format_addr();
+  *call_format_p = call_format;
+}
+
+unsigned int methodOopDesc::call_format() {
+  return *(unsigned int *)call_format_addr();
+}
+#endif
+
 void methodOopDesc::set_signature_handler(address handler) {
   address* signature_handler =  signature_handler_addr();
   *signature_handler = handler;
--- a/src/share/vm/oops/methodOop.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/methodOop.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -421,6 +421,12 @@
   address signature_handler() const              { return *(signature_handler_addr()); }
   void set_signature_handler(address handler);
 
+#ifdef TARGET_ARCH_aarch64
+  address *call_format_addr() const        { return native_function_addr() + 2; }
+  static ByteSize call_format_offset()    { return in_ByteSize(sizeof(methodOopDesc) + 2 * wordSize);      }
+  void set_call_format(unsigned int call_format);
+  int unsigned call_format();
+#endif
   // Interpreter oopmap support
   void mask_for(int bci, InterpreterOopMap* mask);
 
--- a/src/share/vm/oops/objArrayKlass.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/objArrayKlass.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,7 +46,7 @@
   const size_t beg_index = size_t(index);
   assert(beg_index < len || len == 0, "index too large");
 
-  const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride);
+  const size_t stride = MIN2(len - beg_index, (size_t)ObjArrayMarkingStride);
   const size_t end_index = beg_index + stride;
   T* const base = (T*)a->base();
   T* const beg = base + beg_index;
@@ -80,7 +80,7 @@
   const size_t beg_index = size_t(index);
   assert(beg_index < len || len == 0, "index too large");
 
-  const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride);
+  const size_t stride = MIN2(len - beg_index, (size_t)ObjArrayMarkingStride);
   const size_t end_index = beg_index + stride;
   T* const base = (T*)a->base();
   T* const beg = base + beg_index;
--- a/src/share/vm/oops/oop.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/oop.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,6 +46,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/oops/typeArrayOop.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/oops/typeArrayOop.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,6 +30,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "orderAccess_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "orderAccess_linux_sparc.inline.hpp"
 #endif
--- a/src/share/vm/opto/addnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/addnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -344,8 +344,8 @@
 const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const {
   const TypeInt *r0 = t0->is_int(); // Handy access
   const TypeInt *r1 = t1->is_int();
-  int lo = r0->_lo + r1->_lo;
-  int hi = r0->_hi + r1->_hi;
+  int lo = java_add(r0->_lo, r1->_lo);
+  int hi = java_add(r0->_hi, r1->_hi);
   if( !(r0->is_con() && r1->is_con()) ) {
     // Not both constants, compute approximate result
     if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
@@ -462,8 +462,8 @@
 const Type *AddLNode::add_ring( const Type *t0, const Type *t1 ) const {
   const TypeLong *r0 = t0->is_long(); // Handy access
   const TypeLong *r1 = t1->is_long();
-  jlong lo = r0->_lo + r1->_lo;
-  jlong hi = r0->_hi + r1->_hi;
+  jlong lo = java_add(r0->_lo, r1->_lo);
+  jlong hi = java_add(r0->_hi, r1->_hi);
   if( !(r0->is_con() && r1->is_con()) ) {
     // Not both constants, compute approximate result
     if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
--- a/src/share/vm/opto/block.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/block.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,10 +35,6 @@
 #include "opto/rootnode.hpp"
 #include "utilities/copy.hpp"
 
-// Optimization - Graph Style
-
-
-//-----------------------------------------------------------------------------
 void Block_Array::grow( uint i ) {
   assert(i >= Max(), "must be an overflow");
   debug_only(_limit = i+1);
@@ -54,7 +50,6 @@
   Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
 }
 
-//=============================================================================
 void Block_List::remove(uint i) {
   assert(i < _cnt, "index out of bounds");
   Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
@@ -76,8 +71,6 @@
 }
 #endif
 
-//=============================================================================
-
 uint Block::code_alignment() {
   // Check for Root block
   if (_pre_order == 0) return CodeEntryAlignment;
@@ -113,16 +106,15 @@
   return unit_sz; // no particular alignment
 }
 
-//-----------------------------------------------------------------------------
 // Compute the size of first 'inst_cnt' instructions in this block.
 // Return the number of instructions left to compute if the block has
 // less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
 // exceeds OptoLoopAlignment.
 uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
                                     PhaseRegAlloc* ra) {
-  uint last_inst = _nodes.size();
+  uint last_inst = number_of_nodes();
   for( uint j = 0; j < last_inst && inst_cnt > 0; j++ ) {
-    uint inst_size = _nodes[j]->size(ra);
+    uint inst_size = get_node(j)->size(ra);
     if( inst_size > 0 ) {
       inst_cnt--;
       uint sz = sum_size + inst_size;
@@ -138,10 +130,9 @@
   return inst_cnt;
 }
 
-//-----------------------------------------------------------------------------
 uint Block::find_node( const Node *n ) const {
-  for( uint i = 0; i < _nodes.size(); i++ ) {
-    if( _nodes[i] == n )
+  for( uint i = 0; i < number_of_nodes(); i++ ) {
+    if( get_node(i) == n )
       return i;
   }
   ShouldNotReachHere();
@@ -150,7 +141,7 @@
 
 // Find and remove n from block list
 void Block::find_remove( const Node *n ) {
-  _nodes.remove(find_node(n));
+  remove_node(find_node(n));
 }
 
 bool Block::contains( const Node *n ) const {
@@ -161,7 +152,6 @@
   return false;
 }
 
-//------------------------------is_Empty---------------------------------------
 // Return empty status of a block.  Empty blocks contain only the head, other
 // ideal nodes, and an optional trailing goto.
 int Block::is_Empty() const {
@@ -172,10 +162,10 @@
   }
 
   int success_result = completely_empty;
-  int end_idx = _nodes.size()-1;
+  int end_idx = number_of_nodes() - 1;
 
   // Check for ending goto
-  if ((end_idx > 0) && (_nodes[end_idx]->is_MachGoto())) {
+  if ((end_idx > 0) && (get_node(end_idx)->is_MachGoto())) {
     success_result = empty_with_goto;
     end_idx--;
   }
@@ -188,7 +178,7 @@
   // Ideal nodes are allowable in empty blocks: skip them  Only MachNodes
   // turn directly into code, because only MachNodes have non-trivial
   // emit() functions.
-  while ((end_idx > 0) && !_nodes[end_idx]->is_Mach()) {
+  while ((end_idx > 0) && !get_node(end_idx)->is_Mach()) {
     end_idx--;
   }
 
@@ -200,7 +190,6 @@
   return not_empty;
 }
 
-//------------------------------has_uncommon_code------------------------------
 // Return true if the block's code implies that it is likely to be
 // executed infrequently.  Check to see if the block ends in a Halt or
 // a low probability call.
@@ -226,18 +215,17 @@
   return op == Op_Halt;
 }
 
-//------------------------------is_uncommon------------------------------------
 // True if block is low enough frequency or guarded by a test which
 // mostly does not go here.
-bool Block::is_uncommon( Block_Array &bbs ) const {
+bool PhaseCFG::is_uncommon(const Block* block) {
   // Initial blocks must never be moved, so are never uncommon.
-  if (head()->is_Root() || head()->is_Start())  return false;
+  if (block->head()->is_Root() || block->head()->is_Start())  return false;
 
   // Check for way-low freq
-  if( _freq < BLOCK_FREQUENCY(0.00001f) ) return true;
+  if(block->_freq < BLOCK_FREQUENCY(0.00001f) ) return true;
 
   // Look for code shape indicating uncommon_trap or slow path
-  if (has_uncommon_code()) return true;
+  if (block->has_uncommon_code()) return true;
 
   const float epsilon = 0.05f;
   const float guard_factor = PROB_UNLIKELY_MAG(4) / (1.f - epsilon);
@@ -245,8 +233,8 @@
   uint freq_preds = 0;
   uint uncommon_for_freq_preds = 0;
 
-  for( uint i=1; i<num_preds(); i++ ) {
-    Block* guard = bbs[pred(i)->_idx];
+  for( uint i=1; i< block->num_preds(); i++ ) {
+    Block* guard = get_block_for_node(block->pred(i));
     // Check to see if this block follows its guard 1 time out of 10000
     // or less.
     //
@@ -264,14 +252,14 @@
       uncommon_preds++;
     } else {
       freq_preds++;
-      if( _freq < guard->_freq * guard_factor ) {
+      if(block->_freq < guard->_freq * guard_factor ) {
         uncommon_for_freq_preds++;
       }
     }
   }
-  if( num_preds() > 1 &&
+  if( block->num_preds() > 1 &&
       // The block is uncommon if all preds are uncommon or
-      (uncommon_preds == (num_preds()-1) ||
+      (uncommon_preds == (block->num_preds()-1) ||
       // it is uncommon for all frequent preds.
        uncommon_for_freq_preds == freq_preds) ) {
     return true;
@@ -279,7 +267,6 @@
   return false;
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void Block::dump_bidx(const Block* orig, outputStream* st) const {
   if (_pre_order) st->print("B%d",_pre_order);
@@ -293,11 +280,11 @@
   }
 }
 
-void Block::dump_pred(const Block_Array *bbs, Block* orig, outputStream* st) const {
+void Block::dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st) const {
   if (is_connector()) {
     for (uint i=1; i<num_preds(); i++) {
-      Block *p = ((*bbs)[pred(i)->_idx]);
-      p->dump_pred(bbs, orig, st);
+      Block *p = cfg->get_block_for_node(pred(i));
+      p->dump_pred(cfg, orig, st);
     }
   } else {
     dump_bidx(orig, st);
@@ -305,7 +292,7 @@
   }
 }
 
-void Block::dump_head( const Block_Array *bbs, outputStream* st ) const {
+void Block::dump_head(const PhaseCFG* cfg, outputStream* st) const {
   // Print the basic block
   dump_bidx(this, st);
   st->print(": #\t");
@@ -319,26 +306,28 @@
   if( head()->is_block_start() ) {
     for (uint i=1; i<num_preds(); i++) {
       Node *s = pred(i);
-      if (bbs) {
-        Block *p = (*bbs)[s->_idx];
-        p->dump_pred(bbs, p, st);
+      if (cfg != NULL) {
+        Block *p = cfg->get_block_for_node(s);
+        p->dump_pred(cfg, p, st);
       } else {
         while (!s->is_block_start())
           s = s->in(0);
         st->print("N%d ", s->_idx );
       }
     }
-  } else
+  } else {
     st->print("BLOCK HEAD IS JUNK  ");
+  }
 
   // Print loop, if any
   const Block *bhead = this;    // Head of self-loop
   Node *bh = bhead->head();
-  if( bbs && bh->is_Loop() && !head()->is_Root() ) {
+
+  if ((cfg != NULL) && bh->is_Loop() && !head()->is_Root()) {
     LoopNode *loop = bh->as_Loop();
-    const Block *bx = (*bbs)[loop->in(LoopNode::LoopBackControl)->_idx];
+    const Block *bx = cfg->get_block_for_node(loop->in(LoopNode::LoopBackControl));
     while (bx->is_connector()) {
-      bx = (*bbs)[bx->pred(1)->_idx];
+      bx = cfg->get_block_for_node(bx->pred(1));
     }
     st->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);
     // Dump any loop-specific bits, especially for CountedLoops.
@@ -357,29 +346,31 @@
   st->cr();
 }
 
-void Block::dump() const { dump(NULL); }
+void Block::dump() const {
+  dump(NULL);
+}
 
-void Block::dump( const Block_Array *bbs ) const {
-  dump_head(bbs);
-  uint cnt = _nodes.size();
-  for( uint i=0; i<cnt; i++ )
-    _nodes[i]->dump();
+void Block::dump(const PhaseCFG* cfg) const {
+  dump_head(cfg);
+  for (uint i=0; i< number_of_nodes(); i++) {
+    get_node(i)->dump();
+  }
   tty->print("\n");
 }
 #endif
 
-//=============================================================================
-//------------------------------PhaseCFG---------------------------------------
-PhaseCFG::PhaseCFG( Arena *a, RootNode *r, Matcher &m ) :
-  Phase(CFG),
-  _bbs(a),
-  _root(r),
-  _node_latency(NULL)
+PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
+: Phase(CFG)
+, _block_arena(arena)
+, _root(root)
+, _matcher(matcher)
+, _node_to_block_mapping(arena)
+, _node_latency(NULL)
 #ifndef PRODUCT
-  , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
+, _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
 #endif
 #ifdef ASSERT
-  , _raw_oops(a)
+, _raw_oops(arena)
 #endif
 {
   ResourceMark rm;
@@ -388,16 +379,15 @@
   // Node on demand.
   Node *x = new (C) GotoNode(NULL);
   x->init_req(0, x);
-  _goto = m.match_tree(x);
+  _goto = matcher.match_tree(x);
   assert(_goto != NULL, "");
   _goto->set_req(0,_goto);
 
   // Build the CFG in Reverse Post Order
-  _num_blocks = build_cfg();
-  _broot = _bbs[_root->_idx];
+  _number_of_blocks = build_cfg();
+  _root_block = get_block_for_node(_root);
 }
 
-//------------------------------build_cfg--------------------------------------
 // Build a proper looking CFG.  Make every block begin with either a StartNode
 // or a RegionNode.  Make every block end with either a Goto, If or Return.
 // The RootNode both starts and ends it's own block.  Do this with a recursive
@@ -448,11 +438,11 @@
       // 'p' now points to the start of this basic block
 
       // Put self in array of basic blocks
-      Block *bb = new (_bbs._arena) Block(_bbs._arena,p);
-      _bbs.map(p->_idx,bb);
-      _bbs.map(x->_idx,bb);
+      Block *bb = new (_block_arena) Block(_block_arena, p);
+      map_node_to_block(p, bb);
+      map_node_to_block(x, bb);
       if( x != p ) {                // Only for root is x == p
-        bb->_nodes.push((Node*)x);
+        bb->push_node((Node*)x);
       }
       // Now handle predecessors
       ++sum;                        // Count 1 for self block
@@ -481,17 +471,17 @@
       // Check if it the fist node pushed on stack at the beginning.
       if (idx == 0) break;          // end of the build
       // Find predecessor basic block
-      Block *pb = _bbs[x->_idx];
+      Block *pb = get_block_for_node(x);
       // Insert into nodes array, if not already there
-      if( !_bbs.lookup(proj->_idx) ) {
+      if (!has_block(proj)) {
         assert( x != proj, "" );
         // Map basic block of projection
-        _bbs.map(proj->_idx,pb);
-        pb->_nodes.push(proj);
+        map_node_to_block(proj, pb);
+        pb->push_node(proj);
       }
       // Insert self as a child of my predecessor block
-      pb->_succs.map(pb->_num_succs++, _bbs[np->_idx]);
-      assert( pb->_nodes[ pb->_nodes.size() - pb->_num_succs ]->is_block_proj(),
+      pb->_succs.map(pb->_num_succs++, get_block_for_node(np));
+      assert( pb->get_node(pb->number_of_nodes() - pb->_num_succs)->is_block_proj(),
               "too many control users, not a CFG?" );
     }
   }
@@ -499,13 +489,12 @@
   return sum;
 }
 
-//------------------------------insert_goto_at---------------------------------
 // Inserts a goto & corresponding basic block between
 // block[block_no] and its succ_no'th successor block
 void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
   // get block with block_no
-  assert(block_no < _num_blocks, "illegal block number");
-  Block* in  = _blocks[block_no];
+  assert(block_no < number_of_blocks(), "illegal block number");
+  Block* in  = get_block(block_no);
   // get successor block succ_no
   assert(succ_no < in->_num_succs, "illegal successor number");
   Block* out = in->_succs[succ_no];
@@ -514,20 +503,20 @@
   // surrounding blocks.
   float freq = in->_freq * in->succ_prob(succ_no);
   // get ProjNode corresponding to the succ_no'th successor of the in block
-  ProjNode* proj = in->_nodes[in->_nodes.size() - in->_num_succs + succ_no]->as_Proj();
+  ProjNode* proj = in->get_node(in->number_of_nodes() - in->_num_succs + succ_no)->as_Proj();
   // create region for basic block
   RegionNode* region = new (C) RegionNode(2);
   region->init_req(1, proj);
   // setup corresponding basic block
-  Block* block = new (_bbs._arena) Block(_bbs._arena, region);
-  _bbs.map(region->_idx, block);
+  Block* block = new (_block_arena) Block(_block_arena, region);
+  map_node_to_block(region, block);
   C->regalloc()->set_bad(region->_idx);
   // add a goto node
   Node* gto = _goto->clone(); // get a new goto node
   gto->set_req(0, region);
   // add it to the basic block
-  block->_nodes.push(gto);
-  _bbs.map(gto->_idx, block);
+  block->push_node(gto);
+  map_node_to_block(gto, block);
   C->regalloc()->set_bad(gto->_idx);
   // hook up successor block
   block->_succs.map(block->_num_succs++, out);
@@ -540,19 +529,17 @@
   // Set the frequency of the new block
   block->_freq = freq;
   // add new basic block to basic block list
-  _blocks.insert(block_no + 1, block);
-  _num_blocks++;
+  add_block_at(block_no + 1, block);
 }
 
-//------------------------------no_flip_branch---------------------------------
 // Does this block end in a multiway branch that cannot have the default case
 // flipped for another case?
 static bool no_flip_branch(Block *b) {
-  int branch_idx = b->_nodes.size() - b->_num_succs-1;
+  int branch_idx = b->number_of_nodes() - b->_num_succs-1;
   if (branch_idx < 1) {
     return false;
   }
-  Node *branch = b->_nodes[branch_idx];
+  Node *branch = b->get_node(branch_idx);
   if (branch->is_Catch()) {
     return true;
   }
@@ -572,7 +559,6 @@
   return false;
 }
 
-//------------------------------convert_NeverBranch_to_Goto--------------------
 // Check for NeverBranch at block end.  This needs to become a GOTO to the
 // true target.  NeverBranch are treated as a conditional branch that always
 // goes the same direction for most of the optimizer and are used to give a
@@ -581,16 +567,16 @@
 void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
   // Find true target
   int end_idx = b->end_idx();
-  int idx = b->_nodes[end_idx+1]->as_Proj()->_con;
+  int idx = b->get_node(end_idx+1)->as_Proj()->_con;
   Block *succ = b->_succs[idx];
   Node* gto = _goto->clone(); // get a new goto node
   gto->set_req(0, b->head());
-  Node *bp = b->_nodes[end_idx];
-  b->_nodes.map(end_idx,gto); // Slam over NeverBranch
-  _bbs.map(gto->_idx, b);
+  Node *bp = b->get_node(end_idx);
+  b->map_node(gto, end_idx); // Slam over NeverBranch
+  map_node_to_block(gto, b);
   C->regalloc()->set_bad(gto->_idx);
-  b->_nodes.pop();              // Yank projections
-  b->_nodes.pop();              // Yank projections
+  b->pop_node();              // Yank projections
+  b->pop_node();              // Yank projections
   b->_succs.map(0,succ);        // Map only successor
   b->_num_succs = 1;
   // remap successor's predecessors if necessary
@@ -606,11 +592,10 @@
   // Scan through block, yanking dead path from
   // all regions and phis.
   dead->head()->del_req(j);
-  for( int k = 1; dead->_nodes[k]->is_Phi(); k++ )
-    dead->_nodes[k]->del_req(j);
+  for( int k = 1; dead->get_node(k)->is_Phi(); k++ )
+    dead->get_node(k)->del_req(j);
 }
 
-//------------------------------move_to_next-----------------------------------
 // Helper function to move block bx to the slot following b_index. Return
 // true if the move is successful, otherwise false
 bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
@@ -618,20 +603,22 @@
 
   // Return false if bx is already scheduled.
   uint bx_index = bx->_pre_order;
-  if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
+  if ((bx_index <= b_index) && (get_block(bx_index) == bx)) {
     return false;
   }
 
   // Find the current index of block bx on the block list
   bx_index = b_index + 1;
-  while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
-  assert(_blocks[bx_index] == bx, "block not found");
+  while (bx_index < number_of_blocks() && get_block(bx_index) != bx) {
+    bx_index++;
+  }
+  assert(get_block(bx_index) == bx, "block not found");
 
   // If the previous block conditionally falls into bx, return false,
   // because moving bx will create an extra jump.
   for(uint k = 1; k < bx->num_preds(); k++ ) {
-    Block* pred = _bbs[bx->pred(k)->_idx];
-    if (pred == _blocks[bx_index-1]) {
+    Block* pred = get_block_for_node(bx->pred(k));
+    if (pred == get_block(bx_index - 1)) {
       if (pred->_num_succs != 1) {
         return false;
       }
@@ -644,14 +631,13 @@
   return true;
 }
 
-//------------------------------move_to_end------------------------------------
 // Move empty and uncommon blocks to the end.
 void PhaseCFG::move_to_end(Block *b, uint i) {
   int e = b->is_Empty();
   if (e != Block::not_empty) {
     if (e == Block::empty_with_goto) {
       // Remove the goto, but leave the block.
-      b->_nodes.pop();
+      b->pop_node();
     }
     // Mark this block as a connector block, which will cause it to be
     // ignored in certain functions such as non_connector_successor().
@@ -662,31 +648,31 @@
   _blocks.push(b);
 }
 
-//---------------------------set_loop_alignment--------------------------------
 // Set loop alignment for every block
 void PhaseCFG::set_loop_alignment() {
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");
 
-  for (uint i = 1; i < last; i++ ) {
-    Block *b = _blocks[i];
-    if (b->head()->is_Loop()) {
-      b->set_loop_alignment(b);
+  for (uint i = 1; i < last; i++) {
+    Block* block = get_block(i);
+    if (block->head()->is_Loop()) {
+      block->set_loop_alignment(block);
     }
   }
 }
 
-//-----------------------------remove_empty------------------------------------
 // Make empty basic blocks to be "connector" blocks, Move uncommon blocks
 // to the end.
-void PhaseCFG::remove_empty() {
+void PhaseCFG::remove_empty_blocks() {
   // Move uncommon blocks to the end
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");
 
   for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_connector()) break;
+    Block* block = get_block(i);
+    if (block->is_connector()) {
+      break;
+    }
 
     // Check for NeverBranch at block end.  This needs to become a GOTO to the
     // true target.  NeverBranch are treated as a conditional branch that
@@ -694,30 +680,33 @@
     // to give a fake exit path to infinite loops.  At this late stage they
     // need to turn into Goto's so that when you enter the infinite loop you
     // indeed hang.
-    if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
-      convert_NeverBranch_to_Goto(b);
+    if (block->get_node(block->end_idx())->Opcode() == Op_NeverBranch) {
+      convert_NeverBranch_to_Goto(block);
+    }
 
     // Look for uncommon blocks and move to end.
     if (!C->do_freq_based_layout()) {
-      if( b->is_uncommon(_bbs) ) {
-        move_to_end(b, i);
+      if (is_uncommon(block)) {
+        move_to_end(block, i);
         last--;                   // No longer check for being uncommon!
-        if( no_flip_branch(b) ) { // Fall-thru case must follow?
-          b = _blocks[i];         // Find the fall-thru block
-          move_to_end(b, i);
+        if (no_flip_branch(block)) { // Fall-thru case must follow?
+          // Find the fall-thru block
+          block = get_block(i);
+          move_to_end(block, i);
           last--;
         }
-        i--;                      // backup block counter post-increment
+        // backup block counter post-increment
+        i--;
       }
     }
   }
 
   // Move empty blocks to the end
-  last = _num_blocks;
+  last = number_of_blocks();
   for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_Empty() != Block::not_empty) {
-      move_to_end(b, i);
+    Block* block = get_block(i);
+    if (block->is_Empty() != Block::not_empty) {
+      move_to_end(block, i);
       last--;
       i--;
     }
@@ -732,9 +721,9 @@
   assert(block->_num_succs == 2, "must have 2 successors");
 
   // Get the If node and the projection for the first successor.
-  MachIfNode *iff   = block->_nodes[block->_nodes.size()-3]->as_MachIf();
-  ProjNode   *proj0 = block->_nodes[block->_nodes.size()-2]->as_Proj();
-  ProjNode   *proj1 = block->_nodes[block->_nodes.size()-1]->as_Proj();
+  MachIfNode *iff   = block->get_node(block->number_of_nodes()-3)->as_MachIf();
+  ProjNode   *proj0 = block->get_node(block->number_of_nodes()-2)->as_Proj();
+  ProjNode   *proj1 = block->get_node(block->number_of_nodes()-1)->as_Proj();
   ProjNode   *projt = (proj0->Opcode() == Op_IfTrue)  ? proj0 : proj1;
   ProjNode   *projf = (proj0->Opcode() == Op_IfFalse) ? proj0 : proj1;
 
@@ -760,12 +749,12 @@
   }
   assert(iff->_prob <= 2*PROB_NEVER, "Trap based checks are expected to trap never!");
   // Map the successors properly
-  block->_succs.map(0, _bbs[proj_never ->raw_out(0)->_idx]);   // The target of the trap.
-  block->_succs.map(1, _bbs[proj_always->raw_out(0)->_idx]);   // The fall through target.
+  block->_succs.map(0, get_block_for_node(proj_never ->raw_out(0)));   // The target of the trap.
+  block->_succs.map(1, get_block_for_node(proj_always->raw_out(0)));   // The fall through target.
 
-  if (block->_nodes[block->_nodes.size() - block->_num_succs + 1] != proj_always) {
-    block->_nodes.map(block->_nodes.size() - block->_num_succs + 0, proj_never);
-    block->_nodes.map(block->_nodes.size() - block->_num_succs + 1, proj_always);
+  if (block->get_node(block->number_of_nodes() - block->_num_succs + 1) != proj_always) {
+    block->map_node(proj_never,  block->number_of_nodes() - block->_num_succs + 0);
+    block->map_node(proj_always, block->number_of_nodes() - block->_num_succs + 1);
   }
 
   // Place the fall through block after this block.
@@ -780,108 +769,108 @@
   return bnext;
 }
 
-//-----------------------------fixup_flow--------------------------------------
 // Fix up the final control flow for basic blocks.
 void PhaseCFG::fixup_flow() {
   // Fixup final control flow for the blocks.  Remove jump-to-next
   // block. If neither arm of an IF follows the conditional branch, we
   // have to add a second jump after the conditional.  We place the
   // TRUE branch target in succs[0] for both GOTOs and IFs.
-  for (uint i=0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    b->_pre_order = i;          // turn pre-order into block-index
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->_pre_order = i;          // turn pre-order into block-index
 
     // Connector blocks need no further processing.
-    if (b->is_connector()) {
-      assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
-             "All connector blocks should sink to the end");
+    if (block->is_connector()) {
+      assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end");
       continue;
     }
-    assert(b->is_Empty() != Block::completely_empty,
-           "Empty blocks should be connectors");
+    assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors");
 
-    Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
-    Block *bs0 = b->non_connector_successor(0);
+    Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL;
+    Block* bs0 = block->non_connector_successor(0);
 
     // Check for multi-way branches where I cannot negate the test to
     // exchange the true and false targets.
-    if (no_flip_branch(b)) {
+    if (no_flip_branch(block)) {
       // Find fall through case - if must fall into its target
       // Get the index of the branch's first successor.
-      int branch_idx = b->_nodes.size() - b->_num_succs;
+      int branch_idx = block->number_of_nodes() - block->_num_succs;
 
       // The branch is 1 before the branch's first successor.
-      Node *bra = b->_nodes[branch_idx-1];
+      Node *branch = block->get_node(branch_idx-1);
 
       // Handle no-flip branches which have implicit checks and which require
       // special block ordering and individual semantics of the 'fall through
       // case'.
       if ((TrapBasedNullChecks || TrapBasedRangeChecks) &&
-          bra->is_Mach() && bra->as_Mach()->is_TrapBasedCheckNode()) {
-        bnext = fixup_trap_based_check(bra, b, i, bnext);
+          branch->is_Mach() && branch->as_Mach()->is_TrapBasedCheckNode()) {
+        bnext = fixup_trap_based_check(branch, block, i, bnext);
       } else {
         // Else, default handling for no-flip branches
-        for (uint j2 = 0; j2 < b->_num_succs; j2++) {
-          const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
+        for (uint j2 = 0; j2 < block->_num_succs; j2++) {
+          const ProjNode* p = block->get_node(branch_idx + j2)->as_Proj();
           if (p->_con == 0) {
             // successor j2 is fall through case
-            if (b->non_connector_successor(j2) != bnext) {
+            if (block->non_connector_successor(j2) != bnext) {
               // but it is not the next block => insert a goto
               insert_goto_at(i, j2);
             }
             // Put taken branch in slot 0
-            if (j2 == 0 && b->_num_succs == 2) {
+            if (j2 == 0 && block->_num_succs == 2) {
               // Flip targets in succs map
-              Block *tbs0 = b->_succs[0];
-              Block *tbs1 = b->_succs[1];
-              b->_succs.map(0, tbs1);
-              b->_succs.map(1, tbs0);
+              Block *tbs0 = block->_succs[0];
+              Block *tbs1 = block->_succs[1];
+              block->_succs.map(0, tbs1);
+              block->_succs.map(1, tbs0);
             }
             break;
           }
         }
       }
+
       // Remove all CatchProjs
-      for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
+      for (uint j = 0; j < block->_num_succs; j++) {
+        block->pop_node();
+      }
 
-    } else if (b->_num_succs == 1) {
+    } else if (block->_num_succs == 1) {
       // Block ends in a Goto?
       if (bnext == bs0) {
         // We fall into next block; remove the Goto
-        b->_nodes.pop();
+        block->pop_node();
       }
 
-    } else if( b->_num_succs == 2 ) { // Block ends in a If?
+    } else if(block->_num_succs == 2) { // Block ends in a If?
       // Get opcode of 1st projection (matches _succs[0])
       // Note: Since this basic block has 2 exits, the last 2 nodes must
       //       be projections (in any order), the 3rd last node must be
       //       the IfNode (we have excluded other 2-way exits such as
       //       CatchNodes already).
-      MachNode *iff   = b->_nodes[b->_nodes.size()-3]->as_Mach();
-      ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
-      ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+      MachNode* iff   = block->get_node(block->number_of_nodes() - 3)->as_Mach();
+      ProjNode* proj0 = block->get_node(block->number_of_nodes() - 2)->as_Proj();
+      ProjNode* proj1 = block->get_node(block->number_of_nodes() - 1)->as_Proj();
 
       // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
-      assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
-      assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
+      assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");
+      assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");
 
-      Block *bs1 = b->non_connector_successor(1);
+      Block* bs1 = block->non_connector_successor(1);
 
       // Check for neither successor block following the current
       // block ending in a conditional. If so, move one of the
       // successors after the current one, provided that the
       // successor was previously unscheduled, but moveable
       // (i.e., all paths to it involve a branch).
-      if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
+      if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) {
         // Choose the more common successor based on the probability
         // of the conditional branch.
-        Block *bx = bs0;
-        Block *by = bs1;
+        Block* bx = bs0;
+        Block* by = bs1;
 
         // _prob is the probability of taking the true path. Make
         // p the probability of taking successor #1.
         float p = iff->as_MachIf()->_prob;
-        if( proj0->Opcode() == Op_IfTrue ) {
+        if (proj0->Opcode() == Op_IfTrue) {
           p = 1.0 - p;
         }
 
@@ -908,14 +897,16 @@
       // succs[1].
       if (bnext == bs0) {
         // Fall-thru case in succs[0], so flip targets in succs map
-        Block *tbs0 = b->_succs[0];
-        Block *tbs1 = b->_succs[1];
-        b->_succs.map( 0, tbs1 );
-        b->_succs.map( 1, tbs0 );
+        Block* tbs0 = block->_succs[0];
+        Block* tbs1 = block->_succs[1];
+        block->_succs.map(0, tbs1);
+        block->_succs.map(1, tbs0);
         // Flip projection for each target
-        { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
+        ProjNode* tmp = proj0;
+        proj0 = proj1;
+        proj1 = tmp;
 
-      } else if( bnext != bs1 ) {
+      } else if(bnext != bs1) {
         // Need a double-branch
         // The existing conditional branch need not change.
         // Add a unconditional branch to the false target.
@@ -925,12 +916,12 @@
       }
 
       // Make sure we TRUE branch to the target
-      if( proj0->Opcode() == Op_IfFalse ) {
+      if (proj0->Opcode() == Op_IfFalse) {
         iff->as_MachIf()->negate();
       }
 
-      b->_nodes.pop();          // Remove IfFalse & IfTrue projections
-      b->_nodes.pop();
+      block->pop_node();          // Remove IfFalse & IfTrue projections
+      block->pop_node();
 
     } else {
       // Multi-exit block, e.g. a switch statement
@@ -1009,11 +1000,11 @@
   NOT_PRODUCT(bool foundNode = false;)
 
   // for all blocks
-  for (uint i = 0; i < _num_blocks; i++) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
     Block *b = _blocks[i];
     // For all instructions in the current block.
-    for (uint j = 0; j < b->_nodes.size(); j++) {
-      Node *n = b->_nodes[j];
+    for (uint j = 0; j < b->number_of_nodes(); j++) {
+      Node *n = b->get_node(j);
       if (n->is_Mach() && n->as_Mach()->requires_late_expand()) {
 #ifndef PRODUCT
         if (TraceLateExpand) {
@@ -1116,8 +1107,8 @@
         Node *n2 = NULL;
         for (int k = 0; k < new_nodes.length(); ++k) {
           n2 = new_nodes.at(k);
-          b->_nodes.insert(++index, n2);
-          _bbs.map(n2->_idx, b);
+          b->insert_node(n2, ++index);
+          map_node_to_block(n2, b);
         }
 
         // Add old node n to remove and remove them all from block.
@@ -1158,7 +1149,6 @@
 }
 
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
   const Node *x = end->is_block_proj();
@@ -1175,57 +1165,58 @@
   } while( !p->is_block_start() );
 
   // Recursively visit
-  for( uint i=1; i<p->req(); i++ )
-    _dump_cfg(p->in(i),visited);
+  for (uint i = 1; i < p->req(); i++) {
+    _dump_cfg(p->in(i), visited);
+  }
 
   // Dump the block
-  _bbs[p->_idx]->dump(&_bbs);
+  get_block_for_node(p)->dump(this);
 }
 
 void PhaseCFG::dump( ) const {
-  tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
-  if( _blocks.size() ) {        // Did we do basic-block layout?
-    for( uint i=0; i<_num_blocks; i++ )
-      _blocks[i]->dump(&_bbs);
+  tty->print("\n--- CFG --- %d BBs\n", number_of_blocks());
+  if (_blocks.size()) {        // Did we do basic-block layout?
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      const Block* block = get_block(i);
+      block->dump(this);
+    }
   } else {                      // Else do it with a DFS
-    VectorSet visited(_bbs._arena);
+    VectorSet visited(_block_arena);
     _dump_cfg(_root,visited);
   }
 }
 
 void PhaseCFG::dump_headers() {
-  for( uint i = 0; i < _num_blocks; i++ ) {
-    if( _blocks[i] == NULL ) continue;
-    _blocks[i]->dump_head(&_bbs);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (block != NULL) {
+      block->dump_head(this);
+    }
   }
 }
 
-void PhaseCFG::verify( ) const {
+void PhaseCFG::verify() const {
 #ifdef ASSERT
   // Verify sane CFG
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    uint cnt = b->_nodes.size();
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    uint cnt = block->number_of_nodes();
     uint j;
     for (j = 0; j < cnt; j++)  {
-      Node *n = b->_nodes[j];
-      assert( _bbs[n->_idx] == b, "" );
-      if (j >= 1 && n->is_Mach() &&
-          n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
-        assert(j == 1 || b->_nodes[j-1]->is_Phi(),
-               "CreateEx must be first instruction in block");
+      Node *n = block->get_node(j);
+      assert(get_block_for_node(n) == block, "");
+      if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
+        assert(j == 1 || block->get_node(j-1)->is_Phi(), "CreateEx must be first instruction in block");
       }
       for (uint k = 0; k < n->req(); k++) {
         Node *def = n->in(k);
         if (def && def != n) {
-          assert(_bbs[def->_idx] || def->is_Con(),
-                 "must have block; constants for debug info ok");
+          assert(get_block_for_node(def) || def->is_Con(), "must have block; constants for debug info ok");
           // Verify that instructions in the block is in correct order.
           // Uses must follow their definition if they are at the same block.
           // Mostly done to check that MachSpillCopy nodes are placed correctly
           // when CreateEx node is moved in build_ifg_physical().
-          if (_bbs[def->_idx] == b &&
-              !(b->head()->is_Loop() && n->is_Phi()) &&
+          if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) &&
               // See (+++) comment in reg_split.cpp
               !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {
             bool is_loop = false;
@@ -1237,29 +1228,29 @@
                 }
               }
             }
-            assert(is_loop || b->find_node(def) < j, "uses must follow definitions");
+            assert(is_loop || block->find_node(def) < j, "uses must follow definitions");
           }
         }
       }
     }
 
-    j = b->end_idx();
-    Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
-    assert( bp, "last instruction must be a block proj" );
-    assert( bp == b->_nodes[j], "wrong number of successors for this block" );
+    j = block->end_idx();
+    Node* bp = (Node*)block->get_node(block->number_of_nodes() - 1)->is_block_proj();
+    assert(bp, "last instruction must be a block proj");
+    assert(bp == block->get_node(j), "wrong number of successors for this block");
     if (bp->is_Catch()) {
-      while (b->_nodes[--j]->is_MachProj()) ;
-      assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call");
+      while (block->get_node(--j)->is_MachProj()) {
+        ;
+      }
+      assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call");
     } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {
-      assert(b->_num_succs == 2, "Conditional branch must have two targets");
+      assert(block->_num_succs == 2, "Conditional branch must have two targets");
     }
   }
 #endif
 }
 #endif
 
-//=============================================================================
-//------------------------------UnionFind--------------------------------------
 UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
   Copy::zero_to_bytes( _indices, sizeof(uint)*max );
 }
@@ -1284,7 +1275,6 @@
   for( uint i=0; i<max; i++ ) map(i,i);
 }
 
-//------------------------------Find_compress----------------------------------
 // Straight out of Tarjan's union-find algorithm
 uint UnionFind::Find_compress( uint idx ) {
   uint cur  = idx;
@@ -1304,7 +1294,6 @@
   return idx;
 }
 
-//------------------------------Find_const-------------------------------------
 // Like Find above, but no path compress, so bad asymptotic behavior
 uint UnionFind::Find_const( uint idx ) const {
   if( idx == 0 ) return idx;    // Ignore the zero idx
@@ -1319,7 +1308,6 @@
   return next;
 }
 
-//------------------------------Union------------------------------------------
 // union 2 sets together.
 void UnionFind::Union( uint idx1, uint idx2 ) {
   uint src = Find(idx1);
@@ -1388,9 +1376,6 @@
 }
 #endif
 
-//=============================================================================
-
-//------------------------------edge_order-------------------------------------
 // Comparison function for edges
 static int edge_order(CFGEdge **e0, CFGEdge **e1) {
   float freq0 = (*e0)->freq();
@@ -1405,7 +1390,6 @@
   return dist1 - dist0;
 }
 
-//------------------------------trace_frequency_order--------------------------
 // Comparison function for edges
 extern "C" int trace_frequency_order(const void *p0, const void *p1) {
   Trace *tr0 = *(Trace **) p0;
@@ -1431,17 +1415,15 @@
   return diff;
 }
 
-//------------------------------find_edges-------------------------------------
 // Find edges of interest, i.e, those which can fall through. Presumes that
 // edges which don't fall through are of low frequency and can be generally
 // ignored.  Initialize the list of traces.
-void PhaseBlockLayout::find_edges()
-{
+void PhaseBlockLayout::find_edges() {
   // Walk the blocks, creating edges and Traces
   uint i;
   Trace *tr = NULL;
-  for (i = 0; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* b = _cfg.get_block(i);
     tr = new Trace(b, next, prev);
     traces[tr->id()] = tr;
 
@@ -1465,7 +1447,7 @@
       if (n->num_preds() != 1) break;
 
       i++;
-      assert(n = _cfg._blocks[i], "expecting next block");
+      assert(n = _cfg.get_block(i), "expecting next block");
       tr->append(n);
       uf->map(n->_pre_order, tr->id());
       traces[n->_pre_order] = NULL;
@@ -1489,8 +1471,8 @@
   }
 
   // Group connector blocks into one trace
-  for (i++; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i++; i < _cfg.number_of_blocks(); i++) {
+    Block *b = _cfg.get_block(i);
     assert(b->is_connector(), "connector blocks at the end");
     tr->append(b);
     uf->map(b->_pre_order, tr->id());
@@ -1498,10 +1480,8 @@
   }
 }
 
-//------------------------------union_traces----------------------------------
 // Union two traces together in uf, and null out the trace in the list
-void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
-{
+void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) {
   uint old_id = old_trace->id();
   uint updated_id = updated_trace->id();
 
@@ -1525,10 +1505,8 @@
   traces[hi_id] = NULL;
 }
 
-//------------------------------grow_traces-------------------------------------
 // Append traces together via the most frequently executed edges
-void PhaseBlockLayout::grow_traces()
-{
+void PhaseBlockLayout::grow_traces() {
   // Order the edges, and drive the growth of Traces via the most
   // frequently executed edges.
   edges->sort(edge_order);
@@ -1570,11 +1548,9 @@
   }
 }
 
-//------------------------------merge_traces-----------------------------------
 // Embed one trace into another, if the fork or join points are sufficiently
 // balanced.
-void PhaseBlockLayout::merge_traces(bool fall_thru_only)
-{
+void PhaseBlockLayout::merge_traces(bool fall_thru_only) {
   // Walk the edge list a another time, looking at unprocessed edges.
   // Fold in diamonds
   for (int i = 0; i < edges->length(); i++) {
@@ -1628,7 +1604,7 @@
         src_trace->insert_after(src_block, targ_trace);
         union_traces(src_trace, targ_trace);
       } else if (src_at_tail) {
-        if (src_trace != trace(_cfg._broot)) {
+        if (src_trace != trace(_cfg.get_root_block())) {
           e->set_state(CFGEdge::connected);
           targ_trace->insert_before(targ_block, src_trace);
           union_traces(targ_trace, src_trace);
@@ -1637,7 +1613,7 @@
     } else if (e->state() == CFGEdge::open) {
       // Append traces, even without a fall-thru connection.
       // But leave root entry at the beginning of the block list.
-      if (targ_trace != trace(_cfg._broot)) {
+      if (targ_trace != trace(_cfg.get_root_block())) {
         e->set_state(CFGEdge::connected);
         src_trace->append(targ_trace);
         union_traces(src_trace, targ_trace);
@@ -1646,11 +1622,9 @@
   }
 }
 
-//----------------------------reorder_traces-----------------------------------
 // Order the sequence of the traces in some desirable way, and fixup the
 // jumps at the end of each block.
-void PhaseBlockLayout::reorder_traces(int count)
-{
+void PhaseBlockLayout::reorder_traces(int count) {
   ResourceArea *area = Thread::current()->resource_area();
   Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
   Block_List worklist;
@@ -1665,15 +1639,14 @@
   }
 
   // The entry block should be first on the new trace list.
-  Trace *tr = trace(_cfg._broot);
+  Trace *tr = trace(_cfg.get_root_block());
   assert(tr == new_traces[0], "entry trace misplaced");
 
   // Sort the new trace list by frequency
   qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);
 
   // Patch up the successor blocks
-  _cfg._blocks.reset();
-  _cfg._num_blocks = 0;
+  _cfg.clear_blocks();
   for (int i = 0; i < new_count; i++) {
     Trace *tr = new_traces[i];
     if (tr != NULL) {
@@ -1682,17 +1655,15 @@
   }
 }
 
-//------------------------------PhaseBlockLayout-------------------------------
 // Order basic blocks based on frequency
-PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
-  Phase(BlockLayout),
-  _cfg(cfg)
-{
+PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg)
+: Phase(BlockLayout)
+, _cfg(cfg) {
   ResourceMark rm;
   ResourceArea *area = Thread::current()->resource_area();
 
   // List of traces
-  int size = _cfg._num_blocks + 1;
+  int size = _cfg.number_of_blocks() + 1;
   traces = NEW_ARENA_ARRAY(area, Trace *, size);
   memset(traces, 0, size*sizeof(Trace*));
   next = NEW_ARENA_ARRAY(area, Block *, size);
@@ -1725,11 +1696,10 @@
   // Re-order all the remaining traces by frequency
   reorder_traces(size);
 
-  assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
+  assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink");
 }
 
 
-//------------------------------backedge---------------------------------------
 // Edge e completes a loop in a trace. If the target block is head of the
 // loop, rotate the loop block so that the loop ends in a conditional branch.
 bool Trace::backedge(CFGEdge *e) {
@@ -1781,14 +1751,12 @@
   return loop_rotated;
 }
 
-//------------------------------fixup_blocks-----------------------------------
 // push blocks onto the CFG list
 // ensure that blocks have the correct two-way branch sense
 void Trace::fixup_blocks(PhaseCFG &cfg) {
   Block *last = last_block();
   for (Block *b = first_block(); b != NULL; b = next(b)) {
-    cfg._blocks.push(b);
-    cfg._num_blocks++;
+    cfg.add_block(b);
     if (!b->is_connector()) {
       int nfallthru = b->num_fall_throughs();
       if (b != last) {
@@ -1797,9 +1765,9 @@
           Block *bnext = next(b);
           Block *bs0 = b->non_connector_successor(0);
 
-          MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach();
-          ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
-          ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+          MachNode *iff = b->get_node(b->number_of_nodes() - 3)->as_Mach();
+          ProjNode *proj0 = b->get_node(b->number_of_nodes() - 2)->as_Proj();
+          ProjNode *proj1 = b->get_node(b->number_of_nodes() - 1)->as_Proj();
 
           if (bnext == bs0) {
             // Fall-thru case in succs[0], should be in succs[1]
@@ -1811,8 +1779,8 @@
             b->_succs.map( 1, tbs0 );
 
             // Flip projections to match targets
-            b->_nodes.map(b->_nodes.size()-2, proj1);
-            b->_nodes.map(b->_nodes.size()-1, proj0);
+            b->map_node(proj1, b->number_of_nodes() - 2);
+            b->map_node(proj0, b->number_of_nodes() - 1);
           }
         }
       }
--- a/src/share/vm/opto/block.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/block.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -48,13 +48,12 @@
   friend class VMStructs;
   uint _size;                   // allocated size, as opposed to formal limit
   debug_only(uint _limit;)      // limit to formal domain
+  Arena *_arena;                // Arena to allocate in
 protected:
   Block **_blocks;
   void grow( uint i );          // Grow array node to fit
 
 public:
-  Arena *_arena;                // Arena to allocate in
-
   Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
     debug_only(_limit=0);
     _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
@@ -77,7 +76,7 @@
 public:
   uint _cnt;
   Block_List() : Block_Array(Thread::current()->resource_area()), _cnt(0) {}
-  void push( Block *b ) { map(_cnt++,b); }
+  void push( Block *b ) {  map(_cnt++,b); }
   Block *pop() { return _blocks[--_cnt]; }
   Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;}
   void remove( uint i );
@@ -106,15 +105,53 @@
 // any optimization pass.  They are created late in the game.
 class Block : public CFGElement {
   friend class VMStructs;
- public:
+
+private:
   // Nodes in this block, in order
   Node_List _nodes;
 
+public:
+
+  // Get the node at index 'at_index', if 'at_index' is out of bounds return NULL
+  Node* get_node(uint at_index) const {
+    return _nodes[at_index];
+  }
+
+  // Get the number of nodes in this block
+  uint number_of_nodes() const {
+    return _nodes.size();
+  }
+
+  // Map a node 'node' to index 'to_index' in the block, if the index is out of bounds the size of the node list is increased
+  void map_node(Node* node, uint to_index) {
+    _nodes.map(to_index, node);
+  }
+
+  // Insert a node 'node' at index 'at_index', moving all nodes that are on a higher index one step, if 'at_index' is out of bounds we crash
+  void insert_node(Node* node, uint at_index) {
+    _nodes.insert(at_index, node);
+  }
+
+  // Remove a node at index 'at_index'
+  void remove_node(uint at_index) {
+    _nodes.remove(at_index);
+  }
+
+  // Push a node 'node' onto the node list
+  void push_node(Node* node) {
+    _nodes.push(node);
+  }
+
+  // Pop the last node off the node list
+  Node* pop_node() {
+    return _nodes.pop();
+  }
+
   // Basic blocks have a Node which defines Control for all Nodes pinned in
   // this block.  This Node is a RegionNode.  Exception-causing Nodes
   // (division, subroutines) and Phi functions are always pinned.  Later,
   // every Node will get pinned to some block.
-  Node *head() const { return _nodes[0]; }
+  Node *head() const { return get_node(0); }
 
   // CAUTION: num_preds() is ONE based, so that predecessor numbers match
   // input edges to Regions and Phis.
@@ -275,7 +312,7 @@
 
   // Add an instruction to an existing block.  It must go after the head
   // instruction and before the end instruction.
-  void add_inst( Node *n ) { _nodes.insert(end_idx(),n); }
+  void add_inst( Node *n ) { insert_node(n, end_idx()); }
   // Find node in block. Fails if node not in block.
   uint find_node( const Node *n ) const;
   // Find and remove n from block list
@@ -283,23 +320,6 @@
   // Check wether the node is in the block.
   bool contains( const Node *n ) const;
 
-  // helper function that adds caller save registers to MachProjNode
-  void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
-  // Schedule a call next in the block
-  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
-
-  // Perform basic-block local scheduling
-  Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
-  void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
-  void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
-  bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
-  // Cleanup if any code lands between a Call and his Catch
-  void call_catch_cleanup(Block_Array &bbs, Compile *C);
-  // Detect implicit-null-check opportunities.  Basically, find NULL checks
-  // with suitable memory ops nearby.  Use the memory op to do the NULL check.
-  // I can generate a memory op if there is not one nearby.
-  void implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons);
-
   // Return the empty status of a block
   enum { not_empty, empty_with_goto, completely_empty };
   int is_Empty() const;
@@ -331,17 +351,13 @@
   // Examine block's code shape to predict if it is not commonly executed.
   bool has_uncommon_code() const;
 
-  // Use frequency calculations and code shape to predict if the block
-  // is uncommon.
-  bool is_uncommon( Block_Array &bbs ) const;
-
 #ifndef PRODUCT
   // Debugging print of basic block
   void dump_bidx(const Block* orig, outputStream* st = tty) const;
-  void dump_pred(const Block_Array *bbs, Block* orig, outputStream* st = tty) const;
-  void dump_head( const Block_Array *bbs, outputStream* st = tty ) const;
+  void dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st = tty) const;
+  void dump_head(const PhaseCFG* cfg, outputStream* st = tty) const;
   void dump() const;
-  void dump( const Block_Array *bbs ) const;
+  void dump(const PhaseCFG* cfg) const;
 #endif
 };
 
@@ -351,14 +367,95 @@
 class PhaseCFG : public Phase {
   friend class VMStructs;
  private:
+
+  // Root of whole program
+  RootNode* _root;
+
+  // The block containing the root node
+  Block* _root_block;
+
+  // List of basic blocks that are created during CFG creation
+  Block_List _blocks;
+
+  // Count of basic blocks
+  uint _number_of_blocks;
+
+  // Arena for the blocks to be stored in
+  Arena* _block_arena;
+
+  // The matcher for this compilation
+  Matcher& _matcher;
+
+  // Map nodes to owning basic block
+  Block_Array _node_to_block_mapping;
+
+  // Loop from the root
+  CFGLoop* _root_loop;
+
+  // Outmost loop frequency
+  float _outer_loop_frequency;
+
   // Build a proper looking cfg.  Return count of basic blocks
   uint build_cfg();
 
-  // Perform DFS search.
+  // Build the dominator tree so that we know where we can move instructions
+  void build_dominator_tree();
+
+  // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
+  void estimate_block_frequency();
+
+  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
+  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
+  // Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
+  void global_code_motion();
+
+  // Schedule Nodes early in their basic blocks.
+  bool schedule_early(VectorSet &visited, Node_List &roots);
+
+  // For each node, find the latest block it can be scheduled into
+  // and then select the cheapest block between the latest and earliest
+  // block to place the node.
+  void schedule_late(VectorSet &visited, Node_List &stack);
+
+  // Compute the (backwards) latency of a node from a single use
+  int latency_from_use(Node *n, const Node *def, Node *use);
+
+  // Compute the (backwards) latency of a node from the uses of this instruction
+  void partial_latency_of_defs(Node *n);
+
+  // Compute the instruction global latency with a backwards walk
+  void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
+
+  // Pick a block between early and late that is a cheaper alternative
+  // to late. Helper for schedule_late.
+  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+  bool schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call);
+  void set_next_call(Block* block, Node* n, VectorSet& next_call);
+  void needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call);
+
+  // Perform basic-block local scheduling
+  Node* select(Block* block, Node_List& worklist, GrowableArray<int>& ready_cnt, VectorSet& next_call, uint sched_slot);
+
+  // Schedule a call next in the block
+  uint sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call);
+
+  // Cleanup if any code lands between a Call and his Catch
+  void call_catch_cleanup(Block* block);
+
+  Node* catch_cleanup_find_cloned_def(Block* use_blk, Node* def, Block* def_blk, int n_clone_idx);
+  void  catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx);
+
+  // Detect implicit-null-check opportunities.  Basically, find NULL checks
+  // with suitable memory ops nearby.  Use the memory op to do the NULL check.
+  // I can generate a memory op if there is not one nearby.
+  void implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons);
+
+  // Perform a Depth First Search (DFS).
   // Setup 'vertex' as DFS to vertex mapping.
   // Setup 'semi' as vertex to DFS mapping.
   // Set 'parent' to DFS parent.
-  uint DFS( Tarjan *tarjan );
+  uint do_DFS(Tarjan* tarjan, uint rpo_counter);
 
   // Helper function to insert a node into a block
   void schedule_node_into_block( Node *n, Block *b );
@@ -369,80 +466,18 @@
   void schedule_pinned_nodes( VectorSet &visited );
 
   // I'll need a few machine-specific GotoNodes.  Clone from this one.
-  MachNode *_goto;
+  // Used when building the CFG and creating end nodes for blocks.
+  MachNode* _goto;
 
   Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
   void verify_anti_dependences(Block* LCA, Node* load) {
-    assert(LCA == _bbs[load->_idx], "should already be scheduled");
+    assert(LCA == get_block_for_node(load), "should already be scheduled");
     insert_anti_dependences(LCA, load, true);
   }
 
- public:
-  PhaseCFG( Arena *a, RootNode *r, Matcher &m );
-
-  uint _num_blocks;             // Count of basic blocks
-  Block_List _blocks;           // List of basic blocks
-  RootNode *_root;              // Root of whole program
-  Block_Array _bbs;             // Map Nodes to owning Basic Block
-  Block *_broot;                // Basic block of root
-  uint _rpo_ctr;
-  CFGLoop* _root_loop;
-  float _outer_loop_freq;       // Outmost loop frequency
-
-  // Per node latency estimation, valid only during GCM
-  GrowableArray<uint> *_node_latency;
-
-#ifndef PRODUCT
-  bool _trace_opto_pipelining;  // tracing flag
-#endif
-
-#ifdef ASSERT
-  Unique_Node_List _raw_oops;
-#endif
-
-  // Build dominators
-  void Dominators();
-
-  // Estimate block frequencies based on IfNode probabilities
-  void Estimate_Block_Frequency();
-
-  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
-  // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block.
-  void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
-
-  // Compute the (backwards) latency of a node from the uses
-  void latency_from_uses(Node *n);
-
-  // Compute the (backwards) latency of a node from a single use
-  int latency_from_use(Node *n, const Node *def, Node *use);
-
-  // Compute the (backwards) latency of a node from the uses of this instruction
-  void partial_latency_of_defs(Node *n);
-
-  // Schedule Nodes early in their basic blocks.
-  bool schedule_early(VectorSet &visited, Node_List &roots);
-
-  // For each node, find the latest block it can be scheduled into
-  // and then select the cheapest block between the latest and earliest
-  // block to place the node.
-  void schedule_late(VectorSet &visited, Node_List &stack);
-
-  // Pick a block between early and late that is a cheaper alternative
-  // to late. Helper for schedule_late.
-  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
-
-  // Compute the instruction global latency with a backwards walk
-  void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
-
-  // Set loop alignment
-  void set_loop_alignment();
-
-  // Remove empty basic blocks
-  void remove_empty();
-  Block *fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext);
-  void fixup_flow();
   bool move_to_next(Block* bx, uint b_index);
   void move_to_end(Block* bx, uint b_index);
+
   void insert_goto_at(uint block_no, uint succ_no);
 
   // Check for NeverBranch at block end.  This needs to become a GOTO to the
@@ -454,10 +489,114 @@
 
   CFGLoop* create_loop_tree();
 
-  // Insert a node into a block, and update the _bbs
-  void insert( Block *b, uint idx, Node *n ) {
-    b->_nodes.insert( idx, n );
-    _bbs.map( n->_idx, b );
+ public:
+  PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
+
+  void set_latency_for_node(Node* node, int latency) {
+    _node_latency->at_put_grow(node->_idx, latency);
+  }
+
+  uint get_latency_for_node(Node* node) {
+    return _node_latency->at_grow(node->_idx);
+  }
+
+  // Get the outer most frequency
+  float get_outer_loop_frequency() const {
+    return _outer_loop_frequency;
+  }
+
+  // Get the root node of the CFG
+  RootNode* get_root_node() const {
+    return _root;
+  }
+
+  // Get the block of the root node
+  Block* get_root_block() const {
+    return _root_block;
+  }
+
+  // Add a block at a position and moves the later ones one step
+  void add_block_at(uint pos, Block* block) {
+    _blocks.insert(pos, block);
+    _number_of_blocks++;
+  }
+
+  // Adds a block to the top of the block list
+  void add_block(Block* block) {
+    _blocks.push(block);
+    _number_of_blocks++;
+  }
+
+  // Clear the list of blocks
+  void clear_blocks() {
+    _blocks.reset();
+    _number_of_blocks = 0;
+  }
+
+  // Get the block at position pos in _blocks
+  Block* get_block(uint pos) const {
+    return _blocks[pos];
+  }
+
+  // Number of blocks
+  uint number_of_blocks() const {
+    return _number_of_blocks;
+  }
+
+  // set which block this node should reside in
+  void map_node_to_block(const Node* node, Block* block) {
+    _node_to_block_mapping.map(node->_idx, block);
+  }
+
+  // removes the mapping from a node to a block
+  void unmap_node_from_block(const Node* node) {
+    _node_to_block_mapping.map(node->_idx, NULL);
+  }
+
+  // get the block in which this node resides
+  Block* get_block_for_node(const Node* node) const {
+    return _node_to_block_mapping[node->_idx];
+  }
+
+  // does this node reside in a block; return true
+  bool has_block(const Node* node) const {
+    return (_node_to_block_mapping.lookup(node->_idx) != NULL);
+  }
+
+  // Per node latency estimation, valid only during GCM
+  GrowableArray<uint> *_node_latency;
+
+#ifndef PRODUCT
+  bool _trace_opto_pipelining;  // tracing flag
+#endif
+
+  // Use frequency calculations and code shape to predict if the block
+  // is uncommon.
+  bool is_uncommon(const Block* block);
+
+#ifdef ASSERT
+  Unique_Node_List _raw_oops;
+#endif
+
+  // Do global code motion by first building dominator tree and estimate block frequency
+  // Returns true on success
+  bool do_global_code_motion();
+
+  // Compute the (backwards) latency of a node from the uses
+  void latency_from_uses(Node *n);
+
+  // Set loop alignment
+  void set_loop_alignment();
+
+  // Remove empty basic blocks
+  void remove_empty_blocks();
+  Block *fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext);
+  void fixup_flow();
+
+  // Insert a node into a block at index and map the node to the block
+  void insert(Block *b, uint idx, Node *n) {
+    b->insert_node(n , idx);
+    map_node_to_block(n, b);
   }
 
   // Check all nodes and late expand them if necessary.
@@ -549,7 +688,7 @@
     _child(NULL),
     _exit_prob(1.0f) {}
   CFGLoop* parent() { return _parent; }
-  void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk);
+  void push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg);
   void add_member(CFGElement *s) { _members.push(s); }
   void add_nested_loop(CFGLoop* cl);
   Block* head() {
--- a/src/share/vm/opto/buildOopMap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/buildOopMap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -35,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmreg_sparc.inline.hpp"
 #endif
@@ -87,7 +90,6 @@
 // OptoReg::Bad for not-callee-saved.
 
 
-//------------------------------OopFlow----------------------------------------
 // Structure to pass around
 struct OopFlow : public ResourceObj {
   short *_callees;              // Array mapping register to callee-saved
@@ -119,12 +121,11 @@
   OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
 };
 
-//------------------------------compute_reach----------------------------------
 // Given reaching-defs for this block start, compute it for this block end
 void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
 
-  for( uint i=0; i<_b->_nodes.size(); i++ ) {
-    Node *n = _b->_nodes[i];
+  for( uint i=0; i<_b->number_of_nodes(); i++ ) {
+    Node *n = _b->get_node(i);
 
     if( n->jvms() ) {           // Build an OopMap here?
       JVMState *jvms = n->jvms();
@@ -177,7 +178,6 @@
   }
 }
 
-//------------------------------merge------------------------------------------
 // Merge the given flow into the 'this' flow
 void OopFlow::merge( OopFlow *flow, int max_reg ) {
   assert( _b == NULL, "merging into a happy flow" );
@@ -197,14 +197,12 @@
 
 }
 
-//------------------------------clone------------------------------------------
 void OopFlow::clone( OopFlow *flow, int max_size ) {
   _b = flow->_b;
   memcpy( _callees, flow->_callees, sizeof(short)*max_size);
   memcpy( _defs   , flow->_defs   , sizeof(Node*)*max_size);
 }
 
-//------------------------------make-------------------------------------------
 OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
   short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
   Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1);
@@ -215,7 +213,6 @@
   return flow;
 }
 
-//------------------------------bit twiddlers----------------------------------
 static int get_live_bit( int *live, int reg ) {
   return live[reg>>LogBitsPerInt] &   (1<<(reg&(BitsPerInt-1))); }
 static void set_live_bit( int *live, int reg ) {
@@ -223,7 +220,6 @@
 static void clr_live_bit( int *live, int reg ) {
          live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
 
-//------------------------------build_oop_map----------------------------------
 // Build an oopmap from the current flow info
 OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
   int framesize = regalloc->_framesize;
@@ -412,28 +408,29 @@
   return omap;
 }
 
-//------------------------------do_liveness------------------------------------
 // Compute backwards liveness on registers
-static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
-  int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
-  int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
-  Node *root = cfg->C->root();
+static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) {
+  int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints);
+  int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints];
+  Node* root = cfg->get_root_node();
   // On CISC platforms, get the node representing the stack pointer  that regalloc
   // used for spills
   Node *fp = NodeSentinel;
   if (UseCISCSpill && root->req() > 1) {
     fp = root->in(1)->in(TypeFunc::FramePtr);
   }
-  memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
+  memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt));
   // Push preds onto worklist
-  for( uint i=1; i<root->req(); i++ )
-    worklist->push(cfg->_bbs[root->in(i)->_idx]);
+  for (uint i = 1; i < root->req(); i++) {
+    Block* block = cfg->get_block_for_node(root->in(i));
+    worklist->push(block);
+  }
 
   // ZKM.jar includes tiny infinite loops which are unreached from below.
   // If we missed any blocks, we'll retry here after pushing all missed
   // blocks on the worklist.  Normally this outer loop never trips more
   // than once.
-  while( 1 ) {
+  while (1) {
 
     while( worklist->size() ) { // Standard worklist algorithm
       Block *b = worklist->rpop();
@@ -453,8 +450,8 @@
       }
 
       // Now walk tmp_live up the block backwards, computing live
-      for( int k=b->_nodes.size()-1; k>=0; k-- ) {
-        Node *n = b->_nodes[k];
+      for( int k=b->number_of_nodes()-1; k>=0; k-- ) {
+        Node *n = b->get_node(k);
         // KILL def'd bits
         int first = regalloc->get_reg_first(n);
         int second = regalloc->get_reg_second(n);
@@ -537,37 +534,42 @@
         for( l=0; l<max_reg_ints; l++ )
           old_live[l] = tmp_live[l];
         // Push preds onto worklist
-        for( l=1; l<(int)b->num_preds(); l++ )
-          worklist->push(cfg->_bbs[b->pred(l)->_idx]);
+        for (l = 1; l < (int)b->num_preds(); l++) {
+          Block* block = cfg->get_block_for_node(b->pred(l));
+          worklist->push(block);
+        }
       }
     }
 
     // Scan for any missing safepoints.  Happens to infinite loops
     // ala ZKM.jar
     uint i;
-    for( i=1; i<cfg->_num_blocks; i++ ) {
-      Block *b = cfg->_blocks[i];
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      Block* block = cfg->get_block(i);
       uint j;
-      for( j=1; j<b->_nodes.size(); j++ )
-        if( b->_nodes[j]->jvms() &&
-            (*safehash)[b->_nodes[j]] == NULL )
+      for (j = 1; j < block->number_of_nodes(); j++) {
+        if (block->get_node(j)->jvms() && (*safehash)[block->get_node(j)] == NULL) {
            break;
-      if( j<b->_nodes.size() ) break;
+        }
+      }
+      if (j < block->number_of_nodes()) {
+        break;
+      }
     }
-    if( i == cfg->_num_blocks )
+    if (i == cfg->number_of_blocks()) {
       break;                    // Got 'em all
+    }
 #ifndef PRODUCT
     if( PrintOpto && Verbose )
       tty->print_cr("retripping live calc");
 #endif
     // Force the issue (expensively): recheck everybody
-    for( i=1; i<cfg->_num_blocks; i++ )
-      worklist->push(cfg->_blocks[i]);
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      worklist->push(cfg->get_block(i));
+    }
   }
-
 }
 
-//------------------------------BuildOopMaps-----------------------------------
 // Collect GC mask info - where are all the OOPs?
 void Compile::BuildOopMaps() {
   NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
@@ -588,12 +590,12 @@
   OopFlow *free_list = NULL;    // Free, unused
 
   // Array mapping blocks to completed oopflows
-  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
-  memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
+  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks());
+  memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) );
 
 
   // Do the first block 'by hand' to prime the worklist
-  Block *entry = _cfg->_blocks[1];
+  Block *entry = _cfg->get_block(1);
   OopFlow *rootflow = OopFlow::make(A,max_reg,this);
   // Initialize to 'bottom' (not 'top')
   memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
@@ -619,7 +621,9 @@
 
     Block *b = worklist.pop();
     // Ignore root block
-    if( b == _cfg->_broot ) continue;
+    if (b == _cfg->get_root_block()) {
+      continue;
+    }
     // Block is already done?  Happens if block has several predecessors,
     // he can get on the worklist more than once.
     if( flows[b->_pre_order] ) continue;
@@ -629,10 +633,9 @@
     // pred to this block.  Otherwise we have to grab a new OopFlow.
     OopFlow *flow = NULL;       // Flag for finding optimized flow
     Block *pred = (Block*)0xdeadbeef;
-    uint j;
     // Scan this block's preds to find a done predecessor
-    for( j=1; j<b->num_preds(); j++ ) {
-      Block *p = _cfg->_bbs[b->pred(j)->_idx];
+    for (uint j = 1; j < b->num_preds(); j++) {
+      Block* p = _cfg->get_block_for_node(b->pred(j));
       OopFlow *p_flow = flows[p->_pre_order];
       if( p_flow ) {            // Predecessor is done
         assert( p_flow->_b == p, "cross check" );
--- a/src/share/vm/opto/bytecodeInfo.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -84,16 +84,35 @@
   assert(!UseOldInlining, "do not use for old stuff");
 }
 
+/**
+ *  Return true when EA is ON and a java constructor is called or
+ *  a super constructor is called from an inlined java constructor.
+ *  Also return true for boxing methods.
+ */
 static bool is_init_with_ea(ciMethod* callee_method,
                             ciMethod* caller_method, Compile* C) {
-  // True when EA is ON and a java constructor is called or
-  // a super constructor is called from an inlined java constructor.
-  return C->do_escape_analysis() && EliminateAllocations &&
-         ( callee_method->is_initializer() ||
-           (caller_method->is_initializer() &&
-            caller_method != C->method() &&
-            caller_method->holder()->is_subclass_of(callee_method->holder()))
-         );
+  if (!C->do_escape_analysis() || !EliminateAllocations) {
+    return false; // EA is off
+  }
+  if (callee_method->is_initializer()) {
+    return true; // constuctor
+  }
+  if (caller_method->is_initializer() &&
+      caller_method != C->method() &&
+      caller_method->holder()->is_subclass_of(callee_method->holder())) {
+    return true; // super constructor is called from inlined constructor
+  }
+  if (C->eliminate_boxing() && callee_method->is_boxing_method()) {
+    return true;
+  }
+  return false;
+}
+
+/**
+ *  Force inlining unboxing accessor.
+ */
+static bool is_unboxing_method(ciMethod* callee_method, Compile* C) {
+  return C->eliminate_boxing() && callee_method->is_unboxing_method();
 }
 
 // positive filter: should callee be inlined?
@@ -143,6 +162,7 @@
   // bump the max size if the call is frequent
   if ((freq >= InlineFrequencyRatio) ||
       (call_site_count >= InlineFrequencyCount) ||
+      is_unboxing_method(callee_method, C) ||
       is_init_with_ea(callee_method, caller_method, C)) {
 
     max_inline_size = C->freq_inline_size();
@@ -238,6 +258,11 @@
 
   // Now perform checks which are heuristic
 
+  if (is_unboxing_method(callee_method, C)) {
+    // Inline unboxing methods.
+    return false;
+  }
+
   if (!callee_method->force_inline()) {
     if (callee_method->has_compiled_code() &&
         callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
@@ -288,9 +313,8 @@
     }
 
     if (is_init_with_ea(callee_method, caller_method, C)) {
-
       // Escape Analysis: inline all executed constructors
-
+      return false;
     } else if (!callee_method->was_executed_more_than(MIN2(MinInliningThreshold,
                                                            CompileThreshold >> 1))) {
       set_msg("executed < MinInliningThreshold times");
--- a/src/share/vm/opto/c2_globals.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/c2_globals.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c2_globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c2_globals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c2_globals_sparc.hpp"
 #endif
@@ -414,10 +417,10 @@
   develop(intx, WarmCallMaxSize, 999999,                                    \
           "size of the largest inlinable method")                           \
                                                                             \
-  product(intx, MaxNodeLimit, 65000,                                        \
+  product(intx, MaxNodeLimit, 80000,                                        \
           "Maximum number of nodes")                                        \
                                                                             \
-  product(intx, NodeLimitFudgeFactor, 1000,                                 \
+  product(intx, NodeLimitFudgeFactor, 2000,                                 \
           "Fudge Factor for certain optimizations")                         \
                                                                             \
   product(bool, UseJumpTables, true,                                        \
@@ -450,12 +453,15 @@
   notproduct(bool, PrintEliminateLocks, false,                              \
           "Print out when locks are eliminated")                            \
                                                                             \
-  diagnostic(bool, EliminateAutoBox, false,                                 \
-          "Private flag to control optimizations for autobox elimination")  \
+  product(bool, EliminateAutoBox, true,                                     \
+          "Control optimizations for autobox elimination")                  \
                                                                             \
   product(intx, AutoBoxCacheMax, 128,                                       \
           "Sets max value cached by the java.lang.Integer autobox cache")   \
                                                                             \
+  experimental(bool, AggressiveUnboxing, false,                             \
+          "Control optimizations for aggressive boxing elimination")        \
+                                                                            \
   develop(bool, TraceLateExpand, false, "Trace late expand of nodes.")      \
                                                                             \
   product(bool, DoEscapeAnalysis, true,                                     \
--- a/src/share/vm/opto/c2compiler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/c2compiler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -128,9 +131,10 @@
   bool subsume_loads = SubsumeLoads;
   bool do_escape_analysis = DoEscapeAnalysis &&
     !env->jvmti_can_access_local_variables();
+  bool eliminate_boxing = EliminateAutoBox;
   while (!env->failing()) {
     // Attempt to compile while subsuming loads into machine instructions.
-    Compile C(env, this, target, entry_bci, subsume_loads, do_escape_analysis);
+    Compile C(env, this, target, entry_bci, subsume_loads, do_escape_analysis, eliminate_boxing);
 
 
     // Check result and retry if appropriate.
@@ -145,6 +149,12 @@
         do_escape_analysis = false;
         continue;  // retry
       }
+      if (C.has_boxed_value()) {
+        // Recompile without boxing elimination regardless failure reason.
+        assert(eliminate_boxing, "must make progress");
+        eliminate_boxing = false;
+        continue;  // retry
+      }
       // Pass any other failure reason up to the ciEnv.
       // Note that serious, irreversible failures are already logged
       // on the ciEnv via env->record_method_not_compilable().
--- a/src/share/vm/opto/callGenerator.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/callGenerator.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -134,7 +134,7 @@
     kit.C->log()->elem("direct_call bci='%d'", jvms->bci());
   }
 
-  CallStaticJavaNode *call = new (kit.C) CallStaticJavaNode(tf(), target, method(), kit.bci());
+  CallStaticJavaNode *call = new (kit.C) CallStaticJavaNode(kit.C, tf(), target, method(), kit.bci());
   _call_node = call;  // Save the call node in case we need it later
   if (!is_static) {
     // Make an explicit receiver null_check as part of this call.
@@ -304,29 +304,34 @@
 
 void LateInlineCallGenerator::do_late_inline() {
   // Can't inline it
-  if (call_node() == NULL || call_node()->outcnt() == 0 ||
-      call_node()->in(0) == NULL || call_node()->in(0)->is_top()) {
+  CallStaticJavaNode* call = call_node();
+  if (call == NULL || call->outcnt() == 0 ||
+      call->in(0) == NULL || call->in(0)->is_top()) {
     return;
   }
 
-  const TypeTuple *r = call_node()->tf()->domain();
+  const TypeTuple *r = call->tf()->domain();
   for (int i1 = 0; i1 < method()->arg_size(); i1++) {
-    if (call_node()->in(TypeFunc::Parms + i1)->is_top() && r->field_at(TypeFunc::Parms + i1) != Type::HALF) {
+    if (call->in(TypeFunc::Parms + i1)->is_top() && r->field_at(TypeFunc::Parms + i1) != Type::HALF) {
       assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
       return;
     }
   }
 
-  if (call_node()->in(TypeFunc::Memory)->is_top()) {
+  if (call->in(TypeFunc::Memory)->is_top()) {
     assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
     return;
   }
 
-  CallStaticJavaNode* call = call_node();
+  Compile* C = Compile::current();
+  // Remove inlined methods from Compiler's lists.
+  if (call->is_macro()) {
+    C->remove_macro_node(call);
+  }
 
   // Make a clone of the JVMState that appropriate to use for driving a parse
-  Compile* C = Compile::current();
-  JVMState* jvms     = call->jvms()->clone_shallow(C);
+  JVMState* old_jvms = call->jvms();
+  JVMState* jvms = old_jvms->clone_shallow(C);
   uint size = call->req();
   SafePointNode* map = new (C) SafePointNode(size, jvms);
   for (uint i1 = 0; i1 < size; i1++) {
@@ -340,16 +345,23 @@
     map->set_req(TypeFunc::Memory, mem);
   }
 
-  // Make enough space for the expression stack and transfer the incoming arguments
-  int nargs    = method()->arg_size();
+  uint nargs = method()->arg_size();
+  // blow away old call arguments
+  Node* top = C->top();
+  for (uint i1 = 0; i1 < nargs; i1++) {
+    map->set_req(TypeFunc::Parms + i1, top);
+  }
   jvms->set_map(map);
+
+  // Make enough space in the expression stack to transfer
+  // the incoming arguments and return value.
   map->ensure_stack(jvms, jvms->method()->max_stack());
-  if (nargs > 0) {
-    for (int i1 = 0; i1 < nargs; i1++) {
-      map->set_req(i1 + jvms->argoff(), call->in(TypeFunc::Parms + i1));
-    }
+  for (uint i1 = 0; i1 < nargs; i1++) {
+    map->set_argument(jvms, i1, call->in(TypeFunc::Parms + i1));
   }
 
+  // This check is done here because for_method_handle_inline() method
+  // needs jvms for inlined state.
   if (!do_late_inline_check(jvms)) {
     map->disconnect_inputs(NULL, C);
     return;
@@ -480,6 +492,26 @@
   return new LateInlineStringCallGenerator(method, inline_cg);
 }
 
+class LateInlineBoxingCallGenerator : public LateInlineCallGenerator {
+
+ public:
+  LateInlineBoxingCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
+    LateInlineCallGenerator(method, inline_cg) {}
+
+  virtual JVMState* generate(JVMState* jvms) {
+    Compile *C = Compile::current();
+    C->print_inlining_skip(this);
+
+    C->add_boxing_late_inline(this);
+
+    JVMState* new_jvms =  DirectCallGenerator::generate(jvms);
+    return new_jvms;
+  }
+};
+
+CallGenerator* CallGenerator::for_boxing_late_inline(ciMethod* method, CallGenerator* inline_cg) {
+  return new LateInlineBoxingCallGenerator(method, inline_cg);
+}
 
 //---------------------------WarmCallGenerator--------------------------------
 // Internal class which handles initial deferral of inlining decisions.
--- a/src/share/vm/opto/callGenerator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/callGenerator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -125,6 +125,7 @@
   static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
   static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
   static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
+  static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
 
   // How to make a call but defer the decision whether to inline or not.
   static CallGenerator* for_warm_call(WarmCallInfo* ci,
--- a/src/share/vm/opto/callnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/callnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -518,7 +518,9 @@
 
 
 void JVMState::dump_on(outputStream* st) const {
-  if (_map && !((uintptr_t)_map & 1)) {
+  bool print_map = _map && !((uintptr_t)_map & 1) &&
+                  ((caller() == NULL) || (caller()->map() != _map));
+  if (print_map) {
     if (_map->len() > _map->req()) {  // _map->has_exceptions()
       Node* ex = _map->in(_map->req());  // _map->next_exception()
       // skip the first one; it's already being printed
@@ -527,7 +529,10 @@
         ex->dump(1);
       }
     }
-    _map->dump(2);
+    _map->dump(Verbose ? 2 : 1);
+  }
+  if (caller() != NULL) {
+    caller()->dump_on(st);
   }
   st->print("JVMS depth=%d loc=%d stk=%d arg=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d reexecute=%s method=",
              depth(), locoff(), stkoff(), argoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci(), should_reexecute()?"true":"false");
@@ -541,9 +546,6 @@
       _method->print_codes_on(bci(), bci()+1, st);
     }
   }
-  if (caller() != NULL) {
-    caller()->dump_on(st);
-  }
 }
 
 // Extra way to dump a jvms from the debugger,
@@ -579,6 +581,15 @@
   return n;
 }
 
+/**
+ * Reset map for all callers
+ */
+void JVMState::set_map_deep(SafePointNode* map) {
+  for (JVMState* p = this; p->_caller != NULL; p = p->_caller) {
+    p->set_map(map);
+  }
+}
+
 //=============================================================================
 uint CallNode::cmp( const Node &n ) const
 { return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; }
@@ -658,17 +669,49 @@
 // Determine whether the call could modify the field of the specified
 // instance at the specified offset.
 //
-bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) {
-  const TypeOopPtr *adrInst_t  = addr_t->isa_oopptr();
-
-  // If not an OopPtr or not an instance type, assume the worst.
-  // Note: currently this method is called only for instance types.
-  if (adrInst_t == NULL || !adrInst_t->is_known_instance()) {
-    return true;
+bool CallNode::may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase) {
+  assert((t_oop != NULL), "sanity");
+  if (t_oop->is_known_instance()) {
+    // The instance_id is set only for scalar-replaceable allocations which
+    // are not passed as arguments according to Escape Analysis.
+    return false;
   }
-  // The instance_id is set only for scalar-replaceable allocations which
-  // are not passed as arguments according to Escape Analysis.
-  return false;
+  if (t_oop->is_ptr_to_boxed_value()) {
+    ciKlass* boxing_klass = t_oop->klass();
+    if (is_CallStaticJava() && as_CallStaticJava()->is_boxing_method()) {
+      // Skip unrelated boxing methods.
+      Node* proj = proj_out(TypeFunc::Parms);
+      if ((proj == NULL) || (phase->type(proj)->is_instptr()->klass() != boxing_klass)) {
+        return false;
+      }
+    }
+    if (is_CallJava() && as_CallJava()->method() != NULL) {
+      ciMethod* meth = as_CallJava()->method();
+      if (meth->is_accessor()) {
+        return false;
+      }
+      // May modify (by reflection) if an boxing object is passed
+      // as argument or returned.
+      if (returns_pointer() && (proj_out(TypeFunc::Parms) != NULL)) {
+        Node* proj = proj_out(TypeFunc::Parms);
+        const TypeInstPtr* inst_t = phase->type(proj)->isa_instptr();
+        if ((inst_t != NULL) && (!inst_t->klass_is_exact() ||
+                                 (inst_t->klass() == boxing_klass))) {
+          return true;
+        }
+      }
+      const TypeTuple* d = tf()->domain();
+      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+        const TypeInstPtr* inst_t = d->field_at(i)->isa_instptr();
+        if ((inst_t != NULL) && (!inst_t->klass_is_exact() ||
+                                 (inst_t->klass() == boxing_klass))) {
+          return true;
+        }
+      }
+      return false;
+    }
+  }
+  return true;
 }
 
 // Does this call have a direct reference to n other than debug information?
@@ -1016,6 +1059,7 @@
   int scloff = jvms->scloff();
   int endoff = jvms->endoff();
   assert(endoff == (int)req(), "no other states or debug info after me");
+  assert(jvms->scl_size() == 0, "parsed code should not have scalar objects");
   Node* top = Compile::current()->top();
   for (uint i = 0; i < grow_by; i++) {
     ins_req(monoff, top);
@@ -1031,6 +1075,7 @@
   const int MonitorEdges = 2;
   assert(JVMState::logMonitorEdges == exact_log2(MonitorEdges), "correct MonitorEdges");
   assert(req() == jvms()->endoff(), "correct sizing");
+  assert((jvms()->scl_size() == 0), "parsed code should not have scalar objects");
   int nextmon = jvms()->scloff();
   if (GenerateSynchronizationCode) {
     add_req(lock->box_node());
@@ -1046,6 +1091,7 @@
 
 void SafePointNode::pop_monitor() {
   // Delete last monitor from debug info
+  assert((jvms()->scl_size() == 0), "parsed code should not have scalar objects");
   debug_only(int num_before_pop = jvms()->nof_monitors());
   const int MonitorEdges = (1<<JVMState::logMonitorEdges);
   int scloff = jvms()->scloff();
@@ -1150,6 +1196,7 @@
   init_class_id(Class_Allocate);
   init_flags(Flag_is_macro);
   _is_scalar_replaceable = false;
+  _is_non_escaping = false;
   Node *topnode = C->top();
 
   init_req( TypeFunc::Control  , ctrl );
@@ -1165,8 +1212,6 @@
 }
 
 //=============================================================================
-uint AllocateArrayNode::size_of() const { return sizeof(*this); }
-
 Node* AllocateArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape))  return this;
   // Don't bother trying to transform a dead node
@@ -1231,6 +1276,8 @@
       //   - the narrow_length is 0
       //   - the narrow_length is not wider than length
       assert(narrow_length_type == TypeInt::ZERO ||
+             length_type->is_con() && narrow_length_type->is_con() &&
+                (narrow_length_type->_hi <= length_type->_lo) ||
              (narrow_length_type->_hi <= length_type->_hi &&
               narrow_length_type->_lo >= length_type->_lo),
              "narrow type must be narrower than length type");
--- a/src/share/vm/opto/callnode.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/callnode.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -50,6 +50,7 @@
 class         CallLeafNoFPNode;
 class     AllocateNode;
 class       AllocateArrayNode;
+class     BoxLockNode;
 class     LockNode;
 class     UnlockNode;
 class JVMState;
@@ -236,7 +237,6 @@
 
   int            loc_size() const { return stkoff() - locoff(); }
   int            stk_size() const { return monoff() - stkoff(); }
-  int            arg_size() const { return monoff() - argoff(); }
   int            mon_size() const { return scloff() - monoff(); }
   int            scl_size() const { return endoff() - scloff(); }
 
@@ -299,6 +299,7 @@
   // Miscellaneous utility functions
   JVMState* clone_deep(Compile* C) const;    // recursively clones caller chain
   JVMState* clone_shallow(Compile* C) const; // retains uncloned caller
+  void      set_map_deep(SafePointNode *map);// reset map for all callers
 
 #ifndef PRODUCT
   void      format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const;
@@ -472,7 +473,7 @@
   static  bool           needs_polling_address_input();
 
 #ifndef PRODUCT
-  virtual void              dump_spec(outputStream *st) const;
+  virtual void           dump_spec(outputStream *st) const;
 #endif
 };
 
@@ -587,10 +588,10 @@
   virtual bool        guaranteed_safepoint()  { return true; }
   // For macro nodes, the JVMState gets modified during expansion, so when cloning
   // the node the JVMState must be cloned.
-  virtual void        clone_jvms() { }   // default is not to clone
+  virtual void        clone_jvms(Compile* C) { }   // default is not to clone
 
   // Returns true if the call may modify n
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase);
+  virtual bool        may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase);
   // Does this node have a use of n other than in debug information?
   bool                has_non_debug_use(Node *n);
   // Returns the unique CheckCastPP of a call
@@ -663,9 +664,15 @@
   virtual uint cmp( const Node &n ) const;
   virtual uint size_of() const; // Size is bigger
 public:
-  CallStaticJavaNode(const TypeFunc* tf, address addr, ciMethod* method, int bci)
+  CallStaticJavaNode(Compile* C, const TypeFunc* tf, address addr, ciMethod* method, int bci)
     : CallJavaNode(tf, addr, method, bci), _name(NULL) {
     init_class_id(Class_CallStaticJava);
+    if (C->eliminate_boxing() && (method != NULL) && method->is_boxing_method()) {
+      init_flags(Flag_is_macro);
+      C->add_macro_node(this);
+    }
+    _is_scalar_replaceable = false;
+    _is_non_escaping = false;
   }
   CallStaticJavaNode(const TypeFunc* tf, address addr, const char* name, int bci,
                      const TypePtr* adr_type)
@@ -673,13 +680,31 @@
     init_class_id(Class_CallStaticJava);
     // This node calls a runtime stub, which often has narrow memory effects.
     _adr_type = adr_type;
+    _is_scalar_replaceable = false;
+    _is_non_escaping = false;
   }
-  const char *_name;            // Runtime wrapper name
+  const char *_name;      // Runtime wrapper name
+
+  // Result of Escape Analysis
+  bool _is_scalar_replaceable;
+  bool _is_non_escaping;
 
   // If this is an uncommon trap, return the request code, else zero.
   int uncommon_trap_request() const;
   static int extract_uncommon_trap_request(const Node* call);
 
+  bool is_boxing_method() const {
+    return is_macro() && (method() != NULL) && method()->is_boxing_method();
+  }
+  // Later inlining modifies the JVMState, so we need to clone it
+  // when the call node is cloned (because it is macro node).
+  virtual void  clone_jvms(Compile* C) {
+    if ((jvms() != NULL) && is_boxing_method()) {
+      set_jvms(jvms()->clone_deep(C));
+      jvms()->set_map_deep(this);
+    }
+  }
+
   virtual int         Opcode() const;
 #ifndef PRODUCT
   virtual void        dump_spec(outputStream *st) const;
@@ -781,12 +806,12 @@
     ParmLimit
   };
 
-  static const TypeFunc* alloc_type() {
+  static const TypeFunc* alloc_type(const Type* t) {
     const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms);
     fields[AllocSize]   = TypeInt::POS;
     fields[KlassNode]   = TypeInstPtr::NOTNULL;
     fields[InitialTest] = TypeInt::BOOL;
-    fields[ALength]     = TypeInt::INT;  // length (can be a bad length)
+    fields[ALength]     = t;  // length (can be a bad length)
 
     const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
 
@@ -799,21 +824,26 @@
     return TypeFunc::make(domain, range);
   }
 
-  bool _is_scalar_replaceable;  // Result of Escape Analysis
+  // Result of Escape Analysis
+  bool _is_scalar_replaceable;
+  bool _is_non_escaping;
 
   virtual uint size_of() const; // Size is bigger
   AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
                Node *size, Node *klass_node, Node *initial_test);
   // Expansion modifies the JVMState, so we need to clone it
-  virtual void  clone_jvms() {
-    set_jvms(jvms()->clone_deep(Compile::current()));
+  virtual void  clone_jvms(Compile* C) {
+    if (jvms() != NULL) {
+      set_jvms(jvms()->clone_deep(C));
+      jvms()->set_map_deep(this);
+    }
   }
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegP; }
   virtual bool        guaranteed_safepoint()  { return false; }
 
   // allocations do not modify their arguments
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase) { return false;}
+  virtual bool        may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase) { return false;}
 
   // Pattern-match a possible usage of AllocateNode.
   // Return null if no allocation is recognized.
@@ -848,10 +878,6 @@
   // are defined in graphKit.cpp, which sets up the bidirectional relation.)
   InitializeNode* initialization();
 
-  // Return the corresponding storestore barrier (or null if none).
-  // Walks out edges to find it...
-  MemBarStoreStoreNode* storestore();
-
   // Convenience for initialization->maybe_set_complete(phase)
   bool maybe_set_complete(PhaseGVN* phase);
 };
@@ -873,7 +899,6 @@
     set_req(AllocateNode::ALength,        count_val);
   }
   virtual int Opcode() const;
-  virtual uint size_of() const; // Size is bigger
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 
   // Dig the length operand out of a array allocation site.
@@ -951,7 +976,7 @@
   void set_nested()      { _kind = Nested; set_eliminated_lock_counter(); }
 
   // locking does not modify its arguments
-  virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
+  virtual bool may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase){ return false;}
 
 #ifndef PRODUCT
   void create_lock_counter(JVMState* s);
@@ -998,8 +1023,11 @@
 
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   // Expansion modifies the JVMState, so we need to clone it
-  virtual void  clone_jvms() {
-    set_jvms(jvms()->clone_deep(Compile::current()));
+  virtual void  clone_jvms(Compile* C) {
+    if (jvms() != NULL) {
+      set_jvms(jvms()->clone_deep(C));
+      jvms()->set_map_deep(this);
+    }
   }
 
   bool is_nested_lock_region(); // Is this Lock nested?
--- a/src/share/vm/opto/cfgnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/cfgnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -806,7 +806,7 @@
       Node *in = ophi->in(i);
       if (in == NULL || igvn->type(in) == Type::TOP)
         continue;
-      Node *opt = MemNode::optimize_simple_memory_chain(in, at, igvn);
+      Node *opt = MemNode::optimize_simple_memory_chain(in, t_oop, NULL, igvn);
       PhiNode *optphi = opt->is_Phi() ? opt->as_Phi() : NULL;
       if (optphi != NULL && optphi->adr_type() == TypePtr::BOTTOM) {
         opt = node_map[optphi->_idx];
@@ -1921,7 +1921,7 @@
     const TypePtr* at = adr_type();
     for( uint i=1; i<req(); ++i ) {// For all paths in
       Node *ii = in(i);
-      Node *new_in = MemNode::optimize_memory_chain(ii, at, phase);
+      Node *new_in = MemNode::optimize_memory_chain(ii, at, NULL, phase);
       if (ii != new_in ) {
         set_req(i, new_in);
         progress = this;
--- a/src/share/vm/opto/chaitin.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/chaitin.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -40,10 +40,8 @@
 #include "opto/opcodes.hpp"
 #include "opto/rootnode.hpp"
 
-//=============================================================================
-
 #ifndef PRODUCT
-void LRG::dump( ) const {
+void LRG::dump() const {
   ttyLocker ttyl;
   tty->print("%d ",num_regs());
   _mask.dump();
@@ -94,7 +92,6 @@
 }
 #endif
 
-//------------------------------score------------------------------------------
 // Compute score from cost and area.  Low score is best to spill.
 static double raw_score( double cost, double area ) {
   return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
@@ -125,7 +122,6 @@
   return score;
 }
 
-//------------------------------LRG_List---------------------------------------
 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
   memset( _lidxs, 0, sizeof(uint)*max );
 }
@@ -145,7 +141,72 @@
 
 #define NUMBUCKS 3
 
-//------------------------------Chaitin----------------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint LiveRangeMap::find_compress(uint lrg) {
+  uint cur = lrg;
+  uint next = _uf_map[cur];
+  while (next != cur) { // Scan chain of equivalences
+    assert( next < cur, "always union smaller");
+    cur = next; // until find a fixed-point
+    next = _uf_map[cur];
+  }
+
+  // Core of union-find algorithm: update chain of
+  // equivalences to be equal to the root.
+  while (lrg != next) {
+    uint tmp = _uf_map[lrg];
+    _uf_map.map(lrg, next);
+    lrg = tmp;
+  }
+  return lrg;
+}
+
+// Reset the Union-Find map to identity
+void LiveRangeMap::reset_uf_map(uint max_lrg_id) {
+  _max_lrg_id= max_lrg_id;
+  // Force the Union-Find mapping to be at least this large
+  _uf_map.extend(_max_lrg_id, 0);
+  // Initialize it to be the ID mapping.
+  for (uint i = 0; i < _max_lrg_id; ++i) {
+    _uf_map.map(i, i);
+  }
+}
+
+// Make all Nodes map directly to their final live range; no need for
+// the Union-Find mapping after this call.
+void LiveRangeMap::compress_uf_map_for_nodes() {
+  // For all Nodes, compress mapping
+  uint unique = _names.Size();
+  for (uint i = 0; i < unique; ++i) {
+    uint lrg = _names[i];
+    uint compressed_lrg = find(lrg);
+    if (lrg != compressed_lrg) {
+      _names.map(i, compressed_lrg);
+    }
+  }
+}
+
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint LiveRangeMap::find_const(uint lrg) const {
+  if (!lrg) {
+    return lrg; // Ignore the zero LRG
+  }
+
+  // Off the end?  This happens during debugging dumps when you got
+  // brand new live ranges but have not told the allocator yet.
+  if (lrg >= _max_lrg_id) {
+    return lrg;
+  }
+
+  uint next = _uf_map[lrg];
+  while (next != lrg) { // Scan chain of equivalences
+    assert(next < lrg, "always union smaller");
+    lrg = next; // until find a fixed-point
+    next = _uf_map[lrg];
+  }
+  return next;
+}
+
 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
   : PhaseRegAlloc(unique, cfg, matcher,
 #ifndef PRODUCT
@@ -153,58 +214,133 @@
 #else
        NULL
 #endif
-       ),
-    _names(unique), _uf_map(unique),
-    _maxlrg(0), _live(0),
-    _spilled_once(Thread::current()->resource_area()),
-    _spilled_twice(Thread::current()->resource_area()),
-    _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0),
-    _oldphi(unique)
+       )
+  , _lrg_map(unique)
+  , _live(0)
+  , _spilled_once(Thread::current()->resource_area())
+  , _spilled_twice(Thread::current()->resource_area())
+  , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
+  , _oldphi(unique)
 #ifndef PRODUCT
   , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
 #endif
 {
   NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
 
-  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
+  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 
-  uint i,j;
   // Build a list of basic blocks, sorted by frequency
-  _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+  _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
   // Experiment with sorting strategies to speed compilation
   double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
   Block **buckets[NUMBUCKS];             // Array of buckets
   uint    buckcnt[NUMBUCKS];             // Array of bucket counters
   double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
-  for( i = 0; i < NUMBUCKS; i++ ) {
-    buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+  for (uint i = 0; i < NUMBUCKS; i++) {
+    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
     buckcnt[i] = 0;
     // Bump by three orders of magnitude each time
     cutoff *= 0.001;
     buckval[i] = cutoff;
-    for( j = 0; j < _cfg._num_blocks; j++ ) {
+    for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
       buckets[i][j] = NULL;
     }
   }
   // Sort blocks into buckets
-  for( i = 0; i < _cfg._num_blocks; i++ ) {
-    for( j = 0; j < NUMBUCKS; j++ ) {
-      if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) {
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    for (uint j = 0; j < NUMBUCKS; j++) {
+      if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
         // Assign block to end of list for appropriate bucket
-        buckets[j][buckcnt[j]++] = _cfg._blocks[i];
-        break;                      // kick out of inner loop
+        buckets[j][buckcnt[j]++] = _cfg.get_block(i);
+        break; // kick out of inner loop
       }
     }
   }
   // Dump buckets into final block array
   uint blkcnt = 0;
-  for( i = 0; i < NUMBUCKS; i++ ) {
-    for( j = 0; j < buckcnt[i]; j++ ) {
+  for (uint i = 0; i < NUMBUCKS; i++) {
+    for (uint j = 0; j < buckcnt[i]; j++) {
       _blks[blkcnt++] = buckets[i][j];
     }
   }
 
-  assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
+  assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
+}
+
+//------------------------------Union------------------------------------------
+// union 2 sets together.
+void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
+  uint src = _lrg_map.find(src_n);
+  uint dst = _lrg_map.find(dst_n);
+  assert(src, "");
+  assert(dst, "");
+  assert(src < _lrg_map.max_lrg_id(), "oob");
+  assert(dst < _lrg_map.max_lrg_id(), "oob");
+  assert(src < dst, "always union smaller");
+  _lrg_map.uf_map(dst, src);
+}
+
+//------------------------------new_lrg----------------------------------------
+void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
+  // Make the Node->LRG mapping
+  _lrg_map.extend(x->_idx,lrg);
+  // Make the Union-Find mapping an identity function
+  _lrg_map.uf_extend(lrg, lrg);
+}
+
+
+int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
+  assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
+  DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )
+  int found_projs = 0;
+  uint cnt = orig->outcnt();
+  for (uint i = 0; i < cnt; i++) {
+    Node* proj = orig->raw_out(i);
+    if (proj->is_MachProj()) {
+      assert(proj->outcnt() == 0, "only kill projections are expected here");
+      assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
+      found_projs++;
+      // Copy kill projections after the cloned node
+      Node* kills = proj->clone();
+      kills->set_req(0, copy);
+      b->insert_node(kills, idx++);
+      _cfg.map_node_to_block(kills, b);
+      new_lrg(kills, max_lrg_id++);
+    }
+  }
+  return found_projs;
+}
+
+//------------------------------compact----------------------------------------
+// Renumber the live ranges to compact them.  Makes the IFG smaller.
+void PhaseChaitin::compact() {
+  // Current the _uf_map contains a series of short chains which are headed
+  // by a self-cycle.  All the chains run from big numbers to little numbers.
+  // The Find() call chases the chains & shortens them for the next Find call.
+  // We are going to change this structure slightly.  Numbers above a moving
+  // wave 'i' are unchanged.  Numbers below 'j' point directly to their
+  // compacted live range with no further chaining.  There are no chains or
+  // cycles below 'i', so the Find call no longer works.
+  uint j=1;
+  uint i;
+  for (i = 1; i < _lrg_map.max_lrg_id(); i++) {
+    uint lr = _lrg_map.uf_live_range_id(i);
+    // Ignore unallocated live ranges
+    if (!lr) {
+      continue;
+    }
+    assert(lr <= i, "");
+    _lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));
+  }
+  // Now change the Node->LR mapping to reflect the compacted names
+  uint unique = _lrg_map.size();
+  for (i = 0; i < unique; i++) {
+    uint lrg_id = _lrg_map.live_range_id(i);
+    _lrg_map.map(i, _lrg_map.uf_live_range_id(lrg_id));
+  }
+
+  // Reset the Union-Find mapping
+  _lrg_map.reset_uf_map(j);
 }
 
 void PhaseChaitin::Register_Allocate() {
@@ -231,14 +367,12 @@
   // all copy-related live ranges low and then using the max copy-related
   // live range as a cut-off for LIVE and the IFG.  In other words, I can
   // build a subset of LIVE and IFG just for copies.
-  PhaseLive live(_cfg,_names,&live_arena);
+  PhaseLive live(_cfg, _lrg_map.names(), &live_arena);
 
   // Need IFG for coalescing and coloring
-  PhaseIFG ifg( &live_arena );
+  PhaseIFG ifg(&live_arena);
   _ifg = &ifg;
 
-  if (C->unique() > _names.Size())  _names.extend(C->unique()-1, 0);
-
   // Come out of SSA world to the Named world.  Assign (virtual) registers to
   // Nodes.  Use the same register for all inputs and the output of PhiNodes
   // - effectively ending SSA form.  This requires either coalescing live
@@ -258,9 +392,9 @@
     _live = NULL;                 // Mark live as being not available
     rm.reset_to_mark();           // Reclaim working storage
     IndexSet::reset_memory(C, &live_arena);
-    ifg.init(_maxlrg);            // Empty IFG
+    ifg.init(_lrg_map.max_lrg_id()); // Empty IFG
     gather_lrg_masks( false );    // Collect LRG masks
-    live.compute( _maxlrg );      // Compute liveness
+    live.compute(_lrg_map.max_lrg_id()); // Compute liveness
     _live = &live;                // Mark LIVE as being available
   }
 
@@ -270,19 +404,19 @@
   // across any GC point where the derived value is live.  So this code looks
   // at all the GC points, and "stretches" the live range of any base pointer
   // to the GC point.
-  if( stretch_base_pointer_live_ranges(&live_arena) ) {
-    NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); )
+  if (stretch_base_pointer_live_ranges(&live_arena)) {
+    NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)
     // Since some live range stretched, I need to recompute live
     _live = NULL;
     rm.reset_to_mark();         // Reclaim working storage
     IndexSet::reset_memory(C, &live_arena);
-    ifg.init(_maxlrg);
-    gather_lrg_masks( false );
-    live.compute( _maxlrg );
+    ifg.init(_lrg_map.max_lrg_id());
+    gather_lrg_masks(false);
+    live.compute(_lrg_map.max_lrg_id());
     _live = &live;
   }
   // Create the interference graph using virtual copies
-  build_ifg_virtual( );  // Include stack slots this time
+  build_ifg_virtual();  // Include stack slots this time
 
   // Aggressive (but pessimistic) copy coalescing.
   // This pass works on virtual copies.  Any virtual copies which are not
@@ -296,11 +430,14 @@
     // given Node and search them for an instance, i.e., time O(#MaxLRG)).
     _ifg->SquareUp();
 
-    PhaseAggressiveCoalesce coalesce( *this );
-    coalesce.coalesce_driver( );
+    PhaseAggressiveCoalesce coalesce(*this);
+    coalesce.coalesce_driver();
     // Insert un-coalesced copies.  Visit all Phis.  Where inputs to a Phi do
     // not match the Phi itself, insert a copy.
     coalesce.insert_copies(_matcher);
+    if (C->failing()) {
+      return;
+    }
   }
 
   // After aggressive coalesce, attempt a first cut at coloring.
@@ -310,28 +447,36 @@
     _live = NULL;
     rm.reset_to_mark();           // Reclaim working storage
     IndexSet::reset_memory(C, &live_arena);
-    ifg.init(_maxlrg);
+    ifg.init(_lrg_map.max_lrg_id());
     gather_lrg_masks( true );
-    live.compute( _maxlrg );
+    live.compute(_lrg_map.max_lrg_id());
     _live = &live;
   }
 
   // Build physical interference graph
   uint must_spill = 0;
-  must_spill = build_ifg_physical( &live_arena );
+  must_spill = build_ifg_physical(&live_arena);
   // If we have a guaranteed spill, might as well spill now
-  if( must_spill ) {
-    if( !_maxlrg ) return;
+  if (must_spill) {
+    if(!_lrg_map.max_lrg_id()) {
+      return;
+    }
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
     C->check_node_count(10*must_spill, "out of nodes before split");
-    if (C->failing())  return;
-    _maxlrg = Split(_maxlrg, &split_arena);  // Split spilling LRG everywhere
+    if (C->failing()) {
+      return;
+    }
+
+    uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
+    _lrg_map.set_max_lrg_id(new_max_lrg_id);
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
     // or we failed to split
     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
-    if (C->failing())  return;
+    if (C->failing()) {
+      return;
+    }
 
-    NOT_PRODUCT( C->verify_graph_edges(); )
+    NOT_PRODUCT(C->verify_graph_edges();)
 
     compact();                  // Compact LRGs; return new lower max lrg
 
@@ -340,23 +485,23 @@
       _live = NULL;
       rm.reset_to_mark();         // Reclaim working storage
       IndexSet::reset_memory(C, &live_arena);
-      ifg.init(_maxlrg);          // Build a new interference graph
+      ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph
       gather_lrg_masks( true );   // Collect intersect mask
-      live.compute( _maxlrg );    // Compute LIVE
+      live.compute(_lrg_map.max_lrg_id()); // Compute LIVE
       _live = &live;
     }
-    build_ifg_physical( &live_arena );
+    build_ifg_physical(&live_arena);
     _ifg->SquareUp();
     _ifg->Compute_Effective_Degree();
     // Only do conservative coalescing if requested
-    if( OptoCoalesce ) {
+    if (OptoCoalesce) {
       // Conservative (and pessimistic) copy coalescing of those spills
-      PhaseConservativeCoalesce coalesce( *this );
+      PhaseConservativeCoalesce coalesce(*this);
       // If max live ranges greater than cutoff, don't color the stack.
       // This cutoff can be larger than below since it is only done once.
-      coalesce.coalesce_driver( );
+      coalesce.coalesce_driver();
     }
-    compress_uf_map_for_nodes();
+    _lrg_map.compress_uf_map_for_nodes();
 
 #ifdef ASSERT
     verify(&live_arena, true);
@@ -390,13 +535,18 @@
       }
     }
 
-    if( !_maxlrg ) return;
-    _maxlrg = Split(_maxlrg, &split_arena);  // Split spilling LRG everywhere
+    if (!_lrg_map.max_lrg_id()) {
+      return;
+    }
+    uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
+    _lrg_map.set_max_lrg_id(new_max_lrg_id);
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
-    C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split");
-    if (C->failing())  return;
+    C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");
+    if (C->failing()) {
+      return;
+    }
 
-    compact();                  // Compact LRGs; return new lower max lrg
+    compact(); // Compact LRGs; return new lower max lrg
 
     // Nuke the live-ness and interference graph and LiveRanGe info
     {
@@ -404,26 +554,26 @@
       _live = NULL;
       rm.reset_to_mark();         // Reclaim working storage
       IndexSet::reset_memory(C, &live_arena);
-      ifg.init(_maxlrg);
+      ifg.init(_lrg_map.max_lrg_id());
 
       // Create LiveRanGe array.
       // Intersect register masks for all USEs and DEFs
-      gather_lrg_masks( true );
-      live.compute( _maxlrg );
+      gather_lrg_masks(true);
+      live.compute(_lrg_map.max_lrg_id());
       _live = &live;
     }
-    must_spill = build_ifg_physical( &live_arena );
+    must_spill = build_ifg_physical(&live_arena);
     _ifg->SquareUp();
     _ifg->Compute_Effective_Degree();
 
     // Only do conservative coalescing if requested
-    if( OptoCoalesce ) {
+    if (OptoCoalesce) {
       // Conservative (and pessimistic) copy coalescing
-      PhaseConservativeCoalesce coalesce( *this );
+      PhaseConservativeCoalesce coalesce(*this);
       // Check for few live ranges determines how aggressive coalesce is.
-      coalesce.coalesce_driver( );
+      coalesce.coalesce_driver();
     }
-    compress_uf_map_for_nodes();
+    _lrg_map.compress_uf_map_for_nodes();
 #ifdef ASSERT
     verify(&live_arena, true);
 #endif
@@ -435,7 +585,7 @@
 
     // Select colors by re-inserting LRGs back into the IFG in reverse order.
     // Return whether or not something spills.
-    spills = Select( );
+    spills = Select();
   }
 
   // Count number of Simplify-Select trips per coloring success.
@@ -455,9 +605,12 @@
 
   // max_reg is past the largest *register* used.
   // Convert that to a frame_slot number.
-  if( _max_reg <= _matcher._new_SP )
+  if (_max_reg <= _matcher._new_SP) {
     _framesize = C->out_preserve_stack_slots();
-  else _framesize = _max_reg -_matcher._new_SP;
+  }
+  else {
+    _framesize = _max_reg -_matcher._new_SP;
+  }
   assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");
 
   // This frame must preserve the required fp alignment
@@ -465,8 +618,9 @@
   assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );
 #ifndef PRODUCT
   _total_framesize += _framesize;
-  if( (int)_framesize > _max_framesize )
+  if ((int)_framesize > _max_framesize) {
     _max_framesize = _framesize;
+  }
 #endif
 
   // Convert CISC spills
@@ -478,15 +632,17 @@
     log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());
   }
 
-  if (C->failing())  return;
+  if (C->failing()) {
+    return;
+  }
 
-  NOT_PRODUCT( C->verify_graph_edges(); )
+  NOT_PRODUCT(C->verify_graph_edges();)
 
   // Move important info out of the live_arena to longer lasting storage.
-  alloc_node_regs(_names.Size());
-  for (uint i=0; i < _names.Size(); i++) {
-    if (_names[i]) {           // Live range associated with Node?
-      LRG &lrg = lrgs(_names[i]);
+  alloc_node_regs(_lrg_map.size());
+  for (uint i=0; i < _lrg_map.size(); i++) {
+    if (_lrg_map.live_range_id(i)) { // Live range associated with Node?
+      LRG &lrg = lrgs(_lrg_map.live_range_id(i));
       if (!lrg.alive()) {
         set_bad(i);
       } else if (lrg.num_regs() == 1) {
@@ -524,76 +680,78 @@
   C->set_indexSet_arena(NULL);  // ResourceArea is at end of scope
 }
 
-//------------------------------de_ssa-----------------------------------------
 void PhaseChaitin::de_ssa() {
   // Set initial Names for all Nodes.  Most Nodes get the virtual register
   // number.  A few get the ZERO live range number.  These do not
   // get allocated, but instead rely on correct scheduling to ensure that
   // only one instance is simultaneously live at a time.
   uint lr_counter = 1;
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    uint cnt = b->_nodes.size();
+  for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
+    Block* block = _cfg.get_block(i);
+    uint cnt = block->number_of_nodes();
 
     // Handle all the normal Nodes in the block
     for( uint j = 0; j < cnt; j++ ) {
-      Node *n = b->_nodes[j];
+      Node *n = block->get_node(j);
       // Pre-color to the zero live range, or pick virtual register
       const RegMask &rm = n->out_RegMask();
-      _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 );
+      _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
     }
   }
   // Reset the Union-Find mapping to be identity
-  reset_uf_map(lr_counter);
+  _lrg_map.reset_uf_map(lr_counter);
 }
 
 
-//------------------------------gather_lrg_masks-------------------------------
 // Gather LiveRanGe information, including register masks.  Modification of
 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
 
   // Nail down the frame pointer live range
-  uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr));
+  uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
   lrgs(fp_lrg)._cost += 1e12;   // Cost is infinite
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
 
     // For all instructions
-    for( uint j = 1; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for (uint j = 1; j < block->number_of_nodes(); j++) {
+      Node* n = block->get_node(j);
       uint input_edge_start =1; // Skip control most nodes
-      if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
+      if (n->is_Mach()) {
+        input_edge_start = n->as_Mach()->oper_input_base();
+      }
       uint idx = n->is_Copy();
 
       // Get virtual register number, same as LiveRanGe index
-      uint vreg = n2lidx(n);
-      LRG &lrg = lrgs(vreg);
-      if( vreg ) {              // No vreg means un-allocable (e.g. memory)
+      uint vreg = _lrg_map.live_range_id(n);
+      LRG& lrg = lrgs(vreg);
+      if (vreg) {              // No vreg means un-allocable (e.g. memory)
 
         // Collect has-copy bit
-        if( idx ) {
+        if (idx) {
           lrg._has_copy = 1;
-          uint clidx = n2lidx(n->in(idx));
-          LRG &copy_src = lrgs(clidx);
+          uint clidx = _lrg_map.live_range_id(n->in(idx));
+          LRG& copy_src = lrgs(clidx);
           copy_src._has_copy = 1;
         }
 
         // Check for float-vs-int live range (used in register-pressure
         // calculations)
         const Type *n_type = n->bottom_type();
-        if (n_type->is_floatingpoint())
+        if (n_type->is_floatingpoint()) {
           lrg._is_float = 1;
+        }
 
         // Check for twice prior spilling.  Once prior spilling might have
         // spilled 'soft', 2nd prior spill should have spilled 'hard' and
         // further spilling is unlikely to make progress.
-        if( _spilled_once.test(n->_idx) ) {
+        if (_spilled_once.test(n->_idx)) {
           lrg._was_spilled1 = 1;
-          if( _spilled_twice.test(n->_idx) )
+          if (_spilled_twice.test(n->_idx)) {
             lrg._was_spilled2 = 1;
+          }
         }
 
 #ifndef PRODUCT
@@ -630,16 +788,18 @@
 
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();
-        if (lrgmask.is_bound(ireg))
+        if (lrgmask.is_bound(ireg)) {
           lrg._is_bound = 1;
+        }
 
         // Check for maximum frequency value
-        if (lrg._maxfreq < b->_freq)
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }
 
         // Check for oop-iness, or long/double
         // Check for multi-kill projection
-        switch( ireg ) {
+        switch (ireg) {
         case MachProjNode::fat_proj:
           // Fat projections have size equal to number of registers killed
           lrg.set_num_regs(rm.Size());
@@ -776,8 +936,10 @@
       }
       // Prepare register mask for each input
       for( uint k = input_edge_start; k < cnt; k++ ) {
-        uint vreg = n2lidx(n->in(k));
-        if( !vreg ) continue;
+        uint vreg = _lrg_map.live_range_id(n->in(k));
+        if (!vreg) {
+          continue;
+        }
 
         // If this instruction is CISC Spillable, add the flags
         // bit to its appropriate input
@@ -807,8 +969,7 @@
         // AggressiveCoalesce.  This effectively pre-virtual-splits
         // around uncommon uses of common defs.
         const RegMask &rm = n->in_RegMask(k);
-        if( !after_aggressive &&
-          _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) {
+        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
           // Since we are BEFORE aggressive coalesce, leave the register
           // mask untrimmed by the call.  This encourages more coalescing.
           // Later, AFTER aggressive, this live range will have to spill
@@ -852,15 +1013,16 @@
         }
 
         // Check for maximum frequency value
-        if( lrg._maxfreq < b->_freq )
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }
 
       } // End for all allocated inputs
     } // end for all instructions
   } // end for all blocks
 
   // Final per-liverange setup
-  for (uint i2=0; i2<_maxlrg; i2++) {
+  for (uint i2 = 0; i2 < _lrg_map.max_lrg_id(); i2++) {
     LRG &lrg = lrgs(i2);
     assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
     if (lrg.num_regs() > 1 && !lrg._fat_proj) {
@@ -875,14 +1037,13 @@
   }
 }
 
-//------------------------------set_was_low------------------------------------
 // Set the was-lo-degree bit.  Conservative coalescing should not change the
 // colorability of the graph.  If any live range was of low-degree before
 // coalescing, it should Simplify.  This call sets the was-lo-degree bit.
 // The bit is checked in Simplify.
 void PhaseChaitin::set_was_low() {
 #ifdef ASSERT
-  for( uint i = 1; i < _maxlrg; i++ ) {
+  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
     int size = lrgs(i).num_regs();
     uint old_was_lo = lrgs(i)._was_lo;
     lrgs(i)._was_lo = 0;
@@ -912,11 +1073,10 @@
 
 #define REGISTER_CONSTRAINED 16
 
-//------------------------------cache_lrg_info---------------------------------
 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
 void PhaseChaitin::cache_lrg_info( ) {
 
-  for( uint i = 1; i < _maxlrg; i++ ) {
+  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
     LRG &lrg = lrgs(i);
 
     // Check for being of low degree: means we can be trivially colored.
@@ -946,16 +1106,15 @@
   }
 }
 
-//------------------------------Pre-Simplify-----------------------------------
 // Simplify the IFG by removing LRGs of low degree that have NO copies
 void PhaseChaitin::Pre_Simplify( ) {
 
   // Warm up the lo-degree no-copy list
   int lo_no_copy = 0;
-  for( uint i = 1; i < _maxlrg; i++ ) {
-    if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
+  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
+    if ((lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
         !lrgs(i).alive() ||
-        lrgs(i)._must_spill ) {
+        lrgs(i)._must_spill) {
       lrgs(i)._next = lo_no_copy;
       lo_no_copy = i;
     }
@@ -997,7 +1156,6 @@
   // No more lo-degree no-copy live ranges to simplify
 }
 
-//------------------------------Simplify---------------------------------------
 // Simplify the IFG by removing LRGs of low degree.
 void PhaseChaitin::Simplify( ) {
 
@@ -1134,7 +1292,6 @@
 
 }
 
-//------------------------------is_legal_reg-----------------------------------
 // Is 'reg' register legal for 'lrg'?
 static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
   if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
@@ -1161,12 +1318,11 @@
   return false;
 }
 
-//------------------------------bias_color-------------------------------------
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
 
   // Check for "at_risk" LRG's
-  uint risk_lrg = Find(lrg._risk_bias);
+  uint risk_lrg = _lrg_map.find(lrg._risk_bias);
   if( risk_lrg != 0 ) {
     // Walk the colored neighbors of the "at_risk" candidate
     // Choose a color which is both legal and already taken by a neighbor
@@ -1182,7 +1338,7 @@
     }
   }
 
-  uint copy_lrg = Find(lrg._copy_bias);
+  uint copy_lrg = _lrg_map.find(lrg._copy_bias);
   if( copy_lrg != 0 ) {
     // If he has a color,
     if( !(*(_ifg->_yanked))[copy_lrg] ) {
@@ -1223,7 +1379,6 @@
   return OptoReg::add( reg, chunk );
 }
 
-//------------------------------choose_color-----------------------------------
 // Choose a color in the current chunk
 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
   assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
@@ -1245,7 +1400,6 @@
   return lrg.mask().find_last_elem();
 }
 
-//------------------------------Select-----------------------------------------
 // Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
 // in reverse order of removal.  As long as nothing of hi-degree was yanked,
 // everything going back is guaranteed a color.  Select that color.  If some
@@ -1420,28 +1574,24 @@
   return spill_reg-LRG::SPILL_REG;      // Return number of spills
 }
 
-
-//------------------------------copy_was_spilled-------------------------------
 // Copy 'was_spilled'-edness from the source Node to the dst Node.
 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
   if( _spilled_once.test(src->_idx) ) {
     _spilled_once.set(dst->_idx);
-    lrgs(Find(dst))._was_spilled1 = 1;
+    lrgs(_lrg_map.find(dst))._was_spilled1 = 1;
     if( _spilled_twice.test(src->_idx) ) {
       _spilled_twice.set(dst->_idx);
-      lrgs(Find(dst))._was_spilled2 = 1;
+      lrgs(_lrg_map.find(dst))._was_spilled2 = 1;
     }
   }
 }
 
-//------------------------------set_was_spilled--------------------------------
 // Set the 'spilled_once' or 'spilled_twice' flag on a node.
 void PhaseChaitin::set_was_spilled( Node *n ) {
   if( _spilled_once.test_set(n->_idx) )
     _spilled_twice.set(n->_idx);
 }
 
-//------------------------------fixup_spills-----------------------------------
 // Convert Ideal spill instructions into proper FramePtr + offset Loads and
 // Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
 void PhaseChaitin::fixup_spills() {
@@ -1451,16 +1601,16 @@
   NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
 
   // Grab the Frame Pointer
-  Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
+  Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
 
     // For all instructions in block
-    uint last_inst = b->end_idx();
-    for( uint j = 1; j <= last_inst; j++ ) {
-      Node *n = b->_nodes[j];
+    uint last_inst = block->end_idx();
+    for (uint j = 1; j <= last_inst; j++) {
+      Node* n = block->get_node(j);
 
       // Dead instruction???
       assert( n->outcnt() != 0 ||// Nothing dead after post alloc
@@ -1474,7 +1624,7 @@
         MachNode *mach = n->as_Mach();
         inp = mach->operand_index(inp);
         Node *src = n->in(inp);   // Value to load or store
-        LRG &lrg_cisc = lrgs( Find_const(src) );
+        LRG &lrg_cisc = lrgs(_lrg_map.find_const(src));
         OptoReg::Name src_reg = lrg_cisc.reg();
         // Doubles record the HIGH register of an adjacent pair.
         src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());
@@ -1497,7 +1647,7 @@
             assert( cisc->oper_input_base() == 2, "Only adding one edge");
             cisc->ins_req(1,src);         // Requires a memory edge
           }
-          b->_nodes.map(j,cisc);          // Insert into basic block
+          block->map_node(cisc, j);          // Insert into basic block
           n->subsume_by(cisc, C); // Correct graph
           //
           ++_used_cisc_instructions;
@@ -1523,7 +1673,6 @@
   } // End of for all blocks
 }
 
-//------------------------------find_base_for_derived--------------------------
 // Helper to stretch above; recursively discover the base Node for a
 // given derived Node.  Easy for AddP-related machine nodes, but needs
 // to be recursive for derived Phis.
@@ -1553,23 +1702,22 @@
       // Initialize it once and make it shared:
       // set control to _root and place it into Start block
       // (where top() node is placed).
-      base->init_req(0, _cfg._root);
-      Block *startb = _cfg._bbs[C->top()->_idx];
-      startb->_nodes.insert(startb->find_node(C->top()), base );
-      _cfg._bbs.map( base->_idx, startb );
-      assert (n2lidx(base) == 0, "should not have LRG yet");
+      base->init_req(0, _cfg.get_root_node());
+      Block *startb = _cfg.get_block_for_node(C->top());
+      startb->insert_node(base, startb->find_node(C->top()));
+      _cfg.map_node_to_block(base, startb);
+      assert(_lrg_map.live_range_id(base) == 0, "should not have LRG yet");
     }
-    if (n2lidx(base) == 0) {
+    if (_lrg_map.live_range_id(base) == 0) {
       new_lrg(base, maxlrg++);
     }
-    assert(base->in(0) == _cfg._root &&
-           _cfg._bbs[base->_idx] == _cfg._bbs[C->top()->_idx], "base NULL should be shared");
+    assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
     derived_base_map[derived->_idx] = base;
     return base;
   }
 
   // Check for AddP-related opcodes
-  if( !derived->is_Phi() ) {
+  if (!derived->is_Phi()) {
     assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name()));
     Node *base = derived->in(AddPNode::Base);
     derived_base_map[derived->_idx] = base;
@@ -1599,12 +1747,12 @@
   base->as_Phi()->set_type(t);
 
   // Search the current block for an existing base-Phi
-  Block *b = _cfg._bbs[derived->_idx];
+  Block *b = _cfg.get_block_for_node(derived);
   for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi
-    Node *phi = b->_nodes[i];
+    Node *phi = b->get_node(i);
     if( !phi->is_Phi() ) {      // Found end of Phis with no match?
-      b->_nodes.insert( i, base ); // Must insert created Phi here as base
-      _cfg._bbs.map( base->_idx, b );
+      b->insert_node(base,  i); // Must insert created Phi here as base
+      _cfg.map_node_to_block(base, b);
       new_lrg(base,maxlrg++);
       break;
     }
@@ -1626,27 +1774,25 @@
   return base;
 }
 
-
-//------------------------------stretch_base_pointer_live_ranges---------------
 // At each Safepoint, insert extra debug edges for each pair of derived value/
 // base pointer that is live across the Safepoint for oopmap building.  The
 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
 // required edge set.
-bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) {
+bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
   int must_recompute_live = false;
-  uint maxlrg = _maxlrg;
+  uint maxlrg = _lrg_map.max_lrg_id();
   Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());
   memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
 
   // For all blocks in RPO do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     // Note use of deep-copy constructor.  I cannot hammer the original
     // liveout bits, because they are needed by the following coalesce pass.
-    IndexSet liveout(_live->live(b));
+    IndexSet liveout(_live->live(block));
 
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->get_node(j - 1);
 
       // Pre-split compares of loop-phis.  Loop-phis form a cycle we would
       // like to see in the same register.  Compare uses the loop-phi and so
@@ -1660,8 +1806,8 @@
       if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {
         Node *phi = n->in(1);
         if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
-          Block *phi_block = _cfg._bbs[phi->_idx];
-          if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) {
+          Block *phi_block = _cfg.get_block_for_node(phi);
+          if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
             const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
             Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
             insert_proj( phi_block, 1, spill, maxlrg++ );
@@ -1672,15 +1818,18 @@
       }
 
       // Get value being defined
-      uint lidx = n2lidx(n);
-      if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) {
+      uint lidx = _lrg_map.live_range_id(n);
+      // Ignore the occasional brand-new live range
+      if (lidx && lidx < _lrg_map.max_lrg_id()) {
         // Remove from live-out set
         liveout.remove(lidx);
 
         // Copies do not define a new value and so do not interfere.
         // Remove the copies source from the liveout set before interfering.
         uint idx = n->is_Copy();
-        if( idx ) liveout.remove( n2lidx(n->in(idx)) );
+        if (idx) {
+          liveout.remove(_lrg_map.live_range_id(n->in(idx)));
+        }
       }
 
       // Found a safepoint?
@@ -1698,21 +1847,21 @@
                   derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");
           // If its an OOP with a non-zero offset, then it is derived.
           if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {
-            Node *base = find_base_for_derived( derived_base_map, derived, maxlrg );
-            assert( base->_idx < _names.Size(), "" );
+            Node *base = find_base_for_derived(derived_base_map, derived, maxlrg);
+            assert(base->_idx < _lrg_map.size(), "");
             // Add reaching DEFs of derived pointer and base pointer as a
             // pair of inputs
-            n->add_req( derived );
-            n->add_req( base );
+            n->add_req(derived);
+            n->add_req(base);
 
             // See if the base pointer is already live to this point.
             // Since I'm working on the SSA form, live-ness amounts to
             // reaching def's.  So if I find the base's live range then
             // I know the base's def reaches here.
-            if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or
-                 !liveout.member( n2lidx(base) ) ) && // not live) AND
-                 (n2lidx(base) > 0)                && // not a constant
-                 _cfg._bbs[base->_idx] != b ) {     //  base not def'd in blk)
+            if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
+                 !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
+                 (_lrg_map.live_range_id(base) > 0) && // not a constant
+                 _cfg.get_block_for_node(base) != block) { // base not def'd in blk)
               // Base pointer is not currently live.  Since I stretched
               // the base pointer to here and it crosses basic-block
               // boundaries, the global live info is now incorrect.
@@ -1724,11 +1873,12 @@
       } // End of if found a GC point
 
       // Make all inputs live
-      if( !n->is_Phi() ) {      // Phi function uses come from prior block
-        for( uint k = 1; k < n->req(); k++ ) {
-          uint lidx = n2lidx(n->in(k));
-          if( lidx < _maxlrg )
-            liveout.insert( lidx );
+      if (!n->is_Phi()) {      // Phi function uses come from prior block
+        for (uint k = 1; k < n->req(); k++) {
+          uint lidx = _lrg_map.live_range_id(n->in(k));
+          if (lidx < _lrg_map.max_lrg_id()) {
+            liveout.insert(lidx);
+          }
         }
       }
 
@@ -1736,28 +1886,27 @@
     liveout.clear();  // Free the memory used by liveout.
 
   } // End of forall blocks
-  _maxlrg = maxlrg;
+  _lrg_map.set_max_lrg_id(maxlrg);
 
   // If I created a new live range I need to recompute live
-  if( maxlrg != _ifg->_maxlrg )
+  if (maxlrg != _ifg->_maxlrg) {
     must_recompute_live = true;
+  }
 
   return must_recompute_live != 0;
 }
 
+// Extend the node to LRG mapping
 
-//------------------------------add_reference----------------------------------
-// Extend the node to LRG mapping
-void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) {
-  _names.extend( node->_idx, n2lidx(old_node) );
+void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
+  _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
-void PhaseChaitin::dump( const Node *n ) const {
-  uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0;
+void PhaseChaitin::dump(const Node *n) const {
+  uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
   tty->print("L%d",r);
-  if( r && n->Opcode() != Op_Phi ) {
+  if (r && n->Opcode() != Op_Phi) {
     if( _node_regs ) {          // Got a post-allocation copy of allocation?
       tty->print("[");
       OptoReg::Name second = get_reg_second(n);
@@ -1778,11 +1927,13 @@
   tty->print("/N%d\t",n->_idx);
   tty->print("%s === ", n->Name());
   uint k;
-  for( k = 0; k < n->req(); k++) {
+  for (k = 0; k < n->req(); k++) {
     Node *m = n->in(k);
-    if( !m ) tty->print("_ ");
+    if (!m) {
+      tty->print("_ ");
+    }
     else {
-      uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+      uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
       tty->print("L%d",r);
       // Data MultiNode's can have projections with no real registers.
       // Don't die while dumping them.
@@ -1813,8 +1964,10 @@
   if( k < n->len() && n->in(k) ) tty->print("| ");
   for( ; k < n->len(); k++ ) {
     Node *m = n->in(k);
-    if( !m ) break;
-    uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+    if(!m) {
+      break;
+    }
+    uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
     tty->print("L%d",r);
     tty->print("/N%d ",m->_idx);
   }
@@ -1828,12 +1981,12 @@
   tty->print("\n");
 }
 
-void PhaseChaitin::dump( const Block * b ) const {
-  b->dump_head( &_cfg._bbs );
+void PhaseChaitin::dump(const Block *b) const {
+  b->dump_head(&_cfg);
 
   // For all instructions
-  for( uint j = 0; j < b->_nodes.size(); j++ )
-    dump(b->_nodes[j]);
+  for( uint j = 0; j < b->number_of_nodes(); j++ )
+    dump(b->get_node(j));
   // Print live-out info at end of block
   if( _live ) {
     tty->print("Liveout: ");
@@ -1842,7 +1995,7 @@
     tty->print("{");
     uint i;
     while ((i = elements.next()) != 0) {
-      tty->print("L%d ", Find_const(i));
+      tty->print("L%d ", _lrg_map.find_const(i));
     }
     tty->print_cr("}");
   }
@@ -1854,8 +2007,9 @@
               _matcher._new_SP, _framesize );
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ )
-    dump(_cfg._blocks[i]);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    dump(_cfg.get_block(i));
+  }
   // End of per-block dump
   tty->print("\n");
 
@@ -1866,10 +2020,14 @@
 
   // Dump LRG array
   tty->print("--- Live RanGe Array ---\n");
-  for(uint i2 = 1; i2 < _maxlrg; i2++ ) {
+  for (uint i2 = 1; i2 < _lrg_map.max_lrg_id(); i2++) {
     tty->print("L%d: ",i2);
-    if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( );
-    else tty->print_cr("new LRG");
+    if (i2 < _ifg->_maxlrg) {
+      lrgs(i2).dump();
+    }
+    else {
+      tty->print_cr("new LRG");
+    }
   }
   tty->cr();
 
@@ -1892,7 +2050,6 @@
   tty->cr();
 }
 
-//------------------------------dump_degree_lists------------------------------
 void PhaseChaitin::dump_degree_lists() const {
   // Dump lo-degree list
   tty->print("Lo degree: ");
@@ -1913,7 +2070,6 @@
   tty->cr();
 }
 
-//------------------------------dump_simplified--------------------------------
 void PhaseChaitin::dump_simplified() const {
   tty->print("Simplified: ");
   for( uint i = _simplified; i; i = lrgs(i)._next )
@@ -1932,7 +2088,6 @@
   return buf+strlen(buf);
 }
 
-//------------------------------dump_register----------------------------------
 // Dump a register name into a buffer.  Be intelligent if we get called
 // before allocation is complete.
 char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
@@ -1942,7 +2097,7 @@
     // Post allocation, use direct mappings, no LRG info available
     print_reg( get_reg_first(n), this, buf );
   } else {
-    uint lidx = Find_const(n); // Grab LRG number
+    uint lidx = _lrg_map.find_const(n); // Grab LRG number
     if( !_ifg ) {
       sprintf(buf,"L%d",lidx);  // No register binding yet
     } else if( !lidx ) {        // Special, not allocated value
@@ -1966,12 +2121,11 @@
   return buf+strlen(buf);
 }
 
-//----------------------dump_for_spill_split_recycle--------------------------
 void PhaseChaitin::dump_for_spill_split_recycle() const {
   if( WizardMode && (PrintCompilation || PrintOpto) ) {
     // Display which live ranges need to be split and the allocator's state
     tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);
-    for( uint bidx = 1; bidx < _maxlrg; bidx++ ) {
+    for (uint bidx = 1; bidx < _lrg_map.max_lrg_id(); bidx++) {
       if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
         tty->print("L%d: ", bidx);
         lrgs(bidx).dump();
@@ -1982,7 +2136,6 @@
   }
 }
 
-//------------------------------dump_frame------------------------------------
 void PhaseChaitin::dump_frame() const {
   const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
   const TypeTuple *domain = C->tf()->domain();
@@ -2088,28 +2241,30 @@
   tty->print_cr("#");
 }
 
-//------------------------------dump_bb----------------------------------------
 void PhaseChaitin::dump_bb( uint pre_order ) const {
   tty->print_cr("---dump of B%d---",pre_order);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    if( b->_pre_order == pre_order )
-      dump(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    if (block->_pre_order == pre_order) {
+      dump(block);
+    }
   }
 }
 
-//------------------------------dump_lrg---------------------------------------
 void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
   tty->print_cr("---dump of L%d---",lidx);
 
-  if( _ifg ) {
-    if( lidx >= _maxlrg ) {
+  if (_ifg) {
+    if (lidx >= _lrg_map.max_lrg_id()) {
       tty->print("Attempt to print live range index beyond max live range.\n");
       return;
     }
     tty->print("L%d: ",lidx);
-    if( lidx < _ifg->_maxlrg ) lrgs(lidx).dump( );
-    else tty->print_cr("new LRG");
+    if (lidx < _ifg->_maxlrg) {
+      lrgs(lidx).dump();
+    } else {
+      tty->print_cr("new LRG");
+    }
   }
   if( _ifg && lidx < _ifg->_maxlrg) {
     tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));
@@ -2117,17 +2272,17 @@
     tty->cr();
   }
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     int dump_once = 0;
 
     // For all instructions
-    for( uint j = 0; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
-      if( Find_const(n) == lidx ) {
-        if( !dump_once++ ) {
+    for( uint j = 0; j < block->number_of_nodes(); j++ ) {
+      Node *n = block->get_node(j);
+      if (_lrg_map.find_const(n) == lidx) {
+        if (!dump_once++) {
           tty->cr();
-          b->dump_head( &_cfg._bbs );
+          block->dump_head(&_cfg);
         }
         dump(n);
         continue;
@@ -2136,11 +2291,13 @@
         uint cnt = n->req();
         for( uint k = 1; k < cnt; k++ ) {
           Node *m = n->in(k);
-          if (!m)  continue;  // be robust in the dumper
-          if( Find_const(m) == lidx ) {
-            if( !dump_once++ ) {
+          if (!m)  {
+            continue;  // be robust in the dumper
+          }
+          if (_lrg_map.find_const(m) == lidx) {
+            if (!dump_once++) {
               tty->cr();
-              b->dump_head( &_cfg._bbs );
+              block->dump_head(&_cfg);
             }
             dump(n);
           }
@@ -2152,7 +2309,6 @@
 }
 #endif // not PRODUCT
 
-//------------------------------print_chaitin_statistics-------------------------------
 int PhaseChaitin::_final_loads  = 0;
 int PhaseChaitin::_final_stores = 0;
 int PhaseChaitin::_final_memoves= 0;
--- a/src/share/vm/opto/chaitin.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/chaitin.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -290,18 +290,118 @@
   int effective_degree( uint lidx ) const;
 };
 
-// TEMPORARILY REPLACED WITH COMMAND LINE FLAG
+// The LiveRangeMap class is responsible for storing node to live range id mapping.
+// Each node is mapped to a live range id (a virtual register). Nodes that are
+// not considered for register allocation are given live range id 0.
+class LiveRangeMap VALUE_OBJ_CLASS_SPEC {
+
+private:
+
+  uint _max_lrg_id;
+
+  // Union-find map.  Declared as a short for speed.
+  // Indexed by live-range number, it returns the compacted live-range number
+  LRG_List _uf_map;
+
+  // Map from Nodes to live ranges
+  LRG_List _names;
+
+  // Straight out of Tarjan's union-find algorithm
+  uint find_compress(const Node *node) {
+    uint lrg_id = find_compress(_names[node->_idx]);
+    _names.map(node->_idx, lrg_id);
+    return lrg_id;
+  }
+
+  uint find_compress(uint lrg);
+
+public:
+
+  const LRG_List& names() {
+    return _names;
+  }
+
+  uint max_lrg_id() const {
+    return _max_lrg_id;
+  }
+
+  void set_max_lrg_id(uint max_lrg_id) {
+    _max_lrg_id = max_lrg_id;
+  }
+
+  uint size() const {
+    return _names.Size();
+  }
+
+  uint live_range_id(uint idx) const {
+    return _names[idx];
+  }
+
+  uint live_range_id(const Node *node) const {
+    return _names[node->_idx];
+  }
+
+  uint uf_live_range_id(uint lrg_id) const {
+    return _uf_map[lrg_id];
+  }
 
-//// !!!!! Magic Constants need to move into ad file
-#ifdef SPARC
-//#define FLOAT_PRESSURE 30  /*     SFLT_REG_mask.Size() - 1 */
-//#define INT_PRESSURE   23  /* NOTEMP_I_REG_mask.Size() - 1 */
-#define FLOAT_INCREMENT(regs) regs
-#else
-//#define FLOAT_PRESSURE 6
-//#define INT_PRESSURE   6
-#define FLOAT_INCREMENT(regs) 1
-#endif
+  void map(uint idx, uint lrg_id) {
+    _names.map(idx, lrg_id);
+  }
+
+  void uf_map(uint dst_lrg_id, uint src_lrg_id) {
+    _uf_map.map(dst_lrg_id, src_lrg_id);
+  }
+
+  void extend(uint idx, uint lrg_id) {
+    _names.extend(idx, lrg_id);
+  }
+
+  void uf_extend(uint dst_lrg_id, uint src_lrg_id) {
+    _uf_map.extend(dst_lrg_id, src_lrg_id);
+  }
+
+  LiveRangeMap(uint unique)
+  : _names(unique)
+  , _uf_map(unique)
+  , _max_lrg_id(0) {}
+
+  uint find_id( const Node *n ) {
+    uint retval = live_range_id(n);
+    assert(retval == find(n),"Invalid node to lidx mapping");
+    return retval;
+  }
+
+  // Reset the Union-Find map to identity
+  void reset_uf_map(uint max_lrg_id);
+
+  // Make all Nodes map directly to their final live range; no need for
+  // the Union-Find mapping after this call.
+  void compress_uf_map_for_nodes();
+
+  uint find(uint lidx) {
+    uint uf_lidx = _uf_map[lidx];
+    return (uf_lidx == lidx) ? uf_lidx : find_compress(lidx);
+  }
+
+  // Convert a Node into a Live Range Index - a lidx
+  uint find(const Node *node) {
+    uint lidx = live_range_id(node);
+    uint uf_lidx = _uf_map[lidx];
+    return (uf_lidx == lidx) ? uf_lidx : find_compress(node);
+  }
+
+  // Like Find above, but no path compress, so bad asymptotic behavior
+  uint find_const(uint lrg) const;
+
+  // Like Find above, but no path compress, so bad asymptotic behavior
+  uint find_const(const Node *node) const {
+    if(node->_idx >= _names.Size()) {
+      return 0; // not mapped, usual for debug dump
+    }
+    return find_const(_names[node->_idx]);
+  }
+};
 
 //------------------------------Chaitin----------------------------------------
 // Briggs-Chaitin style allocation, mostly.
@@ -311,7 +411,6 @@
   int _trip_cnt;
   int _alternate;
 
-  uint _maxlrg;                 // Max live range number
   LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
   PhaseLive *_live;             // Liveness, used in the interference graph
   PhaseIFG *_ifg;               // Interference graph (for original chunk)
@@ -319,16 +418,6 @@
   VectorSet _spilled_once;      // Nodes that have been spilled
   VectorSet _spilled_twice;     // Nodes that have been spilled twice
 
-  LRG_List _names;              // Map from Nodes to Live RanGes
-
-  // Union-find map.  Declared as a short for speed.
-  // Indexed by live-range number, it returns the compacted live-range number
-  LRG_List _uf_map;
-  // Reset the Union-Find map to identity
-  void reset_uf_map( uint maxlrg );
-  // Remove the need for the Union-Find mapping
-  void compress_uf_map_for_nodes( );
-
   // Combine the Live Range Indices for these 2 Nodes into a single live
   // range.  Future requests for any Node in either live range will
   // return the live range index for the combined live range.
@@ -347,7 +436,23 @@
   // Helper functions for Split()
   uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
   uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
-  int clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg );
+
+  //------------------------------clone_projs------------------------------------
+  // After cloning some rematerialized instruction, clone any MachProj's that
+  // follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
+  // use G3 as an address temp.
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id);
+
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) {
+    uint max_lrg_id = lrg_map.max_lrg_id();
+    int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id);
+    if (found_projs > 0) {
+      // max_lrg_id is updated during call above
+      lrg_map.set_max_lrg_id(max_lrg_id);
+    }
+    return found_projs;
+  }
+
   Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits,
                             int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
   // True if lidx is used before any real register is def'd in the block
@@ -374,20 +479,11 @@
   PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher );
   ~PhaseChaitin() {}
 
-  // Convert a Node into a Live Range Index - a lidx
-  uint Find( const Node *n ) {
-    uint lidx = n2lidx(n);
-    uint uf_lidx = _uf_map[lidx];
-    return (uf_lidx == lidx) ? uf_lidx : Find_compress(n);
-  }
-  uint Find_const( uint lrg ) const;
-  uint Find_const( const Node *n ) const;
+  LiveRangeMap _lrg_map;
 
   // Do all the real work of allocate
   void Register_Allocate();
 
-  uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
-
   float high_frequency_lrg() const { return _high_frequency_lrg; }
 
 #ifndef PRODUCT
@@ -399,18 +495,6 @@
   // all inputs to a PhiNode, effectively coalescing live ranges.  Insert
   // copies as needed.
   void de_ssa();
-  uint Find_compress( const Node *n );
-  uint Find( uint lidx ) {
-    uint uf_lidx = _uf_map[lidx];
-    return (uf_lidx == lidx) ? uf_lidx : Find_compress(lidx);
-  }
-  uint Find_compress( uint lidx );
-
-  uint Find_id( const Node *n ) {
-    uint retval = n2lidx(n);
-    assert(retval == Find(n),"Invalid node to lidx mapping");
-    return retval;
-  }
 
   // Add edge between reg and everything in the vector.
   // Same as _ifg->add_vector(reg,live) EXCEPT use the RegMask
--- a/src/share/vm/opto/coalesce.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/coalesce.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,185 +35,29 @@
 #include "opto/regmask.hpp"
 
 //=============================================================================
-//------------------------------reset_uf_map-----------------------------------
-void PhaseChaitin::reset_uf_map( uint maxlrg ) {
-  _maxlrg = maxlrg;
-  // Force the Union-Find mapping to be at least this large
-  _uf_map.extend(_maxlrg,0);
-  // Initialize it to be the ID mapping.
-  for( uint i=0; i<_maxlrg; i++ )
-    _uf_map.map(i,i);
-}
-
-//------------------------------compress_uf_map--------------------------------
-// Make all Nodes map directly to their final live range; no need for
-// the Union-Find mapping after this call.
-void PhaseChaitin::compress_uf_map_for_nodes( ) {
-  // For all Nodes, compress mapping
-  uint unique = _names.Size();
-  for( uint i=0; i<unique; i++ ) {
-    uint lrg = _names[i];
-    uint compressed_lrg = Find(lrg);
-    if( lrg != compressed_lrg )
-      _names.map(i,compressed_lrg);
-  }
-}
-
-//------------------------------Find-------------------------------------------
-// Straight out of Tarjan's union-find algorithm
-uint PhaseChaitin::Find_compress( uint lrg ) {
-  uint cur = lrg;
-  uint next = _uf_map[cur];
-  while( next != cur ) {        // Scan chain of equivalences
-    assert( next < cur, "always union smaller" );
-    cur = next;                 // until find a fixed-point
-    next = _uf_map[cur];
-  }
-  // Core of union-find algorithm: update chain of
-  // equivalences to be equal to the root.
-  while( lrg != next ) {
-    uint tmp = _uf_map[lrg];
-    _uf_map.map(lrg, next);
-    lrg = tmp;
-  }
-  return lrg;
-}
-
-//------------------------------Find-------------------------------------------
-// Straight out of Tarjan's union-find algorithm
-uint PhaseChaitin::Find_compress( const Node *n ) {
-  uint lrg = Find_compress(_names[n->_idx]);
-  _names.map(n->_idx,lrg);
-  return lrg;
-}
-
-//------------------------------Find_const-------------------------------------
-// Like Find above, but no path compress, so bad asymptotic behavior
-uint PhaseChaitin::Find_const( uint lrg ) const {
-  if( !lrg ) return lrg;        // Ignore the zero LRG
-  // Off the end?  This happens during debugging dumps when you got
-  // brand new live ranges but have not told the allocator yet.
-  if( lrg >= _maxlrg ) return lrg;
-  uint next = _uf_map[lrg];
-  while( next != lrg ) {        // Scan chain of equivalences
-    assert( next < lrg, "always union smaller" );
-    lrg = next;                 // until find a fixed-point
-    next = _uf_map[lrg];
-  }
-  return next;
-}
-
-//------------------------------Find-------------------------------------------
-// Like Find above, but no path compress, so bad asymptotic behavior
-uint PhaseChaitin::Find_const( const Node *n ) const {
-  if( n->_idx >= _names.Size() ) return 0; // not mapped, usual for debug dump
-  return Find_const( _names[n->_idx] );
-}
-
-//------------------------------Union------------------------------------------
-// union 2 sets together.
-void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
-  uint src = Find(src_n);
-  uint dst = Find(dst_n);
-  assert( src, "" );
-  assert( dst, "" );
-  assert( src < _maxlrg, "oob" );
-  assert( dst < _maxlrg, "oob" );
-  assert( src < dst, "always union smaller" );
-  _uf_map.map(dst,src);
-}
-
-//------------------------------new_lrg----------------------------------------
-void PhaseChaitin::new_lrg( const Node *x, uint lrg ) {
-  // Make the Node->LRG mapping
-  _names.extend(x->_idx,lrg);
-  // Make the Union-Find mapping an identity function
-  _uf_map.extend(lrg,lrg);
-}
-
-//------------------------------clone_projs------------------------------------
-// After cloning some rematerialized instruction, clone any MachProj's that
-// follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
-// use G3 as an address temp.
-int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
-  assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
-  DEBUG_ONLY( Block* borig = _cfg._bbs[orig->_idx]; )
-  int found_projs = 0;
-  uint cnt = orig->outcnt();
-  for (uint i = 0; i < cnt; i++) {
-    Node* proj = orig->raw_out(i);
-    if (proj->is_MachProj()) {
-      assert(proj->outcnt() == 0, "only kill projections are expected here");
-      assert(_cfg._bbs[proj->_idx] == borig, "incorrect block for kill projections");
-      found_projs++;
-      // Copy kill projections after the cloned node
-      Node* kills = proj->clone();
-      kills->set_req(0, copy);
-      b->_nodes.insert(idx++, kills);
-      _cfg._bbs.map(kills->_idx, b);
-      new_lrg(kills, max_lrg_id++);
-    }
-  }
-  return found_projs;
-}
-
-//------------------------------compact----------------------------------------
-// Renumber the live ranges to compact them.  Makes the IFG smaller.
-void PhaseChaitin::compact() {
-  // Current the _uf_map contains a series of short chains which are headed
-  // by a self-cycle.  All the chains run from big numbers to little numbers.
-  // The Find() call chases the chains & shortens them for the next Find call.
-  // We are going to change this structure slightly.  Numbers above a moving
-  // wave 'i' are unchanged.  Numbers below 'j' point directly to their
-  // compacted live range with no further chaining.  There are no chains or
-  // cycles below 'i', so the Find call no longer works.
-  uint j=1;
-  uint i;
-  for( i=1; i < _maxlrg; i++ ) {
-    uint lr = _uf_map[i];
-    // Ignore unallocated live ranges
-    if( !lr ) continue;
-    assert( lr <= i, "" );
-    _uf_map.map(i, ( lr == i ) ? j++ : _uf_map[lr]);
-  }
-  if( false )                  // PrintOptoCompactLiveRanges
-    printf("Compacted %d LRs from %d\n",i-j,i);
-  // Now change the Node->LR mapping to reflect the compacted names
-  uint unique = _names.Size();
-  for( i=0; i<unique; i++ )
-    _names.map(i,_uf_map[_names[i]]);
-
-  // Reset the Union-Find mapping
-  reset_uf_map(j);
-
-}
-
-//=============================================================================
-//------------------------------Dump-------------------------------------------
 #ifndef PRODUCT
-void PhaseCoalesce::dump( Node *n ) const {
+void PhaseCoalesce::dump(Node *n) const {
   // Being a const function means I cannot use 'Find'
-  uint r = _phc.Find(n);
+  uint r = _phc._lrg_map.find(n);
   tty->print("L%d/N%d ",r,n->_idx);
 }
 
-//------------------------------dump-------------------------------------------
 void PhaseCoalesce::dump() const {
   // I know I have a block layout now, so I can print blocks in a loop
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+  for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
     uint j;
-    Block *b = _phc._cfg._blocks[i];
+    Block* b = _phc._cfg.get_block(i);
     // Print a nice block header
     tty->print("B%d: ",b->_pre_order);
     for( j=1; j<b->num_preds(); j++ )
-      tty->print("B%d ", _phc._cfg._bbs[b->pred(j)->_idx]->_pre_order);
+      tty->print("B%d ", _phc._cfg.get_block_for_node(b->pred(j))->_pre_order);
     tty->print("-> ");
     for( j=0; j<b->_num_succs; j++ )
       tty->print("B%d ",b->_succs[j]->_pre_order);
     tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
-    uint cnt = b->_nodes.size();
+    uint cnt = b->number_of_nodes();
     for( j=0; j<cnt; j++ ) {
-      Node *n = b->_nodes[j];
+      Node *n = b->get_node(j);
       dump( n );
       tty->print("\t%s\t",n->Name());
 
@@ -239,11 +83,10 @@
 }
 #endif
 
-//------------------------------combine_these_two------------------------------
 // Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
-void PhaseCoalesce::combine_these_two( Node *n1, Node *n2 ) {
-  uint lr1 = _phc.Find(n1);
-  uint lr2 = _phc.Find(n2);
+void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
+  uint lr1 = _phc._lrg_map.find(n1);
+  uint lr2 = _phc._lrg_map.find(n2);
   if( lr1 != lr2 &&             // Different live ranges already AND
       !_phc._ifg->test_edge_sq( lr1, lr2 ) ) {  // Do not interfere
     LRG *lrg1 = &_phc.lrgs(lr1);
@@ -281,18 +124,15 @@
   }
 }
 
-//------------------------------coalesce_driver--------------------------------
 // Copy coalescing
-void PhaseCoalesce::coalesce_driver( ) {
-
+void PhaseCoalesce::coalesce_driver() {
   verify();
   // Coalesce from high frequency to low
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ )
-    coalesce( _phc._blks[i] );
-
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
+    coalesce(_phc._blks[i]);
+  }
 }
 
-//------------------------------insert_copy_with_overlap-----------------------
 // I am inserting copies to come out of SSA form.  In the general case, I am
 // doing a parallel renaming.  I'm in the Named world now, so I can't do a
 // general parallel renaming.  All the copies now use  "names" (live-ranges)
@@ -312,14 +152,18 @@
   // I am about to clobber the dst_name, so the copy must be inserted
   // after the last use.  Last use is really first-use on a backwards scan.
   uint i = b->end_idx()-1;
-  while( 1 ) {
-    Node *n = b->_nodes[i];
+  while(1) {
+    Node *n = b->get_node(i);
     // Check for end of virtual copies; this is also the end of the
     // parallel renaming effort.
-    if( n->_idx < _unique ) break;
+    if (n->_idx < _unique) {
+      break;
+    }
     uint idx = n->is_Copy();
     assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
-    if( idx && _phc.Find(n->in(idx)) == dst_name ) break;
+    if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) {
+      break;
+    }
     i--;
   }
   uint last_use_idx = i;
@@ -330,49 +174,53 @@
   // There can be only 1 kill that exits any block and that is
   // the last kill.  Thus it is the first kill on a backwards scan.
   i = b->end_idx()-1;
-  while( 1 ) {
-    Node *n = b->_nodes[i];
+  while (1) {
+    Node *n = b->get_node(i);
     // Check for end of virtual copies; this is also the end of the
     // parallel renaming effort.
-    if( n->_idx < _unique ) break;
+    if (n->_idx < _unique) {
+      break;
+    }
     assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
-    if( _phc.Find(n) == src_name ) {
+    if (_phc._lrg_map.find(n) == src_name) {
       kill_src_idx = i;
       break;
     }
     i--;
   }
   // Need a temp?  Last use of dst comes after the kill of src?
-  if( last_use_idx >= kill_src_idx ) {
+  if (last_use_idx >= kill_src_idx) {
     // Need to break a cycle with a temp
     uint idx = copy->is_Copy();
     Node *tmp = copy->clone();
-    _phc.new_lrg(tmp,_phc._maxlrg++);
+    uint max_lrg_id = _phc._lrg_map.max_lrg_id();
+    _phc.new_lrg(tmp, max_lrg_id);
+    _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
+
     // Insert new temp between copy and source
     tmp ->set_req(idx,copy->in(idx));
     copy->set_req(idx,tmp);
     // Save source in temp early, before source is killed
-    b->_nodes.insert(kill_src_idx,tmp);
-    _phc._cfg._bbs.map( tmp->_idx, b );
+    b->insert_node(tmp, kill_src_idx);
+    _phc._cfg.map_node_to_block(tmp, b);
     last_use_idx++;
   }
 
   // Insert just after last use
-  b->_nodes.insert(last_use_idx+1,copy);
+  b->insert_node(copy, last_use_idx + 1);
 }
 
-//------------------------------insert_copies----------------------------------
 void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
   // We do LRGs compressing and fix a liveout data only here since the other
   // place in Split() is guarded by the assert which we never hit.
-  _phc.compress_uf_map_for_nodes();
+  _phc._lrg_map.compress_uf_map_for_nodes();
   // Fix block's liveout data for compressed live ranges.
-  for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) {
-    uint compressed_lrg = _phc.Find(lrg);
-    if( lrg != compressed_lrg ) {
-      for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) {
-        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
-        if( liveout->member(lrg) ) {
+  for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
+    uint compressed_lrg = _phc._lrg_map.find(lrg);
+    if (lrg != compressed_lrg) {
+      for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
+        IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
+        if (liveout->member(lrg)) {
           liveout->remove(lrg);
           liveout->insert(compressed_lrg);
         }
@@ -384,12 +232,14 @@
   // Nodes with index less than '_unique' are original, non-virtual Nodes.
   _unique = C->unique();
 
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
-    Block *b = _phc._cfg._blocks[i];
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
+    C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
+    if (C->failing()) return;
+    Block *b = _phc._cfg.get_block(i);
     uint cnt = b->num_preds();  // Number of inputs to the Phi
 
-    for( uint l = 1; l<b->_nodes.size(); l++ ) {
-      Node *n = b->_nodes[l];
+    for( uint l = 1; l<b->number_of_nodes(); l++ ) {
+      Node *n = b->get_node(l);
 
       // Do not use removed-copies, use copied value instead
       uint ncnt = n->req();
@@ -398,8 +248,9 @@
         uint cidx = copy->is_Copy();
         if( cidx ) {
           Node *def = copy->in(cidx);
-          if( _phc.Find(copy) == _phc.Find(def) )
-            n->set_req(k,def);
+          if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) {
+            n->set_req(k, def);
+          }
         }
       }
 
@@ -407,26 +258,28 @@
       uint cidx = n->is_Copy();
       if( cidx ) {
         Node *def = n->in(cidx);
-        if( _phc.Find(n) == _phc.Find(def) ) {
+        if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) {
           n->replace_by(def);
           n->set_req(cidx,NULL);
-          b->_nodes.remove(l);
+          b->remove_node(l);
           l--;
           continue;
         }
       }
 
-      if( n->is_Phi() ) {
+      if (n->is_Phi()) {
         // Get the chosen name for the Phi
-        uint phi_name = _phc.Find( n );
+        uint phi_name = _phc._lrg_map.find(n);
         // Ignore the pre-allocated specials
-        if( !phi_name ) continue;
+        if (!phi_name) {
+          continue;
+        }
         // Check for mismatch inputs to Phi
-        for( uint j = 1; j<cnt; j++ ) {
+        for (uint j = 1; j < cnt; j++) {
           Node *m = n->in(j);
-          uint src_name = _phc.Find(m);
-          if( src_name != phi_name ) {
-            Block *pred = _phc._cfg._bbs[b->pred(j)->_idx];
+          uint src_name = _phc._lrg_map.find(m);
+          if (src_name != phi_name) {
+            Block *pred = _phc._cfg.get_block_for_node(b->pred(j));
             Node *copy;
             assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
             // Rematerialize constants instead of copying them
@@ -436,18 +289,18 @@
               // Insert the copy in the predecessor basic block
               pred->add_inst(copy);
               // Copy any flags as well
-              _phc.clone_projs( pred, pred->end_idx(), m, copy, _phc._maxlrg );
+              _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
             } else {
               const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
-              copy = new (C) MachSpillCopyNode(m,*rm,*rm);
+              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
               // Find a good place to insert.  Kinda tricky, use a subroutine
               insert_copy_with_overlap(pred,copy,phi_name,src_name);
             }
             // Insert the copy in the use-def chain
-            n->set_req( j, copy );
-            _phc._cfg._bbs.map( copy->_idx, pred );
+            n->set_req(j, copy);
+            _phc._cfg.map_node_to_block(copy, pred);
             // Extend ("register allocate") the names array for the copy.
-            _phc._names.extend( copy->_idx, phi_name );
+            _phc._lrg_map.extend(copy->_idx, phi_name);
           } // End of if Phi names do not match
         } // End of for all inputs to Phi
       } else { // End of if Phi
@@ -456,38 +309,38 @@
         uint idx;
         if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
           // Get the chosen name for the Node
-          uint name = _phc.Find( n );
-          assert( name, "no 2-address specials" );
+          uint name = _phc._lrg_map.find(n);
+          assert (name, "no 2-address specials");
           // Check for name mis-match on the 2-address input
           Node *m = n->in(idx);
-          if( _phc.Find(m) != name ) {
+          if (_phc._lrg_map.find(m) != name) {
             Node *copy;
             assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
             // At this point it is unsafe to extend live ranges (6550579).
             // Rematerialize only constants as we do for Phi above.
-            if( m->is_Mach() && m->as_Mach()->is_Con() &&
-                m->as_Mach()->rematerialize() ) {
+            if(m->is_Mach() && m->as_Mach()->is_Con() &&
+               m->as_Mach()->rematerialize()) {
               copy = m->clone();
               // Insert the copy in the basic block, just before us
-              b->_nodes.insert( l++, copy );
-              l += _phc.clone_projs(b, l, m, copy, _phc._maxlrg);
+              b->insert_node(copy, l++);
+              l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
             } else {
               const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
-              copy = new (C) MachSpillCopyNode( m, *rm, *rm );
+              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
               // Insert the copy in the basic block, just before us
-              b->_nodes.insert( l++, copy );
+              b->insert_node(copy, l++);
             }
             // Insert the copy in the use-def chain
-            n->set_req(idx, copy );
+            n->set_req(idx, copy);
             // Extend ("register allocate") the names array for the copy.
-            _phc._names.extend( copy->_idx, name );
-            _phc._cfg._bbs.map( copy->_idx, b );
+            _phc._lrg_map.extend(copy->_idx, name);
+            _phc._cfg.map_node_to_block(copy, b);
           }
 
         } // End of is two-adr
 
         // Insert a copy at a debug use for a lrg which has high frequency
-        if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) {
+        if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || _phc._cfg.is_uncommon(b)) {
           // Walk the debug inputs to the node and check for lrg freq
           JVMState* jvms = n->jvms();
           uint debug_start = jvms ? jvms->debug_start() : 999999;
@@ -495,9 +348,11 @@
           for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
             // Do not split monitors; they are only needed for debug table
             // entries and need no code.
-            if( jvms->is_monitor_use(inpidx) ) continue;
+            if (jvms->is_monitor_use(inpidx)) {
+              continue;
+            }
             Node *inp = n->in(inpidx);
-            uint nidx = _phc.n2lidx(inp);
+            uint nidx = _phc._lrg_map.live_range_id(inp);
             LRG &lrg = lrgs(nidx);
 
             // If this lrg has a high frequency use/def
@@ -522,10 +377,12 @@
               // Insert the copy in the use-def chain
               n->set_req(inpidx, copy );
               // Insert the copy in the basic block, just before us
-              b->_nodes.insert( l++, copy );
+              b->insert_node(copy,  l++);
               // Extend ("register allocate") the names array for the copy.
-              _phc.new_lrg( copy, _phc._maxlrg++ );
-              _phc._cfg._bbs.map( copy->_idx, b );
+              uint max_lrg_id = _phc._lrg_map.max_lrg_id();
+              _phc.new_lrg(copy, max_lrg_id);
+              _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
+              _phc._cfg.map_node_to_block(copy, b);
               //tty->print_cr("Split a debug use in Aggressive Coalesce");
             }  // End of if high frequency use/def
           }  // End of for all debug inputs
@@ -537,8 +394,7 @@
   } // End of for all blocks
 }
 
-//=============================================================================
-//------------------------------coalesce---------------------------------------
+
 // Aggressive (but pessimistic) copy coalescing of a single block
 
 // The following coalesce pass represents a single round of aggressive
@@ -571,10 +427,13 @@
     Block *bs = b->_succs[i];
     // Find index of 'b' in 'bs' predecessors
     uint j=1;
-    while( _phc._cfg._bbs[bs->pred(j)->_idx] != b ) j++;
+    while (_phc._cfg.get_block_for_node(bs->pred(j)) != b) {
+      j++;
+    }
+
     // Visit all the Phis in successor block
-    for( uint k = 1; k<bs->_nodes.size(); k++ ) {
-      Node *n = bs->_nodes[k];
+    for( uint k = 1; k<bs->number_of_nodes(); k++ ) {
+      Node *n = bs->get_node(k);
       if( !n->is_Phi() ) break;
       combine_these_two( n, n->in(j) );
     }
@@ -584,31 +443,27 @@
   // Check _this_ block for 2-address instructions and copies.
   uint cnt = b->end_idx();
   for( i = 1; i<cnt; i++ ) {
-    Node *n = b->_nodes[i];
+    Node *n = b->get_node(i);
     uint idx;
     // 2-address instructions have a virtual Copy matching their input
     // to their output
-    if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
+    if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) {
       MachNode *mach = n->as_Mach();
-      combine_these_two( mach, mach->in(idx) );
+      combine_these_two(mach, mach->in(idx));
     }
   } // End of for all instructions in block
 }
 
-//=============================================================================
-//------------------------------PhaseConservativeCoalesce----------------------
-PhaseConservativeCoalesce::PhaseConservativeCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {
-  _ulr.initialize(_phc._maxlrg);
+PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
+  _ulr.initialize(_phc._lrg_map.max_lrg_id());
 }
 
-//------------------------------verify-----------------------------------------
 void PhaseConservativeCoalesce::verify() {
 #ifdef ASSERT
   _phc.set_was_low();
 #endif
 }
 
-//------------------------------union_helper-----------------------------------
 void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
   // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
   // union-find tree
@@ -636,22 +491,21 @@
   dst_copy->set_req( didx, src_def );
   // Add copy to free list
   // _phc.free_spillcopy(b->_nodes[bindex]);
-  assert( b->_nodes[bindex] == dst_copy, "" );
+  assert( b->get_node(bindex) == dst_copy, "" );
   dst_copy->replace_by( dst_copy->in(didx) );
   dst_copy->set_req( didx, NULL);
-  b->_nodes.remove(bindex);
+  b->remove_node(bindex);
   if( bindex < b->_ihrp_index ) b->_ihrp_index--;
   if( bindex < b->_fhrp_index ) b->_fhrp_index--;
 
   // Stretched lr1; add it to liveness of intermediate blocks
-  Block *b2 = _phc._cfg._bbs[src_copy->_idx];
+  Block *b2 = _phc._cfg.get_block_for_node(src_copy);
   while( b != b2 ) {
-    b = _phc._cfg._bbs[b->pred(1)->_idx];
+    b = _phc._cfg.get_block_for_node(b->pred(1));
     _phc._live->live(b)->insert(lr1);
   }
 }
 
-//------------------------------compute_separating_interferences---------------
 // Factored code from copy_copy that computes extra interferences from
 // lengthening a live range by double-coalescing.
 uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
@@ -666,22 +520,26 @@
     bindex2--;                  // Chain backwards 1 instruction
     while( bindex2 == 0 ) {     // At block start, find prior block
       assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
-      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
       bindex2 = b2->end_idx()-1;
     }
     // Get prior instruction
-    assert(bindex2 < b2->_nodes.size(), "index out of bounds");
-    Node *x = b2->_nodes[bindex2];
+    assert(bindex2 < b2->number_of_nodes(), "index out of bounds");
+    Node *x = b2->get_node(bindex2);
     if( x == prev_copy ) {      // Previous copy in copy chain?
       if( prev_copy == src_copy)// Found end of chain and all interferences
         break;                  // So break out of loop
       // Else work back one in copy chain
       prev_copy = prev_copy->in(prev_copy->is_Copy());
     } else {                    // Else collect interferences
-      uint lidx = _phc.Find(x);
+      uint lidx = _phc._lrg_map.find(x);
       // Found another def of live-range being stretched?
-      if( lidx == lr1 ) return max_juint;
-      if( lidx == lr2 ) return max_juint;
+      if(lidx == lr1) {
+        return max_juint;
+      }
+      if(lidx == lr2) {
+        return max_juint;
+      }
 
       // If we attempt to coalesce across a bound def
       if( lrgs(lidx).is_bound() ) {
@@ -713,7 +571,6 @@
   return reg_degree;
 }
 
-//------------------------------update_ifg-------------------------------------
 void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
   // Some original neighbors of lr1 might have gone away
   // because the constrained register mask prevented them.
@@ -743,7 +600,6 @@
       lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
 }
 
-//------------------------------record_bias------------------------------------
 static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
   // Tag copy bias here
   if( !ifg->lrgs(lr1)._copy_bias )
@@ -752,37 +608,46 @@
     ifg->lrgs(lr2)._copy_bias = lr1;
 }
 
-//------------------------------copy_copy--------------------------------------
 // See if I can coalesce a series of multiple copies together.  I need the
 // final dest copy and the original src copy.  They can be the same Node.
 // Compute the compatible register masks.
-bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
+bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
 
-  if( !dst_copy->is_SpillCopy() ) return false;
-  if( !src_copy->is_SpillCopy() ) return false;
+  if (!dst_copy->is_SpillCopy()) {
+    return false;
+  }
+  if (!src_copy->is_SpillCopy()) {
+    return false;
+  }
   Node *src_def = src_copy->in(src_copy->is_Copy());
-  uint lr1 = _phc.Find(dst_copy);
-  uint lr2 = _phc.Find(src_def );
+  uint lr1 = _phc._lrg_map.find(dst_copy);
+  uint lr2 = _phc._lrg_map.find(src_def);
 
   // Same live ranges already?
-  if( lr1 == lr2 ) return false;
+  if (lr1 == lr2) {
+    return false;
+  }
 
   // Interfere?
-  if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false;
+  if (_phc._ifg->test_edge_sq(lr1, lr2)) {
+    return false;
+  }
 
   // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
-  if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast
+  if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast
     return false;
+  }
 
   // Coalescing between an aligned live range and a mis-aligned live range?
   // No, no!  Alignment changes how we count degree.
-  if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj )
+  if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) {
     return false;
+  }
 
   // Sort; use smaller live-range number
   Node *lr1_node = dst_copy;
   Node *lr2_node = src_def;
-  if( lr1 > lr2 ) {
+  if (lr1 > lr2) {
     uint tmp = lr1; lr1 = lr2; lr2 = tmp;
     lr1_node = src_def;  lr2_node = dst_copy;
   }
@@ -796,8 +661,8 @@
 
   if (UseFPUForSpilling && rm.is_AllStack() ) {
     // Don't coalesce when frequency difference is large
-    Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
-    Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
+    Block *dst_b = _phc._cfg.get_block_for_node(dst_copy);
+    Block *src_def_b = _phc._cfg.get_block_for_node(src_def);
     if (src_def_b->_freq > 10*dst_b->_freq )
       return false;
   }
@@ -810,7 +675,7 @@
   // Another early bail-out test is when we are double-coalescing and the
   // 2 copies are separated by some control flow.
   if( dst_copy != src_copy ) {
-    Block *src_b = _phc._cfg._bbs[src_copy->_idx];
+    Block *src_b = _phc._cfg.get_block_for_node(src_copy);
     Block *b2 = b;
     while( b2 != src_b ) {
       if( b2->num_preds() > 2 ){// Found merge-point
@@ -821,7 +686,7 @@
         //record_bias( _phc._lrgs, lr1, lr2 );
         return false;           // To hard to find all interferences
       }
-      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
     }
   }
 
@@ -902,17 +767,17 @@
   return true;
 }
 
-//------------------------------coalesce---------------------------------------
 // Conservative (but pessimistic) copy coalescing of a single block
 void PhaseConservativeCoalesce::coalesce( Block *b ) {
   // Bail out on infrequent blocks
-  if( b->is_uncommon(_phc._cfg._bbs) )
+  if (_phc._cfg.is_uncommon(b)) {
     return;
+  }
   // Check this block for copies.
   for( uint i = 1; i<b->end_idx(); i++ ) {
     // Check for actual copies on inputs.  Coalesce a copy into its
     // input if use and copy's input are compatible.
-    Node *copy1 = b->_nodes[i];
+    Node *copy1 = b->get_node(i);
     uint idx1 = copy1->is_Copy();
     if( !idx1 ) continue;       // Not a copy
 
@@ -921,17 +786,5 @@
       PhaseChaitin::_conserv_coalesce++;  // Collect stats on success
       continue;
     }
-
-    /* do not attempt pairs.  About 1/2 of all pairs can be removed by
-       post-alloc.  The other set are too few to bother.
-    Node *copy2 = copy1->in(idx1);
-    uint idx2 = copy2->is_Copy();
-    if( !idx2 ) continue;
-    if( copy_copy(copy1,copy2,b,i) ) {
-      i--;                      // Retry, same location in block
-      PhaseChaitin::_conserv_coalesce_pair++; // Collect stats on success
-      continue;
-    }
-    */
   }
 }
--- a/src/share/vm/opto/coalesce.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/coalesce.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -41,23 +41,25 @@
 
 public:
   // Coalesce copies
-  PhaseCoalesce( PhaseChaitin &chaitin ) : Phase(Coalesce), _phc(chaitin) { }
+  PhaseCoalesce(PhaseChaitin &phc)
+  : Phase(Coalesce)
+  , _phc(phc) {}
 
   virtual void verify() = 0;
 
   // Coalesce copies
-  void coalesce_driver( );
+  void coalesce_driver();
 
   // Coalesce copies in this block
-  virtual void coalesce( Block *b ) = 0;
+  virtual void coalesce(Block *b) = 0;
 
   // Attempt to coalesce live ranges defined by these 2
-  void combine_these_two( Node *n1, Node *n2 );
+  void combine_these_two(Node *n1, Node *n2);
 
-  LRG &lrgs( uint lidx ) { return _phc.lrgs(lidx); }
+  LRG &lrgs(uint lidx) { return _phc.lrgs(lidx); }
 #ifndef PRODUCT
   // Dump internally name
-  void dump( Node *n ) const;
+  void dump(Node *n) const;
   // Dump whole shebang
   void dump() const;
 #endif
--- a/src/share/vm/opto/compile.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/compile.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -69,6 +69,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -87,6 +90,9 @@
 #ifdef  TARGET_ARCH_x86
 # include "compile_x86.hpp"
 #endif
+#ifdef  TARGET_ARCH_aarch64
+# include "compile_aarch64.hpp"
+#endif
 #ifdef  TARGET_ARCH_sparc
 # include "compile_sparc.hpp"
 #endif
@@ -97,6 +103,9 @@
 # include "compile_ppc.hpp"
 #endif
 
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
 
 // -------------------- Compile::mach_constant_base_node -----------------------
 // Constant table base node singleton.
@@ -444,6 +453,7 @@
   }
   // clean up the late inline lists
   remove_useless_late_inlines(&_string_late_inlines, useful);
+  remove_useless_late_inlines(&_boxing_late_inlines, useful);
   remove_useless_late_inlines(&_late_inlines, useful);
   debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
 }
@@ -511,6 +521,12 @@
     tty->print_cr("** Bailout: Recompile without escape analysis          **");
     tty->print_cr("*********************************************************");
   }
+  if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {
+    // Recompiling without boxing elimination
+    tty->print_cr("*********************************************************");
+    tty->print_cr("** Bailout: Recompile without boxing elimination       **");
+    tty->print_cr("*********************************************************");
+  }
   if (env()->break_at_compile()) {
     // Open the debugger when compiling this method.
     tty->print("### Breaking when compiling: ");
@@ -630,7 +646,8 @@
 // the continuation bci for on stack replacement.
 
 
-Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis )
+Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
+                  bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
                 : Phase(Compiler),
                   _env(ci_env),
                   _log(ci_env->log()),
@@ -646,6 +663,7 @@
                   _warm_calls(NULL),
                   _subsume_loads(subsume_loads),
                   _do_escape_analysis(do_escape_analysis),
+                  _eliminate_boxing(eliminate_boxing),
                   _failure_reason(NULL),
                   _code_buffer("Compile::Fill_buffer"),
                   _orig_pc_slot(0),
@@ -668,6 +686,7 @@
                   _congraph(NULL),
                   _late_inlines(comp_arena(), 2, 0, NULL),
                   _string_late_inlines(comp_arena(), 2, 0, NULL),
+                  _boxing_late_inlines(comp_arena(), 2, 0, NULL),
                   _late_inlines_pos(0),
                   _number_of_mh_late_inlines(0),
                   _inlining_progress(false),
@@ -896,6 +915,37 @@
       _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
     }
 
+#ifdef BUILTIN_SIM
+    char *method_name = NULL;
+    AArch64Simulator *sim = NULL;
+    size_t len = 65536;
+    if (NotifySimulator) {
+      method_name = new char[len];
+    }
+    if (method_name) {
+      unsigned char *entry = code_buffer()->insts_begin();
+      stringStream st(method_name, 400);
+      if (_entry_bci != InvocationEntryBci) {
+        st.print("osr:");
+      }
+      _method->holder()->name()->print_symbol_on(&st);
+      // convert '/' separators in class name into '.' separator
+      for (unsigned i = 0; i < len; i++) {
+        if (method_name[i] == '/') {
+          method_name[i] = '.';
+        } else if (method_name[i] == '\0') {
+          break;
+        }
+      }
+      st.print(".");
+      _method->name()->print_symbol_on(&st);
+      _method->signature()->as_symbol()->print_symbol_on(&st);
+      sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+      sim->notifyCompile(method_name, entry);
+      sim->notifyRelocate(entry, 0);
+    }
+#endif
+
     env()->register_method(_method, _entry_bci,
                            &_code_offsets,
                            _orig_pc_slot_offset_in_bytes,
@@ -940,6 +990,7 @@
     _orig_pc_slot_offset_in_bytes(0),
     _subsume_loads(true),
     _do_escape_analysis(false),
+    _eliminate_boxing(false),
     _failure_reason(NULL),
     _code_buffer("Compile::Fill_buffer"),
     _has_method_handle_invokes(false),
@@ -1064,6 +1115,7 @@
   set_has_split_ifs(false);
   set_has_loops(has_method() && method()->has_loops()); // first approximation
   set_has_stringbuilder(false);
+  set_has_boxed_value(false);
   _trap_can_recompile = false;  // no traps emitted yet
   _major_progress = true; // start out assuming good things will happen
   set_has_unsafe_access(false);
@@ -1881,6 +1933,38 @@
   _string_late_inlines.trunc_to(0);
 }
 
+// Late inlining of boxing methods
+void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
+  if (_boxing_late_inlines.length() > 0) {
+    assert(has_boxed_value(), "inconsistent");
+
+    PhaseGVN* gvn = initial_gvn();
+    set_inlining_incrementally(true);
+
+    assert( igvn._worklist.size() == 0, "should be done with igvn" );
+    for_igvn()->clear();
+    gvn->replace_with(&igvn);
+
+    while (_boxing_late_inlines.length() > 0) {
+      CallGenerator* cg = _boxing_late_inlines.pop();
+      cg->do_late_inline();
+      if (failing())  return;
+    }
+    _boxing_late_inlines.trunc_to(0);
+
+    {
+      ResourceMark rm;
+      PhaseRemoveUseless pru(gvn, for_igvn());
+    }
+
+    igvn = PhaseIterGVN(gvn);
+    igvn.optimize();
+
+    set_inlining_progress(false);
+    set_inlining_incrementally(false);
+  }
+}
+
 void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
   assert(IncrementalInline, "incremental inlining should be on");
   PhaseGVN* gvn = initial_gvn();
@@ -1905,7 +1989,7 @@
 
   {
     ResourceMark rm;
-    PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn());
+    PhaseRemoveUseless pru(gvn, for_igvn());
   }
 
   igvn = PhaseIterGVN(gvn);
@@ -2003,12 +2087,25 @@
 
   if (failing())  return;
 
-  inline_incrementally(igvn);
+  {
+    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
+    inline_incrementally(igvn);
+  }
 
   print_method(PHASE_INCREMENTAL_INLINE, 2);
 
   if (failing())  return;
 
+  if (eliminate_boxing()) {
+    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
+    // Inline valueOf() methods now.
+    inline_boxing_calls(igvn);
+
+    print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);
+
+    if (failing())  return;
+  }
+
   // No more new expensive nodes will be added to the list from here
   // so keep only the actual candidates for optimizations.
   cleanup_expensive_nodes(igvn);
@@ -2158,7 +2255,9 @@
 //------------------------------Code_Gen---------------------------------------
 // Given a graph, generate code for it
 void Compile::Code_Gen() {
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   // Perform instruction selection.  You might think we could reclaim Matcher
   // memory PDQ, but actually the Matcher is used in generating spill code.
@@ -2170,12 +2269,11 @@
   // nodes.  Mapping is only valid at the root of each matched subtree.
   NOT_PRODUCT( verify_graph_edges(); )
 
-  Node_List proj_list;
-  Matcher m(proj_list);
-  _matcher = &m;
+  Matcher matcher;
+  _matcher = &matcher;
   {
     TracePhase t2("matcher", &_t_matcher, true);
-    m.match();
+    matcher.match();
   }
   // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
   // nodes.  Mapping is only valid at the root of each matched subtree.
@@ -2183,49 +2281,41 @@
 
   // If you have too many nodes, or if matching has failed, bail out
   check_node_count(0, "out of nodes matching instructions");
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   // Platform dependent post matching hook (used on ppc).
   PdCompile::pd_post_matching_hook(this);
 
   // Build a proper-looking CFG
-  PhaseCFG cfg(node_arena(), root(), m);
+  PhaseCFG cfg(node_arena(), root(), matcher);
   _cfg = &cfg;
   {
     NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
-    cfg.Dominators();
-    if (failing())  return;
-
-    NOT_PRODUCT( verify_graph_edges(); )
-
-    cfg.Estimate_Block_Frequency();
-    cfg.GlobalCodeMotion(m,unique(),proj_list);
-    if (failing())  return;
+    bool success = cfg.do_global_code_motion();
+    if (!success) {
+      return;
+    }
 
     print_method(PHASE_GLOBAL_CODE_MOTION, 2);
-
     NOT_PRODUCT( verify_graph_edges(); )
-
     debug_only( cfg.verify(); )
   }
-  NOT_PRODUCT( verify_graph_edges(); )
-
-  PhaseChaitin regalloc(unique(),cfg,m);
+
+  PhaseChaitin regalloc(unique(), cfg, matcher);
   _regalloc = &regalloc;
   {
     TracePhase t2("regalloc", &_t_registerAllocation, true);
-    // Perform any platform dependent preallocation actions.  This is used,
-    // for example, to avoid taking an implicit null pointer exception
-    // using the frame pointer on win95.
-    _regalloc->pd_preallocate_hook();
-
     // Perform register allocation.  After Chaitin, use-def chains are
     // no longer accurate (at spill code) and so must be ignored.
     // Node->LRG->reg mappings are still accurate.
     _regalloc->Register_Allocate();
 
     // Bail out if the allocator builds too many nodes
-    if (failing())  return;
+    if (failing()) {
+      return;
+    }
   }
 
   // Prior to register allocation we kept empty basic blocks in case the
@@ -2234,7 +2324,7 @@
   // can now safely remove it.
   {
     NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
-    cfg.remove_empty();
+    cfg.remove_empty_blocks();
     if (do_freq_based_layout()) {
       PhaseBlockLayout layout(cfg);
     } else {
@@ -2243,9 +2333,6 @@
     cfg.fixup_flow();
   }
 
-  // Perform any platform dependent postallocation verifications.
-  debug_only( _regalloc->pd_postallocate_verify_hook(); )
-
   // Apply peephole optimizations
   if( OptoPeephole ) {
     NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
@@ -2289,38 +2376,50 @@
   _regalloc->dump_frame();
 
   Node *n = NULL;
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    if (VMThread::should_terminate()) { cut_short = true; break; }
-    Block *b = _cfg->_blocks[i];
-    if (b->is_connector() && !Verbose) continue;
-    n = b->_nodes[0];
-    if (pcs && n->_idx < pc_limit)
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    if (VMThread::should_terminate()) {
+      cut_short = true;
+      break;
+    }
+    Block* block = _cfg->get_block(i);
+    if (block->is_connector() && !Verbose) {
+      continue;
+    }
+    n = block->head();
+    if (pcs && n->_idx < pc_limit) {
       tty->print("%3.3x   ", pcs[n->_idx]);
-    else
+    } else {
       tty->print("      ");
-    b->dump_head( &_cfg->_bbs );
-    if (b->is_connector()) {
+    }
+    block->dump_head(_cfg);
+    if (block->is_connector()) {
       tty->print_cr("        # Empty connector block");
-    } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
+    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
       tty->print_cr("        # Block is sole successor of call");
     }
 
     // For all instructions
     Node *delay = NULL;
-    for( uint j = 0; j<b->_nodes.size(); j++ ) {
-      if (VMThread::should_terminate()) { cut_short = true; break; }
-      n = b->_nodes[j];
+    for (uint j = 0; j < block->number_of_nodes(); j++) {
+      if (VMThread::should_terminate()) {
+        cut_short = true;
+        break;
+      }
+      n = block->get_node(j);
       if (valid_bundle_info(n)) {
-        Bundle *bundle = node_bundling(n);
+        Bundle* bundle = node_bundling(n);
         if (bundle->used_in_unconditional_delay()) {
           delay = n;
           continue;
         }
-        if (bundle->starts_bundle())
+        if (bundle->starts_bundle()) {
           starts_bundle = '+';
+        }
       }
 
-      if (WizardMode) n->dump();
+      if (WizardMode) {
+        n->dump();
+      }
 
       if( !n->is_Region() &&    // Dont print in the Assembly
           !n->is_Phi() &&       // a few noisely useless nodes
@@ -2981,6 +3080,7 @@
     }
     break;
   case Op_MemBarStoreStore:
+  case Op_MemBarRelease:
     // Break the link with AllocateNode: it is no longer useful and
     // confuses register allocation.
     if (n->req() > MemBarNode::Precedent) {
@@ -3610,7 +3710,7 @@
 }
 
 Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {
-  Block* b = Compile::current()->cfg()->_bbs[n->_idx];
+  Block* b = Compile::current()->cfg()->get_block_for_node(n);
   Constant con(type, value, b->_freq);
   add(con);
   return con;
--- a/src/share/vm/opto/compile.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/compile.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -252,6 +252,7 @@
   const bool            _save_argument_registers; // save/restore arg regs for trampolines
   const bool            _subsume_loads;         // Load can be matched as part of a larger op.
   const bool            _do_escape_analysis;    // Do escape analysis.
+  const bool            _eliminate_boxing;      // Do boxing elimination.
   ciMethod*             _method;                // The method being compiled.
   int                   _entry_bci;             // entry bci for osr methods.
   const TypeFunc*       _tf;                    // My kind of signature
@@ -277,6 +278,7 @@
   bool                  _has_split_ifs;         // True if the method _may_ have some split-if
   bool                  _has_unsafe_access;     // True if the method _may_ produce faults in unsafe loads or stores.
   bool                  _has_stringbuilder;     // True StringBuffers or StringBuilders are allocated
+  bool                  _has_boxed_value;       // True if a boxed object is allocated
   int                   _max_vector_size;       // Maximum size of generated vectors
   uint                  _trap_hist[trapHistLength];  // Cumulative traps
   bool                  _trap_can_recompile;    // Have we emitted a recompiling trap?
@@ -370,6 +372,8 @@
                                                       // main parsing has finished.
   GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations
 
+  GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations
+
   int                           _late_inlines_pos;    // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
   uint                          _number_of_mh_late_inlines; // number of method handle late inlining still pending
 
@@ -481,8 +485,12 @@
   // instructions that subsume a load may result in an unschedulable
   // instruction sequence.
   bool              subsume_loads() const       { return _subsume_loads; }
-  // Do escape analysis.
+  /** Do escape analysis. */
   bool              do_escape_analysis() const  { return _do_escape_analysis; }
+  /** Do boxing elimination. */
+  bool              eliminate_boxing() const    { return _eliminate_boxing; }
+  /** Do aggressive boxing elimination. */
+  bool              aggressive_unboxing() const { return _eliminate_boxing && AggressiveUnboxing; }
   bool              save_argument_registers() const { return _save_argument_registers; }
 
 
@@ -522,6 +530,8 @@
   void          set_has_unsafe_access(bool z)   { _has_unsafe_access = z; }
   bool              has_stringbuilder() const   { return _has_stringbuilder; }
   void          set_has_stringbuilder(bool z)   { _has_stringbuilder = z; }
+  bool              has_boxed_value() const     { return _has_boxed_value; }
+  void          set_has_boxed_value(bool z)     { _has_boxed_value = z; }
   int               max_vector_size() const     { return _max_vector_size; }
   void          set_max_vector_size(int s)      { _max_vector_size = s; }
   void          set_trap_count(uint r, uint c)  { assert(r < trapHistLength, "oob");        _trap_hist[r] = c; }
@@ -552,6 +562,10 @@
   bool          method_has_option(const char * option) {
     return method() != NULL && method()->has_option(option);
   }
+  template<typename T>
+  bool          method_has_option_value(const char * option, T& value) {
+    return method() != NULL && method()->has_option_value(option, value);
+  }
 #ifndef PRODUCT
   bool          trace_opto_output() const       { return _trace_opto_output; }
   bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
@@ -605,12 +619,12 @@
 #endif
   }
 
-  int           macro_count()                   { return _macro_nodes->length(); }
-  int           predicate_count()               { return _predicate_opaqs->length();}
-  int           expensive_count()               { return _expensive_nodes->length(); }
-  Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
-  Node*         predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
-  Node*         expensive_node(int idx)         { return _expensive_nodes->at(idx); }
+  int           macro_count()             const { return _macro_nodes->length(); }
+  int           predicate_count()         const { return _predicate_opaqs->length();}
+  int           expensive_count()         const { return _expensive_nodes->length(); }
+  Node*         macro_node(int idx)       const { return _macro_nodes->at(idx); }
+  Node*         predicate_opaque1_node(int idx) const { return _predicate_opaqs->at(idx);}
+  Node*         expensive_node(int idx)   const { return _expensive_nodes->at(idx); }
   ConnectionGraph* congraph()                   { return _congraph;}
   void set_congraph(ConnectionGraph* congraph)  { _congraph = congraph;}
   void add_macro_node(Node * n) {
@@ -807,7 +821,12 @@
   CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_does_dispatch,
                                    JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true,
                                    bool delayed_forbidden = false);
-  bool should_delay_inlining(ciMethod* call_method, JVMState* jvms);
+  bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
+    return should_delay_string_inlining(call_method, jvms) ||
+           should_delay_boxing_inlining(call_method, jvms);
+  }
+  bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
+  bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
 
   // Helper functions to identify inlining potential at call-site
   ciMethod* optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
@@ -867,6 +886,10 @@
     _string_late_inlines.push(cg);
   }
 
+  void              add_boxing_late_inline(CallGenerator* cg) {
+    _boxing_late_inlines.push(cg);
+  }
+
   void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
 
   void dump_inlining();
@@ -886,6 +909,7 @@
   void inline_incrementally_one(PhaseIterGVN& igvn);
   void inline_incrementally(PhaseIterGVN& igvn);
   void inline_string_calls(bool parse_time);
+  void inline_boxing_calls(PhaseIterGVN& igvn);
 
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*         cfg()                       { return _cfg; }
@@ -958,7 +982,8 @@
   // replacement, entry_bci indicates the bytecode for which to compile a
   // continuation.
   Compile(ciEnv* ci_env, C2Compiler* compiler, ciMethod* target,
-          int entry_bci, bool subsume_loads, bool do_escape_analysis);
+          int entry_bci, bool subsume_loads, bool do_escape_analysis,
+          bool eliminate_boxing);
 
   // Second major entry point.  From the TypeFunc signature, generate code
   // to pass arguments from the Java calling convention to the C calling
--- a/src/share/vm/opto/doCall.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/doCall.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -177,9 +177,12 @@
           // Delay the inlining of this method to give us the
           // opportunity to perform some high level optimizations
           // first.
-          if (should_delay_inlining(callee, jvms)) {
+          if (should_delay_string_inlining(callee, jvms)) {
             assert(!delayed_forbidden, "strange");
             return CallGenerator::for_string_late_inline(callee, cg);
+          } else if (should_delay_boxing_inlining(callee, jvms)) {
+            assert(!delayed_forbidden, "strange");
+            return CallGenerator::for_boxing_late_inline(callee, cg);
           } else if ((should_delay || AlwaysIncrementalInline) && !delayed_forbidden) {
             return CallGenerator::for_late_inline(callee, cg);
           }
@@ -277,7 +280,7 @@
 
 // Return true for methods that shouldn't be inlined early so that
 // they are easier to analyze and optimize as intrinsics.
-bool Compile::should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
+bool Compile::should_delay_string_inlining(ciMethod* call_method, JVMState* jvms) {
   if (has_stringbuilder()) {
 
     if ((call_method->holder() == C->env()->StringBuilder_klass() ||
@@ -328,6 +331,13 @@
   return false;
 }
 
+bool Compile::should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms) {
+  if (eliminate_boxing() && call_method->is_boxing_method()) {
+    set_has_boxed_value(true);
+    return true;
+  }
+  return false;
+}
 
 // uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
 bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
--- a/src/share/vm/opto/domgraph.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/domgraph.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -32,9 +32,6 @@
 
 // Portions of code courtesy of Clifford Click
 
-// Optimization - Graph Style
-
-//------------------------------Tarjan-----------------------------------------
 // A data structure that holds all the information needed to find dominators.
 struct Tarjan {
   Block *_block;                // Basic block for this info
@@ -60,23 +57,21 @@
 
 };
 
-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the CFG.  The CFG must already have been
 // constructed.  This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
-void PhaseCFG::Dominators( ) {
+void PhaseCFG::build_dominator_tree() {
   // Pre-grow the blocks array, prior to the ResourceMark kicking in
-  _blocks.map(_num_blocks,0);
+  _blocks.map(number_of_blocks(), 0);
 
   ResourceMark rm;
   // Setup mappings from my Graph to Tarjan's stuff and back
   // Note: Tarjan uses 1-based arrays
-  Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
+  Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);
 
   // Tarjan's algorithm, almost verbatim:
   // Step 1:
-  _rpo_ctr = _num_blocks;
-  uint dfsnum = DFS( tarjan );
-  if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
+  uint dfsnum = do_DFS(tarjan, number_of_blocks());
+  if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
     // If the returned dfsnum does not match the number of blocks, then we
     // must have some unreachable loops.  These can be made at any time by
     // IterGVN.  They are cleaned up by CCP or the loop opts, but the last
@@ -93,20 +88,19 @@
     C->record_method_not_compilable("unreachable loop");
     return;
   }
-  _blocks._cnt = _num_blocks;
+  _blocks._cnt = number_of_blocks();
 
   // Tarjan is using 1-based arrays, so these are some initialize flags
   tarjan[0]._size = tarjan[0]._semi = 0;
   tarjan[0]._label = &tarjan[0];
 
-  uint i;
-  for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
+  for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
     Tarjan *w = &tarjan[i];     // Get vertex from DFS
 
     // Step 2:
     Node *whead = w->_block->head();
-    for( uint j=1; j < whead->req(); j++ ) {
-      Block *b = _bbs[whead->in(j)->_idx];
+    for (uint j = 1; j < whead->req(); j++) {
+      Block* b = get_block_for_node(whead->in(j));
       Tarjan *vx = &tarjan[b->_pre_order];
       Tarjan *u = vx->EVAL();
       if( u->_semi < w->_semi )
@@ -130,19 +124,19 @@
   }
 
   // Step 4:
-  for( i=2; i <= _num_blocks; i++ ) {
+  for (uint i = 2; i <= number_of_blocks(); i++) {
     Tarjan *w = &tarjan[i];
     if( w->_dom != &tarjan[w->_semi] )
       w->_dom = w->_dom->_dom;
     w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
   }
   // No immediate dominator for the root
-  Tarjan *w = &tarjan[_broot->_pre_order];
+  Tarjan *w = &tarjan[get_root_block()->_pre_order];
   w->_dom = NULL;
   w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
 
   // Convert the dominator tree array into my kind of graph
-  for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
+  for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
     Tarjan *t = &tarjan[i];     // Handy access
     Tarjan *tdom = t->_dom;     // Handy access to immediate dominator
     if( tdom )  {               // Root has no immediate dominator
@@ -152,11 +146,10 @@
     } else
       t->_block->_idom = NULL;  // Root
   }
-  w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
+  w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree
 
 }
 
-//----------------------------Block_Stack--------------------------------------
 class Block_Stack {
   private:
     struct Block_Descr {
@@ -214,26 +207,25 @@
     }
 };
 
-//-------------------------most_frequent_successor-----------------------------
 // Find the index into the b->succs[] array of the most frequent successor.
 uint Block_Stack::most_frequent_successor( Block *b ) {
   uint freq_idx = 0;
   int eidx = b->end_idx();
-  Node *n = b->_nodes[eidx];
+  Node *n = b->get_node(eidx);
   int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
   switch( op ) {
   case Op_CountedLoopEnd:
   case Op_If: {               // Split frequency amongst children
     float prob = n->as_MachIf()->_prob;
     // Is succ[0] the TRUE branch or the FALSE branch?
-    if( b->_nodes[eidx+1]->Opcode() == Op_IfFalse )
+    if( b->get_node(eidx+1)->Opcode() == Op_IfFalse )
       prob = 1.0f - prob;
     freq_idx = prob < PROB_FAIR;      // freq=1 for succ[0] < 0.5 prob
     break;
   }
   case Op_Catch:                // Split frequency amongst children
     for( freq_idx = 0; freq_idx < b->_num_succs; freq_idx++ )
-      if( b->_nodes[eidx+1+freq_idx]->as_CatchProj()->_con == CatchProjNode::fall_through_index )
+      if( b->get_node(eidx+1+freq_idx)->as_CatchProj()->_con == CatchProjNode::fall_through_index )
         break;
     // Handle case of no fall-thru (e.g., check-cast MUST throw an exception)
     if( freq_idx == b->_num_succs ) freq_idx = 0;
@@ -258,40 +250,38 @@
   return freq_idx;
 }
 
-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
-uint PhaseCFG::DFS( Tarjan *tarjan ) {
-  Block *b = _broot;
+uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
+  Block* root_block = get_root_block();
   uint pre_order = 1;
-  // Allocate stack of size _num_blocks+1 to avoid frequent realloc
-  Block_Stack bstack(tarjan, _num_blocks+1);
+  // Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
+  Block_Stack bstack(tarjan, number_of_blocks() + 1);
 
   // Push on stack the state for the first block
-  bstack.push(pre_order, b);
+  bstack.push(pre_order, root_block);
   ++pre_order;
 
   while (bstack.is_nonempty()) {
     if (!bstack.last_successor()) {
       // Walk over all successors in pre-order (DFS).
-      Block *s = bstack.next_successor();
-      if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
+      Block* next_block = bstack.next_successor();
+      if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
         // Push on stack the state of successor
-        bstack.push(pre_order, s);
+        bstack.push(pre_order, next_block);
         ++pre_order;
       }
     }
     else {
       // Build a reverse post-order in the CFG _blocks array
       Block *stack_top = bstack.pop();
-      stack_top->_rpo = --_rpo_ctr;
+      stack_top->_rpo = --rpo_counter;
       _blocks.map(stack_top->_rpo, stack_top);
     }
   }
   return pre_order;
 }
 
-//------------------------------COMPRESS---------------------------------------
 void Tarjan::COMPRESS()
 {
   assert( _ancestor != 0, "" );
@@ -303,14 +293,12 @@
   }
 }
 
-//------------------------------EVAL-------------------------------------------
 Tarjan *Tarjan::EVAL() {
   if( !_ancestor ) return _label;
   COMPRESS();
   return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }
 
-//------------------------------LINK-------------------------------------------
 void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
   Tarjan *s = w;
   while( w->_label->_semi < s->_child->_label->_semi ) {
@@ -333,7 +321,6 @@
   }
 }
 
-//------------------------------setdepth---------------------------------------
 void Tarjan::setdepth( uint stack_size ) {
   Tarjan **top  = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
   Tarjan **next = top;
@@ -362,8 +349,7 @@
   } while (last < top);
 }
 
-//*********************** DOMINATORS ON THE SEA OF NODES***********************
-//------------------------------NTarjan----------------------------------------
+// Compute dominators on the Sea of Nodes form
 // A data structure that holds all the information needed to find dominators.
 struct NTarjan {
   Node *_control;               // Control node associated with this info
@@ -396,7 +382,6 @@
 #endif
 };
 
-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the sea of nodes.  This version walks all CFG
 // nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
 // it needs a count of the CFG nodes for the mapping table. This is the
@@ -517,7 +502,6 @@
   }
 }
 
-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
 int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
@@ -560,7 +544,6 @@
   return dfsnum;
 }
 
-//------------------------------COMPRESS---------------------------------------
 void NTarjan::COMPRESS()
 {
   assert( _ancestor != 0, "" );
@@ -572,14 +555,12 @@
   }
 }
 
-//------------------------------EVAL-------------------------------------------
 NTarjan *NTarjan::EVAL() {
   if( !_ancestor ) return _label;
   COMPRESS();
   return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }
 
-//------------------------------LINK-------------------------------------------
 void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
   NTarjan *s = w;
   while( w->_label->_semi < s->_child->_label->_semi ) {
@@ -602,7 +583,6 @@
   }
 }
 
-//------------------------------setdepth---------------------------------------
 void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
   NTarjan **top  = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
   NTarjan **next = top;
@@ -631,7 +611,6 @@
   } while (last < top);
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void NTarjan::dump(int offset) const {
   // Dump the data from this node
--- a/src/share/vm/opto/escape.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/escape.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -65,15 +65,19 @@
   // EA brings benefits only when the code has allocations and/or locks which
   // are represented by ideal Macro nodes.
   int cnt = C->macro_count();
-  for( int i=0; i < cnt; i++ ) {
+  for (int i = 0; i < cnt; i++) {
     Node *n = C->macro_node(i);
-    if ( n->is_Allocate() )
+    if (n->is_Allocate())
       return true;
-    if( n->is_Lock() ) {
+    if (n->is_Lock()) {
       Node* obj = n->as_Lock()->obj_node()->uncast();
-      if( !(obj->is_Parm() || obj->is_Con()) )
+      if (!(obj->is_Parm() || obj->is_Con()))
         return true;
     }
+    if (n->is_CallStaticJava() &&
+        n->as_CallStaticJava()->is_boxing_method()) {
+      return true;
+    }
   }
   return false;
 }
@@ -117,7 +121,7 @@
   { Compile::TracePhase t3("connectionGraph", &Phase::_t_connectionGraph, true);
 
   // 1. Populate Connection Graph (CG) with PointsTo nodes.
-  ideal_nodes.map(C->unique(), NULL);  // preallocate space
+  ideal_nodes.map(C->live_nodes(), NULL);  // preallocate space
   // Initialize worklist
   if (C->root() != NULL) {
     ideal_nodes.push(C->root());
@@ -160,8 +164,11 @@
       // escape status of the associated Allocate node some of them
       // may be eliminated.
       storestore_worklist.append(n);
+    } else if (n->is_MemBar() && (n->Opcode() == Op_MemBarRelease) &&
+               (n->req() > MemBarNode::Precedent)) {
+      record_for_optimizer(n);
 #ifdef ASSERT
-    } else if(n->is_AddP()) {
+    } else if (n->is_AddP()) {
       // Collect address nodes for graph verification.
       addp_worklist.append(n);
 #endif
@@ -214,8 +221,15 @@
   int non_escaped_length = non_escaped_worklist.length();
   for (int next = 0; next < non_escaped_length; next++) {
     JavaObjectNode* ptn = non_escaped_worklist.at(next);
-    if (ptn->escape_state() == PointsToNode::NoEscape &&
-        ptn->scalar_replaceable()) {
+    bool noescape = (ptn->escape_state() == PointsToNode::NoEscape);
+    Node* n = ptn->ideal_node();
+    if (n->is_Allocate()) {
+      n->as_Allocate()->_is_non_escaping = noescape;
+    }
+    if (n->is_CallStaticJava()) {
+      n->as_CallStaticJava()->_is_non_escaping = noescape;
+    }
+    if (noescape && ptn->scalar_replaceable()) {
       adjust_scalar_replaceable_state(ptn);
       if (ptn->scalar_replaceable()) {
         alloc_worklist.append(ptn->ideal_node());
@@ -338,8 +352,10 @@
       // Don't mark as processed since call's arguments have to be processed.
       delayed_worklist->push(n);
       // Check if a call returns an object.
-      if (n->as_Call()->returns_pointer() &&
-          n->as_Call()->proj_out(TypeFunc::Parms) != NULL) {
+      if ((n->as_Call()->returns_pointer() &&
+           n->as_Call()->proj_out(TypeFunc::Parms) != NULL) ||
+          (n->is_CallStaticJava() &&
+           n->as_CallStaticJava()->is_boxing_method())) {
         add_call_node(n->as_Call());
       }
     }
@@ -392,8 +408,8 @@
     case Op_ConN: {
       // assume all oop constants globally escape except for null
       PointsToNode::EscapeState es;
-      if (igvn->type(n) == TypePtr::NULL_PTR ||
-          igvn->type(n) == TypeNarrowOop::NULL_PTR) {
+      const Type* t = igvn->type(n);
+      if (t == TypePtr::NULL_PTR || t == TypeNarrowOop::NULL_PTR) {
         es = PointsToNode::NoEscape;
       } else {
         es = PointsToNode::GlobalEscape;
@@ -803,6 +819,18 @@
       // Returns a newly allocated unescaped object.
       add_java_object(call, PointsToNode::NoEscape);
       ptnode_adr(call_idx)->set_scalar_replaceable(false);
+    } else if (meth->is_boxing_method()) {
+      // Returns boxing object
+      PointsToNode::EscapeState es;
+      vmIntrinsics::ID intr = meth->intrinsic_id();
+      if (intr == vmIntrinsics::_floatValue || intr == vmIntrinsics::_doubleValue) {
+        // It does not escape if object is always allocated.
+        es = PointsToNode::NoEscape;
+      } else {
+        // It escapes globally if object could be loaded from cache.
+        es = PointsToNode::GlobalEscape;
+      }
+      add_java_object(call, es);
     } else {
       BCEscapeAnalyzer* call_analyzer = meth->get_bcea();
       call_analyzer->copy_dependencies(_compile->dependencies());
@@ -949,6 +977,9 @@
       assert((name == NULL || strcmp(name, "uncommon_trap") != 0), "normal calls only");
 #endif
       ciMethod* meth = call->as_CallJava()->method();
+      if ((meth != NULL) && meth->is_boxing_method()) {
+        break; // Boxing methods do not modify any oops.
+      }
       BCEscapeAnalyzer* call_analyzer = (meth !=NULL) ? meth->get_bcea() : NULL;
       // fall-through if not a Java method or no analyzer information
       if (call_analyzer != NULL) {
@@ -2764,6 +2795,11 @@
           // so it could be eliminated if it has no uses.
           alloc->as_Allocate()->_is_scalar_replaceable = true;
         }
+        if (alloc->is_CallStaticJava()) {
+          // Set the scalar_replaceable flag for boxing method
+          // so it could be eliminated if it has no uses.
+          alloc->as_CallStaticJava()->_is_scalar_replaceable = true;
+        }
         continue;
       }
       if (!n->is_CheckCastPP()) { // not unique CheckCastPP.
@@ -2802,6 +2838,11 @@
         // so it could be eliminated.
         alloc->as_Allocate()->_is_scalar_replaceable = true;
       }
+      if (alloc->is_CallStaticJava()) {
+        // Set the scalar_replaceable flag for boxing method
+        // so it could be eliminated.
+        alloc->as_CallStaticJava()->_is_scalar_replaceable = true;
+      }
       set_escape_state(ptnode_adr(n->_idx), es); // CheckCastPP escape state
       // in order for an object to be scalar-replaceable, it must be:
       //   - a direct allocation (not a call returning an object)
@@ -2931,7 +2972,9 @@
         // Load/store to instance's field
         memnode_worklist.append_if_missing(use);
       } else if (use->is_MemBar()) {
-        memnode_worklist.append_if_missing(use);
+        if (use->in(TypeFunc::Memory) == n) { // Ignore precedent edge
+          memnode_worklist.append_if_missing(use);
+        }
       } else if (use->is_AddP() && use->outcnt() > 0) { // No dead nodes
         Node* addp2 = find_second_addp(use, n);
         if (addp2 != NULL) {
@@ -3033,7 +3076,9 @@
           continue;
         memnode_worklist.append_if_missing(use);
       } else if (use->is_MemBar()) {
-        memnode_worklist.append_if_missing(use);
+        if (use->in(TypeFunc::Memory) == n) { // Ignore precedent edge
+          memnode_worklist.append_if_missing(use);
+        }
 #ifdef ASSERT
       } else if(use->is_Mem()) {
         assert(use->in(MemNode::Memory) != n, "EA: missing memory path");
@@ -3264,7 +3309,12 @@
     if (ptn == NULL || !ptn->is_JavaObject())
       continue;
     PointsToNode::EscapeState es = ptn->escape_state();
-    if (ptn->ideal_node()->is_Allocate() && (es == PointsToNode::NoEscape || Verbose)) {
+    if ((es != PointsToNode::NoEscape) && !Verbose) {
+      continue;
+    }
+    Node* n = ptn->ideal_node();
+    if (n->is_Allocate() || (n->is_CallStaticJava() &&
+                             n->as_CallStaticJava()->is_boxing_method())) {
       if (first) {
         tty->cr();
         tty->print("======== Connection graph for ");
--- a/src/share/vm/opto/gcm.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/gcm.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -41,6 +41,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -70,7 +73,7 @@
 // are in b also.
 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
   // Set basic block of n, Add n to b,
-  _bbs.map(n->_idx, b);
+  map_node_to_block(n, b);
   b->add_inst(n);
 
   // After Matching, nearly any old Node may have projections trailing it.
@@ -79,11 +82,12 @@
   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
     Node*  use  = n->fast_out(i);
     if (use->is_Proj()) {
-      Block* buse = _bbs[use->_idx];
+      Block* buse = get_block_for_node(use);
       if (buse != b) {              // In wrong block?
-        if (buse != NULL)
+        if (buse != NULL) {
           buse->find_remove(use);   // Remove from wrong block
-        _bbs.map(use->_idx, b);     // Re-insert in this block
+        }
+        map_node_to_block(use, b);
         b->add_inst(use);
       }
     }
@@ -101,16 +105,16 @@
   if (p != NULL && p != n) {    // Control from a block projection?
     assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
     // Find trailing Region
-    Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
+    Block *pb = get_block_for_node(in0); // Block-projection already has basic block
     uint j = 0;
     if (pb->_num_succs != 1) {  // More then 1 successor?
       // Search for successor
-      uint max = pb->_nodes.size();
+      uint max = pb->number_of_nodes();
       assert( max > 1, "" );
       uint start = max - pb->_num_succs;
       // Find which output path belongs to projection
       for (j = start; j < max; j++) {
-        if( pb->_nodes[j] == in0 )
+        if( pb->get_node(j) == in0 )
           break;
       }
       assert( j < max, "must find" );
@@ -124,26 +128,30 @@
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
-void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
   // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique()+8);
+  GrowableArray <Node *> spstack(C->unique() + 8);
   spstack.push(_root);
-  while ( spstack.is_nonempty() ) {
-    Node *n = spstack.pop();
-    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
-      if( n->pinned() && !_bbs.lookup(n->_idx) ) {  // Pinned?  Nail it down!
-        assert( n->in(0), "pinned Node must have Control" );
+  while (spstack.is_nonempty()) {
+    Node* node = spstack.pop();
+    if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
+      if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
+        assert(node->in(0), "pinned Node must have Control");
         // Before setting block replace block_proj control edge
-        replace_block_proj_ctrl(n);
-        Node *input = n->in(0);
-        while( !input->is_block_start() )
+        replace_block_proj_ctrl(node);
+        Node* input = node->in(0);
+        while (!input->is_block_start()) {
           input = input->in(0);
-        Block *b = _bbs[input->_idx];  // Basic block of controlling input
-        schedule_node_into_block(n, b);
+        }
+        Block* block = get_block_for_node(input); // Basic block of controlling input
+        schedule_node_into_block(node, block);
       }
-      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
-        if( n->in(i) != NULL )
-          spstack.push(n->in(i));
+
+      // process all inputs that are non NULL
+      for (int i = node->req() - 1; i >= 0; --i) {
+        if (node->in(i) != NULL) {
+          spstack.push(node->in(i));
+        }
       }
     }
   }
@@ -153,7 +161,7 @@
 // Assert that new input b2 is dominated by all previous inputs.
 // Check this by by seeing that it is dominated by b1, the deepest
 // input observed until b2.
-static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) {
+static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
   if (b1 == NULL)  return;
   assert(b1->_dom_depth < b2->_dom_depth, "sanity");
   Block* tmp = b2;
@@ -166,7 +174,7 @@
     for (uint j=0; j<n->len(); j++) { // For all inputs
       Node* inn = n->in(j); // Get input
       if (inn == NULL)  continue;  // Ignore NULL, missing inputs
-      Block* inb = bbs[inn->_idx];
+      Block* inb = cfg->get_block_for_node(inn);
       tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
                  inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
       inn->dump();
@@ -178,20 +186,20 @@
 }
 #endif
 
-static Block* find_deepest_input(Node* n, Block_Array &bbs) {
+static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
   // Find the last input dominated by all other inputs.
   Block* deepb           = NULL;        // Deepest block so far
   int    deepb_dom_depth = 0;
   for (uint k = 0; k < n->len(); k++) { // For all inputs
     Node* inn = n->in(k);               // Get input
     if (inn == NULL)  continue;         // Ignore NULL, missing inputs
-    Block* inb = bbs[inn->_idx];
+    Block* inb = cfg->get_block_for_node(inn);
     assert(inb != NULL, "must already have scheduled this input");
     if (deepb_dom_depth < (int) inb->_dom_depth) {
       // The new inb must be dominated by the previous deepb.
       // The various inputs must be linearly ordered in the dom
       // tree, or else there will not be a unique deepest block.
-      DEBUG_ONLY(assert_dom(deepb, inb, n, bbs));
+      DEBUG_ONLY(assert_dom(deepb, inb, n, cfg));
       deepb = inb;                      // Save deepest block
       deepb_dom_depth = deepb->_dom_depth;
     }
@@ -207,32 +215,29 @@
 // which all their inputs occur.
 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
   // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
-  // roots.push(_root); _root will be processed among C->top() inputs
+  Node_Stack nstack(roots.Size() + 8);
+  // _root will be processed among C->top() inputs
   roots.push(C->top());
   visited.set(C->top()->_idx);
 
   while (roots.size() != 0) {
     // Use local variables nstack_top_n & nstack_top_i to cache values
     // on stack's top.
-    Node *nstack_top_n = roots.pop();
-    uint  nstack_top_i = 0;
-//while_nstack_nonempty:
+    Node* parent_node = roots.pop();
+    uint  input_index = 0;
+
     while (true) {
-      // Get parent node and next input's index from stack's top.
-      Node *n = nstack_top_n;
-      uint  i = nstack_top_i;
-
-      if (i == 0) {
+      if (input_index == 0) {
         // Fixup some control.  Constants without control get attached
         // to root and nodes that use is_block_proj() nodes should be attached
         // to the region that starts their block.
-        const Node *in0 = n->in(0);
-        if (in0 != NULL) {              // Control-dependent?
-          replace_block_proj_ctrl(n);
-        } else {               // n->in(0) == NULL
-          if (n->req() == 1) { // This guy is a constant with NO inputs?
-            n->set_req(0, _root);
+        const Node* control_input = parent_node->in(0);
+        if (control_input != NULL) {
+          replace_block_proj_ctrl(parent_node);
+        } else {
+          // Is a constant with NO inputs?
+          if (parent_node->req() == 1) {
+            parent_node->set_req(0, _root);
           }
         }
       }
@@ -241,37 +246,47 @@
       // input is already in a block we quit following inputs (to avoid
       // cycles). Instead we put that Node on a worklist to be handled
       // later (since IT'S inputs may not have a block yet).
-      bool done = true;              // Assume all n's inputs will be processed
-      while (i < n->len()) {         // For all inputs
-        Node *in = n->in(i);         // Get input
-        ++i;
-        if (in == NULL) continue;    // Ignore NULL, missing inputs
+
+      // Assume all n's inputs will be processed
+      bool done = true;
+
+      while (input_index < parent_node->len()) {
+        Node* in = parent_node->in(input_index++);
+        if (in == NULL) {
+          continue;
+        }
+
         int is_visited = visited.test_set(in->_idx);
-        if (!_bbs.lookup(in->_idx)) { // Missing block selection?
+        if (!has_block(in)) {
           if (is_visited) {
-            // assert( !visited.test(in->_idx), "did not schedule early" );
             return false;
           }
-          nstack.push(n, i);         // Save parent node and next input's index.
-          nstack_top_n = in;         // Process current input now.
-          nstack_top_i = 0;
-          done = false;              // Not all n's inputs processed.
-          break; // continue while_nstack_nonempty;
-        } else if (!is_visited) {    // Input not yet visited?
-          roots.push(in);            // Visit this guy later, using worklist
+          // Save parent node and next input's index.
+          nstack.push(parent_node, input_index);
+          // Process current input now.
+          parent_node = in;
+          input_index = 0;
+          // Not all n's inputs processed.
+          done = false;
+          break;
+        } else if (!is_visited) {
+          // Visit this guy later, using worklist
+          roots.push(in);
         }
       }
+
       if (done) {
         // All of n's inputs have been processed, complete post-processing.
 
         // Some instructions are pinned into a block.  These include Region,
         // Phi, Start, Return, and other control-dependent instructions and
         // any projections which depend on them.
-        if (!n->pinned()) {
+        if (!parent_node->pinned()) {
           // Set earliest legal block.
-          _bbs.map(n->_idx, find_deepest_input(n, _bbs));
+          Block* earliest_block = find_deepest_input(parent_node, this);
+          map_node_to_block(parent_node, earliest_block);
         } else {
-          assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge");
+          assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
         }
 
         if (nstack.is_empty()) {
@@ -280,12 +295,12 @@
           break;
         }
         // Get saved parent node and next input's index.
-        nstack_top_n = nstack.node();
-        nstack_top_i = nstack.index();
+        parent_node = nstack.node();
+        input_index = nstack.index();
         nstack.pop();
-      } //    if (done)
-    }   // while (true)
-  }     // while (roots.size() != 0)
+      }
+    }
+  }
   return true;
 }
 
@@ -317,8 +332,8 @@
 // The definition must dominate the use, so move the LCA upward in the
 // dominator tree to dominate the use.  If the use is a phi, adjust
 // the LCA only with the phi input paths which actually use this def.
-static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) {
-  Block* buse = bbs[use->_idx];
+static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) {
+  Block* buse = cfg->get_block_for_node(use);
   if (buse == NULL)    return LCA;   // Unused killing Projs have no use block
   if (!use->is_Phi())  return buse->dom_lca(LCA);
   uint pmax = use->req();       // Number of Phi inputs
@@ -333,7 +348,7 @@
   // more than once.
   for (uint j=1; j<pmax; j++) { // For all inputs
     if (use->in(j) == def) {    // Found matching input?
-      Block* pred = bbs[buse->pred(j)->_idx];
+      Block* pred = cfg->get_block_for_node(buse->pred(j));
       LCA = pred->dom_lca(LCA);
     }
   }
@@ -346,8 +361,7 @@
 // which are marked with the given index.  Return the LCA (in the dom tree)
 // of all marked blocks.  If there are none marked, return the original
 // LCA.
-static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
-                                    Block* early, Block_Array &bbs) {
+static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) {
   Block_List worklist;
   worklist.push(LCA);
   while (worklist.size() > 0) {
@@ -370,7 +384,7 @@
     } else {
       // Keep searching through this block's predecessors.
       for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
-        Block* mid_parent = bbs[ mid->pred(j)->_idx ];
+        Block* mid_parent = cfg->get_block_for_node(mid->pred(j));
         worklist.push(mid_parent);
       }
     }
@@ -388,7 +402,7 @@
 // be earlier (at a shallower dom_depth) than the true schedule_early
 // point of the node. We compute this earlier block as a more permissive
 // site for anti-dependency insertion, but only if subsume_loads is enabled.
-static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) {
+static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) {
   Node* base;
   Node* index;
   Node* store = load->in(MemNode::Memory);
@@ -416,12 +430,12 @@
     Block* deepb           = NULL;        // Deepest block so far
     int    deepb_dom_depth = 0;
     for (int i = 0; i < mem_inputs_length; i++) {
-      Block* inb = bbs[mem_inputs[i]->_idx];
+      Block* inb = cfg->get_block_for_node(mem_inputs[i]);
       if (deepb_dom_depth < (int) inb->_dom_depth) {
         // The new inb must be dominated by the previous deepb.
         // The various inputs must be linearly ordered in the dom
         // tree, or else there will not be a unique deepest block.
-        DEBUG_ONLY(assert_dom(deepb, inb, load, bbs));
+        DEBUG_ONLY(assert_dom(deepb, inb, load, cfg));
         deepb = inb;                      // Save deepest block
         deepb_dom_depth = deepb->_dom_depth;
       }
@@ -492,14 +506,14 @@
   // and other inputs are first available.  (Computed by schedule_early.)
   // For normal loads, 'early' is the shallowest place (dom graph wise)
   // to look for anti-deps between this load and any store.
-  Block* early = _bbs[load_index];
+  Block* early = get_block_for_node(load);
 
   // If we are subsuming loads, compute an "early" block that only considers
   // memory or address inputs. This block may be different than the
   // schedule_early block in that it could be at an even shallower depth in the
   // dominator tree, and allow for a broader discovery of anti-dependences.
   if (C->subsume_loads()) {
-    early = memory_early_block(load, early, _bbs);
+    early = memory_early_block(load, early, this);
   }
 
   ResourceArea *area = Thread::current()->resource_area();
@@ -623,7 +637,7 @@
     // or else observe that 'store' is all the way up in the
     // earliest legal block for 'load'.  In the latter case,
     // immediately insert an anti-dependence edge.
-    Block* store_block = _bbs[store->_idx];
+    Block* store_block = get_block_for_node(store);
     assert(store_block != NULL, "unused killing projections skipped above");
 
     if (store->is_Phi()) {
@@ -641,7 +655,7 @@
       for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
         if (store->in(j) == mem) {   // Found matching input?
           DEBUG_ONLY(found_match = true);
-          Block* pred_block = _bbs[store_block->pred(j)->_idx];
+          Block* pred_block = get_block_for_node(store_block->pred(j));
           if (pred_block != early) {
             // If any predecessor of the Phi matches the load's "early block",
             // we do not need a precedence edge between the Phi and 'load'
@@ -715,7 +729,7 @@
   // preventing the load from sinking past any block containing
   // a store that may invalidate the memory state required by 'load'.
   if (must_raise_LCA)
-    LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs);
+    LCA = raise_LCA_above_marks(LCA, load->_idx, early, this);
   if (LCA == early)  return LCA;
 
   // Insert anti-dependence edges from 'load' to each store
@@ -724,7 +738,7 @@
   if (LCA->raise_LCA_mark() == load_index) {
     while (non_early_stores.size() > 0) {
       Node* store = non_early_stores.pop();
-      Block* store_block = _bbs[store->_idx];
+      Block* store_block = get_block_for_node(store);
       if (store_block == LCA) {
         // add anti_dependence from store to load in its own block
         assert(store != load->in(0), "dependence cycle found");
@@ -758,7 +772,7 @@
 
 public:
   // Constructor for the iterator
-  Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs);
+  Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg);
 
   // Postincrement operator to iterate over the nodes
   Node *next();
@@ -766,12 +780,12 @@
 private:
   VectorSet   &_visited;
   Node_List   &_stack;
-  Block_Array &_bbs;
+  PhaseCFG &_cfg;
 };
 
 // Constructor for the Node_Backward_Iterator
-Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs )
-  : _visited(visited), _stack(stack), _bbs(bbs) {
+Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg)
+  : _visited(visited), _stack(stack), _cfg(cfg) {
   // The stack should contain exactly the root
   stack.clear();
   stack.push(root);
@@ -801,8 +815,8 @@
     _visited.set(self->_idx);
 
     // Now schedule all uses as late as possible.
-    uint src     = self->is_Proj() ? self->in(0)->_idx : self->_idx;
-    uint src_rpo = _bbs[src]->_rpo;
+    const Node* src = self->is_Proj() ? self->in(0) : self;
+    uint src_rpo = _cfg.get_block_for_node(src)->_rpo;
 
     // Schedule all nodes in a post-order visit
     Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any
@@ -818,7 +832,7 @@
 
       // do not traverse backward control edges
       Node *use = n->is_Proj() ? n->in(0) : n;
-      uint use_rpo = _bbs[use->_idx]->_rpo;
+      uint use_rpo = _cfg.get_block_for_node(use)->_rpo;
 
       if ( use_rpo < src_rpo )
         continue;
@@ -850,13 +864,13 @@
 
 //------------------------------ComputeLatenciesBackwards----------------------
 // Compute the latency of all the instructions.
-void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
 #ifndef PRODUCT
   if (trace_opto_pipelining())
     tty->print("\n#---- ComputeLatenciesBackwards ----\n");
 #endif
 
-  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
   Node *n;
 
   // Walk over all the nodes from last to first
@@ -873,31 +887,34 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
 
-  if (n->is_Proj())
+  if (n->is_Proj()) {
     n = n->in(0);
+  }
 
-  if (n->is_Root())
+  if (n->is_Root()) {
     return;
+  }
 
   uint nlen = n->len();
-  uint use_latency = _node_latency->at_grow(n->_idx);
-  uint use_pre_order = _bbs[n->_idx]->_pre_order;
+  uint use_latency = get_latency_for_node(n);
+  uint use_pre_order = get_block_for_node(n)->_pre_order;
 
-  for ( uint j=0; j<nlen; j++ ) {
+  for (uint j = 0; j < nlen; j++) {
     Node *def = n->in(j);
 
-    if (!def || def == n)
+    if (!def || def == n) {
       continue;
+    }
 
     // Walk backwards thru projections
-    if (def->is_Proj())
+    if (def->is_Proj()) {
       def = def->in(0);
+    }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -907,25 +924,23 @@
 #endif
 
     // If the defining block is not known, assume it is ok
-    Block *def_block = _bbs[def->_idx];
+    Block *def_block = get_block_for_node(def);
     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 
-    if ( (use_pre_order <  def_pre_order) ||
-         (use_pre_order == def_pre_order && n->is_Phi()) )
+    if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
       continue;
+    }
 
     uint delta_latency = n->latency(j);
     uint current_latency = delta_latency + use_latency;
 
-    if (_node_latency->at_grow(def->_idx) < current_latency) {
-      _node_latency->at_put_grow(def->_idx, current_latency);
+    if (get_latency_for_node(def) < current_latency) {
+      set_latency_for_node(def, current_latency);
     }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
-      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
-                    use_latency, j, delta_latency, current_latency, def->_idx,
-                    _node_latency->at_grow(def->_idx));
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
     }
 #endif
   }
@@ -935,10 +950,11 @@
 // Compute the latency of a specific use
 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
   // If self-reference, return no latency
-  if (use == n || use->is_Root())
+  if (use == n || use->is_Root()) {
     return 0;
+  }
 
-  uint def_pre_order = _bbs[def->_idx]->_pre_order;
+  uint def_pre_order = get_block_for_node(def)->_pre_order;
   uint latency = 0;
 
   // If the use is not a projection, then it is simple...
@@ -950,7 +966,7 @@
     }
 #endif
 
-    uint use_pre_order = _bbs[use->_idx]->_pre_order;
+    uint use_pre_order = get_block_for_node(use)->_pre_order;
 
     if (use_pre_order < def_pre_order)
       return 0;
@@ -959,7 +975,7 @@
       return 0;
 
     uint nlen = use->len();
-    uint nl = _node_latency->at_grow(use->_idx);
+    uint nl = get_latency_for_node(use);
 
     for ( uint j=0; j<nlen; j++ ) {
       if (use->in(j) == n) {
@@ -994,8 +1010,7 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
@@ -1008,7 +1023,7 @@
     if (latency < l) latency = l;
   }
 
-  _node_latency->at_put_grow(n->_idx, latency);
+  set_latency_for_node(n, latency);
 }
 
 //------------------------------hoist_to_cheaper_block-------------------------
@@ -1018,11 +1033,11 @@
   const double delta = 1+PROB_UNLIKELY_MAG(4);
   Block* least       = LCA;
   double least_freq  = least->_freq;
-  uint target        = _node_latency->at_grow(self->_idx);
-  uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
-  uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  uint target        = get_latency_for_node(self);
+  uint start_latency = get_latency_for_node(LCA->head());
+  uint end_latency   = get_latency_for_node(LCA->get_node(LCA->end_idx()));
   bool in_latency    = (target <= start_latency);
-  const Block* root_block = _bbs[_root->_idx];
+  const Block* root_block = get_block_for_node(_root);
 
   // Turn off latency scheduling if scheduling is just plain off
   if (!C->do_scheduling())
@@ -1037,14 +1052,13 @@
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# Find cheaper block for latency %d: ",
-      _node_latency->at_grow(self->_idx));
+    tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
     self->dump();
     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
       LCA->_pre_order,
-      LCA->_nodes[0]->_idx,
+      LCA->head()->_idx,
       start_latency,
-      LCA->_nodes[LCA->end_idx()]->_idx,
+      LCA->get_node(LCA->end_idx())->_idx,
       end_latency,
       least_freq);
   }
@@ -1065,14 +1079,14 @@
     if (mach && LCA == root_block)
       break;
 
-    uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
+    uint start_lat = get_latency_for_node(LCA->head());
     uint end_idx   = LCA->end_idx();
-    uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
+    uint end_lat   = get_latency_for_node(LCA->get_node(end_idx));
     double LCA_freq = LCA->_freq;
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
       tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
-        LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
+        LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq);
     }
 #endif
     if (LCA_freq < least_freq              || // Better Frequency
@@ -1106,7 +1120,7 @@
       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
     }
 #endif
-    _node_latency->at_put_grow(self->_idx, end_latency);
+    set_latency_for_node(self, end_latency);
     partial_latency_of_defs(self);
   }
 
@@ -1125,12 +1139,12 @@
     tty->print("\n#---- schedule_late ----\n");
 #endif
 
-  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
   Node *self;
 
   // Walk over all the nodes from last to first
   while (self = iter.next()) {
-    Block* early = _bbs[self->_idx];   // Earliest legal placement
+    Block* early = get_block_for_node(self); // Earliest legal placement
 
     if (self->is_top()) {
       // Top node goes in bb #2 with other constants.
@@ -1178,7 +1192,7 @@
       for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
         // For all uses, find LCA
         Node* use = self->fast_out(i);
-        LCA = raise_LCA_above_use(LCA, use, self, _bbs);
+        LCA = raise_LCA_above_use(LCA, use, self, this);
       }
     }  // (Hide defs of imax, i from rest of block.)
 
@@ -1186,7 +1200,7 @@
     // requirement for correctness but it reduces useless
     // interference between temps and other nodes.
     if (mach != NULL && mach->is_MachTemp()) {
-      _bbs.map(self->_idx, LCA);
+      map_node_to_block(self, LCA);
       LCA->add_inst(self);
       continue;
     }
@@ -1251,7 +1265,7 @@
 } // end ScheduleLate
 
 //------------------------------GlobalCodeMotion-------------------------------
-void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+void PhaseCFG::global_code_motion() {
   ResourceMark rm;
 
 #ifndef PRODUCT
@@ -1260,22 +1274,23 @@
   }
 #endif
 
-  // Initialize the bbs.map for things on the proj_list
-  uint i;
-  for( i=0; i < proj_list.size(); i++ )
-    _bbs.map(proj_list[i]->_idx, NULL);
+  // Initialize the node to block mapping for things on the proj_list
+  for (uint i = 0; i < _matcher.number_of_projections(); i++) {
+    unmap_node_from_block(_matcher.get_projection(i));
+  }
 
   // Set the basic block for Nodes pinned into blocks
-  Arena *a = Thread::current()->resource_area();
-  VectorSet visited(a);
-  schedule_pinned_nodes( visited );
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  schedule_pinned_nodes(visited);
 
   // Find the earliest Block any instruction can be placed in.  Some
   // instructions are pinned into Blocks.  Unpinned instructions can
   // appear in last block in which all their inputs occur.
   visited.Clear();
-  Node_List stack(a);
-  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  Node_List stack(arena);
+  // Pre-grow the list
+  stack.map((C->unique() >> 1) + 16, NULL);
   if (!schedule_early(visited, stack)) {
     // Bailout without retry
     C->record_method_not_compilable("early schedule failed");
@@ -1283,29 +1298,25 @@
   }
 
   // Build Def-Use edges.
-  proj_list.push(_root);        // Add real root as another root
-  proj_list.pop();
-
   // Compute the latency information (via backwards walk) for all the
   // instructions in the graph
   _node_latency = new GrowableArray<uint>(); // resource_area allocation
 
-  if( C->do_scheduling() )
-    ComputeLatenciesBackwards(visited, stack);
+  if (C->do_scheduling()) {
+    compute_latencies_backwards(visited, stack);
+  }
 
   // Now schedule all codes as LATE as possible.  This is the LCA in the
   // dominator tree of all USES of a value.  Pick the block with the least
   // loop nesting depth that is lowest in the dominator tree.
   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
   schedule_late(visited, stack);
-  if( C->failing() ) {
+  if (C->failing()) {
     // schedule_late fails only when graph is incorrect.
     assert(!VerifyGraphEdges, "verification should have failed");
     return;
   }
 
-  unique = C->unique();
-
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- Detect implicit null checks ----\n");
@@ -1328,10 +1339,11 @@
     // By reversing the loop direction we get a very minor gain on mpegaudio.
     // Feel free to revert to a forward loop for clarity.
     // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
-    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
-      Node *proj = matcher._null_check_tests[i  ];
-      Node *val  = matcher._null_check_tests[i+1];
-      _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons);
+    for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
+      Node* proj = _matcher._null_check_tests[i];
+      Node* val  = _matcher._null_check_tests[i + 1];
+      Block* block = get_block_for_node(proj);
+      implicit_null_check(block, proj, val, allowed_reasons);
       // The implicit_null_check will only perform the transformation
       // if the null branch is truly uncommon, *and* it leads to an
       // uncommon trap.  Combined with the too_many_traps guards
@@ -1348,11 +1360,11 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  uint max_idx = C->unique();
-  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
+  GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
   visited.Clear();
-  for (i = 0; i < _num_blocks; i++) {
-    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (!schedule_local(block, ready_cnt, visited)) {
       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
         C->record_method_not_compilable("local schedule failed");
       }
@@ -1362,14 +1374,17 @@
 
   // If we inserted any instructions between a Call and his CatchNode,
   // clone the instructions on all paths below the Catch.
-  for( i=0; i < _num_blocks; i++ )
-    _blocks[i]->call_catch_cleanup(_bbs, C);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    call_catch_cleanup(block);
+  }
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- After GlobalCodeMotion ----\n");
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump();
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      Block* block = get_block(i);
+      block->dump();
     }
   }
 #endif
@@ -1377,10 +1392,29 @@
   _node_latency = (GrowableArray<uint> *)0xdeadbeef;
 }
 
+bool PhaseCFG::do_global_code_motion() {
+
+  build_dominator_tree();
+  if (C->failing()) {
+    return false;
+  }
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  estimate_block_frequency();
+
+  global_code_motion();
+
+  if (C->failing()) {
+    return false;
+  }
+
+  return true;
+}
 
 //------------------------------Estimate_Block_Frequency-----------------------
 // Estimate block frequencies based on IfNode probabilities.
-void PhaseCFG::Estimate_Block_Frequency() {
+void PhaseCFG::estimate_block_frequency() {
 
   // Force conditional branches leading to uncommon traps to be unlikely,
   // not because we get to the uncommon_trap with less relative frequency,
@@ -1388,18 +1422,20 @@
   // there once.
   if (C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
-      Block *pb = _bbs[root_blk->pred(i)->_idx];
+      Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
         worklist.push(pb);
       }
     }
     while (worklist.size() > 0) {
       Block* uct = worklist.pop();
-      if (uct == _broot) continue;
+      if (uct == get_root_block()) {
+        continue;
+      }
       for (uint i = 1; i < uct->num_preds(); i++) {
-        Block *pb = _bbs[uct->pred(i)->_idx];
+        Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1) {
           worklist.push(pb);
         } else if (pb->num_fall_throughs() == 2) {
@@ -1421,14 +1457,14 @@
   _root_loop->scale_freq();
 
   // Save outmost loop frequency for LRG frequency threshold
-  _outer_loop_freq = _root_loop->outer_loop_freq();
+  _outer_loop_frequency = _root_loop->outer_loop_freq();
 
   // force paths ending at uncommon traps to be infrequent
   if (!C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
-      Block *pb = _bbs[root_blk->pred(i)->_idx];
+      Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
         worklist.push(pb);
       }
@@ -1437,7 +1473,7 @@
       Block* uct = worklist.pop();
       uct->_freq = PROB_MIN;
       for (uint i = 1; i < uct->num_preds(); i++) {
-        Block *pb = _bbs[uct->pred(i)->_idx];
+        Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
           worklist.push(pb);
         }
@@ -1446,8 +1482,8 @@
   }
 
 #ifdef ASSERT
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* b = get_block(i);
     assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
   }
 #endif
@@ -1471,16 +1507,16 @@
 CFGLoop* PhaseCFG::create_loop_tree() {
 
 #ifdef ASSERT
-  assert( _blocks[0] == _broot, "" );
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  assert(get_block(0) == get_root_block(), "first block should be root block");
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
     // Check that _loop field are clear...we could clear them if not.
-    assert(b->_loop == NULL, "clear _loop expected");
+    assert(block->_loop == NULL, "clear _loop expected");
     // Sanity check that the RPO numbering is reflected in the _blocks array.
     // It doesn't have to be for the loop tree to be built, but if it is not,
     // then the blocks have been reordered since dom graph building...which
     // may question the RPO numbering
-    assert(b->_rpo == i, "unexpected reverse post order number");
+    assert(block->_rpo == i, "unexpected reverse post order number");
   }
 #endif
 
@@ -1490,14 +1526,14 @@
   Block_List worklist;
 
   // Assign blocks to loops
-  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
-    Block *b = _blocks[i];
+  for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
+    Block* block = get_block(i);
 
-    if (b->head()->is_Loop()) {
-      Block* loop_head = b;
+    if (block->head()->is_Loop()) {
+      Block* loop_head = block;
       assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
-      Block* tail = _bbs[tail_n->_idx];
+      Block* tail = get_block_for_node(tail_n);
 
       // Defensively filter out Loop nodes for non-single-entry loops.
       // For all reasonable loops, the head occurs before the tail in RPO.
@@ -1512,13 +1548,13 @@
         loop_head->_loop = nloop;
         // Add to nloop so push_pred() will skip over inner loops
         nloop->add_member(loop_head);
-        nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs);
+        nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this);
 
         while (worklist.size() > 0) {
           Block* member = worklist.pop();
           if (member != loop_head) {
             for (uint j = 1; j < member->num_preds(); j++) {
-              nloop->push_pred(member, j, worklist, _bbs);
+              nloop->push_pred(member, j, worklist, this);
             }
           }
         }
@@ -1528,23 +1564,23 @@
 
   // Create a member list for each loop consisting
   // of both blocks and (immediate child) loops.
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    CFGLoop* lp = b->_loop;
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    CFGLoop* lp = block->_loop;
     if (lp == NULL) {
       // Not assigned to a loop. Add it to the method's pseudo loop.
-      b->_loop = root_loop;
+      block->_loop = root_loop;
       lp = root_loop;
     }
-    if (lp == root_loop || b != lp->head()) { // loop heads are already members
-      lp->add_member(b);
+    if (lp == root_loop || block != lp->head()) { // loop heads are already members
+      lp->add_member(block);
     }
     if (lp != root_loop) {
       if (lp->parent() == NULL) {
         // Not a nested loop. Make it a child of the method's pseudo loop.
         root_loop->add_nested_loop(lp);
       }
-      if (b == lp->head()) {
+      if (block == lp->head()) {
         // Add nested loop to member list of parent loop.
         lp->parent()->add_member(lp);
       }
@@ -1555,9 +1591,9 @@
 }
 
 //------------------------------push_pred--------------------------------------
-void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) {
+void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) {
   Node* pred_n = blk->pred(i);
-  Block* pred = node_to_blk[pred_n->_idx];
+  Block* pred = cfg->get_block_for_node(pred_n);
   CFGLoop *pred_loop = pred->_loop;
   if (pred_loop == NULL) {
     // Filter out blocks for non-single-entry loops.
@@ -1578,7 +1614,7 @@
       Block* pred_head = pred_loop->head();
       assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       assert(pred_head != head(), "loop head in only one loop");
-      push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk);
+      push_pred(pred_head, LoopNode::EntryControl, worklist, cfg);
     } else {
       assert(pred_loop->_parent == this && _parent == NULL, "just checking");
     }
@@ -1691,7 +1727,7 @@
 // Determine the probability of reaching successor 'i' from the receiver block.
 float Block::succ_prob(uint i) {
   int eidx = end_idx();
-  Node *n = _nodes[eidx];  // Get ending Node
+  Node *n = get_node(eidx);  // Get ending Node
 
   int op = n->Opcode();
   if (n->is_Mach()) {
@@ -1726,7 +1762,7 @@
     float prob  = n->as_MachIf()->_prob;
     assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
     // If succ[i] is the FALSE branch, invert path info
-    if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) {
+    if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) {
       return 1.0f - prob; // not taken
     } else {
       return prob; // taken
@@ -1738,7 +1774,7 @@
     return 1.0f/_num_succs;
 
   case Op_Catch: {
-    const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
+    const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
     if (ci->_con == CatchProjNode::fall_through_index) {
       // Fall-thru path gets the lion's share.
       return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
@@ -1775,7 +1811,7 @@
 // Return the number of fall-through candidates for a block
 int Block::num_fall_throughs() {
   int eidx = end_idx();
-  Node *n = _nodes[eidx];  // Get ending Node
+  Node *n = get_node(eidx);  // Get ending Node
 
   int op = n->Opcode();
   if (n->is_Mach()) {
@@ -1799,7 +1835,7 @@
 
   case Op_Catch: {
     for (uint i = 0; i < _num_succs; i++) {
-      const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
+      const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
       if (ci->_con == CatchProjNode::fall_through_index) {
         return 1;
       }
@@ -1827,14 +1863,14 @@
 // Return true if a specific successor could be fall-through target.
 bool Block::succ_fall_through(uint i) {
   int eidx = end_idx();
-  Node *n = _nodes[eidx];  // Get ending Node
+  Node *n = get_node(eidx);  // Get ending Node
 
   int op = n->Opcode();
   if (n->is_Mach()) {
     if (n->is_MachNullCheck()) {
       // In theory, either side can fall-thru, for simplicity sake,
       // let's say only the false branch can now.
-      return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse;
+      return get_node(i + eidx + 1)->Opcode() == Op_IfFalse;
     }
     op = n->as_Mach()->ideal_Opcode();
   }
@@ -1848,7 +1884,7 @@
     return true;
 
   case Op_Catch: {
-    const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
+    const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
     return ci->_con == CatchProjNode::fall_through_index;
   }
 
@@ -1872,7 +1908,7 @@
 // Update the probability of a two-branch to be uncommon
 void Block::update_uncommon_branch(Block* ub) {
   int eidx = end_idx();
-  Node *n = _nodes[eidx];  // Get ending Node
+  Node *n = get_node(eidx);  // Get ending Node
 
   int op = n->as_Mach()->ideal_Opcode();
 
@@ -1888,7 +1924,7 @@
 
   // If ub is the true path, make the proability small, else
   // ub is the false path, and make the probability large
-  bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse);
+  bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse);
 
   // Get existing probability
   float p = n->as_MachIf()->_prob;
--- a/src/share/vm/opto/generateOptoStub.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/generateOptoStub.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -100,6 +100,55 @@
 #endif /* defined(SPARC) */
 
 
+#if defined(AARCH64)
+  // the x86 code does not need to write return pc which follows the
+  // VM callout to the thread anchor pc slot. the frame walking code
+  // can safely compute that a called the stub's return pc by
+  // evaluating sp[-1] where sp is the top of the stub frame saved in
+  // the anchor sp field. AArch64 cannot make this assumption because
+  // the C compiler does not have to push the link register and, even
+  // if it does, can locate it somewhere other than at the bottom of
+  // the frame.
+
+  // this poses a problem when we come to generate the opto runtime
+  // stubs. we cannot actually refer to generated code addresses in
+  // ideal code and, worse, we need to get the memory write to write a
+  // constant value derived form the code address of the instruction
+  // following the runtime callout i.e. the constant we need is only
+  // determined when we plant a separate instruction later in the
+  // instruction stream.
+  // 
+  // rather than introduce a mechanism into the opto compiler to allow
+  // us to provide some sort of label and label resolve mechanism and
+  // then backpatch the required address we can actually do this with
+  // a minor hack. we generate a store to the anchor pc field but
+  // supply a small negative constant address (-1 or -2) as the value
+  // for the memory store. in the back end we detect this type of
+  // store and compute the desired address, substituting it in place
+  // of the supplied constant.
+  // 
+  // the back end employs a special lowering rule to do this. it
+  // matches a write via the thread register with an offset which
+  // equals the thread anchor's pc slot. of course, those sort of
+  // writes only happen in these stubs! the encodign for this rule
+  // substitutes the supplied value with the current code buffer
+  // address plus an offset to the instruction following the VM
+  // callout. Of course, the offset varies according to what arguments
+  // are passed to the callout. For any given stub the arguments
+  // passed to the VM include the stub arguments, the thread and,
+  // optionally, the stub's caller's return pc. We supply -1 in the
+  // store if the call will not include the caller ret pc or -2 if it
+  // does. The back end can use this to work out exactly what the
+  // required offset is.
+
+  const TypeRawPtr *t = TypeRawPtr::make((address)(return_pc ? -2L : -1L));
+
+  Node *last_pc = new (C) ConPNode(t);
+  _gvn.set_type(last_pc, t);
+  store_to_memory(NULL, adr_last_Java_pc, last_pc, T_ADDRESS, NoAlias);
+
+#endif /* defined(AARCH64) */
+
   // Drop in the last_Java_sp.  last_Java_fp is not touched.
   // Always do this after the other "last_Java_frame" fields are set since
   // as soon as last_Java_sp != NULL the has_last_Java_frame is true and
--- a/src/share/vm/opto/graphKit.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/graphKit.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -333,6 +333,7 @@
   assert(ex_jvms->stkoff() == phi_map->_jvms->stkoff(), "matching locals");
   assert(ex_jvms->sp() == phi_map->_jvms->sp(), "matching stack sizes");
   assert(ex_jvms->monoff() == phi_map->_jvms->monoff(), "matching JVMS");
+  assert(ex_jvms->scloff() == phi_map->_jvms->scloff(), "matching scalar replaced objects");
   assert(ex_map->req() == phi_map->req(), "matching maps");
   uint tos = ex_jvms->stkoff() + ex_jvms->sp();
   Node*         hidden_merge_mark = root();
@@ -409,7 +410,7 @@
         while (dst->req() > orig_width)  dst->del_req(dst->req()-1);
       } else {
         assert(dst->is_Phi(), "nobody else uses a hidden region");
-        phi = (PhiNode*)dst;
+        phi = dst->as_Phi();
       }
       if (add_multiple && src->in(0) == ex_control) {
         // Both are phis.
@@ -1461,7 +1462,12 @@
   } else {
     ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, sem);
   }
-  return _gvn.transform(ld);
+  ld = _gvn.transform(ld);
+  if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) {
+    // Improve graph before escape analysis and boxing elimination.
+    record_for_igvn(ld);
+  }
+  return ld;
 }
 
 Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
@@ -3194,7 +3200,7 @@
   set_all_memory(mem); // Create new memory state
 
   AllocateNode* alloc
-    = new (C) AllocateNode(C, AllocateNode::alloc_type(),
+    = new (C) AllocateNode(C, AllocateNode::alloc_type(Type::TOP),
                            control(), mem, i_o(),
                            size, klass_node,
                            initial_slow_test);
@@ -3358,7 +3364,7 @@
 
   // Create the AllocateArrayNode and its result projections
   AllocateArrayNode* alloc
-    = new (C) AllocateArrayNode(C, AllocateArrayNode::alloc_type(),
+    = new (C) AllocateArrayNode(C, AllocateArrayNode::alloc_type(TypeInt::INT),
                                 control(), mem, i_o(),
                                 size, klass_node,
                                 initial_slow_test,
@@ -3399,10 +3405,14 @@
   if (ptr == NULL) {     // reduce dumb test in callers
     return NULL;
   }
-  if (ptr->is_CheckCastPP()) {  // strip a raw-to-oop cast
+  if (ptr->is_CheckCastPP()) { // strip only one raw-to-oop cast
     ptr = ptr->in(1);
-    if (ptr == NULL)  return NULL;
+    if (ptr == NULL) return NULL;
   }
+  // Return NULL for allocations with several casts:
+  //   j.l.reflect.Array.newInstance(jobject, jint)
+  //   Object.clone()
+  // to keep more precise type from last cast.
   if (ptr->is_Proj()) {
     Node* allo = ptr->in(0);
     if (allo != NULL && allo->is_Allocate()) {
@@ -3447,19 +3457,6 @@
   return NULL;
 }
 
-// Trace Allocate -> Proj[Parm] -> MemBarStoreStore
-MemBarStoreStoreNode* AllocateNode::storestore() {
-  ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
-  if (rawoop == NULL)  return NULL;
-  for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
-    Node* storestore = rawoop->fast_out(i);
-    if (storestore->is_MemBarStoreStore()) {
-      return storestore->as_MemBarStoreStore();
-    }
-  }
-  return NULL;
-}
-
 //----------------------------- loop predicates ---------------------------
 
 //------------------------------add_predicate_impl----------------------------
--- a/src/share/vm/opto/idealGraphPrinter.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/idealGraphPrinter.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -413,10 +413,10 @@
     print_prop("debug_idx", node->_debug_idx);
 #endif
 
-    if(C->cfg() != NULL) {
-      Block *block = C->cfg()->_bbs[node->_idx];
-      if(block == NULL) {
-        print_prop("block", C->cfg()->_blocks[0]->_pre_order);
+    if (C->cfg() != NULL) {
+      Block* block = C->cfg()->get_block_for_node(node);
+      if (block == NULL) {
+        print_prop("block", C->cfg()->get_block(0)->_pre_order);
       } else {
         print_prop("block", block->_pre_order);
       }
@@ -616,7 +616,7 @@
       buffer[0] = 0;
       _chaitin->dump_register(node, buffer);
       print_prop("reg", buffer);
-      print_prop("lrg", _chaitin->n2lidx(node));
+      print_prop("lrg", _chaitin->_lrg_map.live_range_id(node));
     }
 
     Compile::current()->_in_dump_cnt--;
@@ -637,10 +637,10 @@
   if (C->cfg() != NULL) {
     // once we have a CFG there are some nodes that aren't really
     // reachable but are in the CFG so add them here.
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
-      for (uint s = 0; s < b->_nodes.size(); s++) {
-        nodeStack.push(b->_nodes[s]);
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
+      for (uint s = 0; s < block->number_of_nodes(); s++) {
+        nodeStack.push(block->get_node(s));
       }
     }
   }
@@ -698,24 +698,24 @@
   tail(EDGES_ELEMENT);
   if (C->cfg() != NULL) {
     head(CONTROL_FLOW_ELEMENT);
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
       begin_head(BLOCK_ELEMENT);
-      print_attr(BLOCK_NAME_PROPERTY, b->_pre_order);
+      print_attr(BLOCK_NAME_PROPERTY, block->_pre_order);
       end_head();
 
       head(SUCCESSORS_ELEMENT);
-      for (uint s = 0; s < b->_num_succs; s++) {
+      for (uint s = 0; s < block->_num_succs; s++) {
         begin_elem(SUCCESSOR_ELEMENT);
-        print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order);
+        print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order);
         end_elem();
       }
       tail(SUCCESSORS_ELEMENT);
 
       head(NODES_ELEMENT);
-      for (uint s = 0; s < b->_nodes.size(); s++) {
+      for (uint s = 0; s < block->number_of_nodes(); s++) {
         begin_elem(NODE_ELEMENT);
-        print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s]));
+        print_attr(NODE_ID_PROPERTY, get_node_id(block->get_node(s)));
         end_elem();
       }
       tail(NODES_ELEMENT);
--- a/src/share/vm/opto/ifg.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/ifg.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -39,12 +39,9 @@
 
 #define EXACT_PRESSURE 1
 
-//=============================================================================
-//------------------------------IFG--------------------------------------------
 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
 }
 
-//------------------------------init-------------------------------------------
 void PhaseIFG::init( uint maxlrg ) {
   _maxlrg = maxlrg;
   _yanked = new (_arena) VectorSet(_arena);
@@ -61,7 +58,6 @@
   }
 }
 
-//------------------------------add--------------------------------------------
 // Add edge between vertices a & b.  These are sorted (triangular matrix),
 // then the smaller number is inserted in the larger numbered array.
 int PhaseIFG::add_edge( uint a, uint b ) {
@@ -73,7 +69,6 @@
   return _adjs[a].insert( b );
 }
 
-//------------------------------add_vector-------------------------------------
 // Add an edge between 'a' and everything in the vector.
 void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
   // IFG is triangular, so do the inserts where 'a' < 'b'.
@@ -88,7 +83,6 @@
   }
 }
 
-//------------------------------test-------------------------------------------
 // Is there an edge between a and b?
 int PhaseIFG::test_edge( uint a, uint b ) const {
   // Sort a and b, so that a is larger
@@ -97,7 +91,6 @@
   return _adjs[a].member(b);
 }
 
-//------------------------------SquareUp---------------------------------------
 // Convert triangular matrix to square matrix
 void PhaseIFG::SquareUp() {
   assert( !_is_square, "only on triangular" );
@@ -113,7 +106,6 @@
   _is_square = true;
 }
 
-//------------------------------Compute_Effective_Degree-----------------------
 // Compute effective degree in bulk
 void PhaseIFG::Compute_Effective_Degree() {
   assert( _is_square, "only on square" );
@@ -122,7 +114,6 @@
     lrgs(i).set_degree(effective_degree(i));
 }
 
-//------------------------------test_edge_sq-----------------------------------
 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
   assert( _is_square, "only on square" );
   // Swap, so that 'a' has the lesser count.  Then binary search is on
@@ -132,7 +123,6 @@
   return _adjs[a].member(b);
 }
 
-//------------------------------Union------------------------------------------
 // Union edges of B into A
 void PhaseIFG::Union( uint a, uint b ) {
   assert( _is_square, "only on square" );
@@ -148,7 +138,6 @@
   }
 }
 
-//------------------------------remove_node------------------------------------
 // Yank a Node and all connected edges from the IFG.  Return a
 // list of neighbors (edges) yanked.
 IndexSet *PhaseIFG::remove_node( uint a ) {
@@ -167,7 +156,6 @@
   return neighbors(a);
 }
 
-//------------------------------re_insert--------------------------------------
 // Re-insert a yanked Node.
 void PhaseIFG::re_insert( uint a ) {
   assert( _is_square, "only on square" );
@@ -182,7 +170,6 @@
   }
 }
 
-//------------------------------compute_degree---------------------------------
 // Compute the degree between 2 live ranges.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@@ -198,7 +185,6 @@
   return tmp;
 }
 
-//------------------------------effective_degree-------------------------------
 // Compute effective degree for this live range.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@@ -223,7 +209,6 @@
 
 
 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 void PhaseIFG::dump() const {
   tty->print_cr("-- Interference Graph --%s--",
                 _is_square ? "square" : "triangular" );
@@ -262,7 +247,6 @@
   tty->print("\n");
 }
 
-//------------------------------stats------------------------------------------
 void PhaseIFG::stats() const {
   ResourceMark rm;
   int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
@@ -278,7 +262,6 @@
   tty->cr();
 }
 
-//------------------------------verify-----------------------------------------
 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
   // IFG is square, sorted and no need for Find
   for( uint i = 0; i < _maxlrg; i++ ) {
@@ -288,20 +271,18 @@
     uint idx;
     uint last = 0;
     while ((idx = elements.next()) != 0) {
-      assert( idx != i, "Must have empty diagonal");
-      assert( pc->Find_const(idx) == idx, "Must not need Find" );
-      assert( _adjs[idx].member(i), "IFG not square" );
-      assert( !(*_yanked)[idx], "No yanked neighbors" );
-      assert( last < idx, "not sorted increasing");
+      assert(idx != i, "Must have empty diagonal");
+      assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
+      assert(_adjs[idx].member(i), "IFG not square");
+      assert(!(*_yanked)[idx], "No yanked neighbors");
+      assert(last < idx, "not sorted increasing");
       last = idx;
     }
-    assert( !lrgs(i)._degree_valid ||
-            effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" );
+    assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
   }
 }
 #endif
 
-//------------------------------interfere_with_live----------------------------
 // Interfere this register with everything currently live.  Use the RegMasks
 // to trim the set of possible interferences. Return a count of register-only
 // interferences as an estimate of register pressure.
@@ -318,7 +299,6 @@
       _ifg->add_edge( r, l );
 }
 
-//------------------------------build_ifg_virtual------------------------------
 // Actually build the interference graph.  Uses virtual registers only, no
 // physical register masks.  This allows me to be very aggressive when
 // coalescing copies.  Some of this aggressiveness will have to be undone
@@ -328,9 +308,9 @@
 void PhaseChaitin::build_ifg_virtual( ) {
 
   // For all blocks (in any order) do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    IndexSet *liveout = _live->live(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    IndexSet* liveout = _live->live(block);
 
     // The IFG is built by a single reverse pass over each basic block.
     // Starting with the known live-out set, we remove things that get
@@ -340,14 +320,14 @@
     // The defined value interferes with everything currently live.  The
     // value is then removed from the live-ness set and it's inputs are
     // added to the live-ness set.
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->get_node(j - 1);
 
       // Get value being defined
-      uint r = n2lidx(n);
+      uint r = _lrg_map.live_range_id(n);
 
       // Some special values do not allocate
-      if( r ) {
+      if (r) {
 
         // Remove from live-out set
         liveout->remove(r);
@@ -355,16 +335,19 @@
         // Copies do not define a new value and so do not interfere.
         // Remove the copies source from the liveout set before interfering.
         uint idx = n->is_Copy();
-        if( idx ) liveout->remove( n2lidx(n->in(idx)) );
+        if (idx) {
+          liveout->remove(_lrg_map.live_range_id(n->in(idx)));
+        }
 
         // Interfere with everything live
-        interfere_with_live( r, liveout );
+        interfere_with_live(r, liveout);
       }
 
       // Make all inputs live
-      if( !n->is_Phi() ) {      // Phi function uses come from prior block
-        for( uint k = 1; k < n->req(); k++ )
-          liveout->insert( n2lidx(n->in(k)) );
+      if (!n->is_Phi()) {      // Phi function uses come from prior block
+        for(uint k = 1; k < n->req(); k++) {
+          liveout->insert(_lrg_map.live_range_id(n->in(k)));
+        }
       }
 
       // 2-address instructions always have the defined value live
@@ -396,18 +379,18 @@
           n->set_req( 2, tmp );
         }
         // Defined value interferes with all inputs
-        uint lidx = n2lidx(n->in(idx));
-        for( uint k = 1; k < n->req(); k++ ) {
-          uint kidx = n2lidx(n->in(k));
-          if( kidx != lidx )
-            _ifg->add_edge( r, kidx );
+        uint lidx = _lrg_map.live_range_id(n->in(idx));
+        for (uint k = 1; k < n->req(); k++) {
+          uint kidx = _lrg_map.live_range_id(n->in(k));
+          if (kidx != lidx) {
+            _ifg->add_edge(r, kidx);
+          }
         }
       }
     } // End of forall instructions in block
   } // End of forall blocks
 }
 
-//------------------------------count_int_pressure-----------------------------
 uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
   IndexSetIterator elements(liveout);
   uint lidx;
@@ -423,7 +406,6 @@
   return cnt;
 }
 
-//------------------------------count_float_pressure---------------------------
 uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
   IndexSetIterator elements(liveout);
   uint lidx;
@@ -437,7 +419,6 @@
   return cnt;
 }
 
-//------------------------------lower_pressure---------------------------------
 // Adjust register pressure down by 1.  Capture last hi-to-low transition,
 static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
   if (lrg->mask().is_UP() && lrg->mask_size()) {
@@ -467,40 +448,41 @@
   }
 }
 
-//------------------------------build_ifg_physical-----------------------------
 // Build the interference graph using physical registers when available.
 // That is, if 2 live ranges are simultaneously alive but in their acceptable
 // register sets do not overlap, then they do not interfere.
 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
   NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
 
-  uint spill_reg = LRG::SPILL_REG;
   uint must_spill = 0;
 
   // For all blocks (in any order) do...
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     // Clone (rather than smash in place) the liveout info, so it is alive
     // for the "collect_gc_info" phase later.
-    IndexSet liveout(_live->live(b));
-    uint last_inst = b->end_idx();
+    IndexSet liveout(_live->live(block));
+    uint last_inst = block->end_idx();
     // Compute first nonphi node index
     uint first_inst;
-    for( first_inst = 1; first_inst < last_inst; first_inst++ )
-      if( !b->_nodes[first_inst]->is_Phi() )
+    for (first_inst = 1; first_inst < last_inst; first_inst++) {
+      if (!block->get_node(first_inst)->is_Phi()) {
         break;
+      }
+    }
 
     // Spills could be inserted before CreateEx node which should be
     // first instruction in block after Phis. Move CreateEx up.
-    for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
-      Node *ex = b->_nodes[insidx];
-      if( ex->is_SpillCopy() ) continue;
-      if( insidx > first_inst && ex->is_Mach() &&
-          ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+    for (uint insidx = first_inst; insidx < last_inst; insidx++) {
+      Node *ex = block->get_node(insidx);
+      if (ex->is_SpillCopy()) {
+        continue;
+      }
+      if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
         // If the CreateEx isn't above all the MachSpillCopies
         // then move it to the top.
-        b->_nodes.remove(insidx);
-        b->_nodes.insert(first_inst, ex);
+        block->remove_node(insidx);
+        block->insert_node(ex, first_inst);
       }
       // Stop once a CreateEx or any other node is found
       break;
@@ -510,12 +492,12 @@
     uint pressure[2], hrp_index[2];
     pressure[0] = pressure[1] = 0;
     hrp_index[0] = hrp_index[1] = last_inst+1;
-    b->_reg_pressure = b->_freg_pressure = 0;
+    block->_reg_pressure = block->_freg_pressure = 0;
     // Liveout things are presumed live for the whole block.  We accumulate
     // 'area' accordingly.  If they get killed in the block, we'll subtract
     // the unused part of the block from the area.
     int inst_count = last_inst - first_inst;
-    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
     assert(!(cost < 0.0), "negative spill cost" );
     IndexSetIterator elements(&liveout);
     uint lidx;
@@ -527,15 +509,17 @@
         if (lrg._is_float || lrg._is_vector) {   // Count float pressure
           pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
-          if( pressure[1] > b->_freg_pressure )
-            b->_freg_pressure = pressure[1];
+          if (pressure[1] > block->_freg_pressure) {
+            block->_freg_pressure = pressure[1];
+          }
 #endif
           // Count int pressure, but do not count the SP, flags
-        } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+        } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
           pressure[0] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
-          if( pressure[0] > b->_reg_pressure )
-            b->_reg_pressure = pressure[0];
+          if (pressure[0] > block->_reg_pressure) {
+            block->_reg_pressure = pressure[0];
+          }
 #endif
         }
       }
@@ -552,17 +536,17 @@
     // value is then removed from the live-ness set and it's inputs are added
     // to the live-ness set.
     uint j;
-    for( j = last_inst + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j - 1];
+    for (j = last_inst + 1; j > 1; j--) {
+      Node* n = block->get_node(j - 1);
 
       // Get value being defined
-      uint r = n2lidx(n);
+      uint r = _lrg_map.live_range_id(n);
 
       // Some special values do not allocate
-      if( r ) {
+      if(r) {
         // A DEF normally costs block frequency; rematerialized values are
         // removed from the DEF sight, so LOWER costs here.
-        lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
+        lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq;
 
         // If it is not live, then this instruction is dead.  Probably caused
         // by spilling and rematerialization.  Who cares why, yank this baby.
@@ -570,7 +554,7 @@
           Node *def = n->in(0);
           if( !n->is_Proj() ||
               // Could also be a flags-projection of a dead ADD or such.
-              (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) {
+              (_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) {
             bool remove = true;
             if (n->is_MachProj()) {
               // Don't remove KILL projections if their "defining" nodes have
@@ -590,10 +574,12 @@
               }
             }
             if (remove) {
-              b->_nodes.remove(j - 1);
-              if( lrgs(r)._def == n ) lrgs(r)._def = 0;
+	      block->remove_node(j - 1);
+	      if (lrgs(r)._def == n) {
+		lrgs(r)._def = 0;
+	      }
               n->disconnect_inputs(NULL, C);
-              _cfg._bbs.map(n->_idx,NULL);
+              _cfg.unmap_node_from_block(n);
               n->replace_by(C->top());
               // Since yanking a Node from block, high pressure moves up one
               hrp_index[0]--;
@@ -604,36 +590,36 @@
 
           // Fat-projections kill many registers which cannot be used to
           // hold live ranges.
-          if( lrgs(r)._fat_proj ) {
+          if (lrgs(r)._fat_proj) {
             // Count the int-only registers
             RegMask itmp = lrgs(r).mask();
             itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
             int iregs = itmp.Size();
 #ifdef EXACT_PRESSURE
-            if( pressure[0]+iregs > b->_reg_pressure )
-              b->_reg_pressure = pressure[0]+iregs;
+            if (pressure[0]+iregs > block->_reg_pressure) {
+              block->_reg_pressure = pressure[0] + iregs;
+            }
 #endif
-            if( pressure[0]       <= (uint)INTPRESSURE &&
-                pressure[0]+iregs >  (uint)INTPRESSURE ) {
+            if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) {
 #ifndef EXACT_PRESSURE
-              b->_reg_pressure = (uint)INTPRESSURE+1;
+              block->_reg_pressure = (uint)INTPRESSURE+1;
 #endif
-              hrp_index[0] = j-1;
+              hrp_index[0] = j - 1;
             }
             // Count the float-only registers
             RegMask ftmp = lrgs(r).mask();
             ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
             int fregs = ftmp.Size();
 #ifdef EXACT_PRESSURE
-            if( pressure[1]+fregs > b->_freg_pressure )
-              b->_freg_pressure = pressure[1]+fregs;
+            if (pressure[1] + fregs > block->_freg_pressure) {
+              block->_freg_pressure = pressure[1] + fregs;
+            }
 #endif
-            if( pressure[1]       <= (uint)FLOATPRESSURE &&
-                pressure[1]+fregs >  (uint)FLOATPRESSURE ) {
+            if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) {
 #ifndef EXACT_PRESSURE
-              b->_freg_pressure = (uint)FLOATPRESSURE+1;
+              block->_freg_pressure = (uint)FLOATPRESSURE+1;
 #endif
-              hrp_index[1] = j-1;
+              hrp_index[1] = j - 1;
             }
           }
 
@@ -646,7 +632,7 @@
           if( n->is_SpillCopy()
               && lrgs(r).is_singledef()        // MultiDef live range can still split
               && n->outcnt() == 1              // and use must be in this block
-              && _cfg._bbs[n->unique_out()->_idx] == b ) {
+              && _cfg.get_block_for_node(n->unique_out()) == block) {
             // All single-use MachSpillCopy(s) that immediately precede their
             // use must color early.  If a longer live range steals their
             // color, the spill copy will split and may push another spill copy
@@ -656,14 +642,16 @@
             //
 
             Node *single_use = n->unique_out();
-            assert( b->find_node(single_use) >= j, "Use must be later in block");
+            assert(block->find_node(single_use) >= j, "Use must be later in block");
             // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
 
             // Find first non SpillCopy 'm' that follows the current instruction
             // (j - 1) is index for current instruction 'n'
             Node *m = n;
-            for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
-            if( m == single_use ) {
+            for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
+              m = block->get_node(i);
+            }
+            if (m == single_use) {
               lrgs(r)._area = 0.0;
             }
           }
@@ -672,7 +660,7 @@
           if( liveout.remove(r) ) {
             // Adjust register pressure.
             // Capture last hi-to-lo pressure transition
-            lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
+            lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
             assert( pressure[0] == count_int_pressure  (&liveout), "" );
             assert( pressure[1] == count_float_pressure(&liveout), "" );
           }
@@ -680,12 +668,12 @@
           // Copies do not define a new value and so do not interfere.
           // Remove the copies source from the liveout set before interfering.
           uint idx = n->is_Copy();
-          if( idx ) {
-            uint x = n2lidx(n->in(idx));
-            if( liveout.remove( x ) ) {
+          if (idx) {
+            uint x = _lrg_map.live_range_id(n->in(idx));
+            if (liveout.remove(x)) {
               lrgs(x)._area -= cost;
               // Adjust register pressure.
-              lower_pressure( &lrgs(x), j-1, b, pressure, hrp_index );
+              lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
               assert( pressure[0] == count_int_pressure  (&liveout), "" );
               assert( pressure[1] == count_float_pressure(&liveout), "" );
             }
@@ -757,7 +745,7 @@
 
       // Area remaining in the block
       inst_count--;
-      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
 
       // Make all inputs live
       if( !n->is_Phi() ) {      // Phi function uses come from prior block
@@ -771,18 +759,21 @@
         // the flags and assumes it's dead.  This keeps the (useless)
         // flag-setting behavior alive while also keeping the (useful)
         // memory update effect.
-        for( uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++ ) {
+        for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
           Node *def = n->in(k);
-          uint x = n2lidx(def);
-          if( !x ) continue;
+          uint x = _lrg_map.live_range_id(def);
+          if (!x) {
+            continue;
+          }
           LRG &lrg = lrgs(x);
           // No use-side cost for spilling debug info
-          if( k < debug_start )
+          if (k < debug_start) {
             // A USE costs twice block frequency (once for the Load, once
             // for a Load-delay).  Rematerialized uses only cost once.
-            lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
+            lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
+          }
           // It is live now
-          if( liveout.insert( x ) ) {
+          if (liveout.insert(x)) {
             // Newly live things assumed live from here to top of block
             lrg._area += cost;
             // Adjust register pressure
@@ -790,14 +781,16 @@
               if (lrg._is_float || lrg._is_vector) {
                 pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
-                if( pressure[1] > b->_freg_pressure )
-                  b->_freg_pressure = pressure[1];
+                if (pressure[1] > block->_freg_pressure)  {
+                  block->_freg_pressure = pressure[1];
+                }
 #endif
               } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
                 pressure[0] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
-                if( pressure[0] > b->_reg_pressure )
-                  b->_reg_pressure = pressure[0];
+                if (pressure[0] > block->_reg_pressure) {
+                  block->_reg_pressure = pressure[0];
+                }
 #endif
               }
             }
@@ -812,52 +805,55 @@
     // If we run off the top of the block with high pressure and
     // never see a hi-to-low pressure transition, just record that
     // the whole block is high pressure.
-    if( pressure[0] > (uint)INTPRESSURE   ) {
+    if (pressure[0] > (uint)INTPRESSURE) {
       hrp_index[0] = 0;
 #ifdef EXACT_PRESSURE
-      if( pressure[0] > b->_reg_pressure )
-        b->_reg_pressure = pressure[0];
+      if (pressure[0] > block->_reg_pressure) {
+        block->_reg_pressure = pressure[0];
+      }
 #else
-      b->_reg_pressure = (uint)INTPRESSURE+1;
+      block->_reg_pressure = (uint)INTPRESSURE+1;
 #endif
     }
-    if( pressure[1] > (uint)FLOATPRESSURE ) {
+    if (pressure[1] > (uint)FLOATPRESSURE) {
       hrp_index[1] = 0;
 #ifdef EXACT_PRESSURE
-      if( pressure[1] > b->_freg_pressure )
-        b->_freg_pressure = pressure[1];
+      if (pressure[1] > block->_freg_pressure) {
+        block->_freg_pressure = pressure[1];
+      }
 #else
-      b->_freg_pressure = (uint)FLOATPRESSURE+1;
+      block->_freg_pressure = (uint)FLOATPRESSURE+1;
 #endif
     }
 
     // Compute high pressure indice; avoid landing in the middle of projnodes
     j = hrp_index[0];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
+      Node* cur = block->get_node(j);
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
         j--;
-        cur = b->_nodes[j];
+        cur = block->get_node(j);
       }
     }
-    b->_ihrp_index = j;
+    block->_ihrp_index = j;
     j = hrp_index[1];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
+      Node* cur = block->get_node(j);
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
         j--;
-        cur = b->_nodes[j];
+        cur = block->get_node(j);
       }
     }
-    b->_fhrp_index = j;
+    block->_fhrp_index = j;
 
 #ifndef PRODUCT
     // Gather Register Pressure Statistics
     if( PrintOptoStatistics ) {
-      if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
+      if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
         _high_pressure++;
-      else
+      } else {
         _low_pressure++;
+      }
     }
 #endif
   } // End of for all blocks
--- a/src/share/vm/opto/ifnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/ifnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -673,7 +673,7 @@
 //           /    Region
 //
 Node* IfNode::fold_compares(PhaseGVN* phase) {
-  if (!EliminateAutoBox || Opcode() != Op_If) return NULL;
+  if (!phase->C->eliminate_boxing() || Opcode() != Op_If) return NULL;
 
   Node* this_cmp = in(1)->in(1);
   if (this_cmp != NULL && this_cmp->Opcode() == Op_CmpI &&
--- a/src/share/vm/opto/lcm.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/lcm.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -36,6 +36,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -98,14 +101,14 @@
 // The proj is the control projection for the not-null case.
 // The val is the pointer being checked for nullness or
 // decodeHeapOop_not_null node if it did not fold into address.
-void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) {
+void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {
   // Assume if null check need for 0 offset then always needed
   // Intel solaris doesn't support any null checks yet and no
   // mechanism exists (yet) to set the switches at an os_cpu level
   if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;
 
   // Make sure the ptr-is-null path appears to be uncommon!
-  float f = end()->as_MachIf()->_prob;
+  float f = block->end()->as_MachIf()->_prob;
   if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
   if( f > PROB_UNLIKELY_MAG(4) ) return;
 
@@ -115,13 +118,13 @@
   // Get the successor block for if the test ptr is non-null
   Block* not_null_block;  // this one goes with the proj
   Block* null_block;
-  if (_nodes[_nodes.size()-1] == proj) {
-    null_block     = _succs[0];
-    not_null_block = _succs[1];
+  if (block->get_node(block->number_of_nodes()-1) == proj) {
+    null_block     = block->_succs[0];
+    not_null_block = block->_succs[1];
   } else {
-    assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other");
-    not_null_block = _succs[0];
-    null_block     = _succs[1];
+    assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");
+    not_null_block = block->_succs[0];
+    null_block     = block->_succs[1];
   }
   while (null_block->is_Empty() == Block::empty_with_goto) {
     null_block     = null_block->_succs[0];
@@ -133,8 +136,8 @@
   // detect failure of this optimization, as in 6366351.)
   {
     bool found_trap = false;
-    for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) {
-      Node* nn = null_block->_nodes[i1];
+    for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {
+      Node* nn = null_block->get_node(i1);
       if (nn->is_MachCall() &&
           nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
         const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
@@ -282,20 +285,20 @@
     }
 
     // Check ctrl input to see if the null-check dominates the memory op
-    Block *cb = cfg->_bbs[mach->_idx];
+    Block *cb = get_block_for_node(mach);
     cb = cb->_idom;             // Always hoist at least 1 block
     if( !was_store ) {          // Stores can be hoisted only one block
-      while( cb->_dom_depth > (_dom_depth + 1))
+      while( cb->_dom_depth > (block->_dom_depth + 1))
         cb = cb->_idom;         // Hoist loads as far as we want
       // The non-null-block should dominate the memory op, too. Live
       // range spilling will insert a spill in the non-null-block if it is
       // needs to spill the memory op for an implicit null check.
-      if (cb->_dom_depth == (_dom_depth + 1)) {
+      if (cb->_dom_depth == (block->_dom_depth + 1)) {
         if (cb != not_null_block) continue;
         cb = cb->_idom;
       }
     }
-    if( cb != this ) continue;
+    if( cb != block ) continue;
 
     // Found a memory user; see if it can be hoisted to check-block
     uint vidx = 0;              // Capture index of value into memop
@@ -307,8 +310,8 @@
         if( is_decoden ) continue;
       }
       // Block of memory-op input
-      Block *inb = cfg->_bbs[mach->in(j)->_idx];
-      Block *b = this;          // Start from nul check
+      Block *inb = get_block_for_node(mach->in(j));
+      Block *b = block;          // Start from nul check
       while( b != inb && b->_dom_depth > inb->_dom_depth )
         b = b->_idom;           // search upwards for input
       // See if input dominates null check
@@ -317,28 +320,28 @@
     }
     if( j > 0 )
       continue;
-    Block *mb = cfg->_bbs[mach->_idx];
+    Block *mb = get_block_for_node(mach);
     // Hoisting stores requires more checks for the anti-dependence case.
     // Give up hoisting if we have to move the store past any load.
     if( was_store ) {
       Block *b = mb;            // Start searching here for a local load
       // mach use (faulting) trying to hoist
       // n might be blocker to hoisting
-      while( b != this ) {
+      while( b != block ) {
         uint k;
-        for( k = 1; k < b->_nodes.size(); k++ ) {
-          Node *n = b->_nodes[k];
+        for( k = 1; k < b->number_of_nodes(); k++ ) {
+          Node *n = b->get_node(k);
           if( n->needs_anti_dependence_check() &&
               n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
             break;              // Found anti-dependent load
         }
-        if( k < b->_nodes.size() )
+        if( k < b->number_of_nodes() )
           break;                // Found anti-dependent load
         // Make sure control does not do a merge (would have to check allpaths)
         if( b->num_preds() != 2 ) break;
-        b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block
+        b = get_block_for_node(b->pred(1)); // Move up to predecessor block
       }
-      if( b != this ) continue;
+      if( b != block ) continue;
     }
 
     // Make sure this memory op is not already being used for a NullCheck
@@ -348,15 +351,15 @@
 
     // Found a candidate!  Pick one with least dom depth - the highest
     // in the dom tree should be closest to the null check.
-    if( !best ||
-        cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) {
+    if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {
       best = mach;
       bidx = vidx;
-
     }
   }
   // No candidate!
-  if( !best ) return;
+  if (best == NULL) {
+    return;
+  }
 
   // ---- Found an implicit null check
   extern int implicit_null_checks;
@@ -364,46 +367,45 @@
 
   if( is_decoden ) {
     // Check if we need to hoist decodeHeapOop_not_null first.
-    Block *valb = cfg->_bbs[val->_idx];
-    if( this != valb && this->_dom_depth < valb->_dom_depth ) {
+    Block *valb = get_block_for_node(val);
+    if( block != valb && block->_dom_depth < valb->_dom_depth ) {
       // Hoist it up to the end of the test block.
       valb->find_remove(val);
-      this->add_inst(val);
-      cfg->_bbs.map(val->_idx,this);
+      block->add_inst(val);
+      map_node_to_block(val, block);
       // DecodeN on x86 may kill flags. Check for flag-killing projections
       // that also need to be hoisted.
       for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
         Node* n = val->fast_out(j);
         if( n->is_MachProj() ) {
-          cfg->_bbs[n->_idx]->find_remove(n);
-          this->add_inst(n);
-          cfg->_bbs.map(n->_idx,this);
+          get_block_for_node(n)->find_remove(n);
+          block->add_inst(n);
+          map_node_to_block(n, block);
         }
       }
     }
   }
   // Hoist the memory candidate up to the end of the test block.
-  Block *old_block = cfg->_bbs[best->_idx];
+  Block *old_block = get_block_for_node(best);
   old_block->find_remove(best);
-  add_inst(best);
-  cfg->_bbs.map(best->_idx,this);
+  block->add_inst(best);
+  map_node_to_block(best, block);
 
   // Move the control dependence
-  if (best->in(0) && best->in(0) == old_block->_nodes[0])
-    best->set_req(0, _nodes[0]);
+  if (best->in(0) && best->in(0) == old_block->head())
+    best->set_req(0, block->head());
 
   // Check for flag-killing projections that also need to be hoisted
   // Should be DU safe because no edge updates.
   for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
     Node* n = best->fast_out(j);
     if( n->is_MachProj() ) {
-      cfg->_bbs[n->_idx]->find_remove(n);
-      add_inst(n);
-      cfg->_bbs.map(n->_idx,this);
+      get_block_for_node(n)->find_remove(n);
+      block->add_inst(n);
+      map_node_to_block(n, block);
     }
   }
 
-  Compile *C = cfg->C;
   // proj==Op_True --> ne test; proj==Op_False --> eq test.
   // One of two graph shapes got matched:
   //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
@@ -413,10 +415,10 @@
   // We need to flip the projections to keep the same semantics.
   if( proj->Opcode() == Op_IfTrue ) {
     // Swap order of projections in basic block to swap branch targets
-    Node *tmp1 = _nodes[end_idx()+1];
-    Node *tmp2 = _nodes[end_idx()+2];
-    _nodes.map(end_idx()+1, tmp2);
-    _nodes.map(end_idx()+2, tmp1);
+    Node *tmp1 = block->get_node(block->end_idx()+1);
+    Node *tmp2 = block->get_node(block->end_idx()+2);
+    block->map_node(tmp2, block->end_idx()+1);
+    block->map_node(tmp1, block->end_idx()+2);
     Node *tmp = new (C) Node(C->top()); // Use not NULL input
     tmp1->replace_by(tmp);
     tmp2->replace_by(tmp1);
@@ -429,8 +431,8 @@
   // it as well.
   Node *old_tst = proj->in(0);
   MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
-  _nodes.map(end_idx(),nul_chk);
-  cfg->_bbs.map(nul_chk->_idx,this);
+  block->map_node(nul_chk, block->end_idx());
+  map_node_to_block(nul_chk, block);
   // Redirect users of old_test to nul_chk
   for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
     old_tst->last_out(i2)->set_req(0, nul_chk);
@@ -438,8 +440,8 @@
   for (uint i3 = 0; i3 < old_tst->req(); i3++)
     old_tst->set_req(i3, NULL);
 
-  cfg->latency_from_uses(nul_chk);
-  cfg->latency_from_uses(best);
+  latency_from_uses(nul_chk);
+  latency_from_uses(best);
 }
 
 
@@ -453,7 +455,7 @@
 // remaining cases (most), choose the instruction with the greatest latency
 // (that is, the most number of pseudo-cycles required to the end of the
 // routine). If there is a tie, choose the instruction with the most inputs.
-Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
+Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
 
   // If only a single entry on the stack, use it
   uint cnt = worklist.size();
@@ -486,7 +488,7 @@
     }
 
     // Final call in a block must be adjacent to 'catch'
-    Node *e = end();
+    Node *e = block->end();
     if( e->is_Catch() && e->in(0)->in(0) == n )
       continue;
 
@@ -512,7 +514,7 @@
         Node* use = n->fast_out(j);
 
         // The use is a conditional branch, make them adjacent
-        if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) {
+        if (use->is_MachIf() && get_block_for_node(use) == block) {
           found_machif = true;
           break;
         }
@@ -545,7 +547,7 @@
       n_choice = 1;
     }
 
-    uint n_latency = cfg->_node_latency->at_grow(n->_idx);
+    uint n_latency = get_latency_for_node(n);
     uint n_score   = n->req();   // Many inputs get high score to break ties
 
     // Keep best latency found
@@ -570,13 +572,14 @@
 
 
 //------------------------------set_next_call----------------------------------
-void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) {
+void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
   if( next_call.test_set(n->_idx) ) return;
   for( uint i=0; i<n->len(); i++ ) {
     Node *m = n->in(i);
     if( !m ) continue;  // must see all nodes in block that precede call
-    if( bbs[m->_idx] == this )
-      set_next_call( m, next_call, bbs );
+    if (get_block_for_node(m) == block) {
+      set_next_call(block, m, next_call);
+    }
   }
 }
 
@@ -586,12 +589,12 @@
 // next subroutine call get priority - basically it moves things NOT needed
 // for the next call till after the call.  This prevents me from trying to
 // carry lots of stuff live across a call.
-void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) {
+void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {
   // Find the next control-defining Node in this block
   Node* call = NULL;
   for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
     Node* m = this_call->fast_out(i);
-    if( bbs[m->_idx] == this && // Local-block user
+    if(get_block_for_node(m) == block && // Local-block user
         m != this_call &&       // Not self-start node
         m->is_MachCall() )
       call = m;
@@ -599,11 +602,12 @@
   }
   if (call == NULL)  return;    // No next call (e.g., block end is near)
   // Set next-call for all inputs to this call
-  set_next_call(call, next_call, bbs);
+  set_next_call(block, call, next_call);
 }
 
 //------------------------------add_call_kills-------------------------------------
-void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
+// helper function that adds caller save registers to MachProjNode
+static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
   // Fill in the kill mask for the call
   for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
     if( !regs.Member(r) ) {     // Not already defined by the call
@@ -619,7 +623,7 @@
 
 
 //------------------------------sched_call-------------------------------------
-uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
+uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {
   RegMask regs;
 
   // Schedule all the users of the call right now.  All the users are
@@ -632,18 +636,20 @@
     ready_cnt.at_put(n->_idx, n_cnt);
     assert( n_cnt == 0, "" );
     // Schedule next to call
-    _nodes.map(node_cnt++, n);
+    block->map_node(n, node_cnt++);
     // Collect defined registers
     regs.OR(n->out_RegMask());
     // Check for scheduling the next control-definer
     if( n->bottom_type() == Type::CONTROL )
       // Warm up next pile of heuristic bits
-      needed_for_next_call(n, next_call, bbs);
+      needed_for_next_call(block, n, next_call);
 
     // Children of projections are now all ready
     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
       Node* m = n->fast_out(j); // Get user
-      if( bbs[m->_idx] != this ) continue;
+      if(get_block_for_node(m) != block) {
+        continue;
+      }
       if( m->is_Phi() ) continue;
       int m_cnt = ready_cnt.at(m->_idx)-1;
       ready_cnt.at_put(m->_idx, m_cnt);
@@ -655,14 +661,14 @@
 
   // Act as if the call defines the Frame Pointer.
   // Certainly the FP is alive and well after the call.
-  regs.Insert(matcher.c_frame_pointer());
+  regs.Insert(_matcher.c_frame_pointer());
 
   // Set all registers killed and not already defined by the call.
   uint r_cnt = mcall->tf()->range()->cnt();
   int op = mcall->ideal_Opcode();
-  MachProjNode *proj = new (matcher.C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
-  bbs.map(proj->_idx,this);
-  _nodes.insert(node_cnt++, proj);
+  MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
+  map_node_to_block(proj, block);
+  block->insert_node(proj, node_cnt++);
 
   // Select the right register save policy.
   const char * save_policy;
@@ -671,13 +677,13 @@
     case Op_CallLeaf:
     case Op_CallLeafNoFP:
       // Calling C code so use C calling convention
-      save_policy = matcher._c_reg_save_policy;
+      save_policy = _matcher._c_reg_save_policy;
       break;
 
     case Op_CallStaticJava:
     case Op_CallDynamicJava:
       // Calling Java code so use Java calling convention
-      save_policy = matcher._register_save_policy;
+      save_policy = _matcher._register_save_policy;
       break;
 
     default:
@@ -712,44 +718,46 @@
 
 //------------------------------schedule_local---------------------------------
 // Topological sort within a block.  Someday become a real scheduler.
-bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) {
+bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
   // Already "sorted" are the block start Node (as the first entry), and
   // the block-ending Node and any trailing control projections.  We leave
   // these alone.  PhiNodes and ParmNodes are made to follow the block start
   // Node.  Everything else gets topo-sorted.
 
 #ifndef PRODUCT
-    if (cfg->trace_opto_pipelining()) {
-      tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order);
-      for (uint i = 0;i < _nodes.size();i++) {
+    if (trace_opto_pipelining()) {
+      tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);
+      for (uint i = 0;i < block->number_of_nodes(); i++) {
         tty->print("# ");
-        _nodes[i]->fast_dump();
+        block->get_node(i)->fast_dump();
       }
       tty->print_cr("#");
     }
 #endif
 
   // RootNode is already sorted
-  if( _nodes.size() == 1 ) return true;
+  if (block->number_of_nodes() == 1) {
+    return true;
+  }
 
   // Move PhiNodes and ParmNodes from 1 to cnt up to the start
-  uint node_cnt = end_idx();
+  uint node_cnt = block->end_idx();
   uint phi_cnt = 1;
   uint i;
   for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
-    Node *n = _nodes[i];
+    Node *n = block->get_node(i);
     if( n->is_Phi() ||          // Found a PhiNode or ParmNode
-        (n->is_Proj()  && n->in(0) == head()) ) {
+        (n->is_Proj()  && n->in(0) == block->head()) ) {
       // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
-      _nodes.map(i,_nodes[phi_cnt]);
-      _nodes.map(phi_cnt++,n);  // swap Phi/Parm up front
+      block->map_node(block->get_node(phi_cnt), i);
+      block->map_node(n, phi_cnt++);  // swap Phi/Parm up front
     } else {                    // All others
       // Count block-local inputs to 'n'
       uint cnt = n->len();      // Input count
       uint local = 0;
       for( uint j=0; j<cnt; j++ ) {
         Node *m = n->in(j);
-        if( m && cfg->_bbs[m->_idx] == this && !m->is_top() )
+        if( m && get_block_for_node(m) == block && !m->is_top() )
           local++;              // One more block-local input
       }
       ready_cnt.at_put(n->_idx, local); // Count em up
@@ -761,7 +769,7 @@
           for (uint prec = n->req(); prec < n->len(); prec++) {
             Node* oop_store = n->in(prec);
             if (oop_store != NULL) {
-              assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+              assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");
             }
           }
         }
@@ -785,16 +793,16 @@
       }
     }
   }
-  for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
-    ready_cnt.at_put(_nodes[i2]->_idx, 0);
+  for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
+    ready_cnt.at_put(block->get_node(i2)->_idx, 0);
 
   // All the prescheduled guys do not hold back internal nodes
   uint i3;
   for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
-    Node *n = _nodes[i3];       // Get pre-scheduled
+    Node *n = block->get_node(i3);       // Get pre-scheduled
     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
       Node* m = n->fast_out(j);
-      if( cfg->_bbs[m->_idx] ==this ) { // Local-block user
+      if (get_block_for_node(m) == block) { // Local-block user
         int m_cnt = ready_cnt.at(m->_idx)-1;
         ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
       }
@@ -805,7 +813,7 @@
   // Make a worklist
   Node_List worklist;
   for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
-    Node *m = _nodes[i4];
+    Node *m = block->get_node(i4);
     if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
       if (m->is_iteratively_computed()) {
         // Push induction variable increments last to allow other uses
@@ -827,15 +835,15 @@
   }
 
   // Warm up the 'next_call' heuristic bits
-  needed_for_next_call(_nodes[0], next_call, cfg->_bbs);
+  needed_for_next_call(block, block->head(), next_call);
 
 #ifndef PRODUCT
-    if (cfg->trace_opto_pipelining()) {
-      for (uint j=0; j<_nodes.size(); j++) {
-        Node     *n = _nodes[j];
+    if (trace_opto_pipelining()) {
+      for (uint j=0; j< block->number_of_nodes(); j++) {
+        Node     *n = block->get_node(j);
         int     idx = n->_idx;
         tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
-        tty->print("latency:%3d  ", cfg->_node_latency->at_grow(idx));
+        tty->print("latency:%3d  ", get_latency_for_node(n));
         tty->print("%4d: %s\n", idx, n->Name());
       }
     }
@@ -846,7 +854,7 @@
   while( worklist.size() ) {    // Worklist is not ready
 
 #ifndef PRODUCT
-    if (cfg->trace_opto_pipelining()) {
+    if (trace_opto_pipelining()) {
       tty->print("#   ready list:");
       for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
         Node *n = worklist[i];      // Get Node on worklist
@@ -857,13 +865,13 @@
 #endif
 
     // Select and pop a ready guy from worklist
-    Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt);
-    _nodes.map(phi_cnt++,n);    // Schedule him next
+    Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
+    block->map_node(n, phi_cnt++);    // Schedule him next
 
 #ifndef PRODUCT
-    if (cfg->trace_opto_pipelining()) {
+    if (trace_opto_pipelining()) {
       tty->print("#    select %d: %s", n->_idx, n->Name());
-      tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx));
+      tty->print(", latency:%d", get_latency_for_node(n));
       n->dump();
       if (Verbose) {
         tty->print("#   ready list:");
@@ -878,26 +886,28 @@
 #endif
     if( n->is_MachCall() ) {
       MachCallNode *mcall = n->as_MachCall();
-      phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call);
+      phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);
       continue;
     }
 
     if (n->is_Mach() && n->as_Mach()->has_call()) {
       RegMask regs;
-      regs.Insert(matcher.c_frame_pointer());
+      regs.Insert(_matcher.c_frame_pointer());
       regs.OR(n->out_RegMask());
 
-      MachProjNode *proj = new (matcher.C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
-      cfg->_bbs.map(proj->_idx,this);
-      _nodes.insert(phi_cnt++, proj);
+      MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
+      map_node_to_block(proj, block);
+      block->insert_node(proj, phi_cnt++);
 
-      add_call_kills(proj, regs, matcher._c_reg_save_policy, false);
+      add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);
     }
 
     // Children are now all ready
     for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
       Node* m = n->fast_out(i5); // Get user
-      if( cfg->_bbs[m->_idx] != this ) continue;
+      if (get_block_for_node(m) != block) {
+        continue;
+      }
       if( m->is_Phi() ) continue;
       if (m->_idx >= max_idx) { // new node, skip it
         assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
@@ -910,9 +920,8 @@
     }
   }
 
-  if( phi_cnt != end_idx() ) {
+  if( phi_cnt != block->end_idx() ) {
     // did not schedule all.  Retry, Bailout, or Die
-    Compile* C = matcher.C;
     if (C->subsume_loads() == true && !C->failing()) {
       // Retry with subsume_loads == false
       // If this is the first failure, the sentinel string will "stick"
@@ -924,12 +933,12 @@
   }
 
 #ifndef PRODUCT
-  if (cfg->trace_opto_pipelining()) {
+  if (trace_opto_pipelining()) {
     tty->print_cr("#");
     tty->print_cr("# after schedule_local");
-    for (uint i = 0;i < _nodes.size();i++) {
+    for (uint i = 0;i < block->number_of_nodes();i++) {
       tty->print("# ");
-      _nodes[i]->fast_dump();
+      block->get_node(i)->fast_dump();
     }
     tty->cr();
   }
@@ -955,7 +964,7 @@
 }
 
 //------------------------------catch_cleanup_find_cloned_def------------------
-static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
   assert( use_blk != def_blk, "Inter-block cleanup only");
 
   // The use is some block below the Catch.  Find and return the clone of the def
@@ -981,13 +990,14 @@
     // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
     Node_Array inputs = new Node_List(Thread::current()->resource_area());
     for(uint k = 1; k < use_blk->num_preds(); k++) {
-      inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx));
+      Block* block = get_block_for_node(use_blk->pred(k));
+      inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));
     }
 
     // Check to see if the use_blk already has an identical phi inserted.
     // If it exists, it will be at the first position since all uses of a
     // def are processed together.
-    Node *phi = use_blk->_nodes[1];
+    Node *phi = use_blk->get_node(1);
     if( phi->is_Phi() ) {
       fixup = phi;
       for (uint k = 1; k < use_blk->num_preds(); k++) {
@@ -1002,8 +1012,8 @@
     // If an existing PhiNode was not found, make a new one.
     if (fixup == NULL) {
       Node *new_phi = PhiNode::make(use_blk->head(), def);
-      use_blk->_nodes.insert(1, new_phi);
-      bbs.map(new_phi->_idx, use_blk);
+      use_blk->insert_node(new_phi, 1);
+      map_node_to_block(new_phi, use_blk);
       for (uint k = 1; k < use_blk->num_preds(); k++) {
         new_phi->set_req(k, inputs[k]);
       }
@@ -1012,7 +1022,7 @@
 
   } else {
     // Found the use just below the Catch.  Make it use the clone.
-    fixup = use_blk->_nodes[n_clone_idx];
+    fixup = use_blk->get_node(n_clone_idx);
   }
 
   return fixup;
@@ -1032,36 +1042,36 @@
   for( uint k = 0; k < blk->_num_succs; k++ ) {
     // Get clone in each successor block
     Block *sb = blk->_succs[k];
-    Node *clone = sb->_nodes[offset_idx+1];
+    Node *clone = sb->get_node(offset_idx+1);
     assert( clone->Opcode() == use->Opcode(), "" );
 
     // Make use-clone reference the def-clone
-    catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]);
+    catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));
   }
 }
 
 //------------------------------catch_cleanup_inter_block---------------------
 // Fix all input edges in use that reference "def".  The use is in a different
 // block than the def.
-static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
   if( !use_blk ) return;        // Can happen if the use is a precedence edge
 
-  Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx);
+  Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);
   catch_cleanup_fix_all_inputs(use, def, new_def);
 }
 
 //------------------------------call_catch_cleanup-----------------------------
 // If we inserted any instructions between a Call and his CatchNode,
 // clone the instructions on all paths below the Catch.
-void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) {
+void PhaseCFG::call_catch_cleanup(Block* block) {
 
   // End of region to clone
-  uint end = end_idx();
-  if( !_nodes[end]->is_Catch() ) return;
+  uint end = block->end_idx();
+  if( !block->get_node(end)->is_Catch() ) return;
   // Start of region to clone
   uint beg = end;
-  while(!_nodes[beg-1]->is_MachProj() ||
-        !_nodes[beg-1]->in(0)->is_MachCall() ) {
+  while(!block->get_node(beg-1)->is_MachProj() ||
+        !block->get_node(beg-1)->in(0)->is_MachCall() ) {
     beg--;
     assert(beg > 0,"Catch cleanup walking beyond block boundary");
   }
@@ -1070,15 +1080,16 @@
 
   // Clone along all Catch output paths.  Clone area between the 'beg' and
   // 'end' indices.
-  for( uint i = 0; i < _num_succs; i++ ) {
-    Block *sb = _succs[i];
+  for( uint i = 0; i < block->_num_succs; i++ ) {
+    Block *sb = block->_succs[i];
     // Clone the entire area; ignoring the edge fixup for now.
     for( uint j = end; j > beg; j-- ) {
-      // It is safe here to clone a node with anti_dependence
-      // since clones dominate on each path.
-      Node *clone = _nodes[j-1]->clone();
-      sb->_nodes.insert( 1, clone );
-      bbs.map(clone->_idx,sb);
+      Node *clone = block->get_node(j-1)->clone();
+      sb->insert_node(clone, 1);
+      map_node_to_block(clone, sb);
+      if (clone->needs_anti_dependence_check()) {
+        insert_anti_dependences(sb, clone);
+      }
     }
   }
 
@@ -1086,7 +1097,7 @@
   // Fixup edges.  Check the def-use info per cloned Node
   for(uint i2 = beg; i2 < end; i2++ ) {
     uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
-    Node *n = _nodes[i2];        // Node that got cloned
+    Node *n = block->get_node(i2);        // Node that got cloned
     // Need DU safe iterator because of edge manipulation in calls.
     Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
     for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
@@ -1095,18 +1106,19 @@
     uint max = out->size();
     for (uint j = 0; j < max; j++) {// For all users
       Node *use = out->pop();
-      Block *buse = bbs[use->_idx];
+      Block *buse = get_block_for_node(use);
       if( use->is_Phi() ) {
         for( uint k = 1; k < use->req(); k++ )
           if( use->in(k) == n ) {
-            Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx);
+            Block* b = get_block_for_node(buse->pred(k));
+            Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);
             use->set_req(k, fixup);
           }
       } else {
-        if (this == buse) {
-          catch_cleanup_intra_block(use, n, this, beg, n_clone_idx);
+        if (block == buse) {
+          catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);
         } else {
-          catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx);
+          catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);
         }
       }
     } // End for all users
@@ -1115,30 +1127,30 @@
 
   // Remove the now-dead cloned ops
   for(uint i3 = beg; i3 < end; i3++ ) {
-    _nodes[beg]->disconnect_inputs(NULL, C);
-    _nodes.remove(beg);
+    block->get_node(beg)->disconnect_inputs(NULL, C);
+    block->remove_node(beg);
   }
 
   // If the successor blocks have a CreateEx node, move it back to the top
-  for(uint i4 = 0; i4 < _num_succs; i4++ ) {
-    Block *sb = _succs[i4];
+  for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {
+    Block *sb = block->_succs[i4];
     uint new_cnt = end - beg;
     // Remove any newly created, but dead, nodes.
     for( uint j = new_cnt; j > 0; j-- ) {
-      Node *n = sb->_nodes[j];
+      Node *n = sb->get_node(j);
       if (n->outcnt() == 0 &&
           (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
         n->disconnect_inputs(NULL, C);
-        sb->_nodes.remove(j);
+        sb->remove_node(j);
         new_cnt--;
       }
     }
     // If any newly created nodes remain, move the CreateEx node to the top
     if (new_cnt > 0) {
-      Node *cex = sb->_nodes[1+new_cnt];
+      Node *cex = sb->get_node(1+new_cnt);
       if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
-        sb->_nodes.remove(1+new_cnt);
-        sb->_nodes.insert(1,cex);
+        sb->remove_node(1+new_cnt);
+        sb->insert_node(cex, 1);
       }
     }
   }
--- a/src/share/vm/opto/library_call.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/library_call.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -3717,7 +3717,7 @@
   CallJavaNode* slow_call;
   if (is_static) {
     assert(!is_virtual, "");
-    slow_call = new(C) CallStaticJavaNode(tf,
+    slow_call = new(C) CallStaticJavaNode(C, tf,
                            SharedRuntime::get_resolve_static_call_stub(),
                            method, bci());
   } else if (is_virtual) {
@@ -3736,7 +3736,7 @@
                           method, vtable_index, bci());
   } else {  // neither virtual nor static:  opt_virtual
     null_check_receiver();
-    slow_call = new(C) CallStaticJavaNode(tf,
+    slow_call = new(C) CallStaticJavaNode(C, tf,
                                 SharedRuntime::get_resolve_opt_virtual_call_stub(),
                                 method, bci());
     slow_call->set_optimized_virtual(true);
--- a/src/share/vm/opto/live.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/live.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,9 +30,6 @@
 #include "opto/machnode.hpp"
 
 
-
-//=============================================================================
-//------------------------------PhaseLive--------------------------------------
 // Compute live-in/live-out.  We use a totally incremental algorithm.  The LIVE
 // problem is monotonic.  The steady-state solution looks like this: pull a
 // block from the worklist.  It has a set of delta's - values which are newly
@@ -44,7 +41,7 @@
 // block is put on the worklist.
 //   The locally live-in stuff is computed once and added to predecessor
 // live-out sets.  This separate compilation is done in the outer loop below.
-PhaseLive::PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) {
+PhaseLive::PhaseLive( const PhaseCFG &cfg, const LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) {
 }
 
 void PhaseLive::compute(uint maxlrg) {
@@ -53,9 +50,9 @@
 
   // Init the sparse live arrays.  This data is live on exit from here!
   // The _live info is the live-out info.
-  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
+  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
   uint i;
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _live[i].initialize(_maxlrg);
   }
 
@@ -65,14 +62,14 @@
   // Does the memory used by _defs and _deltas get reclaimed?  Does it matter?  TT
 
   // Array of values defined locally in blocks
-  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks());
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _defs[i].initialize(_maxlrg);
   }
 
   // Array of delta-set pointers, indexed by block pre_order-1.
-  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
-  memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
+  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
+  memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
 
   _free_IndexSet = NULL;
 
@@ -80,31 +77,32 @@
   VectorSet first_pass(Thread::current()->resource_area());
 
   // Outer loop: must compute local live-in sets and push into predecessors.
-  uint iters = _cfg._num_blocks;        // stat counters
-  for( uint j=_cfg._num_blocks; j>0; j-- ) {
-    Block *b = _cfg._blocks[j-1];
+  for (uint j = _cfg.number_of_blocks(); j > 0; j--) {
+    Block* block = _cfg.get_block(j - 1);
 
     // Compute the local live-in set.  Start with any new live-out bits.
-    IndexSet *use = getset( b );
-    IndexSet *def = &_defs[b->_pre_order-1];
+    IndexSet* use = getset(block);
+    IndexSet* def = &_defs[block->_pre_order-1];
     DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
     uint i;
-    for( i=b->_nodes.size(); i>1; i-- ) {
-      Node *n = b->_nodes[i-1];
-      if( n->is_Phi() ) break;
+    for (i = block->number_of_nodes(); i > 1; i--) {
+      Node* n = block->get_node(i-1);
+      if (n->is_Phi()) {
+        break;
+      }
 
       uint r = _names[n->_idx];
       assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
       def->insert( r );
       use->remove( r );
       uint cnt = n->req();
-      for( uint k=1; k<cnt; k++ ) {
+      for (uint k = 1; k < cnt; k++) {
         Node *nk = n->in(k);
         uint nkidx = nk->_idx;
-        if( _cfg._bbs[nkidx] != b ) {
+        if (_cfg.get_block_for_node(nk) != block) {
           uint u = _names[nkidx];
-          use->insert( u );
-          DEBUG_ONLY(def_outside->insert( u );)
+          use->insert(u);
+          DEBUG_ONLY(def_outside->insert(u);)
         }
       }
     }
@@ -113,39 +111,38 @@
     _free_IndexSet = def_outside;     // Drop onto free list
 #endif
     // Remove anything defined by Phis and the block start instruction
-    for( uint k=i; k>0; k-- ) {
-      uint r = _names[b->_nodes[k-1]->_idx];
-      def->insert( r );
-      use->remove( r );
+    for (uint k = i; k > 0; k--) {
+      uint r = _names[block->get_node(k - 1)->_idx];
+      def->insert(r);
+      use->remove(r);
     }
 
     // Push these live-in things to predecessors
-    for( uint l=1; l<b->num_preds(); l++ ) {
-      Block *p = _cfg._bbs[b->pred(l)->_idx];
-      add_liveout( p, use, first_pass );
+    for (uint l = 1; l < block->num_preds(); l++) {
+      Block* p = _cfg.get_block_for_node(block->pred(l));
+      add_liveout(p, use, first_pass);
 
       // PhiNode uses go in the live-out set of prior blocks.
-      for( uint k=i; k>0; k-- )
-        add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
+      for (uint k = i; k > 0; k--) {
+        add_liveout(p, _names[block->get_node(k-1)->in(l)->_idx], first_pass);
+      }
     }
-    freeset( b );
-    first_pass.set(b->_pre_order);
+    freeset(block);
+    first_pass.set(block->_pre_order);
 
     // Inner loop: blocks that picked up new live-out values to be propagated
-    while( _worklist->size() ) {
-        // !!!!!
-// #ifdef ASSERT
-      iters++;
-// #endif
-      Block *b = _worklist->pop();
-      IndexSet *delta = getset(b);
+    while (_worklist->size()) {
+      Block* block = _worklist->pop();
+      IndexSet *delta = getset(block);
       assert( delta->count(), "missing delta set" );
 
       // Add new-live-in to predecessors live-out sets
-      for( uint l=1; l<b->num_preds(); l++ )
-        add_liveout( _cfg._bbs[b->pred(l)->_idx], delta, first_pass );
+      for (uint l = 1; l < block->num_preds(); l++) {
+        Block* predecessor = _cfg.get_block_for_node(block->pred(l));
+        add_liveout(predecessor, delta, first_pass);
+      }
 
-      freeset(b);
+      freeset(block);
     } // End of while-worklist-not-empty
 
   } // End of for-all-blocks-outer-loop
@@ -153,7 +150,7 @@
   // We explicitly clear all of the IndexSets which we are about to release.
   // This allows us to recycle their internal memory into IndexSet's free list.
 
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _defs[i].clear();
     if (_deltas[i]) {
       // Is this always true?
@@ -169,13 +166,11 @@
 
 }
 
-//------------------------------stats------------------------------------------
 #ifndef PRODUCT
 void PhaseLive::stats(uint iters) const {
 }
 #endif
 
-//------------------------------getset-----------------------------------------
 // Get an IndexSet for a block.  Return existing one, if any.  Make a new
 // empty one if a prior one does not exist.
 IndexSet *PhaseLive::getset( Block *p ) {
@@ -186,7 +181,6 @@
   return delta;                 // Return set of new live-out items
 }
 
-//------------------------------getfreeset-------------------------------------
 // Pull from free list, or allocate.  Internal allocation on the returned set
 // is always from thread local storage.
 IndexSet *PhaseLive::getfreeset( ) {
@@ -205,7 +199,6 @@
   return f;
 }
 
-//------------------------------freeset----------------------------------------
 // Free an IndexSet from a block.
 void PhaseLive::freeset( const Block *p ) {
   IndexSet *f = _deltas[p->_pre_order-1];
@@ -214,7 +207,6 @@
   _deltas[p->_pre_order-1] = NULL;
 }
 
-//------------------------------add_liveout------------------------------------
 // Add a live-out value to a given blocks live-out set.  If it is new, then
 // also add it to the delta set and stick the block on the worklist.
 void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
@@ -231,8 +223,6 @@
   }
 }
 
-
-//------------------------------add_liveout------------------------------------
 // Add a vector of live-out values to a given blocks live-out set.
 void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
   IndexSet *live = &_live[p->_pre_order-1];
@@ -260,31 +250,31 @@
 }
 
 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 // Dump the live-out set for a block
 void PhaseLive::dump( const Block *b ) const {
   tty->print("Block %d: ",b->_pre_order);
   tty->print("LiveOut: ");  _live[b->_pre_order-1].dump();
-  uint cnt = b->_nodes.size();
+  uint cnt = b->number_of_nodes();
   for( uint i=0; i<cnt; i++ ) {
-    tty->print("L%d/", _names[b->_nodes[i]->_idx] );
-    b->_nodes[i]->dump();
+    tty->print("L%d/", _names[b->get_node(i)->_idx] );
+    b->get_node(i)->dump();
   }
   tty->print("\n");
 }
 
-//------------------------------verify_base_ptrs-------------------------------
 // Verify that base pointers and derived pointers are still sane.
 void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
 #ifdef ASSERT
   Unique_Node_List worklist(a);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
-      if( n->is_Phi() ) break;
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->get_node(j-1);
+      if (n->is_Phi()) {
+        break;
+      }
       // Found a safepoint?
-      if( n->is_MachSafePoint() ) {
+      if (n->is_MachSafePoint()) {
         MachSafePointNode *sfpt = n->as_MachSafePoint();
         JVMState* jvms = sfpt->jvms();
         if (jvms != NULL) {
@@ -355,7 +345,6 @@
 #endif
 }
 
-//------------------------------verify-------------------------------------
 // Verify that graphs and base pointers are still sane.
 void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
 #ifdef ASSERT
--- a/src/share/vm/opto/live.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/live.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -56,7 +56,7 @@
   Block_List *_worklist;        // Worklist for iterative solution
 
   const PhaseCFG &_cfg;         // Basic blocks
-  LRG_List &_names;             // Mapping from Nodes to live ranges
+  const LRG_List &_names;       // Mapping from Nodes to live ranges
   uint _maxlrg;                 // Largest live-range number
   Arena *_arena;
 
@@ -67,7 +67,7 @@
   void add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass );
 
 public:
-  PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena );
+  PhaseLive(const PhaseCFG &cfg, const LRG_List &names, Arena *arena);
   ~PhaseLive() {}
   // Compute liveness info
   void compute(uint maxlrg);
--- a/src/share/vm/opto/locknode.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/locknode.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
--- a/src/share/vm/opto/loopPredicate.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/loopPredicate.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -855,8 +855,8 @@
         loop->dump_head();
       }
 #endif
-    } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
-      assert(proj->_con == predicate_proj->_con, "must match");
+    } else if ((cl != NULL) && (proj->_con == predicate_proj->_con) &&
+               loop->is_range_check_if(iff, this, invar)) {
 
       // Range check for counted loops
       const Node*    cmp    = bol->in(1)->as_Cmp();
--- a/src/share/vm/opto/loopTransform.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/loopTransform.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1291,8 +1291,8 @@
           limit = new (C) Opaque2Node( C, limit );
           register_new_node( limit, opaq_ctrl );
         }
-        if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
-                   stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
+        if (stride_con > 0 && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo) ||
+            stride_con < 0 && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi)) {
           // No underflow.
           new_limit = new (C) SubINode(limit, stride);
         } else {
--- a/src/share/vm/opto/macro.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/macro.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -659,7 +659,7 @@
         alloc->dump();
       else
         res->dump();
-    } else {
+    } else if (alloc->_is_scalar_replaceable) {
       tty->print("NotScalar (%s)", fail_eliminate);
       if (res == NULL)
         alloc->dump();
@@ -838,18 +838,14 @@
     // to the allocated object with "sobj"
     int start = jvms->debug_start();
     int end   = jvms->debug_end();
-    for (int i = start; i < end; i++) {
-      if (sfpt->in(i) == res) {
-        sfpt->set_req(i, sobj);
-      }
-    }
+    sfpt->replace_edges_in_range(res, sobj, start, end);
     safepoints_done.append_if_missing(sfpt); // keep it for rollback
   }
   return true;
 }
 
 // Process users of eliminated allocation.
-void PhaseMacroExpand::process_users_of_allocation(AllocateNode *alloc) {
+void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {
   Node* res = alloc->result_cast();
   if (res != NULL) {
     for (DUIterator_Last jmin, j = res->last_outs(jmin); j >= jmin; ) {
@@ -892,6 +888,17 @@
   // Process other users of allocation's projections
   //
   if (_resproj != NULL && _resproj->outcnt() != 0) {
+    // First disconnect stores captured by Initialize node.
+    // If Initialize node is eliminated first in the following code,
+    // it will kill such stores and DUIterator_Last will assert.
+    for (DUIterator_Fast jmax, j = _resproj->fast_outs(jmax);  j < jmax; j++) {
+      Node *use = _resproj->fast_out(j);
+      if (use->is_AddP()) {
+        // raw memory addresses used only by the initialization
+        _igvn.replace_node(use, C->top());
+        --j; --jmax;
+      }
+    }
     for (DUIterator_Last jmin, j = _resproj->last_outs(jmin); j >= jmin; ) {
       Node *use = _resproj->last_out(j);
       uint oc1 = _resproj->outcnt();
@@ -916,9 +923,6 @@
 #endif
           _igvn.replace_node(mem_proj, mem);
         }
-      } else if (use->is_AddP()) {
-        // raw memory addresses used only by the initialization
-        _igvn.replace_node(use, C->top());
       } else  {
         assert(false, "only Initialize or AddP expected");
       }
@@ -946,8 +950,18 @@
 }
 
 bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) {
-
-  if (!EliminateAllocations || !alloc->_is_scalar_replaceable) {
+  if (!EliminateAllocations || !alloc->_is_non_escaping) {
+    return false;
+  }
+  Node* klass = alloc->in(AllocateNode::KlassNode);
+  const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr();
+  Node* res = alloc->result_cast();
+  // Eliminate boxing allocations which are not used
+  // regardless scalar replacable status.
+  bool boxing_alloc = C->eliminate_boxing() &&
+                      tklass->klass()->is_instance_klass()  &&
+                      tklass->klass()->as_instance_klass()->is_box_klass();
+  if (!alloc->_is_scalar_replaceable && (!boxing_alloc || (res != NULL))) {
     return false;
   }
 
@@ -958,14 +972,22 @@
     return false;
   }
 
+  if (!alloc->_is_scalar_replaceable) {
+    assert(res == NULL, "sanity");
+    // We can only eliminate allocation if all debug info references
+    // are already replaced with SafePointScalarObject because
+    // we can't search for a fields value without instance_id.
+    if (safepoints.length() > 0) {
+      return false;
+    }
+  }
+
   if (!scalar_replacement(alloc, safepoints)) {
     return false;
   }
 
   CompileLog* log = C->log();
   if (log != NULL) {
-    Node* klass = alloc->in(AllocateNode::KlassNode);
-    const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr();
     log->head("eliminate_allocation type='%d'",
               log->identify(tklass->klass()));
     JVMState* p = alloc->jvms();
@@ -990,6 +1012,43 @@
   return true;
 }
 
+bool PhaseMacroExpand::eliminate_boxing_node(CallStaticJavaNode *boxing) {
+  // EA should remove all uses of non-escaping boxing node.
+  if (!C->eliminate_boxing() || boxing->proj_out(TypeFunc::Parms) != NULL) {
+    return false;
+  }
+
+  extract_call_projections(boxing);
+
+  const TypeTuple* r = boxing->tf()->range();
+  assert(r->cnt() > TypeFunc::Parms, "sanity");
+  const TypeInstPtr* t = r->field_at(TypeFunc::Parms)->isa_instptr();
+  assert(t != NULL, "sanity");
+
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    log->head("eliminate_boxing type='%d'",
+              log->identify(t->klass()));
+    JVMState* p = boxing->jvms();
+    while (p != NULL) {
+      log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
+      p = p->caller();
+    }
+    log->tail("eliminate_boxing");
+  }
+
+  process_users_of_allocation(boxing);
+
+#ifndef PRODUCT
+  if (PrintEliminateAllocations) {
+    tty->print("++++ Eliminated: %d ", boxing->_idx);
+    boxing->method()->print_short_name(tty);
+    tty->cr();
+  }
+#endif
+
+  return true;
+}
 
 //---------------------------set_eden_pointers-------------------------
 void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr) {
@@ -2377,6 +2436,9 @@
       case Node::Class_AllocateArray:
         success = eliminate_allocate_node(n->as_Allocate());
         break;
+      case Node::Class_CallStaticJava:
+        success = eliminate_boxing_node(n->as_CallStaticJava());
+        break;
       case Node::Class_Lock:
       case Node::Class_Unlock:
         assert(!n->as_AbstractLock()->is_eliminated(), "sanity");
@@ -2417,6 +2479,11 @@
         C->remove_macro_node(n);
         _igvn._worklist.push(n);
         success = true;
+      } else if (n->Opcode() == Op_CallStaticJava) {
+        // Remove it from macro list and put on IGVN worklist to optimize.
+        C->remove_macro_node(n);
+        _igvn._worklist.push(n);
+        success = true;
       } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
         _igvn.replace_node(n, n->in(1));
         success = true;
--- a/src/share/vm/opto/macro.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/macro.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -86,10 +86,11 @@
   Node *value_from_mem(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc);
   Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc, Node_Stack *value_phis, int level);
 
+  bool eliminate_boxing_node(CallStaticJavaNode *boxing);
   bool eliminate_allocate_node(AllocateNode *alloc);
   bool can_eliminate_allocation(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints);
   bool scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints_done);
-  void process_users_of_allocation(AllocateNode *alloc);
+  void process_users_of_allocation(CallNode *alloc);
 
   void eliminate_card_mark(Node *cm);
   void mark_eliminated_box(Node* box, Node* obj);
--- a/src/share/vm/opto/matcher.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/matcher.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -44,6 +44,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -70,8 +73,8 @@
 const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;
 
 //---------------------------Matcher-------------------------------------------
-Matcher::Matcher( Node_List &proj_list ) :
-  PhaseTransform( Phase::Ins_Select ),
+Matcher::Matcher()
+: PhaseTransform( Phase::Ins_Select ),
 #ifdef ASSERT
   _old2new_map(C->comp_arena()),
   _new2old_map(C->comp_arena()),
@@ -81,7 +84,7 @@
   _swallowed(swallowed),
   _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
   _end_inst_chain_rule(_END_INST_CHAIN_RULE),
-  _must_clone(must_clone), _proj_list(proj_list),
+  _must_clone(must_clone),
   _register_save_policy(register_save_policy),
   _c_reg_save_policy(c_reg_save_policy),
   _register_save_type(register_save_type),
@@ -1007,6 +1010,8 @@
   mstack.push(n, Visit, NULL, -1);  // set NULL as parent to indicate root
 
   while (mstack.is_nonempty()) {
+    C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions");
+    if (C->failing()) return NULL;
     n = mstack.node();          // Leave node on stack
     Node_State nstate = mstack.state();
     if (nstate == Visit) {
@@ -1334,8 +1339,9 @@
       for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
         proj->_rout.Insert(OptoReg::Name(i));
     }
-    if( proj->_rout.is_NotEmpty() )
-      _proj_list.push(proj);
+    if (proj->_rout.is_NotEmpty()) {
+      push_projection(proj);
+    }
   }
   // Transfer the safepoint information from the call to the mcall
   // Move the JVMState list
@@ -1714,14 +1720,15 @@
   }
 
   // If the _leaf is an AddP, insert the base edge
-  if( leaf->is_AddP() )
+  if (leaf->is_AddP()) {
     mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
+  }
 
-  uint num_proj = _proj_list.size();
+  uint number_of_projections_prior = number_of_projections();
 
   // Perform any 1-to-many expansions required
-  MachNode *ex = mach->Expand(s,_proj_list, mem);
-  if( ex != mach ) {
+  MachNode *ex = mach->Expand(s, _projection_list, mem);
+  if (ex != mach) {
     assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
     if( ex->in(1)->is_Con() )
       ex->in(1)->set_req(0, C->root());
@@ -1742,7 +1749,7 @@
   // generated belatedly during spill code generation.
   if (_allocation_started) {
     guarantee(ex == mach, "no expand rules during spill generation");
-    guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
+    guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
   }
 
   if (leaf->is_Con() || leaf->is_DecodeN()) {
--- a/src/share/vm/opto/matcher.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/matcher.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -88,7 +88,7 @@
 
   Node *transform( Node *dummy );
 
-  Node_List &_proj_list;        // For Machine nodes killing many values
+  Node_List _projection_list;        // For Machine nodes killing many values
 
   Node_Array _shared_nodes;
 
@@ -184,10 +184,30 @@
   void collect_null_checks( Node *proj, Node *orig_proj );
   void validate_null_checks( );
 
-  Matcher( Node_List &proj_list );
+  Matcher();
+
+  // Get a projection node at position pos
+  Node* get_projection(uint pos) {
+    return _projection_list[pos];
+  }
+
+  // Push a projection node onto the projection list
+  void push_projection(Node* node) {
+    _projection_list.push(node);
+  }
+
+  Node* pop_projection() {
+    return _projection_list.pop();
+  }
+
+  // Number of nodes in the projection list
+  uint number_of_projections() const {
+    return _projection_list.size();
+  }
 
   // Select instructions for entire method
-  void  match( );
+  void match();
+
   // Helper for match
   OptoReg::Name warp_incoming_stk_arg( VMReg reg );
 
--- a/src/share/vm/opto/memnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/memnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -103,11 +103,15 @@
 
 #endif
 
-Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) {
-  const TypeOopPtr *tinst = t_adr->isa_oopptr();
-  if (tinst == NULL || !tinst->is_known_instance_field())
+Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) {
+  assert((t_oop != NULL), "sanity");
+  bool is_instance = t_oop->is_known_instance_field();
+  bool is_boxed_value_load = t_oop->is_ptr_to_boxed_value() &&
+                             (load != NULL) && load->is_Load() &&
+                             (phase->is_IterGVN() != NULL);
+  if (!(is_instance || is_boxed_value_load))
     return mchain;  // don't try to optimize non-instance types
-  uint instance_id = tinst->instance_id();
+  uint instance_id = t_oop->instance_id();
   Node *start_mem = phase->C->start()->proj_out(TypeFunc::Memory);
   Node *prev = NULL;
   Node *result = mchain;
@@ -122,15 +126,24 @@
         break;  // hit one of our sentinels
       } else if (proj_in->is_Call()) {
         CallNode *call = proj_in->as_Call();
-        if (!call->may_modify(t_adr, phase)) {
+        if (!call->may_modify(t_oop, phase)) { // returns false for instances
           result = call->in(TypeFunc::Memory);
         }
       } else if (proj_in->is_Initialize()) {
         AllocateNode* alloc = proj_in->as_Initialize()->allocation();
         // Stop if this is the initialization for the object instance which
         // which contains this memory slice, otherwise skip over it.
-        if (alloc != NULL && alloc->_idx != instance_id) {
+        if ((alloc == NULL) || (alloc->_idx == instance_id)) {
+          break;
+        }
+        if (is_instance) {
           result = proj_in->in(TypeFunc::Memory);
+        } else if (is_boxed_value_load) {
+          Node* klass = alloc->in(AllocateNode::KlassNode);
+          const TypeKlassPtr* tklass = phase->type(klass)->is_klassptr();
+          if (tklass->klass_is_exact() && !tklass->klass()->equals(t_oop->klass())) {
+            result = proj_in->in(TypeFunc::Memory); // not related allocation
+          }
         }
       } else if (proj_in->is_MemBar()) {
         result = proj_in->in(TypeFunc::Memory);
@@ -138,25 +151,26 @@
         assert(false, "unexpected projection");
       }
     } else if (result->is_ClearArray()) {
-      if (!ClearArrayNode::step_through(&result, instance_id, phase)) {
+      if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) {
         // Can not bypass initialization of the instance
         // we are looking for.
         break;
       }
       // Otherwise skip it (the call updated 'result' value).
     } else if (result->is_MergeMem()) {
-      result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty);
+      result = step_through_mergemem(phase, result->as_MergeMem(), t_oop, NULL, tty);
     }
   }
   return result;
 }
 
-Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) {
-  const TypeOopPtr *t_oop = t_adr->isa_oopptr();
-  bool is_instance = (t_oop != NULL) && t_oop->is_known_instance_field();
+Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase) {
+  const TypeOopPtr* t_oop = t_adr->isa_oopptr();
+  if (t_oop == NULL)
+    return mchain;  // don't try to optimize non-oop types
+  Node* result = optimize_simple_memory_chain(mchain, t_oop, load, phase);
+  bool is_instance = t_oop->is_known_instance_field();
   PhaseIterGVN *igvn = phase->is_IterGVN();
-  Node *result = mchain;
-  result = optimize_simple_memory_chain(result, t_adr, phase);
   if (is_instance && igvn != NULL  && result->is_Phi()) {
     PhiNode *mphi = result->as_Phi();
     assert(mphi->bottom_type() == Type::MEMORY, "memory phi required");
@@ -366,7 +380,7 @@
   // Or Region for the check in LoadNode::Ideal();
   // 'sub' should have sub->in(0) != NULL.
   assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start() ||
-         sub->is_Region(), "expecting only these nodes");
+         sub->is_Region() || sub->is_Call(), "expecting only these nodes");
 
   // Get control edge of 'sub'.
   Node* orig_sub = sub;
@@ -937,11 +951,14 @@
 // of aliasing.
 Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
   Node* ld_adr = in(MemNode::Address);
-
+  intptr_t ld_off = 0;
+  AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
   const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr();
-  Compile::AliasType* atp = tp != NULL ? phase->C->alias_type(tp) : NULL;
-  if (EliminateAutoBox && atp != NULL && atp->index() >= Compile::AliasIdxRaw &&
-      atp->field() != NULL && !atp->field()->is_volatile()) {
+  Compile::AliasType* atp = (tp != NULL) ? phase->C->alias_type(tp) : NULL;
+  // This is more general than load from boxing objects.
+  if (phase->C->eliminate_boxing() && (atp != NULL) &&
+      (atp->index() >= Compile::AliasIdxRaw) &&
+      (atp->field() != NULL) && !atp->field()->is_volatile()) {
     uint alias_idx = atp->index();
     bool final = atp->field()->is_final();
     Node* result = NULL;
@@ -963,7 +980,7 @@
           Node* new_st = merge->memory_at(alias_idx);
           if (new_st == merge->base_memory()) {
             // Keep searching
-            current = merge->base_memory();
+            current = new_st;
             continue;
           }
           // Save the new memory state for the slice and fall through
@@ -990,9 +1007,7 @@
         intptr_t st_off = 0;
         AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off);
         if (alloc == NULL)       return NULL;
-        intptr_t ld_off = 0;
-        AllocateNode* allo2 = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
-        if (alloc != allo2)      return NULL;
+        if (alloc != ld_alloc)   return NULL;
         if (ld_off != st_off)    return NULL;
         // At this point we have proven something like this setup:
         //  A = Allocate(...)
@@ -1009,14 +1024,12 @@
       return st->in(MemNode::ValueIn);
     }
 
-    intptr_t offset = 0;  // scratch
-
     // A load from a freshly-created object always returns zero.
     // (This can happen after LoadNode::Ideal resets the load's memory input
     // to find_captured_store, which returned InitializeNode::zero_memory.)
     if (st->is_Proj() && st->in(0)->is_Allocate() &&
-        st->in(0) == AllocateNode::Ideal_allocation(ld_adr, phase, offset) &&
-        offset >= st->in(0)->as_Allocate()->minimum_header_size()) {
+        (st->in(0) == ld_alloc) &&
+        (ld_off >= st->in(0)->as_Allocate()->minimum_header_size())) {
       // return a zero value for the load's basic type
       // (This is one of the few places where a generic PhaseTransform
       // can create new nodes.  Think of it as lazily manifesting
@@ -1028,15 +1041,27 @@
     if (st->is_Proj() && st->in(0)->is_Initialize()) {
       InitializeNode* init = st->in(0)->as_Initialize();
       AllocateNode* alloc = init->allocation();
-      if (alloc != NULL &&
-          alloc == AllocateNode::Ideal_allocation(ld_adr, phase, offset)) {
+      if ((alloc != NULL) && (alloc == ld_alloc)) {
         // examine a captured store value
-        st = init->find_captured_store(offset, memory_size(), phase);
+        st = init->find_captured_store(ld_off, memory_size(), phase);
         if (st != NULL)
           continue;             // take one more trip around
       }
     }
 
+    // Load boxed value from result of valueOf() call is input parameter.
+    if (this->is_Load() && ld_adr->is_AddP() &&
+        (tp != NULL) && tp->is_ptr_to_boxed_value()) {
+      intptr_t ignore = 0;
+      Node* base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ignore);
+      if (base != NULL && base->is_Proj() &&
+          base->as_Proj()->_con == TypeFunc::Parms &&
+          base->in(0)->is_CallStaticJava() &&
+          base->in(0)->as_CallStaticJava()->is_boxing_method()) {
+        return base->in(0)->in(TypeFunc::Parms);
+      }
+    }
+
     break;
   }
 
@@ -1045,11 +1070,13 @@
 
 //----------------------is_instance_field_load_with_local_phi------------------
 bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) {
-  if( in(MemNode::Memory)->is_Phi() && in(MemNode::Memory)->in(0) == ctrl &&
-      in(MemNode::Address)->is_AddP() ) {
-    const TypeOopPtr* t_oop = in(MemNode::Address)->bottom_type()->isa_oopptr();
-    // Only instances.
-    if( t_oop != NULL && t_oop->is_known_instance_field() &&
+  if( in(Memory)->is_Phi() && in(Memory)->in(0) == ctrl &&
+      in(Address)->is_AddP() ) {
+    const TypeOopPtr* t_oop = in(Address)->bottom_type()->isa_oopptr();
+    // Only instances and boxed values.
+    if( t_oop != NULL &&
+        (t_oop->is_ptr_to_boxed_value() ||
+         t_oop->is_known_instance_field()) &&
         t_oop->offset() != Type::OffsetBot &&
         t_oop->offset() != Type::OffsetTop) {
       return true;
@@ -1063,7 +1090,7 @@
 Node *LoadNode::Identity( PhaseTransform *phase ) {
   // If the previous store-maker is the right kind of Store, and the store is
   // to the same address, then we are equal to the value stored.
-  Node* mem = in(MemNode::Memory);
+  Node* mem = in(Memory);
   Node* value = can_see_stored_value(mem, phase);
   if( value ) {
     // byte, short & char stores truncate naturally.
@@ -1085,15 +1112,22 @@
   // instance's field to avoid infinite generation of phis in a loop.
   Node *region = mem->in(0);
   if (is_instance_field_load_with_local_phi(region)) {
-    const TypePtr *addr_t = in(MemNode::Address)->bottom_type()->isa_ptr();
+    const TypeOopPtr *addr_t = in(Address)->bottom_type()->isa_oopptr();
     int this_index  = phase->C->get_alias_index(addr_t);
     int this_offset = addr_t->offset();
-    int this_id    = addr_t->is_oopptr()->instance_id();
+    int this_iid    = addr_t->instance_id();
+    if (!addr_t->is_known_instance() &&
+         addr_t->is_ptr_to_boxed_value()) {
+      // Use _idx of address base (could be Phi node) for boxed values.
+      intptr_t   ignore = 0;
+      Node*      base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore);
+      this_iid = base->_idx;
+    }
     const Type* this_type = bottom_type();
     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
       Node* phi = region->fast_out(i);
       if (phi->is_Phi() && phi != mem &&
-          phi->as_Phi()->is_same_inst_field(this_type, this_id, this_index, this_offset)) {
+          phi->as_Phi()->is_same_inst_field(this_type, this_iid, this_index, this_offset)) {
         return phi;
       }
     }
@@ -1102,170 +1136,106 @@
   return this;
 }
 
-
-// Returns true if the AliasType refers to the field that holds the
-// cached box array.  Currently only handles the IntegerCache case.
-static bool is_autobox_cache(Compile::AliasType* atp) {
-  if (atp != NULL && atp->field() != NULL) {
-    ciField* field = atp->field();
-    ciSymbol* klass = field->holder()->name();
-    if (field->name() == ciSymbol::cache_field_name() &&
-        field->holder()->uses_default_loader() &&
-        klass == ciSymbol::java_lang_Integer_IntegerCache()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Fetch the base value in the autobox array
-static bool fetch_autobox_base(Compile::AliasType* atp, int& cache_offset) {
-  if (atp != NULL && atp->field() != NULL) {
-    ciField* field = atp->field();
-    ciSymbol* klass = field->holder()->name();
-    if (field->name() == ciSymbol::cache_field_name() &&
-        field->holder()->uses_default_loader() &&
-        klass == ciSymbol::java_lang_Integer_IntegerCache()) {
-      assert(field->is_constant(), "what?");
-      ciObjArray* array = field->constant_value().as_object()->as_obj_array();
-      // Fetch the box object at the base of the array and get its value
-      ciInstance* box = array->obj_at(0)->as_instance();
-      ciInstanceKlass* ik = box->klass()->as_instance_klass();
-      if (ik->nof_nonstatic_fields() == 1) {
-        // This should be true nonstatic_field_at requires calling
-        // nof_nonstatic_fields so check it anyway
-        ciConstant c = box->field_value(ik->nonstatic_field_at(0));
-        cache_offset = c.as_int();
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-// Returns true if the AliasType refers to the value field of an
-// autobox object.  Currently only handles Integer.
-static bool is_autobox_object(Compile::AliasType* atp) {
-  if (atp != NULL && atp->field() != NULL) {
-    ciField* field = atp->field();
-    ciSymbol* klass = field->holder()->name();
-    if (field->name() == ciSymbol::value_name() &&
-        field->holder()->uses_default_loader() &&
-        klass == ciSymbol::java_lang_Integer()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-
 // We're loading from an object which has autobox behaviour.
 // If this object is result of a valueOf call we'll have a phi
 // merging a newly allocated object and a load from the cache.
 // We want to replace this load with the original incoming
 // argument to the valueOf call.
 Node* LoadNode::eliminate_autobox(PhaseGVN* phase) {
-  Node* base = in(Address)->in(AddPNode::Base);
-  if (base->is_Phi() && base->req() == 3) {
-    AllocateNode* allocation = NULL;
-    int allocation_index = -1;
-    int load_index = -1;
-    for (uint i = 1; i < base->req(); i++) {
-      allocation = AllocateNode::Ideal_allocation(base->in(i), phase);
-      if (allocation != NULL) {
-        allocation_index = i;
-        load_index = 3 - allocation_index;
-        break;
-      }
-    }
-    bool has_load = ( allocation != NULL &&
-                      (base->in(load_index)->is_Load() ||
-                       base->in(load_index)->is_DecodeN() &&
-                       base->in(load_index)->in(1)->is_Load()) );
-    if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
-      // Push the loads from the phi that comes from valueOf up
-      // through it to allow elimination of the loads and the recovery
-      // of the original value.
-      Node* mem_phi = in(Memory);
-      Node* offset = in(Address)->in(AddPNode::Offset);
-      Node* region = base->in(0);
-
-      Node* in1 = clone();
-      Node* in1_addr = in1->in(Address)->clone();
-      in1_addr->set_req(AddPNode::Base, base->in(allocation_index));
-      in1_addr->set_req(AddPNode::Address, base->in(allocation_index));
-      in1_addr->set_req(AddPNode::Offset, offset);
-      in1->set_req(0, region->in(allocation_index));
-      in1->set_req(Address, in1_addr);
-      in1->set_req(Memory, mem_phi->in(allocation_index));
-
-      Node* in2 = clone();
-      Node* in2_addr = in2->in(Address)->clone();
-      in2_addr->set_req(AddPNode::Base, base->in(load_index));
-      in2_addr->set_req(AddPNode::Address, base->in(load_index));
-      in2_addr->set_req(AddPNode::Offset, offset);
-      in2->set_req(0, region->in(load_index));
-      in2->set_req(Address, in2_addr);
-      in2->set_req(Memory, mem_phi->in(load_index));
-
-      in1_addr = phase->transform(in1_addr);
-      in1 =      phase->transform(in1);
-      in2_addr = phase->transform(in2_addr);
-      in2 =      phase->transform(in2);
-
-      PhiNode* result = PhiNode::make_blank(region, this);
-      result->set_req(allocation_index, in1);
-      result->set_req(load_index, in2);
-      return result;
-    }
+  assert(phase->C->eliminate_boxing(), "sanity");
+  intptr_t ignore = 0;
+  Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore);
+  if ((base == NULL) || base->is_Phi()) {
+    // Push the loads from the phi that comes from valueOf up
+    // through it to allow elimination of the loads and the recovery
+    // of the original value. It is done in split_through_phi().
+    return NULL;
   } else if (base->is_Load() ||
              base->is_DecodeN() && base->in(1)->is_Load()) {
-    if (base->is_DecodeN()) {
-      // Get LoadN node which loads cached Integer object
-      base = base->in(1);
-    }
-    // Eliminate the load of Integer.value for integers from the cache
+    // Eliminate the load of boxed value for integer types from the cache
     // array by deriving the value from the index into the array.
     // Capture the offset of the load and then reverse the computation.
-    Node* load_base = base->in(Address)->in(AddPNode::Base);
-    if (load_base->is_DecodeN()) {
-      // Get LoadN node which loads IntegerCache.cache field
-      load_base = load_base->in(1);
+
+    // Get LoadN node which loads a boxing object from 'cache' array.
+    if (base->is_DecodeN()) {
+      base = base->in(1);
+    }
+    if (!base->in(Address)->is_AddP()) {
+      return NULL; // Complex address
     }
-    if (load_base != NULL) {
-      Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type());
-      intptr_t cache_offset;
-      int shift = -1;
-      Node* cache = NULL;
-      if (is_autobox_cache(atp)) {
-        shift  = exact_log2(type2aelembytes(T_OBJECT));
-        cache = AddPNode::Ideal_base_and_offset(load_base->in(Address), phase, cache_offset);
-      }
-      if (cache != NULL && base->in(Address)->is_AddP()) {
+    AddPNode* address = base->in(Address)->as_AddP();
+    Node* cache_base = address->in(AddPNode::Base);
+    if ((cache_base != NULL) && cache_base->is_DecodeN()) {
+      // Get ConP node which is static 'cache' field.
+      cache_base = cache_base->in(1);
+    }
+    if ((cache_base != NULL) && cache_base->is_Con()) {
+      const TypeAryPtr* base_type = cache_base->bottom_type()->isa_aryptr();
+      if ((base_type != NULL) && base_type->is_autobox_cache()) {
         Node* elements[4];
-        int count = base->in(Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
-        int cache_low;
-        if (count > 0 && fetch_autobox_base(atp, cache_low)) {
-          int offset = arrayOopDesc::base_offset_in_bytes(memory_type()) - (cache_low << shift);
-          // Add up all the offsets making of the address of the load
-          Node* result = elements[0];
-          for (int i = 1; i < count; i++) {
-            result = phase->transform(new (phase->C) AddXNode(result, elements[i]));
+        int shift = exact_log2(type2aelembytes(T_OBJECT));
+        int count = address->unpack_offsets(elements, ARRAY_SIZE(elements));
+        if ((count >  0) && elements[0]->is_Con() &&
+            ((count == 1) ||
+             (count == 2) && elements[1]->Opcode() == Op_LShiftX &&
+                             elements[1]->in(2) == phase->intcon(shift))) {
+          ciObjArray* array = base_type->const_oop()->as_obj_array();
+          // Fetch the box object cache[0] at the base of the array and get its value
+          ciInstance* box = array->obj_at(0)->as_instance();
+          ciInstanceKlass* ik = box->klass()->as_instance_klass();
+          assert(ik->is_box_klass(), "sanity");
+          assert(ik->nof_nonstatic_fields() == 1, "change following code");
+          if (ik->nof_nonstatic_fields() == 1) {
+            // This should be true nonstatic_field_at requires calling
+            // nof_nonstatic_fields so check it anyway
+            ciConstant c = box->field_value(ik->nonstatic_field_at(0));
+            BasicType bt = c.basic_type();
+            // Only integer types have boxing cache.
+            assert(bt == T_BOOLEAN || bt == T_CHAR  ||
+                   bt == T_BYTE    || bt == T_SHORT ||
+                   bt == T_INT     || bt == T_LONG, err_msg_res("wrong type = %s", type2name(bt)));
+            jlong cache_low = (bt == T_LONG) ? c.as_long() : c.as_int();
+            if (cache_low != (int)cache_low) {
+              return NULL; // should not happen since cache is array indexed by value
+            }
+            jlong offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT) - (cache_low << shift);
+            if (offset != (int)offset) {
+              return NULL; // should not happen since cache is array indexed by value
+            }
+           // Add up all the offsets making of the address of the load
+            Node* result = elements[0];
+            for (int i = 1; i < count; i++) {
+              result = phase->transform(new (phase->C) AddXNode(result, elements[i]));
+            }
+            // Remove the constant offset from the address and then
+            result = phase->transform(new (phase->C) AddXNode(result, phase->MakeConX(-(int)offset)));
+            // remove the scaling of the offset to recover the original index.
+            if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) {
+              // Peel the shift off directly but wrap it in a dummy node
+              // since Ideal can't return existing nodes
+              result = new (phase->C) RShiftXNode(result->in(1), phase->intcon(0));
+            } else if (result->is_Add() && result->in(2)->is_Con() &&
+                       result->in(1)->Opcode() == Op_LShiftX &&
+                       result->in(1)->in(2) == phase->intcon(shift)) {
+              // We can't do general optimization: ((X<<Z) + Y) >> Z ==> X + (Y>>Z)
+              // but for boxing cache access we know that X<<Z will not overflow
+              // (there is range check) so we do this optimizatrion by hand here.
+              Node* add_con = new (phase->C) RShiftXNode(result->in(2), phase->intcon(shift));
+              result = new (phase->C) AddXNode(result->in(1)->in(1), phase->transform(add_con));
+            } else {
+              result = new (phase->C) RShiftXNode(result, phase->intcon(shift));
+            }
+#ifdef _LP64
+            if (bt != T_LONG) {
+              result = new (phase->C) ConvL2INode(phase->transform(result));
+            }
+#else
+            if (bt == T_LONG) {
+              result = new (phase->C) ConvI2LNode(phase->transform(result));
+            }
+#endif
+            return result;
           }
-          // Remove the constant offset from the address and then
-          // remove the scaling of the offset to recover the original index.
-          result = phase->transform(new (phase->C) AddXNode(result, phase->MakeConX(-offset)));
-          if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) {
-            // Peel the shift off directly but wrap it in a dummy node
-            // since Ideal can't return existing nodes
-            result = new (phase->C) RShiftXNode(result->in(1), phase->intcon(0));
-          } else {
-            result = new (phase->C) RShiftXNode(result, phase->intcon(shift));
-          }
-#ifdef _LP64
-          result = new (phase->C) ConvL2INode(phase->transform(result));
-#endif
-          return result;
         }
       }
     }
@@ -1273,65 +1243,131 @@
   return NULL;
 }
 
-//------------------------------split_through_phi------------------------------
-// Split instance field load through Phi.
-Node *LoadNode::split_through_phi(PhaseGVN *phase) {
-  Node* mem     = in(MemNode::Memory);
-  Node* address = in(MemNode::Address);
-  const TypePtr *addr_t = phase->type(address)->isa_ptr();
-  const TypeOopPtr *t_oop = addr_t->isa_oopptr();
-
-  assert(mem->is_Phi() && (t_oop != NULL) &&
-         t_oop->is_known_instance_field(), "invalide conditions");
-
-  Node *region = mem->in(0);
+static bool stable_phi(PhiNode* phi, PhaseGVN *phase) {
+  Node* region = phi->in(0);
   if (region == NULL) {
-    return NULL; // Wait stable graph
+    return false; // Wait stable graph
   }
-  uint cnt = mem->req();
+  uint cnt = phi->req();
   for (uint i = 1; i < cnt; i++) {
     Node* rc = region->in(i);
     if (rc == NULL || phase->type(rc) == Type::TOP)
-      return NULL; // Wait stable graph
-    Node *in = mem->in(i);
-    if (in == NULL) {
+      return false; // Wait stable graph
+    Node* in = phi->in(i);
+    if (in == NULL || phase->type(in) == Type::TOP)
+      return false; // Wait stable graph
+  }
+  return true;
+}
+//------------------------------split_through_phi------------------------------
+// Split instance or boxed field load through Phi.
+Node *LoadNode::split_through_phi(PhaseGVN *phase) {
+  Node* mem     = in(Memory);
+  Node* address = in(Address);
+  const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr();
+
+  assert((t_oop != NULL) &&
+         (t_oop->is_known_instance_field() ||
+          t_oop->is_ptr_to_boxed_value()), "invalide conditions");
+
+  Compile* C = phase->C;
+  intptr_t ignore = 0;
+  Node*    base = AddPNode::Ideal_base_and_offset(address, phase, ignore);
+  bool base_is_phi = (base != NULL) && base->is_Phi();
+  bool load_boxed_values = t_oop->is_ptr_to_boxed_value() && C->aggressive_unboxing() &&
+                           (base != NULL) && (base == address->in(AddPNode::Base)) &&
+                           phase->type(base)->higher_equal(TypePtr::NOTNULL);
+
+  if (!((mem->is_Phi() || base_is_phi) &&
+        (load_boxed_values || t_oop->is_known_instance_field()))) {
+    return NULL; // memory is not Phi
+  }
+
+  if (mem->is_Phi()) {
+    if (!stable_phi(mem->as_Phi(), phase)) {
       return NULL; // Wait stable graph
     }
-  }
-  // Check for loop invariant.
-  if (cnt == 3) {
-    for (uint i = 1; i < cnt; i++) {
-      Node *in = mem->in(i);
-      Node* m = MemNode::optimize_memory_chain(in, addr_t, phase);
-      if (m == mem) {
-        set_req(MemNode::Memory, mem->in(cnt - i)); // Skip this phi.
-        return this;
+    uint cnt = mem->req();
+    // Check for loop invariant memory.
+    if (cnt == 3) {
+      for (uint i = 1; i < cnt; i++) {
+        Node* in = mem->in(i);
+        Node*  m = optimize_memory_chain(in, t_oop, this, phase);
+        if (m == mem) {
+          set_req(Memory, mem->in(cnt - i));
+          return this; // made change
+        }
       }
     }
   }
+  if (base_is_phi) {
+    if (!stable_phi(base->as_Phi(), phase)) {
+      return NULL; // Wait stable graph
+    }
+    uint cnt = base->req();
+    // Check for loop invariant memory.
+    if (cnt == 3) {
+      for (uint i = 1; i < cnt; i++) {
+        if (base->in(i) == base) {
+          return NULL; // Wait stable graph
+        }
+      }
+    }
+  }
+
+  bool load_boxed_phi = load_boxed_values && base_is_phi && (base->in(0) == mem->in(0));
+
   // Split through Phi (see original code in loopopts.cpp).
-  assert(phase->C->have_alias_type(addr_t), "instance should have alias type");
+  assert(C->have_alias_type(t_oop), "instance should have alias type");
 
   // Do nothing here if Identity will find a value
   // (to avoid infinite chain of value phis generation).
   if (!phase->eqv(this, this->Identity(phase)))
     return NULL;
 
-  // Skip the split if the region dominates some control edge of the address.
-  if (!MemNode::all_controls_dominate(address, region))
-    return NULL;
+  // Select Region to split through.
+  Node* region;
+  if (!base_is_phi) {
+    assert(mem->is_Phi(), "sanity");
+    region = mem->in(0);
+    // Skip if the region dominates some control edge of the address.
+    if (!MemNode::all_controls_dominate(address, region))
+      return NULL;
+  } else if (!mem->is_Phi()) {
+    assert(base_is_phi, "sanity");
+    region = base->in(0);
+    // Skip if the region dominates some control edge of the memory.
+    if (!MemNode::all_controls_dominate(mem, region))
+      return NULL;
+  } else if (base->in(0) != mem->in(0)) {
+    assert(base_is_phi && mem->is_Phi(), "sanity");
+    if (MemNode::all_controls_dominate(mem, base->in(0))) {
+      region = base->in(0);
+    } else if (MemNode::all_controls_dominate(address, mem->in(0))) {
+      region = mem->in(0);
+    } else {
+      return NULL; // complex graph
+    }
+  } else {
+    assert(base->in(0) == mem->in(0), "sanity");
+    region = mem->in(0);
+  }
 
   const Type* this_type = this->bottom_type();
-  int this_index  = phase->C->get_alias_index(addr_t);
-  int this_offset = addr_t->offset();
-  int this_iid    = addr_t->is_oopptr()->instance_id();
-  PhaseIterGVN *igvn = phase->is_IterGVN();
-  Node *phi = new (igvn->C) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset);
+  int this_index  = C->get_alias_index(t_oop);
+  int this_offset = t_oop->offset();
+  int this_iid    = t_oop->instance_id();
+  if (!t_oop->is_known_instance() && load_boxed_values) {
+    // Use _idx of address base for boxed values.
+    this_iid = base->_idx;
+  }
+  PhaseIterGVN* igvn = phase->is_IterGVN();
+  Node* phi = new (C) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset);
   for (uint i = 1; i < region->req(); i++) {
-    Node *x;
+    Node* x;
     Node* the_clone = NULL;
-    if (region->in(i) == phase->C->top()) {
-      x = phase->C->top();      // Dead path?  Use a dead data op
+    if (region->in(i) == C->top()) {
+      x = C->top();      // Dead path?  Use a dead data op
     } else {
       x = this->clone();        // Else clone up the data op
       the_clone = x;            // Remember for possible deletion.
@@ -1341,10 +1377,16 @@
       } else {
         x->set_req(0, NULL);
       }
-      for (uint j = 1; j < this->req(); j++) {
-        Node *in = this->in(j);
-        if (in->is_Phi() && in->in(0) == region)
-          x->set_req(j, in->in(i)); // Use pre-Phi input for the clone
+      if (mem->is_Phi() && (mem->in(0) == region)) {
+        x->set_req(Memory, mem->in(i)); // Use pre-Phi input for the clone.
+      }
+      if (address->is_Phi() && address->in(0) == region) {
+        x->set_req(Address, address->in(i)); // Use pre-Phi input for the clone
+      }
+      if (base_is_phi && (base->in(0) == region)) {
+        Node* base_x = base->in(i); // Clone address for loads from boxed objects.
+        Node* adr_x = phase->transform(new (C) AddPNode(base_x,base_x,address->in(AddPNode::Offset)));
+        x->set_req(Address, adr_x);
       }
     }
     // Check for a 'win' on some paths
@@ -1374,7 +1416,7 @@
       if (y != x) {
         x = y;
       } else {
-        y = igvn->hash_find(x);
+        y = igvn->hash_find_insert(x);
         if (y) {
           x = y;
         } else {
@@ -1385,8 +1427,9 @@
         }
       }
     }
-    if (x != the_clone && the_clone != NULL)
+    if (x != the_clone && the_clone != NULL) {
       igvn->remove_dead_node(the_clone);
+    }
     phi->set_req(i, x);
   }
   // Record Phi
@@ -1425,31 +1468,23 @@
       // A method-invariant, non-null address (constant or 'this' argument).
       set_req(MemNode::Control, NULL);
     }
-
-    if (EliminateAutoBox && can_reshape) {
-      assert(!phase->type(base)->higher_equal(TypePtr::NULL_PTR), "the autobox pointer should be non-null");
-      Compile::AliasType* atp = phase->C->alias_type(adr_type());
-      if (is_autobox_object(atp)) {
-        Node* result = eliminate_autobox(phase);
-        if (result != NULL) return result;
-      }
-    }
   }
 
   Node* mem = in(MemNode::Memory);
   const TypePtr *addr_t = phase->type(address)->isa_ptr();
 
-  if (addr_t != NULL) {
+  if (can_reshape && (addr_t != NULL)) {
     // try to optimize our memory input
-    Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, phase);
+    Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, this, phase);
     if (opt_mem != mem) {
       set_req(MemNode::Memory, opt_mem);
       if (phase->type( opt_mem ) == Type::TOP) return NULL;
       return this;
     }
     const TypeOopPtr *t_oop = addr_t->isa_oopptr();
-    if (can_reshape && opt_mem->is_Phi() &&
-        (t_oop != NULL) && t_oop->is_known_instance_field()) {
+    if ((t_oop != NULL) &&
+        (t_oop->is_known_instance_field() ||
+         t_oop->is_ptr_to_boxed_value())) {
       PhaseIterGVN *igvn = phase->is_IterGVN();
       if (igvn != NULL && igvn->_worklist.member(opt_mem)) {
         // Delay this transformation until memory Phi is processed.
@@ -1459,6 +1494,11 @@
       // Split instance field load through Phi.
       Node* result = split_through_phi(phase);
       if (result != NULL) return result;
+
+      if (t_oop->is_ptr_to_boxed_value()) {
+        Node* result = eliminate_autobox(phase);
+        if (result != NULL) return result;
+      }
     }
   }
 
@@ -1567,18 +1607,23 @@
           // This can happen if a interface-typed array narrows to a class type.
           jt = _type;
         }
-
-        if (EliminateAutoBox && adr->is_AddP()) {
+#ifdef ASSERT
+        if (phase->C->eliminate_boxing() && adr->is_AddP()) {
           // The pointers in the autobox arrays are always non-null
           Node* base = adr->in(AddPNode::Base);
-          if (base != NULL &&
-              !phase->type(base)->higher_equal(TypePtr::NULL_PTR)) {
-            Compile::AliasType* atp = C->alias_type(base->adr_type());
-            if (is_autobox_cache(atp)) {
-              return jt->join(TypePtr::NOTNULL)->is_ptr();
+          if ((base != NULL) && base->is_DecodeN()) {
+            // Get LoadN node which loads IntegerCache.cache field
+            base = base->in(1);
+          }
+          if ((base != NULL) && base->is_Con()) {
+            const TypeAryPtr* base_type = base->bottom_type()->isa_aryptr();
+            if ((base_type != NULL) && base_type->is_autobox_cache()) {
+              // It could be narrow oop
+              assert(jt->make_ptr()->ptr() == TypePtr::NotNull,"sanity");
             }
           }
         }
+#endif
         return jt;
       }
     }
@@ -1618,6 +1663,10 @@
     // Optimizations for constant objects
     ciObject* const_oop = tinst->const_oop();
     if (const_oop != NULL) {
+      // For constant Boxed value treat the target field as a compile time constant.
+      if (tinst->is_ptr_to_boxed_value()) {
+        return tinst->get_const_boxed_value();
+      } else
       // For constant CallSites treat the target field as a compile time constant.
       if (const_oop->is_call_site()) {
         ciCallSite* call_site = const_oop->as_call_site();
@@ -1739,7 +1788,8 @@
   // (Also allow a variable load from a fresh array to produce zero.)
   const TypeOopPtr *tinst = tp->isa_oopptr();
   bool is_instance = (tinst != NULL) && tinst->is_known_instance_field();
-  if (ReduceFieldZeroing || is_instance) {
+  bool is_boxed_value = (tinst != NULL) && tinst->is_ptr_to_boxed_value();
+  if (ReduceFieldZeroing || is_instance || is_boxed_value) {
     Node* value = can_see_stored_value(mem,phase);
     if (value != NULL && value->is_Con()) {
       assert(value->bottom_type()->higher_equal(_type),"sanity");
@@ -2843,41 +2893,54 @@
   if (in(0) && in(0)->is_top())  return NULL;
 
   // Eliminate volatile MemBars for scalar replaced objects.
-  int opc = Opcode();
-  if (can_reshape && req() == (Precedent + 1) &&
-      (opc == Op_MemBarAcquire || opc == Op_MemBarVolatile)) {
-    // Volatile field loads and stores.
-    Node* my_mem = in(MemBarNode::Precedent);
-    // The MembarAquire may keep an unused LoadNode alive through the Precedent edge
-    if ((my_mem != NULL) && (opc == Op_MemBarAcquire) && (my_mem->outcnt() == 1)) {
-      // if the Precedent is a decodeN and its input (a Load) is used at more than one place,
-      // replace this Precedent (decodeN) with the Load instead.
-      if ((my_mem->Opcode() == Op_DecodeN) && (my_mem->in(1)->outcnt() > 1))  {
-        Node* load_node = my_mem->in(1);
-        set_req(MemBarNode::Precedent, load_node);
-        phase->is_IterGVN()->_worklist.push(my_mem);
-        my_mem = load_node;
-      } else{
-        assert(my_mem->unique_out() == this, "sanity");
-        del_req(Precedent);
-        phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later
-        my_mem = NULL;
+  if (can_reshape && req() == (Precedent+1)) {
+    bool eliminate = false;
+    int opc = Opcode();
+    if ((opc == Op_MemBarAcquire || opc == Op_MemBarVolatile)) {
+      // Volatile field loads and stores.
+      Node* my_mem = in(MemBarNode::Precedent);
+      // The MembarAquire may keep an unused LoadNode alive through the Precedent edge
+      if ((my_mem != NULL) && (opc == Op_MemBarAcquire) && (my_mem->outcnt() == 1)) {
+	// if the Precedent is a decodeN and its input (a Load) is used at more than one place,
+	// replace this Precedent (decodeN) with the Load instead.
+	if ((my_mem->Opcode() == Op_DecodeN) && (my_mem->in(1)->outcnt() > 1))  {
+	  Node* load_node = my_mem->in(1);
+	  set_req(MemBarNode::Precedent, load_node);
+	  phase->is_IterGVN()->_worklist.push(my_mem);
+	  my_mem = load_node;
+	} else{
+	  assert(my_mem->unique_out() == this, "sanity");
+	  del_req(Precedent);
+	  phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later
+	  my_mem = NULL;
+	}
+      }
+      if (my_mem != NULL && my_mem->is_Mem()) {
+        const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr();
+        // Check for scalar replaced object reference.
+        if( t_oop != NULL && t_oop->is_known_instance_field() &&
+            t_oop->offset() != Type::OffsetBot &&
+            t_oop->offset() != Type::OffsetTop) {
+          eliminate = true;
+        }
+      }
+    } else if (opc == Op_MemBarRelease) {
+      // Final field stores.
+      Node* alloc = AllocateNode::Ideal_allocation(in(MemBarNode::Precedent), phase);
+      if ((alloc != NULL) && alloc->is_Allocate() &&
+          alloc->as_Allocate()->_is_non_escaping) {
+        // The allocated object does not escape.
+        eliminate = true;
       }
     }
-    if (my_mem != NULL && my_mem->is_Mem()) {
-      const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr();
-      // Check for scalar replaced object reference.
-      if( t_oop != NULL && t_oop->is_known_instance_field() &&
-          t_oop->offset() != Type::OffsetBot &&
-          t_oop->offset() != Type::OffsetTop) {
-        // Replace MemBar projections by its inputs.
-        PhaseIterGVN* igvn = phase->is_IterGVN();
-        igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
-        igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
-        // Must return either the original node (now dead) or a new node
-        // (Do not return a top here, since that would break the uniqueness of top.)
-        return new (phase->C) ConINode(TypeInt::ZERO);
-      }
+    if (eliminate) {
+      // Replace MemBar projections by its inputs.
+      PhaseIterGVN* igvn = phase->is_IterGVN();
+      igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
+      igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
+      // Must return either the original node (now dead) or a new node
+      // (Do not return a top here, since that would break the uniqueness of top.)
+      return new (phase->C) ConINode(TypeInt::ZERO);
     }
   }
   return NULL;
@@ -3090,9 +3153,7 @@
 // within the initialization without creating a vicious cycle, such as:
 //     { Foo p = new Foo(); p.next = p; }
 // True for constants and parameters and small combinations thereof.
-bool InitializeNode::detect_init_independence(Node* n,
-                                              bool st_is_pinned,
-                                              int& count) {
+bool InitializeNode::detect_init_independence(Node* n, int& count) {
   if (n == NULL)      return true;   // (can this really happen?)
   if (n->is_Proj())   n = n->in(0);
   if (n == this)      return false;  // found a cycle
@@ -3112,7 +3173,6 @@
     // a store is never pinned *before* the availability of its inputs.
     if (!MemNode::all_controls_dominate(n, this))
       return false;                  // failed to prove a good control
-
   }
 
   // Check data edges for possible dependencies on 'this'.
@@ -3122,7 +3182,7 @@
     if (m == NULL || m == n || m->is_top())  continue;
     uint first_i = n->find_edge(m);
     if (i != first_i)  continue;  // process duplicate edge just once
-    if (!detect_init_independence(m, st_is_pinned, count)) {
+    if (!detect_init_independence(m, count)) {
       return false;
     }
   }
@@ -3153,7 +3213,7 @@
     return FAIL;                // wrong allocation!  (store needs to float up)
   Node* val = st->in(MemNode::ValueIn);
   int complexity_count = 0;
-  if (!detect_init_independence(val, true, complexity_count))
+  if (!detect_init_independence(val, complexity_count))
     return FAIL;                // stored value must be 'simple enough'
 
   // The Store can be captured only if nothing after the allocation
--- a/src/share/vm/opto/memnode.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/memnode.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -75,8 +75,8 @@
                                       PhaseTransform* phase);
   static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast);
 
-  static Node *optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase);
-  static Node *optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase);
+  static Node *optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase);
+  static Node *optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase);
   // This one should probably be a phase-specific function:
   static bool all_controls_dominate(Node* dom, Node* sub);
 
@@ -1118,7 +1118,7 @@
 
   Node* make_raw_address(intptr_t offset, PhaseTransform* phase);
 
-  bool detect_init_independence(Node* n, bool st_is_pinned, int& count);
+  bool detect_init_independence(Node* n, int& count);
 
   void coalesce_subword_stores(intptr_t header_size, Node* size_in_bytes,
                                PhaseGVN* phase);
--- a/src/share/vm/opto/mulnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/mulnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -244,13 +244,13 @@
   double d = (double)hi1;
 
   // Compute all endpoints & check for overflow
-  int32 A = lo0*lo1;
+  int32 A = java_multiply(lo0, lo1);
   if( (double)A != a*c ) return TypeInt::INT; // Overflow?
-  int32 B = lo0*hi1;
+  int32 B = java_multiply(lo0, hi1);
   if( (double)B != a*d ) return TypeInt::INT; // Overflow?
-  int32 C = hi0*lo1;
+  int32 C = java_multiply(hi0, lo1);
   if( (double)C != b*c ) return TypeInt::INT; // Overflow?
-  int32 D = hi0*hi1;
+  int32 D = java_multiply(hi0, hi1);
   if( (double)D != b*d ) return TypeInt::INT; // Overflow?
 
   if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
@@ -340,13 +340,13 @@
   double d = (double)hi1;
 
   // Compute all endpoints & check for overflow
-  jlong A = lo0*lo1;
+  jlong A = java_multiply(lo0, lo1);
   if( (double)A != a*c ) return TypeLong::LONG; // Overflow?
-  jlong B = lo0*hi1;
+  jlong B = java_multiply(lo0, hi1);
   if( (double)B != a*d ) return TypeLong::LONG; // Overflow?
-  jlong C = hi0*lo1;
+  jlong C = java_multiply(hi0, lo1);
   if( (double)C != b*c ) return TypeLong::LONG; // Overflow?
-  jlong D = hi0*hi1;
+  jlong D = java_multiply(hi0, hi1);
   if( (double)D != b*d ) return TypeLong::LONG; // Overflow?
 
   if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
@@ -571,7 +571,8 @@
     // Masking off high bits which are always zero is useless.
     const TypeLong* t1 = phase->type( in(1) )->isa_long();
     if (t1 != NULL && t1->_lo >= 0) {
-      jlong t1_support = ((jlong)1 << (1 + log2_long(t1->_hi))) - 1;
+      int bit_count = log2_long(t1->_hi) + 1;
+      jlong t1_support = jlong(max_julong >> (BitsPerJavaLong - bit_count));
       if ((t1_support & con) == t1_support)
         return usr;
     }
@@ -799,7 +800,7 @@
 
   // Check for ((x & ((CONST64(1)<<(64-c0))-1)) << c0) which ANDs off high bits
   // before shifting them away.
-  const jlong bits_mask = ((jlong)CONST64(1) << (jlong)(BitsPerJavaLong - con)) - CONST64(1);
+  const jlong bits_mask = jlong(max_julong >> con);
   if( add1_op == Op_AndL &&
       phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
     return new (phase->C) LShiftLNode( add1->in(1), in(2) );
@@ -1250,7 +1251,7 @@
   if ( con == 0 ) return NULL;  // let Identity() handle a 0 shift count
                               // note: mask computation below does not work for 0 shift count
   // We'll be wanting the right-shift amount as a mask of that many bits
-  const jlong mask = (((jlong)CONST64(1) << (jlong)(BitsPerJavaLong - con)) -1);
+  const jlong mask = jlong(max_julong >> con);
 
   // Check for ((x << z) + Y) >>> z.  Replace with x + con>>>z
   // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
--- a/src/share/vm/opto/multnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/multnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -75,13 +75,26 @@
   return (_con == TypeFunc::Control && def->is_CFG());
 }
 
+const Type* ProjNode::proj_type(const Type* t) const {
+  if (t == Type::TOP) {
+    return Type::TOP;
+  }
+  if (t == Type::BOTTOM) {
+    return Type::BOTTOM;
+  }
+  t = t->is_tuple()->field_at(_con);
+  Node* n = in(0);
+  if ((_con == TypeFunc::Parms) &&
+      n->is_CallStaticJava() && n->as_CallStaticJava()->is_boxing_method()) {
+    // The result of autoboxing is always non-null on normal path.
+    t = t->join(TypePtr::NOTNULL);
+  }
+  return t;
+}
+
 const Type *ProjNode::bottom_type() const {
-  if (in(0) == NULL)  return Type::TOP;
-  const Type *tb = in(0)->bottom_type();
-  if( tb == Type::TOP ) return Type::TOP;
-  if( tb == Type::BOTTOM ) return Type::BOTTOM;
-  const TypeTuple *t = tb->is_tuple();
-  return t->field_at(_con);
+  if (in(0) == NULL) return Type::TOP;
+  return proj_type(in(0)->bottom_type());
 }
 
 const TypePtr *ProjNode::adr_type() const {
@@ -117,11 +130,8 @@
 
 //------------------------------Value------------------------------------------
 const Type *ProjNode::Value( PhaseTransform *phase ) const {
-  if( !in(0) ) return Type::TOP;
-  const Type *t = phase->type(in(0));
-  if( t == Type::TOP ) return t;
-  if( t == Type::BOTTOM ) return t;
-  return t->is_tuple()->field_at(_con);
+  if (in(0) == NULL) return Type::TOP;
+  return proj_type(phase->type(in(0)));
 }
 
 //------------------------------out_RegMask------------------------------------
--- a/src/share/vm/opto/multnode.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/multnode.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -60,6 +60,7 @@
   virtual uint cmp( const Node &n ) const;
   virtual uint size_of() const;
   void check_con() const;       // Called from constructor.
+  const Type* proj_type(const Type* t) const;
 
 public:
   ProjNode( Node *src, uint con, bool io_use = false )
@@ -83,6 +84,7 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual uint ideal_reg() const;
   virtual const RegMask &out_RegMask() const;
+
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
--- a/src/share/vm/opto/node.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/node.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -68,7 +68,8 @@
   }
   Compile::set_debug_idx(new_debug_idx);
   set_debug_idx( new_debug_idx );
-  assert(Compile::current()->unique() < (UINT_MAX - 1), "Node limit exceeded UINT_MAX");
+  assert(Compile::current()->unique() < (INT_MAX - 1), "Node limit exceeded INT_MAX");
+  assert(Compile::current()->live_nodes() < (uint)MaxNodeLimit, "Live Node limit exceeded limit");
   if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) {
     tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx);
     BREAKPOINT;
@@ -472,9 +473,9 @@
 //------------------------------clone------------------------------------------
 // Clone a Node.
 Node *Node::clone() const {
-  Compile *compile = Compile::current();
+  Compile* C = Compile::current();
   uint s = size_of();           // Size of inherited Node
-  Node *n = (Node*)compile->node_arena()->Amalloc_D(size_of() + _max*sizeof(Node*));
+  Node *n = (Node*)C->node_arena()->Amalloc_D(size_of() + _max*sizeof(Node*));
   Copy::conjoint_words_to_lower((HeapWord*)this, (HeapWord*)n, s);
   // Set the new input pointer array
   n->_in = (Node**)(((char*)n)+s);
@@ -493,23 +494,23 @@
     if (x != NULL) x->add_out(n);
   }
   if (is_macro())
-    compile->add_macro_node(n);
+    C->add_macro_node(n);
   if (is_expensive())
-    compile->add_expensive_node(n);
+    C->add_expensive_node(n);
   // If the cloned node is a range check dependent CastII, add it to the list.
   CastIINode* cast = n->isa_CastII();
   if (cast != NULL && cast->has_range_check()) {
-    compile->add_range_check_cast(cast);
+    C->add_range_check_cast(cast);
   }
 
-  n->set_idx(compile->next_unique()); // Get new unique index as well
+  n->set_idx(C->next_unique()); // Get new unique index as well
   debug_only( n->verify_construction() );
   NOT_PRODUCT(nodes_created++);
   // Do not patch over the debug_idx of a clone, because it makes it
   // impossible to break on the clone's moment of creation.
   //debug_only( n->set_debug_idx( debug_idx() ) );
 
-  compile->copy_node_notes_to(n, (Node*) this);
+  C->copy_node_notes_to(n, (Node*) this);
 
   // MachNode clone
   uint nopnds;
@@ -524,13 +525,12 @@
                                   (const void*)(&mthis->_opnds), 1));
     mach->_opnds = to;
     for ( uint i = 0; i < nopnds; ++i ) {
-      to[i] = from[i]->clone(compile);
+      to[i] = from[i]->clone(C);
     }
   }
   // cloning CallNode may need to clone JVMState
   if (n->is_Call()) {
-    CallNode *call = n->as_Call();
-    call->clone_jvms();
+    n->as_Call()->clone_jvms(C);
   }
   if (n->is_SafePoint()) {
     n->as_SafePoint()->clone_replaced_nodes();
@@ -828,6 +828,21 @@
   return nrep;
 }
 
+/**
+ * Replace input edges in the range pointing to 'old' node.
+ */
+int Node::replace_edges_in_range(Node* old, Node* neww, int start, int end) {
+  if (old == neww)  return 0;  // nothing to do
+  uint nrep = 0;
+  for (int i = start; i < end; i++) {
+    if (in(i) == old) {
+      set_req(i, neww);
+      nrep++;
+    }
+  }
+  return nrep;
+}
+
 //-------------------------disconnect_inputs-----------------------------------
 // NULL out all inputs to eliminate incoming Def-Use edges.
 // Return the number of edges between 'n' and 'this'
--- a/src/share/vm/opto/node.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/node.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -42,7 +42,6 @@
 class AllocateArrayNode;
 class AllocateNode;
 class Block;
-class Block_Array;
 class BoolNode;
 class BoxLockNode;
 class CMoveNode;
@@ -414,6 +413,7 @@
   // Find first occurrence of n among my edges:
   int find_edge(Node* n);
   int replace_edge(Node* old, Node* neww);
+  int replace_edges_in_range(Node* old, Node* neww, int start, int end);
   // NULL out all inputs to eliminate incoming Def-Use edges.
   // Return the number of edges between 'n' and 'this'
   int  disconnect_inputs(Node *n, Compile *c);
--- a/src/share/vm/opto/output.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/output.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -55,11 +55,10 @@
 extern int emit_exception_handler(CodeBuffer &cbuf);
 extern int emit_deopt_handler(CodeBuffer &cbuf);
 
-//------------------------------Output-----------------------------------------
 // Convert Nodes to instruction bits and pass off to the VM
 void Compile::Output() {
   // RootNode goes
-  assert( _cfg->_broot->_nodes.size() == 0, "" );
+  assert( _cfg->get_root_block()->number_of_nodes() == 0, "" );
 
   // The number of new nodes (mostly MachNop) is proportional to
   // the number of java calls and inner loops which are aligned.
@@ -69,17 +68,16 @@
     return;
   }
   // Make sure I can find the Start Node
-  Block_Array& bbs = _cfg->_bbs;
-  Block *entry = _cfg->_blocks[1];
-  Block *broot = _cfg->_broot;
-
-  const StartNode *start = entry->_nodes[0]->as_Start();
+  Block *entry = _cfg->get_block(1);
+  Block *broot = _cfg->get_root_block();
+
+  const StartNode *start = entry->head()->as_Start();
 
   // Replace StartNode with prolog
   MachPrologNode *prolog = new (this) MachPrologNode();
-  entry->_nodes.map( 0, prolog );
-  bbs.map( prolog->_idx, entry );
-  bbs.map( start->_idx, NULL ); // start is no longer in any block
+  entry->map_node(prolog, 0);
+  _cfg->map_node_to_block(prolog, entry);
+  _cfg->unmap_node_from_block(start); // start is no longer in any block
 
   // Virtual methods need an unverified entry point
 
@@ -111,41 +109,44 @@
   }
 
   // Insert epilogs before every return
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
-    if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
-      Node *m = b->end();
-      if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
-        MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
-        b->add_inst( epilog );
-        bbs.map(epilog->_idx, b);
-        //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    Block* block = _cfg->get_block(i);
+    if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point?
+      Node* m = block->end();
+      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
+        MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
+        block->add_inst(epilog);
+        _cfg->map_node_to_block(epilog, block);
       }
     }
   }
 
 # ifdef ENABLE_ZAP_DEAD_LOCALS
-  if ( ZapDeadCompiledLocals )  Insert_zap_nodes();
+  if (ZapDeadCompiledLocals) {
+    Insert_zap_nodes();
+  }
 # endif
 
-  uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
-  blk_starts[0]    = 0;
+  uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
+  blk_starts[0] = 0;
 
   // Initialize code buffer and process short branches.
   CodeBuffer* cb = init_buffer(blk_starts);
 
-  if (cb == NULL || failing())  return;
+  if (cb == NULL || failing()) {
+    return;
+  }
 
   ScheduleAndBundle();
 
 #ifndef PRODUCT
   if (trace_opto_output()) {
     tty->print("\n---- After ScheduleAndBundle ----\n");
-    for (uint i = 0; i < _cfg->_num_blocks; i++) {
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
       tty->print("\nBB#%03d:\n", i);
-      Block *bb = _cfg->_blocks[i];
-      for (uint j = 0; j < bb->_nodes.size(); j++) {
-        Node *n = bb->_nodes[j];
+      Block* block = _cfg->get_block(i);
+      for (uint j = 0; j < block->number_of_nodes(); j++) {
+        Node* n = block->get_node(j);
         OptoReg::Name reg = _regalloc->get_reg_first(n);
         tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
         n->dump();
@@ -154,11 +155,15 @@
   }
 #endif
 
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   BuildOopMaps();
 
-  if (failing())  return;
+  if (failing())  {
+    return;
+  }
 
   fill_buffer(cb, blk_starts);
 }
@@ -220,10 +225,10 @@
     return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
 
   // Insert call to zap runtime stub before every node with an oop map
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
-    for ( uint j = 0;  j < b->_nodes.size();  ++j ) {
-      Node *n = b->_nodes[j];
+  for( uint i=0; i<_cfg->number_of_blocks(); i++ ) {
+    Block *b = _cfg->get_block(i);
+    for ( uint j = 0;  j < b->number_of_nodes();  ++j ) {
+      Node *n = b->get_node(j);
 
       // Determining if we should insert a zap-a-lot node in output.
       // We do that for all nodes that has oopmap info, except for calls
@@ -252,8 +257,8 @@
         }
         if (insert) {
           Node *zap = call_zap_node(n->as_MachSafePoint(), i);
-          b->_nodes.insert( j, zap );
-          _cfg->_bbs.map( zap->_idx, b );
+          b->insert_node(zap, j);
+          _cfg->map_node_to_block(zap, b);
           ++j;
         }
       }
@@ -278,7 +283,6 @@
   return _matcher->match_sfpt(ideal_node);
 }
 
-//------------------------------is_node_getting_a_safepoint--------------------
 bool Compile::is_node_getting_a_safepoint( Node* n) {
   // This code duplicates the logic prior to the call of add_safepoint
   // below in this file.
@@ -288,7 +292,6 @@
 
 # endif // ENABLE_ZAP_DEAD_LOCALS
 
-//------------------------------compute_loop_first_inst_sizes------------------
 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top
 // of a loop. When aligning a loop we need to provide enough instructions
 // in cpu's fetch buffer to feed decoders. The loop alignment could be
@@ -305,42 +308,39 @@
   // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
   // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
   // equal to 11 bytes which is the largest address NOP instruction.
-  if( MaxLoopPad < OptoLoopAlignment-1 ) {
-    uint last_block = _cfg->_num_blocks-1;
-    for( uint i=1; i <= last_block; i++ ) {
-      Block *b = _cfg->_blocks[i];
+  if (MaxLoopPad < OptoLoopAlignment - 1) {
+    uint last_block = _cfg->number_of_blocks() - 1;
+    for (uint i = 1; i <= last_block; i++) {
+      Block* block = _cfg->get_block(i);
       // Check the first loop's block which requires an alignment.
-      if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
+      if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
         uint sum_size = 0;
         uint inst_cnt = NumberOfLoopInstrToAlign;
-        inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
+        inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 
         // Check subsequent fallthrough blocks if the loop's first
         // block(s) does not have enough instructions.
-        Block *nb = b;
-        while( inst_cnt > 0 &&
-               i < last_block &&
-               !_cfg->_blocks[i+1]->has_loop_alignment() &&
-               !nb->has_successor(b) ) {
+        Block *nb = block;
+        while(inst_cnt > 0 &&
+              i < last_block &&
+              !_cfg->get_block(i + 1)->has_loop_alignment() &&
+              !nb->has_successor(block)) {
           i++;
-          nb = _cfg->_blocks[i];
+          nb = _cfg->get_block(i);
           inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
         } // while( inst_cnt > 0 && i < last_block  )
 
-        b->set_first_inst_size(sum_size);
+        block->set_first_inst_size(sum_size);
       } // f( b->head()->is_Loop() )
     } // for( i <= last_block )
   } // if( MaxLoopPad < OptoLoopAlignment-1 )
 }
 
-//----------------------shorten_branches---------------------------------------
 // The architecture description provides short branch variants for some long
 // branch instructions. Replace eligible long branches with short branches.
 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
-
-  // ------------------
   // Compute size of each block, method size, and relocation information size
-  uint nblocks  = _cfg->_num_blocks;
+  uint nblocks  = _cfg->number_of_blocks();
 
   uint*      jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
   uint*      jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
@@ -372,7 +372,7 @@
   uint last_avoid_back_to_back_adr = max_uint;
   uint nop_size = (new (this) MachNopNode())->size(_regalloc);
   for (uint i = 0; i < nblocks; i++) { // For all blocks
-    Block *b = _cfg->_blocks[i];
+    Block* block = _cfg->get_block(i);
 
     // During short branch replacement, we store the relative (to blk_starts)
     // offset of jump in jmp_offset, rather than the absolute offset of jump.
@@ -385,10 +385,10 @@
     DEBUG_ONLY( jmp_rule[i]   = 0; )
 
     // Sum all instruction sizes to compute block size
-    uint last_inst = b->_nodes.size();
+    uint last_inst = block->number_of_nodes();
     uint blk_size = 0;
     for (uint j = 0; j < last_inst; j++) {
-      Node* nj = b->_nodes[j];
+      Node* nj = block->get_node(j);
       // Handle machine instruction nodes
       if (nj->is_Mach()) {
         MachNode *mach = nj->as_Mach();
@@ -449,8 +449,8 @@
     // When the next block starts a loop, we may insert pad NOP
     // instructions.  Since we cannot know our future alignment,
     // assume the worst.
-    if (i< nblocks-1) {
-      Block *nb = _cfg->_blocks[i+1];
+    if (i < nblocks - 1) {
+      Block* nb = _cfg->get_block(i + 1);
       int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
       if (max_loop_pad > 0) {
         assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
@@ -482,26 +482,26 @@
     has_short_branch_candidate = false;
     int adjust_block_start = 0;
     for (uint i = 0; i < nblocks; i++) {
-      Block *b = _cfg->_blocks[i];
+      Block* block = _cfg->get_block(i);
       int idx = jmp_nidx[i];
-      MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
+      MachNode* mach = (idx == -1) ? NULL: block->get_node(idx)->as_Mach();
       if (mach != NULL && mach->may_be_short_branch()) {
 #ifdef ASSERT
         assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
         int j;
         // Find the branch; ignore trailing NOPs.
-        for (j = b->_nodes.size()-1; j>=0; j--) {
-          Node* n = b->_nodes[j];
+        for (j = block->number_of_nodes()-1; j>=0; j--) {
+          Node* n = block->get_node(j);
           if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
             break;
         }
-        assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
+        assert(j >= 0 && j == idx && block->get_node(j) == (Node*)mach, "sanity");
 #endif
         int br_size = jmp_size[i];
         int br_offs = blk_starts[i] + jmp_offset[i];
 
         // This requires the TRUE branch target be in succs[0]
-        uint bnum = b->non_connector_successor(0)->_pre_order;
+        uint bnum = block->non_connector_successor(0)->_pre_order;
         int offset = blk_starts[bnum] - br_offs;
         if (bnum > i) { // adjust following block's offset
           offset -= adjust_block_start;
@@ -536,7 +536,7 @@
             diff -= nop_size;
           }
           adjust_block_start += diff;
-          b->_nodes.map(idx, replacement);
+          block->map_node(replacement, idx);
           mach->subsume_by(replacement, C);
           mach = replacement;
           progress = true;
@@ -1116,11 +1116,11 @@
     uint add_size = 0;
     // Fill the constant table.
     // Note:  This must happen before shorten_branches.
-    for (uint i = 0; i < _cfg->_num_blocks; i++) {
-      Block* b = _cfg->_blocks[i];
-
-      for (uint j = 0; j < b->_nodes.size(); j++) {
-        Node* n = b->_nodes[j];
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+      Block* b = _cfg->get_block(i);
+
+      for (uint j = 0; j < b->number_of_nodes(); j++) {
+        Node* n = b->get_node(j);
 
         // If the node is a MachConstantNode evaluate the constant
         // value section.
@@ -1211,7 +1211,7 @@
   // !!!!! This preserves old handling of oopmaps for now
   debug_info()->set_oopmaps(_oop_map_set);
 
-  uint nblocks  = _cfg->_num_blocks;
+  uint nblocks  = _cfg->number_of_blocks();
   // Count and start of implicit null check instructions
   uint inct_cnt = 0;
   uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);
@@ -1259,21 +1259,21 @@
   // Now fill in the code buffer
   Node *delay_slot = NULL;
 
-  for (uint i=0; i < nblocks; i++) {
-    Block *b = _cfg->_blocks[i];
-
-    Node *head = b->head();
+  for (uint i = 0; i < nblocks; i++) {
+    Block* block = _cfg->get_block(i);
+    Node* head = block->head();
 
     // If this block needs to start aligned (i.e, can be reached other
     // than by falling-thru from the previous block), then force the
     // start of a new bundle.
-    if (Pipeline::requires_bundling() && starts_bundle(head))
+    if (Pipeline::requires_bundling() && starts_bundle(head)) {
       cb->flush_bundle(true);
+    }
 
 #ifdef ASSERT
-    if (!b->is_connector()) {
+    if (!block->is_connector()) {
       stringStream st;
-      b->dump_head(&_cfg->_bbs, &st);
+      block->dump_head(_cfg, &st);
       MacroAssembler(cb).block_comment(st.as_string());
     }
     jmp_target[i] = 0;
@@ -1284,16 +1284,16 @@
     int blk_offset = current_offset;
 
     // Define the label at the beginning of the basic block
-    MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
-
-    uint last_inst = b->_nodes.size();
+    MacroAssembler(cb).bind(blk_labels[block->_pre_order]);
+
+    uint last_inst = block->number_of_nodes();
 
     // Emit block normally, except for last instruction.
     // Emit means "dump code bits into code buffer".
     for (uint j = 0; j<last_inst; j++) {
 
       // Get the node
-      Node* n = b->_nodes[j];
+      Node* n = block->get_node(j);
 
       // See if delay slots are supported
       if (valid_bundle_info(n) &&
@@ -1347,9 +1347,9 @@
           assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
           int nops_cnt = padding / nop_size;
           MachNode *nop = new (this) MachNopNode(nops_cnt);
-          b->_nodes.insert(j++, nop);
+          block->insert_node(nop, j++);
           last_inst++;
-          _cfg->_bbs.map( nop->_idx, b );
+          _cfg->map_node_to_block(nop, block);
           nop->emit(*cb, _regalloc);
           cb->flush_bundle(true);
           current_offset = cb->insts_size();
@@ -1363,7 +1363,7 @@
           mcall->method_set((intptr_t)mcall->entry_point());
 
           // Save the return address
-          call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
+          call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset();
 
           if (mcall->is_MachCallLeaf()) {
             is_mcall = false;
@@ -1400,7 +1400,7 @@
         // If this is a branch, then fill in the label with the target BB's label
         else if (mach->is_MachBranch()) {
           // This requires the TRUE branch target be in succs[0]
-          uint block_num = b->non_connector_successor(0)->_pre_order;
+          uint block_num = block->non_connector_successor(0)->_pre_order;
 
           // Try to replace long branch if delay slot is not used,
           // it is mostly for back branches since forward branch's
@@ -1433,8 +1433,8 @@
               // Insert padding between avoid_back_to_back branches.
               if (needs_padding && replacement->avoid_back_to_back()) {
                 MachNode *nop = new (this) MachNopNode();
-                b->_nodes.insert(j++, nop);
-                _cfg->_bbs.map(nop->_idx, b);
+                block->insert_node(nop, j++);
+                _cfg->map_node_to_block(nop, block);
                 last_inst++;
                 nop->emit(*cb, _regalloc);
                 cb->flush_bundle(true);
@@ -1446,7 +1446,7 @@
               jmp_size[i]   = new_size;
               jmp_rule[i]   = mach->rule();
 #endif
-              b->_nodes.map(j, replacement);
+              block->map_node(replacement, j);
               mach->subsume_by(replacement, C);
               n    = replacement;
               mach = replacement;
@@ -1454,8 +1454,8 @@
           }
           mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
         } else if (mach->ideal_Opcode() == Op_Jump) {
-          for (uint h = 0; h < b->_num_succs; h++) {
-            Block* succs_block = b->_succs[h];
+          for (uint h = 0; h < block->_num_succs; h++) {
+            Block* succs_block = block->_succs[h];
             for (uint j = 1; j < succs_block->num_preds(); j++) {
               Node* jpn = succs_block->pred(j);
               if (jpn->is_JumpProj() && jpn->in(0) == mach) {
@@ -1466,7 +1466,6 @@
             }
           }
         }
-
 #ifdef ASSERT
         // Check that oop-store precedes the card-mark
         else if (mach->ideal_Opcode() == Op_StoreCM) {
@@ -1477,17 +1476,18 @@
             if (oop_store == NULL) continue;
             count++;
             uint i4;
-            for( i4 = 0; i4 < last_inst; ++i4 ) {
-              if( b->_nodes[i4] == oop_store ) break;
+            for (i4 = 0; i4 < last_inst; ++i4) {
+              if (block->get_node(i4) == oop_store) {
+                break;
+              }
             }
             // Note: This test can provide a false failure if other precedence
             // edges have been added to the storeCMNode.
-            assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+            assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
           }
           assert(count > 0, "storeCM expects at least one precedence edge");
         }
 #endif
-
         else if (!n->is_Proj()) {
           // Remember the beginning of the previous instruction, in case
           // it's followed by a flag-kill and a null-check.  Happens on
@@ -1589,12 +1589,12 @@
     // If the next block is the top of a loop, pad this block out to align
     // the loop top a little. Helps prevent pipe stalls at loop back branches.
     if (i < nblocks-1) {
-      Block *nb = _cfg->_blocks[i+1];
+      Block *nb = _cfg->get_block(i + 1);
       int padding = nb->alignment_padding(current_offset);
       if( padding > 0 ) {
         MachNode *nop = new (this) MachNopNode(padding / nop_size);
-        b->_nodes.insert( b->_nodes.size(), nop );
-        _cfg->_bbs.map( nop->_idx, b );
+        block->insert_node(nop, block->number_of_nodes());
+        _cfg->map_node_to_block(nop, block);
         nop->emit(*cb, _regalloc);
         current_offset = cb->insts_size();
       }
@@ -1634,8 +1634,6 @@
   }
 #endif
 
-  // ------------------
-
 #ifndef PRODUCT
   // Information on the size of the method, without the extraneous code
   Scheduling::increment_method_size(cb->insts_size());
@@ -1697,52 +1695,55 @@
   _inc_table.set_size(cnt);
 
   uint inct_cnt = 0;
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    Block* block = _cfg->get_block(i);
     Node *n = NULL;
     int j;
 
     // Find the branch; ignore trailing NOPs.
-    for( j = b->_nodes.size()-1; j>=0; j-- ) {
-      n = b->_nodes[j];
-      if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
+    for (j = block->number_of_nodes() - 1; j >= 0; j--) {
+      n = block->get_node(j);
+      if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) {
         break;
+      }
     }
 
     // If we didn't find anything, continue
-    if( j < 0 ) continue;
+    if (j < 0) {
+      continue;
+    }
 
     // Compute ExceptionHandlerTable subtable entry and add it
     // (skip empty blocks)
-    if( n->is_Catch() ) {
+    if (n->is_Catch()) {
 
       // Get the offset of the return from the call
-      uint call_return = call_returns[b->_pre_order];
+      uint call_return = call_returns[block->_pre_order];
 #ifdef ASSERT
       assert( call_return > 0, "no call seen for this basic block" );
-      while( b->_nodes[--j]->is_MachProj() ) ;
-      assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
+      while (block->get_node(--j)->is_MachProj()) ;
+      assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call");
 #endif
       // last instruction is a CatchNode, find it's CatchProjNodes
-      int nof_succs = b->_num_succs;
+      int nof_succs = block->_num_succs;
       // allocate space
       GrowableArray<intptr_t> handler_bcis(nof_succs);
       GrowableArray<intptr_t> handler_pcos(nof_succs);
       // iterate through all successors
       for (int j = 0; j < nof_succs; j++) {
-        Block* s = b->_succs[j];
+        Block* s = block->_succs[j];
         bool found_p = false;
-        for( uint k = 1; k < s->num_preds(); k++ ) {
-          Node *pk = s->pred(k);
-          if( pk->is_CatchProj() && pk->in(0) == n ) {
+        for (uint k = 1; k < s->num_preds(); k++) {
+          Node* pk = s->pred(k);
+          if (pk->is_CatchProj() && pk->in(0) == n) {
             const CatchProjNode* p = pk->as_CatchProj();
             found_p = true;
             // add the corresponding handler bci & pco information
-            if( p->_con != CatchProjNode::fall_through_index ) {
+            if (p->_con != CatchProjNode::fall_through_index) {
               // p leads to an exception handler (and is not fall through)
-              assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
+              assert(s == _cfg->get_block(s->_pre_order), "bad numbering");
               // no duplicates, please
-              if( !handler_bcis.contains(p->handler_bci()) ) {
+              if (!handler_bcis.contains(p->handler_bci())) {
                 uint block_num = s->non_connector()->_pre_order;
                 handler_bcis.append(p->handler_bci());
                 handler_pcos.append(blk_labels[block_num].loc_pos());
@@ -1761,14 +1762,14 @@
     }
 
     // Handle implicit null exception table updates
-    if( n->is_MachNullCheck() ) {
-      uint block_num = b->non_connector_successor(0)->_pre_order;
-      _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
+    if (n->is_MachNullCheck()) {
+      uint block_num = block->non_connector_successor(0)->_pre_order;
+      _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
       continue;
     }
     // Handle implicit exception table updates: trap instructions.
     if (n->is_TrapBasedCheckNode()) {
-      uint block_num = b->non_connector_successor(0)->_pre_order;
+      uint block_num = block->non_connector_successor(0)->_pre_order;
       _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
       continue;
     }
@@ -1789,7 +1790,6 @@
 Scheduling::Scheduling(Arena *arena, Compile &compile)
   : _arena(arena),
     _cfg(compile.cfg()),
-    _bbs(compile.cfg()->_bbs),
     _regalloc(compile.regalloc()),
     _reg_node(arena),
     _bundle_instr_count(0),
@@ -1829,14 +1829,12 @@
   memset(_current_latency,    0, node_max * sizeof(unsigned short));
 
   // Clear the bundling information
-  memcpy(_bundle_use_elements,
-    Pipeline_Use::elaborated_elements,
-    sizeof(Pipeline_Use::elaborated_elements));
+  memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements));
 
   // Get the last node
-  Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
-
-  _next_node = bb->_nodes[bb->_nodes.size()-1];
+  Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1);
+
+  _next_node = block->get_node(block->number_of_nodes() - 1);
 }
 
 #ifndef PRODUCT
@@ -1886,7 +1884,6 @@
     sizeof(Pipeline_Use::elaborated_elements));
 }
 
-//------------------------------ScheduleAndBundle------------------------------
 // Perform instruction scheduling and bundling over the sequence of
 // instructions in backwards order.
 void Compile::ScheduleAndBundle() {
@@ -1913,7 +1910,6 @@
   scheduling.DoScheduling();
 }
 
-//------------------------------ComputeLocalLatenciesForward-------------------
 // Compute the latency of all the instructions.  This is fairly simple,
 // because we already have a legal ordering.  Walk over the instructions
 // from first to last, and compute the latency of the instruction based
@@ -1931,7 +1927,7 @@
     // Used to allow latency 0 to force an instruction to the beginning
     // of the bb
     uint latency = 1;
-    Node *use = bb->_nodes[j];
+    Node *use = bb->get_node(j);
     uint nlen = use->len();
 
     // Walk over all the inputs
@@ -2083,7 +2079,6 @@
   return _available[0];
 }
 
-//------------------------------AddNodeToAvailableList-------------------------
 void Scheduling::AddNodeToAvailableList(Node *n) {
   assert( !n->is_Proj(), "projections never directly made available" );
 #ifndef PRODUCT
@@ -2130,7 +2125,6 @@
 #endif
 }
 
-//------------------------------DecrementUseCounts-----------------------------
 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
   for ( uint i=0; i < n->len(); i++ ) {
     Node *def = n->in(i);
@@ -2138,8 +2132,9 @@
     if( def->is_Proj() )        // If this is a machine projection, then
       def = def->in(0);         // propagate usage thru to the base instruction
 
-    if( _bbs[def->_idx] != bb ) // Ignore if not block-local
+    if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local
       continue;
+    }
 
     // Compute the latency
     uint l = _bundle_cycle_number + n->latency(i);
@@ -2152,7 +2147,6 @@
   }
 }
 
-//------------------------------AddNodeToBundle--------------------------------
 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
 #ifndef PRODUCT
   if (_cfg->C->trace_opto_output()) {
@@ -2345,7 +2339,7 @@
        (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {
 
     // Push any trailing projections
-    if( bb->_nodes[bb->_nodes.size()-1] != n ) {
+    if( bb->get_node(bb->number_of_nodes()-1) != n ) {
       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
         Node *foi = n->fast_out(i);
         if( foi->is_Proj() )
@@ -2367,7 +2361,6 @@
   DecrementUseCounts(n,bb);
 }
 
-//------------------------------ComputeUseCount--------------------------------
 // This method sets the use count within a basic block.  We will ignore all
 // uses outside the current basic block.  As we are doing a backwards walk,
 // any node we reach that has a use count of 0 may be scheduled.  This also
@@ -2389,21 +2382,21 @@
   _unconditional_delay_slot = NULL;
 
 #ifdef ASSERT
-  for( uint i=0; i < bb->_nodes.size(); i++ )
-    assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" );
+  for( uint i=0; i < bb->number_of_nodes(); i++ )
+    assert( _uses[bb->get_node(i)->_idx] == 0, "_use array not clean" );
 #endif
 
   // Force the _uses count to never go to zero for unscheduable pieces
   // of the block
   for( uint k = 0; k < _bb_start; k++ )
-    _uses[bb->_nodes[k]->_idx] = 1;
-  for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
-    _uses[bb->_nodes[l]->_idx] = 1;
+    _uses[bb->get_node(k)->_idx] = 1;
+  for( uint l = _bb_end; l < bb->number_of_nodes(); l++ )
+    _uses[bb->get_node(l)->_idx] = 1;
 
   // Iterate backwards over the instructions in the block.  Don't count the
   // branch projections at end or the block header instructions.
   for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
-    Node *n = bb->_nodes[j];
+    Node *n = bb->get_node(j);
     if( n->is_Proj() ) continue; // Projections handled another way
 
     // Account for all uses
@@ -2411,9 +2404,10 @@
       Node *inp = n->in(k);
       if (!inp) continue;
       assert(inp != n, "no cycles allowed" );
-      if( _bbs[inp->_idx] == bb ) { // Block-local use?
-        if( inp->is_Proj() )    // Skip through Proj's
+      if (_cfg->get_block_for_node(inp) == bb) { // Block-local use?
+        if (inp->is_Proj()) { // Skip through Proj's
           inp = inp->in(0);
+        }
         ++_uses[inp->_idx];     // Count 1 block-local use
       }
     }
@@ -2451,20 +2445,22 @@
   Block *bb;
 
   // Walk over all the basic blocks in reverse order
-  for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
-    bb = _cfg->_blocks[i];
+  for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) {
+    bb = _cfg->get_block(i);
 
 #ifndef PRODUCT
     if (_cfg->C->trace_opto_output()) {
       tty->print("#  Schedule BB#%03d (initial)\n", i);
-      for (uint j = 0; j < bb->_nodes.size(); j++)
-        bb->_nodes[j]->dump();
+      for (uint j = 0; j < bb->number_of_nodes(); j++) {
+        bb->get_node(j)->dump();
+      }
     }
 #endif
 
     // On the head node, skip processing
-    if( bb == _cfg->_broot )
+    if (bb == _cfg->get_root_block()) {
       continue;
+    }
 
     // Skip empty, connector blocks
     if (bb->is_connector())
@@ -2483,10 +2479,10 @@
     }
 
     // Leave untouched the starting instruction, any Phis, a CreateEx node
-    // or Top.  bb->_nodes[_bb_start] is the first schedulable instruction.
-    _bb_end = bb->_nodes.size()-1;
+    // or Top.  bb->get_node(_bb_start) is the first schedulable instruction.
+    _bb_end = bb->number_of_nodes()-1;
     for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
-      Node *n = bb->_nodes[_bb_start];
+      Node *n = bb->get_node(_bb_start);
       // Things not matched, like Phinodes and ProjNodes don't get scheduled.
       // Also, MachIdealNodes do not get scheduled
       if( !n->is_Mach() ) continue;     // Skip non-machine nodes
@@ -2506,19 +2502,19 @@
     // in the block), because they have delay slots we can fill.  Calls all
     // have their delay slots filled in the template expansions, so we don't
     // bother scheduling them.
-    Node *last = bb->_nodes[_bb_end];
+    Node *last = bb->get_node(_bb_end);
     // Ignore trailing NOPs.
     while (_bb_end > 0 && last->is_Mach() &&
            last->as_Mach()->ideal_Opcode() == Op_Con) {
-      last = bb->_nodes[--_bb_end];
+      last = bb->get_node(--_bb_end);
     }
     assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
     if( last->is_Catch() ||
        // Exclude unreachable path case when Halt node is in a separate block.
        (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
       // There must be a prior call.  Skip it.
-      while( !bb->_nodes[--_bb_end]->is_MachCall() ) {
-        assert( bb->_nodes[_bb_end]->is_MachProj(), "skipping projections after expected call" );
+      while( !bb->get_node(--_bb_end)->is_MachCall() ) {
+        assert( bb->get_node(_bb_end)->is_MachProj(), "skipping projections after expected call" );
       }
     } else if( last->is_MachNullCheck() ) {
       // Backup so the last null-checked memory instruction is
@@ -2527,7 +2523,7 @@
       Node *mem = last->in(1);
       do {
         _bb_end--;
-      } while (mem != bb->_nodes[_bb_end]);
+      } while (mem != bb->get_node(_bb_end));
     } else {
       // Set _bb_end to point after last schedulable inst.
       _bb_end++;
@@ -2555,7 +2551,7 @@
     assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
 #ifdef ASSERT
     for( uint l = _bb_start; l < _bb_end; l++ ) {
-      Node *n = bb->_nodes[l];
+      Node *n = bb->get_node(l);
       uint m;
       for( m = 0; m < _bb_end-_bb_start; m++ )
         if( _scheduled[m] == n )
@@ -2566,14 +2562,14 @@
 
     // Now copy the instructions (in reverse order) back to the block
     for ( uint k = _bb_start; k < _bb_end; k++ )
-      bb->_nodes.map(k, _scheduled[_bb_end-k-1]);
+      bb->map_node(_scheduled[_bb_end-k-1], k);
 
 #ifndef PRODUCT
     if (_cfg->C->trace_opto_output()) {
       tty->print("#  Schedule BB#%03d (final)\n", i);
       uint current = 0;
-      for (uint j = 0; j < bb->_nodes.size(); j++) {
-        Node *n = bb->_nodes[j];
+      for (uint j = 0; j < bb->number_of_nodes(); j++) {
+        Node *n = bb->get_node(j);
         if( valid_bundle_info(n) ) {
           Bundle *bundle = node_bundling(n);
           if (bundle->instr_count() > 0 || bundle->flags() > 0) {
@@ -2600,7 +2596,6 @@
 
 } // end DoScheduling
 
-//------------------------------verify_good_schedule---------------------------
 // Verify that no live-range used in the block is killed in the block by a
 // wrong DEF.  This doesn't verify live-ranges that span blocks.
 
@@ -2613,7 +2608,6 @@
 }
 
 #ifdef ASSERT
-//------------------------------verify_do_def----------------------------------
 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
   // Check for bad kills
   if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
@@ -2629,7 +2623,6 @@
   }
 }
 
-//------------------------------verify_good_schedule---------------------------
 void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
 
   // Zap to something reasonable for the verify code
@@ -2638,8 +2631,8 @@
   // Walk over the block backwards.  Check to make sure each DEF doesn't
   // kill a live value (other than the one it's supposed to).  Add each
   // USE to the live set.
-  for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) {
-    Node *n = b->_nodes[i];
+  for( uint i = b->number_of_nodes()-1; i >= _bb_start; i-- ) {
+    Node *n = b->get_node(i);
     int n_op = n->Opcode();
     if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
       // Fat-proj kills a slew of registers
@@ -2689,13 +2682,12 @@
     from->add_prec(to);
 }
 
-//------------------------------anti_do_def------------------------------------
 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
   if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
     return;
 
   Node *pinch = _reg_node[def_reg]; // Get pinch point
-  if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
+  if ((pinch == NULL) || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet?
       is_def ) {    // Check for a true def (not a kill)
     _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
     return;
@@ -2721,7 +2713,7 @@
       _cfg->C->record_method_not_compilable("too many D-U pinch points");
       return;
     }
-    _bbs.map(pinch->_idx,b);      // Pretend it's valid in this block (lazy init)
+    _cfg->map_node_to_block(pinch, b);      // Pretend it's valid in this block (lazy init)
     _reg_node.map(def_reg,pinch); // Record pinch-point
     //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
     if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
@@ -2759,20 +2751,19 @@
   add_prec_edge_from_to(kill,pinch);
 }
 
-//------------------------------anti_do_use------------------------------------
 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
   if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
     return;
   Node *pinch = _reg_node[use_reg]; // Get pinch point
   // Check for no later def_reg/kill in block
-  if( pinch && _bbs[pinch->_idx] == b &&
+  if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b &&
       // Use has to be block-local as well
-      _bbs[use->_idx] == b ) {
+      _cfg->get_block_for_node(use) == b) {
     if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
         pinch->req() == 1 ) {   // pinch not yet in block?
       pinch->del_req(0);        // yank pointer to later-def, also set flag
       // Insert the pinch-point in the block just after the last use
-      b->_nodes.insert(b->find_node(use)+1,pinch);
+      b->insert_node(pinch, b->find_node(use) + 1);
       _bb_end++;                // Increase size scheduled region in block
     }
 
@@ -2780,7 +2771,6 @@
   }
 }
 
-//------------------------------ComputeRegisterAntidependences-----------------
 // We insert antidependences between the reads and following write of
 // allocated registers to prevent illegal code motion. Hopefully, the
 // number of added references should be fairly small, especially as we
@@ -2825,10 +2815,10 @@
   // it being in the current block.
   bool fat_proj_seen = false;
   uint last_safept = _bb_end-1;
-  Node* end_node         = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
+  Node* end_node         = (_bb_end-1 >= _bb_start) ? b->get_node(last_safept) : NULL;
   Node* last_safept_node = end_node;
   for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
-    Node *n = b->_nodes[i];
+    Node *n = b->get_node(i);
     int is_def = n->outcnt();   // def if some uses prior to adding precedence edges
     if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) {
       // Fat-proj kills a slew of registers
@@ -2877,7 +2867,7 @@
     // Do not allow defs of new derived values to float above GC
     // points unless the base is definitely available at the GC point.
 
-    Node *m = b->_nodes[i];
+    Node *m = b->get_node(i);
 
     // Add precedence edge from following safepoint to use of derived pointer
     if( last_safept_node != end_node &&
@@ -2894,11 +2884,11 @@
 
     if( n->jvms() ) {           // Precedence edge from derived to safept
       // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
-      if( b->_nodes[last_safept] != last_safept_node ) {
+      if( b->get_node(last_safept) != last_safept_node ) {
         last_safept = b->find_node(last_safept_node);
       }
       for( uint j=last_safept; j > i; j-- ) {
-        Node *mach = b->_nodes[j];
+        Node *mach = b->get_node(j);
         if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
           mach->add_prec( n );
       }
@@ -2914,8 +2904,6 @@
   }
 }
 
-//------------------------------garbage_collect_pinch_nodes-------------------------------
-
 // Garbage collect pinch nodes for reuse by other blocks.
 //
 // The block scheduler's insertion of anti-dependence
@@ -2947,7 +2935,7 @@
     int trace_cnt = 0;
     for (uint k = 0; k < _reg_node.Size(); k++) {
       Node* pinch = _reg_node[k];
-      if (pinch != NULL && pinch->Opcode() == Op_Node &&
+      if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
           // no predecence input edges
           (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
         cleanup_pinch(pinch);
@@ -2990,7 +2978,6 @@
   pinch->set_req(0, NULL);
 }
 
-//------------------------------print_statistics-------------------------------
 #ifndef PRODUCT
 
 void Scheduling::dump_available() const {
--- a/src/share/vm/opto/output.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/output.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -99,9 +102,6 @@
   // List of nodes currently available for choosing for scheduling
   Node_List _available;
 
-  // Mapping from node (index) to basic block
-  Block_Array& _bbs;
-
   // For each instruction beginning a bundle, the number of following
   // nodes to be bundled with it.
   Bundle *_node_bundling_base;
--- a/src/share/vm/opto/parse.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/parse.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -332,6 +332,7 @@
   PPC64_ONLY(bool _wrote_volatile;)
   bool          _count_invocations; // update and test invocation counter
   bool          _method_data_update; // update method data oop
+  Node*         _alloc_with_final;   // An allocation node with final field
 
   // Variables which track Java semantics during bytecode parsing:
 
@@ -380,6 +381,11 @@
 #endif
   bool          count_invocations() const  { return _count_invocations; }
   bool          method_data_update() const { return _method_data_update; }
+  Node*    alloc_with_final() const   { return _alloc_with_final; }
+  void set_alloc_with_final(Node* n)  {
+    assert((_alloc_with_final == NULL) || (_alloc_with_final == n), "different init objects?");
+    _alloc_with_final = n;
+  }
 
   Block*             block()    const { return _block; }
   ciBytecodeStream&  iter()           { return _iter; }
@@ -522,7 +528,7 @@
 
   // loading from a constant field or the constant pool
   // returns false if push failed (non-perm field constants only, not ldcs)
-  bool push_constant(ciConstant con, bool require_constant = false);
+  bool push_constant(ciConstant con, bool require_constant = false, bool is_autobox_cache = false);
 
   // implementation of object creation bytecodes
   void emit_guard_for_new(ciInstanceKlass* klass);
--- a/src/share/vm/opto/parse1.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/parse1.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -392,6 +392,7 @@
   _wrote_final = false;
   // Add MemBarRelease for constructors which write volatile field (PPC64).
   PPC64_ONLY(_wrote_volatile = false;)
+  _alloc_with_final = NULL;
   _entry_bci = InvocationEntryBci;
   _tf = NULL;
   _block = NULL;
@@ -749,6 +750,8 @@
   // Note:  iophi and memphi are not transformed until do_exits.
   Node* iophi  = new (C) PhiNode(region, Type::ABIO);
   Node* memphi = new (C) PhiNode(region, Type::MEMORY, TypePtr::BOTTOM);
+  gvn().set_type_bottom(iophi);
+  gvn().set_type_bottom(memphi);
   _exits.set_i_o(iophi);
   _exits.set_all_memory(memphi);
 
@@ -774,6 +777,7 @@
     }
     int         ret_size = type2size[ret_type->basic_type()];
     Node*       ret_phi  = new (C) PhiNode(region, ret_type);
+    gvn().set_type_bottom(ret_phi);
     _exits.ensure_stack(ret_size);
     assert((int)(tf()->range()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
     assert(method()->return_type()->size() == ret_size, "tf agrees w/ method");
@@ -960,7 +964,7 @@
     // such unusual early publications.  But no barrier is needed on
     // exceptional returns, since they cannot publish normally.
     //
-    _exits.insert_mem_bar(Op_MemBarRelease);
+    _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
 #ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name();
--- a/src/share/vm/opto/parse2.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/parse2.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -955,7 +955,7 @@
     uncommon_trap(Deoptimization::Reason_unreached,
                   Deoptimization::Action_reinterpret,
                   NULL, "cold");
-    if (EliminateAutoBox) {
+    if (C->eliminate_boxing()) {
       // Mark the successor blocks as parsed
       branch_block->next_path_num();
       next_block->next_path_num();
@@ -980,7 +980,7 @@
 
     if (stopped()) {            // Path is dead?
       explicit_null_checks_elided++;
-      if (EliminateAutoBox) {
+      if (C->eliminate_boxing()) {
         // Mark the successor block as parsed
         branch_block->next_path_num();
       }
@@ -1000,7 +1000,7 @@
 
   if (stopped()) {              // Path is dead?
     explicit_null_checks_elided++;
-    if (EliminateAutoBox) {
+    if (C->eliminate_boxing()) {
       // Mark the successor block as parsed
       next_block->next_path_num();
     }
@@ -1037,7 +1037,7 @@
     uncommon_trap(Deoptimization::Reason_unreached,
                   Deoptimization::Action_reinterpret,
                   NULL, "cold");
-    if (EliminateAutoBox) {
+    if (C->eliminate_boxing()) {
       // Mark the successor blocks as parsed
       branch_block->next_path_num();
       next_block->next_path_num();
@@ -1103,7 +1103,7 @@
     set_control(taken_branch);
 
     if (stopped()) {
-      if (EliminateAutoBox) {
+      if (C->eliminate_boxing()) {
         // Mark the successor block as parsed
         branch_block->next_path_num();
       }
@@ -1122,7 +1122,7 @@
 
   // Branch not taken.
   if (stopped()) {
-    if (EliminateAutoBox) {
+    if (C->eliminate_boxing()) {
       // Mark the successor block as parsed
       next_block->next_path_num();
     }
--- a/src/share/vm/opto/parse3.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/parse3.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -150,6 +150,23 @@
     // final field
     if (field->is_static()) {
       // final static field
+      if (C->eliminate_boxing()) {
+        // The pointers in the autobox arrays are always non-null.
+        ciSymbol* klass_name = field->holder()->name();
+        if (field->name() == ciSymbol::cache_field_name() &&
+            field->holder()->uses_default_loader() &&
+            (klass_name == ciSymbol::java_lang_Character_CharacterCache() ||
+             klass_name == ciSymbol::java_lang_Byte_ByteCache() ||
+             klass_name == ciSymbol::java_lang_Short_ShortCache() ||
+             klass_name == ciSymbol::java_lang_Integer_IntegerCache() ||
+             klass_name == ciSymbol::java_lang_Long_LongCache())) {
+          bool require_const = true;
+          bool autobox_cache = true;
+          if (push_constant(field->constant_value(), require_const, autobox_cache)) {
+            return;
+          }
+        }
+      }
       if (push_constant(field->constant_value()))
         return;
     }
@@ -315,6 +332,13 @@
   // out of the constructor.
   if (is_field && field->is_final()) {
     set_wrote_final(true);
+    // Preserve allocation ptr to create precedent edge to it in membar
+    // generated on exit from constructor.
+    if (C->eliminate_boxing() &&
+        adr_type->isa_oopptr() && adr_type->is_oopptr()->is_ptr_to_boxed_value() &&
+        AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
+      set_alloc_with_final(obj);
+    }
   }
 
 #ifdef PPC64
@@ -326,7 +350,7 @@
 }
 
 
-bool Parse::push_constant(ciConstant constant, bool require_constant) {
+bool Parse::push_constant(ciConstant constant, bool require_constant, bool is_autobox_cache) {
   switch (constant.basic_type()) {
   case T_BOOLEAN:  push( intcon(constant.as_boolean()) ); break;
   case T_INT:      push( intcon(constant.as_int())     ); break;
@@ -347,7 +371,7 @@
       push( zerocon(T_OBJECT) );
       break;
     } else if (require_constant || oop_constant->should_be_constant()) {
-      push( makecon(TypeOopPtr::make_from_constant(oop_constant, require_constant)) );
+      push( makecon(TypeOopPtr::make_from_constant(oop_constant, require_constant, is_autobox_cache)) );
       break;
     } else {
       // we cannot inline the oop, but we can use it later to narrow a type
--- a/src/share/vm/opto/parseHelper.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/parseHelper.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -284,6 +284,11 @@
        klass == C->env()->StringBuffer_klass())) {
     C->set_has_stringbuilder(true);
   }
+
+  // Keep track of boxed values for EliminateAutoBox optimizations.
+  if (C->eliminate_boxing() && klass->is_box_klass()) {
+    C->set_has_boxed_value(true);
+  }
 }
 
 #ifndef PRODUCT
--- a/src/share/vm/opto/phase.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/phase.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -64,6 +64,7 @@
 // Subtimers for _t_optimizer
 elapsedTimer   Phase::_t_iterGVN;
 elapsedTimer   Phase::_t_iterGVN2;
+elapsedTimer   Phase::_t_incrInline;
 
 // Subtimers for _t_registerAllocation
 elapsedTimer   Phase::_t_ctorChaitin;
@@ -111,6 +112,7 @@
       tty->print_cr ("      macroEliminate : %3.3f sec", Phase::_t_macroEliminate.seconds());
     }
     tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
+    tty->print_cr ("      incrInline     : %3.3f sec", Phase::_t_incrInline.seconds());
     tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
     tty->print_cr ("      idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
     tty->print_cr ("      ccp            : %3.3f sec", Phase::_t_ccp.seconds());
--- a/src/share/vm/opto/phase.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/phase.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -100,6 +100,7 @@
 // Subtimers for _t_optimizer
   static elapsedTimer   _t_iterGVN;
   static elapsedTimer   _t_iterGVN2;
+  static elapsedTimer   _t_incrInline;
 
 // Subtimers for _t_registerAllocation
   static elapsedTimer   _t_ctorChaitin;
--- a/src/share/vm/opto/phaseX.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/phaseX.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -882,7 +882,7 @@
       return;
     }
     Node *n  = _worklist.pop();
-    if (++loop_count >= K * C->unique()) {
+    if (++loop_count >= K * C->live_nodes()) {
       debug_only(n->dump(4);)
       assert(false, "infinite loop in PhaseIterGVN::optimize");
       C->record_method_not_compilable("infinite loop in PhaseIterGVN::optimize");
@@ -1676,15 +1676,15 @@
   bool method_name_not_printed = true;
 
   // Examine each basic block
-  for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) {
-    Block *block = _cfg._blocks[block_number];
+  for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) {
+    Block* block = _cfg.get_block(block_number);
     bool block_not_printed = true;
 
     // and each instruction within a block
-    uint end_index = block->_nodes.size();
+    uint end_index = block->number_of_nodes();
     // block->end_idx() not valid after PhaseRegAlloc
     for( uint instruction_index = 1; instruction_index < end_index; ++instruction_index ) {
-      Node     *n = block->_nodes.at(instruction_index);
+      Node     *n = block->get_node(instruction_index);
       if( n->is_Mach() ) {
         MachNode *m = n->as_Mach();
         int deleted_count = 0;
@@ -1706,7 +1706,7 @@
             }
             // Print instructions being deleted
             for( int i = (deleted_count - 1); i >= 0; --i ) {
-              block->_nodes.at(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
+              block->get_node(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
             }
             tty->print_cr("replaced with");
             // Print new instruction
@@ -1720,11 +1720,11 @@
           //  the node index to live range mappings.)
           uint safe_instruction_index = (instruction_index - deleted_count);
           for( ; (instruction_index > safe_instruction_index); --instruction_index ) {
-            block->_nodes.remove( instruction_index );
+            block->remove_node( instruction_index );
           }
           // install new node after safe_instruction_index
-          block->_nodes.insert( safe_instruction_index + 1, m2 );
-          end_index = block->_nodes.size() - 1; // Recompute new block size
+          block->insert_node(m2, safe_instruction_index + 1);
+          end_index = block->number_of_nodes() - 1; // Recompute new block size
           NOT_PRODUCT( inc_peepholes(); )
         }
       }
--- a/src/share/vm/opto/phasetype.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/phasetype.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,6 @@
   PHASE_BEFORE_REMOVEUSELESS,
   PHASE_AFTER_PARSING,
   PHASE_ITER_GVN1,
-  PHASE_INCREMENTAL_INLINE,
   PHASE_PHASEIDEAL_BEFORE_EA,
   PHASE_ITER_GVN_AFTER_EA,
   PHASE_ITER_GVN_AFTER_ELIMINATION,
@@ -50,6 +49,8 @@
   PHASE_BEFORE_BEAUTIFY_LOOPS,
   PHASE_AFTER_BEAUTIFY_LOOPS,
   PHASE_BEFORE_MATCHING,
+  PHASE_INCREMENTAL_INLINE,
+  PHASE_INCREMENTAL_BOXING_INLINE,
   PHASE_END,
   PHASE_FAILURE,
 
@@ -65,7 +66,6 @@
       case PHASE_BEFORE_REMOVEUSELESS:       return "Before RemoveUseless";
       case PHASE_AFTER_PARSING:              return "After Parsing";
       case PHASE_ITER_GVN1:                  return "Iter GVN 1";
-      case PHASE_INCREMENTAL_INLINE:         return "Incremental Inline";
       case PHASE_PHASEIDEAL_BEFORE_EA:       return "PhaseIdealLoop before EA";
       case PHASE_ITER_GVN_AFTER_EA:          return "Iter GVN after EA";
       case PHASE_ITER_GVN_AFTER_ELIMINATION: return "Iter GVN after eliminating allocations and locks";
@@ -84,6 +84,8 @@
       case PHASE_BEFORE_BEAUTIFY_LOOPS:      return "Before beautify loops";
       case PHASE_AFTER_BEAUTIFY_LOOPS:       return "After beautify loops";
       case PHASE_BEFORE_MATCHING:            return "Before Matching";
+      case PHASE_INCREMENTAL_INLINE:         return "Incremental Inline";
+      case PHASE_INCREMENTAL_BOXING_INLINE:  return "Incremental Boxing Inline";
       case PHASE_END:                        return "End";
       case PHASE_FAILURE:                    return "Failure";
       default:
--- a/src/share/vm/opto/postaloc.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/postaloc.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -56,7 +56,7 @@
   int i;
   for( i=0; i < limit; i++ ) {
     if( def->is_Proj() && def->in(0)->is_Start() &&
-        _matcher.is_save_on_entry(lrgs(n2lidx(def)).reg()) )
+        _matcher.is_save_on_entry(lrgs(_lrg_map.live_range_id(def)).reg()))
       return true;              // Direct use of callee-save proj
     if( def->is_Copy() )        // Copies carry value through
       def = def->in(def->is_Copy());
@@ -78,12 +78,14 @@
 // Helper function for yank_if_dead
 int PhaseChaitin::yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
   int blk_adjust=0;
-  Block *oldb = _cfg._bbs[old->_idx];
+  Block *oldb = _cfg.get_block_for_node(old);
   oldb->find_remove(old);
   // Count 1 if deleting an instruction from the current block
-  if( oldb == current_block ) blk_adjust++;
-  _cfg._bbs.map(old->_idx,NULL);
-  OptoReg::Name old_reg = lrgs(n2lidx(old)).reg();
+  if (oldb == current_block) {
+    blk_adjust++;
+  }
+  _cfg.unmap_node_from_block(old);
+  OptoReg::Name old_reg = lrgs(_lrg_map.live_range_id(old)).reg();
   if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available?
     value->map(old_reg,NULL);  // Yank from value/regnd maps
     regnd->map(old_reg,NULL);  // This register's value is now unknown
@@ -167,7 +169,7 @@
   // Not every pair of physical registers are assignment compatible,
   // e.g. on sparc floating point registers are not assignable to integer
   // registers.
-  const LRG &def_lrg = lrgs(n2lidx(def));
+  const LRG &def_lrg = lrgs(_lrg_map.live_range_id(def));
   OptoReg::Name def_reg = def_lrg.reg();
   const RegMask &use_mask = n->in_RegMask(idx);
   bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
@@ -212,11 +214,12 @@
 // Skip through any number of copies (that don't mod oop-i-ness)
 Node *PhaseChaitin::skip_copies( Node *c ) {
   int idx = c->is_Copy();
-  uint is_oop = lrgs(n2lidx(c))._is_oop;
+  uint is_oop = lrgs(_lrg_map.live_range_id(c))._is_oop;
   while (idx != 0) {
     guarantee(c->in(idx) != NULL, "must not resurrect dead copy");
-    if (lrgs(n2lidx(c->in(idx)))._is_oop != is_oop)
+    if (lrgs(_lrg_map.live_range_id(c->in(idx)))._is_oop != is_oop) {
       break;  // casting copy, not the same value
+    }
     c = c->in(idx);
     idx = c->is_Copy();
   }
@@ -228,8 +231,8 @@
 int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs ) {
   int blk_adjust = 0;
 
-  uint nk_idx = n2lidx(n->in(k));
-  OptoReg::Name nk_reg = lrgs(nk_idx ).reg();
+  uint nk_idx = _lrg_map.live_range_id(n->in(k));
+  OptoReg::Name nk_reg = lrgs(nk_idx).reg();
 
   // Remove obvious same-register copies
   Node *x = n->in(k);
@@ -237,9 +240,13 @@
   while( (idx=x->is_Copy()) != 0 ) {
     Node *copy = x->in(idx);
     guarantee(copy != NULL, "must not resurrect dead copy");
-    if( lrgs(n2lidx(copy)).reg() != nk_reg ) break;
+    if(lrgs(_lrg_map.live_range_id(copy)).reg() != nk_reg) {
+      break;
+    }
     blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
-    if( n->in(k) != copy ) break; // Failed for some cutout?
+    if (n->in(k) != copy) {
+      break; // Failed for some cutout?
+    }
     x = copy;                   // Progress, try again
   }
 
@@ -256,10 +263,24 @@
   // intermediate copies might be illegal, i.e., value is stored down to stack
   // then reloaded BUT survives in a register the whole way.
   Node *val = skip_copies(n->in(k));
+
+  if (val == x && nk_idx != 0 &&
+      regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
+      _lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) {
+    // When rematerialzing nodes and stretching lifetimes, the
+    // allocator will reuse the original def for multidef LRG instead
+    // of the current reaching def because it can't know it's safe to
+    // do so.  After allocation completes if they are in the same LRG
+    // then it should use the current reaching def instead.
+    n->set_req(k, regnd[nk_reg]);
+    blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
+    val = skip_copies(n->in(k));
+  }
+
   if (val == x) return blk_adjust; // No progress?
 
   int n_regs = RegMask::num_registers(val->ideal_reg());
-  uint val_idx = n2lidx(val);
+  uint val_idx = _lrg_map.live_range_id(val);
   OptoReg::Name val_reg = lrgs(val_idx).reg();
 
   // See if it happens to already be in the correct register!
@@ -375,10 +396,10 @@
   ResourceMark rm;
   // Keep track of the defs seen in registers and collect their uses in the block.
   RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
-  for (uint i = 0; i < _cfg._num_blocks; i++) {
-    Block* block = _cfg._blocks[i];
-    for (uint j = 1; j < block->_nodes.size(); j++) {
-      Node* n = block->_nodes[j];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    for (uint j = 1; j < block->number_of_nodes(); j++) {
+      Node* n = block->get_node(j);
       if (n->is_Phi()) continue;
       for (uint k = 1; k < n->req(); k++) {
         j += possibly_merge_multidef(n, k, block, reg2defuse);
@@ -389,7 +410,7 @@
       // the base register only and ignore other effects of multi-register lrgs and fat projections.
       // It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
       // those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
-      uint lrg = n2lidx(n);
+      uint lrg = _lrg_map.live_range_id(n);
       if (lrg > 0 && lrgs(lrg).is_multidef()) {
         OptoReg::Name reg = lrgs(lrg).reg();
         reg2defuse.at(reg).clear();
@@ -405,12 +426,12 @@
 int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
   int blk_adjust = 0;
 
-  uint lrg = n2lidx(n->in(k));
+  uint lrg = _lrg_map.live_range_id(n->in(k));
   if (lrg > 0 && lrgs(lrg).is_multidef()) {
     OptoReg::Name reg = lrgs(lrg).reg();
 
     Node* def = reg2defuse.at(reg).def();
-    if (def != NULL && lrg == n2lidx(def) && def != n->in(k)) {
+    if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
       // Same lrg but different node, we have to merge.
       MachMergeNode* merge;
       if (def->is_MachMerge()) { // is it already a merge?
@@ -420,16 +441,16 @@
 
         // Insert the merge node into the block before the first use.
         uint use_index = block->find_node(reg2defuse.at(reg).first_use());
-        block->_nodes.insert(use_index++, merge);
+        block->insert_node(merge, use_index++);
 
         // Let the allocator know about the new node, use the same lrg
-        _names.extend(merge->_idx, lrg);
+        _lrg_map.extend(merge->_idx, lrg);
         blk_adjust++;
 
         // Fixup all the uses (there is at least one) that happened between the first
         // use and before the current one.
-        for (; use_index < block->_nodes.size(); use_index++) {
-          Node* use = block->_nodes[use_index];
+        for (; use_index < block->number_of_nodes(); use_index++) {
+          Node* use = block->get_node(use_index);
           if (use == n) {
             break;
           }
@@ -475,28 +496,29 @@
 
   // Need a mapping from basic block Node_Lists.  We need a Node_List to
   // map from register number to value-producing Node.
-  Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
-  memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+  Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
+  memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
   // Need a mapping from basic block Node_Lists.  We need a Node_List to
   // map from register number to register-defining Node.
-  Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
-  memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+  Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
+  memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
 
   // We keep unused Node_Lists on a free_list to avoid wasting
   // memory.
   GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
     uint j;
-    Block *b = _cfg._blocks[i];
+    Block* block = _cfg.get_block(i);
 
     // Count of Phis in block
     uint phi_dex;
-    for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) {
-      Node *phi = b->_nodes[phi_dex];
-      if( !phi->is_Phi() )
+    for (phi_dex = 1; phi_dex < block->number_of_nodes(); phi_dex++) {
+      Node* phi = block->get_node(phi_dex);
+      if (!phi->is_Phi()) {
         break;
+      }
     }
 
     // If any predecessor has not been visited, we do not know the state
@@ -504,21 +526,23 @@
     // along Phi input edges
     bool missing_some_inputs = false;
     Block *freed = NULL;
-    for( j = 1; j < b->num_preds(); j++ ) {
-      Block *pb = _cfg._bbs[b->pred(j)->_idx];
+    for (j = 1; j < block->num_preds(); j++) {
+      Block* pb = _cfg.get_block_for_node(block->pred(j));
       // Remove copies along phi edges
-      for( uint k=1; k<phi_dex; k++ )
-        elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false );
-      if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge?
+      for (uint k = 1; k < phi_dex; k++) {
+        elide_copy(block->get_node(k), j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false);
+      }
+      if (blk2value[pb->_pre_order]) { // Have a mapping on this edge?
         // See if this predecessor's mappings have been used by everybody
         // who wants them.  If so, free 'em.
         uint k;
-        for( k=0; k<pb->_num_succs; k++ ) {
-          Block *pbsucc = pb->_succs[k];
-          if( !blk2value[pbsucc->_pre_order] && pbsucc != b )
+        for (k = 0; k < pb->_num_succs; k++) {
+          Block* pbsucc = pb->_succs[k];
+          if (!blk2value[pbsucc->_pre_order] && pbsucc != block) {
             break;              // Found a future user
+          }
         }
-        if( k >= pb->_num_succs ) { // No more uses, free!
+        if (k >= pb->_num_succs) { // No more uses, free!
           freed = pb;           // Record last block freed
           free_list.push(blk2value[pb->_pre_order]);
           free_list.push(blk2regnd[pb->_pre_order]);
@@ -537,20 +561,20 @@
     value.map(_max_reg,NULL);
     regnd.map(_max_reg,NULL);
     // Set mappings as OUR mappings
-    blk2value[b->_pre_order] = &value;
-    blk2regnd[b->_pre_order] = &regnd;
+    blk2value[block->_pre_order] = &value;
+    blk2regnd[block->_pre_order] = &regnd;
 
     // Initialize value & regnd for this block
-    if( missing_some_inputs ) {
+    if (missing_some_inputs) {
       // Some predecessor has not yet been visited; zap map to empty
-      for( uint k = 0; k < (uint)_max_reg; k++ ) {
+      for (uint k = 0; k < (uint)_max_reg; k++) {
         value.map(k,NULL);
         regnd.map(k,NULL);
       }
     } else {
       if( !freed ) {            // Didn't get a freebie prior block
         // Must clone some data
-        freed = _cfg._bbs[b->pred(1)->_idx];
+        freed = _cfg.get_block_for_node(block->pred(1));
         Node_List &f_value = *blk2value[freed->_pre_order];
         Node_List &f_regnd = *blk2regnd[freed->_pre_order];
         for( uint k = 0; k < (uint)_max_reg; k++ ) {
@@ -559,9 +583,11 @@
         }
       }
       // Merge all inputs together, setting to NULL any conflicts.
-      for( j = 1; j < b->num_preds(); j++ ) {
-        Block *pb = _cfg._bbs[b->pred(j)->_idx];
-        if( pb == freed ) continue; // Did self already via freelist
+      for (j = 1; j < block->num_preds(); j++) {
+        Block* pb = _cfg.get_block_for_node(block->pred(j));
+        if (pb == freed) {
+          continue; // Did self already via freelist
+        }
         Node_List &p_regnd = *blk2regnd[pb->_pre_order];
         for( uint k = 0; k < (uint)_max_reg; k++ ) {
           if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
@@ -573,22 +599,23 @@
     }
 
     // For all Phi's
-    for( j = 1; j < phi_dex; j++ ) {
+    for (j = 1; j < phi_dex; j++) {
       uint k;
-      Node *phi = b->_nodes[j];
-      uint pidx = n2lidx(phi);
-      OptoReg::Name preg = lrgs(n2lidx(phi)).reg();
+      Node *phi = block->get_node(j);
+      uint pidx = _lrg_map.live_range_id(phi);
+      OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
 
       // Remove copies remaining on edges.  Check for junk phi.
       Node *u = NULL;
-      for( k=1; k<phi->req(); k++ ) {
+      for (k = 1; k < phi->req(); k++) {
         Node *x = phi->in(k);
         if( phi != x && u != x ) // Found a different input
           u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
       }
-      if( u != NodeSentinel ) {    // Junk Phi.  Remove
-        b->_nodes.remove(j--); phi_dex--;
-        _cfg._bbs.map(phi->_idx,NULL);
+      if (u != NodeSentinel) {    // Junk Phi.  Remove
+        block->remove_node(j--);
+        phi_dex--;
+        _cfg.unmap_node_from_block(phi);
         phi->replace_by(u);
         phi->disconnect_inputs(NULL, C);
         continue;
@@ -616,13 +643,13 @@
     }
 
     // For all remaining instructions
-    for( j = phi_dex; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for (j = phi_dex; j < block->number_of_nodes(); j++) {
+      Node* n = block->get_node(j);
 
-      if( n->outcnt() == 0 &&   // Dead?
-          n != C->top() &&      // (ignore TOP, it has no du info)
-          !n->is_Proj() ) {     // fat-proj kills
-        j -= yank_if_dead(n,b,&value,&regnd);
+      if(n->outcnt() == 0 &&   // Dead?
+         n != C->top() &&      // (ignore TOP, it has no du info)
+         !n->is_Proj() ) {     // fat-proj kills
+        j -= yank_if_dead(n, block, &value, &regnd);
         continue;
       }
 
@@ -632,10 +659,10 @@
       // alive and well at the use (or else the allocator fubar'd).  Take
       // advantage of this info to set a reaching def for the use-reg.
       uint k;
-      for( k = 1; k < n->req(); k++ ) {
+      for (k = 1; k < n->req(); k++) {
         Node *def = n->in(k);   // n->in(k) is a USE; def is the DEF for this USE
         guarantee(def != NULL, "no disconnected nodes at this point");
-        uint useidx = n2lidx(def); // useidx is the live range index for this USE
+        uint useidx = _lrg_map.live_range_id(def); // useidx is the live range index for this USE
 
         if( useidx ) {
           OptoReg::Name ureg = lrgs(useidx).reg();
@@ -643,7 +670,7 @@
             int idx;            // Skip occasional useless copy
             while( (idx=def->is_Copy()) != 0 &&
                    def->in(idx) != NULL &&  // NULL should not happen
-                   ureg == lrgs(n2lidx(def->in(idx))).reg() )
+                   ureg == lrgs(_lrg_map.live_range_id(def->in(idx))).reg())
               def = def->in(idx);
             Node *valdef = skip_copies(def); // tighten up val through non-useless copies
             value.map(ureg,valdef); // record improved reaching-def info
@@ -667,12 +694,15 @@
       const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
 
       // Remove copies along input edges
-      for( k = 1; k < n->req(); k++ )
-        j -= elide_copy( n, k, b, value, regnd, two_adr!=k );
+      for (k = 1; k < n->req(); k++) {
+        j -= elide_copy(n, k, block, value, regnd, two_adr != k);
+      }
 
       // Unallocated Nodes define no registers
-      uint lidx = n2lidx(n);
-      if( !lidx ) continue;
+      uint lidx = _lrg_map.live_range_id(n);
+      if (!lidx) {
+        continue;
+      }
 
       // Update the register defined by this instruction
       OptoReg::Name nreg = lrgs(lidx).reg();
@@ -697,8 +727,8 @@
         // then 'n' is a useless copy.  Do not update the register->node
         // mapping so 'n' will go dead.
         if( value[nreg] != val ) {
-          if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) {
-            j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+          if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) {
+            j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
           } else {
             // Update the mapping: record new Node defined by the register
             regnd.map(nreg,n);
@@ -707,8 +737,8 @@
             value.map(nreg,val);
           }
         } else if( !may_be_copy_of_callee(n) ) {
-          assert( n->is_Copy(), "" );
-          j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+          assert(n->is_Copy(), "");
+          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
         }
       } else if (RegMask::is_vector(n_ideal_reg)) {
         // If Node 'n' does not change the value mapped by the register,
@@ -727,7 +757,7 @@
           }
         } else if (n->is_Copy()) {
           // Note: vector can't be constant and can't be copy of calee.
-          j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
         }
       } else {
         // If the value occupies a register pair, record same info
@@ -741,18 +771,18 @@
           tmp.Remove(nreg);
           nreg_lo = tmp.find_first_elem();
         }
-        if( value[nreg] != val || value[nreg_lo] != val ) {
-          if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) {
-            j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+        if (value[nreg] != val || value[nreg_lo] != val) {
+          if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) {
+            j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
           } else {
             regnd.map(nreg   , n );
             regnd.map(nreg_lo, n );
             value.map(nreg   ,val);
             value.map(nreg_lo,val);
           }
-        } else if( !may_be_copy_of_callee(n) ) {
-          assert( n->is_Copy(), "" );
-          j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+        } else if (!may_be_copy_of_callee(n)) {
+          assert(n->is_Copy(), "");
+          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
         }
       }
 
--- a/src/share/vm/opto/reg_split.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/reg_split.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -103,17 +103,17 @@
 void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
   // Skip intervening ProjNodes.  Do not insert between a ProjNode and
   // its definer.
-  while( i < b->_nodes.size() &&
-         (b->_nodes[i]->is_Proj() ||
-          b->_nodes[i]->is_Phi() ) )
+  while( i < b->number_of_nodes() &&
+         (b->get_node(i)->is_Proj() ||
+          b->get_node(i)->is_Phi() ) )
     i++;
 
   // Do not insert between a call and his Catch
-  if( b->_nodes[i]->is_Catch() ) {
+  if( b->get_node(i)->is_Catch() ) {
     // Put the instruction at the top of the fall-thru block.
     // Find the fall-thru projection
     while( 1 ) {
-      const CatchProjNode *cp = b->_nodes[++i]->as_CatchProj();
+      const CatchProjNode *cp = b->get_node(++i)->as_CatchProj();
       if( cp->_con == CatchProjNode::fall_through_index )
         break;
     }
@@ -122,8 +122,8 @@
     i = 1;                      // Right at start of block
   }
 
-  b->_nodes.insert(i,spill);    // Insert node in block
-  _cfg._bbs.map(spill->_idx,b); // Update node->block mapping to reflect
+  b->insert_node(spill, i);    // Insert node in block
+  _cfg.map_node_to_block(spill,  b); // Update node->block mapping to reflect
   // Adjust the point where we go hi-pressure
   if( i <= b->_ihrp_index ) b->_ihrp_index++;
   if( i <= b->_fhrp_index ) b->_fhrp_index++;
@@ -151,9 +151,9 @@
   // (The implicit_null_check function ensures the use is also dominated
   // by the branch-not-taken block.)
   Node *be = b->end();
-  if( be->is_MachNullCheck() && be->in(1) == def && def == b->_nodes[loc] ) {
+  if( be->is_MachNullCheck() && be->in(1) == def && def == b->get_node(loc)) {
     // Spill goes in the branch-not-taken block
-    b = b->_succs[b->_nodes[b->end_idx()+1]->Opcode() == Op_IfTrue];
+    b = b->_succs[b->get_node(b->end_idx()+1)->Opcode() == Op_IfTrue];
     loc = 0;                    // Just past the Region
   }
   assert( loc >= 0, "must insert past block head" );
@@ -210,7 +210,7 @@
         use->set_req(useidx, def);
       } else {
         // Block and index where the use occurs.
-        Block *b = _cfg._bbs[use->_idx];
+        Block *b = _cfg.get_block_for_node(use);
         // Put the clone just prior to use
         int bindex = b->find_node(use);
         // DEF is UP, so must copy it DOWN and hook in USE
@@ -261,7 +261,7 @@
   int bindex;
   // Phi input spill-copys belong at the end of the prior block
   if( use->is_Phi() ) {
-    b = _cfg._bbs[b->pred(useidx)->_idx];
+    b = _cfg.get_block_for_node(b->pred(useidx));
     bindex = b->end_idx();
   } else {
     // Put the clone just prior to use
@@ -317,15 +317,15 @@
   if( def->req() > 1 ) {
     for( uint i = 1; i < def->req(); i++ ) {
       Node *in = def->in(i);
-      uint lidx = n2lidx(in);
+      uint lidx = _lrg_map.live_range_id(in);
       // We do not need this for live ranges that are only defined once.
       // However, this is not true for spill copies that are added in this
       // Split() pass, since they might get coalesced later on in this pass.
-      if (lidx < _maxlrg && lrgs(lidx).is_singledef()) {
-         continue;
-       }
+      if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_singledef()) {
+        continue;
+      }
 
-      Block *b_def = _cfg._bbs[def->_idx];
+      Block *b_def = _cfg.get_block_for_node(def);
       int idx_def = b_def->find_node(def);
       Node *in_spill = get_spillcopy_wide( in, def, i );
       if( !in_spill ) return 0; // Bailed out
@@ -347,26 +347,28 @@
   if( spill->req() > 1 ) {
     for( uint i = 1; i < spill->req(); i++ ) {
       Node *in = spill->in(i);
-      uint lidx = Find_id(in);
+      uint lidx = _lrg_map.find_id(in);
 
       // Walk backwards thru spill copy node intermediates
       if (walkThru) {
-        while ( in->is_SpillCopy() && lidx >= _maxlrg ) {
+        while (in->is_SpillCopy() && lidx >= _lrg_map.max_lrg_id()) {
           in = in->in(1);
-          lidx = Find_id(in);
+          lidx = _lrg_map.find_id(in);
         }
 
-        if (lidx < _maxlrg && lrgs(lidx).is_multidef()) {
+        if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_multidef()) {
           // walkThru found a multidef LRG, which is unsafe to use, so
           // just keep the original def used in the clone.
           in = spill->in(i);
-          lidx = Find_id(in);
+          lidx = _lrg_map.find_id(in);
         }
       }
 
-      if( lidx < _maxlrg && lrgs(lidx).reg() >= LRG::SPILL_REG ) {
+      if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).reg() >= LRG::SPILL_REG) {
         Node *rdef = Reachblock[lrg2reach[lidx]];
-        if( rdef ) spill->set_req(i,rdef);
+        if (rdef) {
+          spill->set_req(i, rdef);
+        }
       }
     }
   }
@@ -432,17 +434,25 @@
 //------------------------------prompt_use---------------------------------
 // True if lidx is used before any real register is def'd in the block
 bool PhaseChaitin::prompt_use( Block *b, uint lidx ) {
-  if( lrgs(lidx)._was_spilled2 ) return false;
+  if (lrgs(lidx)._was_spilled2) {
+    return false;
+  }
 
   // Scan block for 1st use.
   for( uint i = 1; i <= b->end_idx(); i++ ) {
-    Node *n = b->_nodes[i];
+    Node *n = b->get_node(i);
     // Ignore PHI use, these can be up or down
-    if( n->is_Phi() ) continue;
-    for( uint j = 1; j < n->req(); j++ )
-      if( Find_id(n->in(j)) == lidx )
+    if (n->is_Phi()) {
+      continue;
+    }
+    for (uint j = 1; j < n->req(); j++) {
+      if (_lrg_map.find_id(n->in(j)) == lidx) {
         return true;          // Found 1st use!
-    if( n->out_RegMask().is_NotEmpty() ) return false;
+      }
+    }
+    if (n->out_RegMask().is_NotEmpty()) {
+      return false;
+    }
   }
   return false;
 }
@@ -472,23 +482,23 @@
   bool                 u1, u2, u3;
   Block               *b, *pred;
   PhiNode             *phi;
-  GrowableArray<uint>  lidxs(split_arena, _maxlrg, 0, 0);
+  GrowableArray<uint>  lidxs(split_arena, maxlrg, 0, 0);
 
   // Array of counters to count splits per live range
-  GrowableArray<uint>  splits(split_arena, _maxlrg, 0, 0);
+  GrowableArray<uint>  splits(split_arena, maxlrg, 0, 0);
 
 #define NEW_SPLIT_ARRAY(type, size)\
   (type*) split_arena->allocate_bytes((size) * sizeof(type))
 
   //----------Setup Code----------
   // Create a convenient mapping from lrg numbers to reaches/leaves indices
-  uint *lrg2reach = NEW_SPLIT_ARRAY( uint, _maxlrg );
+  uint *lrg2reach = NEW_SPLIT_ARRAY(uint, maxlrg);
   // Keep track of DEFS & Phis for later passes
   defs = new Node_List();
   phis = new Node_List();
   // Gather info on which LRG's are spilling, and build maps
-  for( bidx = 1; bidx < _maxlrg; bidx++ ) {
-    if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
+  for (bidx = 1; bidx < maxlrg; bidx++) {
+    if (lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG) {
       assert(!lrgs(bidx).mask().is_AllStack(),"AllStack should color");
       lrg2reach[bidx] = spill_cnt;
       spill_cnt++;
@@ -514,13 +524,13 @@
   // a Def is UP or DOWN.  UP means that it should get a register (ie -
   // it is always in LRP regions), and DOWN means that it is probably
   // on the stack (ie - it crosses HRP regions).
-  Node ***Reaches     = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 );
-  bool  **UP          = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 );
+  Node ***Reaches     = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1);
+  bool  **UP          = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1);
   Node  **debug_defs  = NEW_SPLIT_ARRAY( Node*, spill_cnt );
   VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );
 
   // Initialize Reaches & UP
-  for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
+  for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) {
     Reaches[bidx]     = NEW_SPLIT_ARRAY( Node*, spill_cnt );
     UP[bidx]          = NEW_SPLIT_ARRAY( bool, spill_cnt );
     Node **Reachblock = Reaches[bidx];
@@ -540,13 +550,13 @@
   //----------PASS 1----------
   //----------Propagation & Node Insertion Code----------
   // Walk the Blocks in RPO for DEF & USE info
-  for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
+  for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) {
 
     if (C->check_node_count(spill_cnt, out_of_nodes)) {
       return 0;
     }
 
-    b  = _cfg._blocks[bidx];
+    b  = _cfg.get_block(bidx);
     // Reaches & UP arrays for this block
     Reachblock = Reaches[b->_pre_order];
     UPblock    = UP[b->_pre_order];
@@ -574,7 +584,7 @@
         UPblock[slidx] = true;
         // Record following instruction in case 'n' rematerializes and
         // kills flags
-        Block *pred1 = _cfg._bbs[b->pred(1)->_idx];
+        Block *pred1 = _cfg.get_block_for_node(b->pred(1));
         continue;
       }
 
@@ -586,7 +596,7 @@
       // Grab predecessor block header
       n1 = b->pred(1);
       // Grab the appropriate reaching def info for inpidx
-      pred = _cfg._bbs[n1->_idx];
+      pred = _cfg.get_block_for_node(n1);
       pidx = pred->_pre_order;
       Node **Ltmp = Reaches[pidx];
       bool  *Utmp = UP[pidx];
@@ -601,7 +611,7 @@
         // Grab predecessor block headers
         n2 = b->pred(inpidx);
         // Grab the appropriate reaching def info for inpidx
-        pred = _cfg._bbs[n2->_idx];
+        pred = _cfg.get_block_for_node(n2);
         pidx = pred->_pre_order;
         Ltmp = Reaches[pidx];
         Utmp = UP[pidx];
@@ -627,7 +637,7 @@
 
       // check block for appropriate phinode & update edges
       for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
-        n1 = b->_nodes[insidx];
+        n1 = b->get_node(insidx);
         // bail if this is not a phi
         phi = n1->is_Phi() ? n1->as_Phi() : NULL;
         if( phi == NULL ) {
@@ -637,7 +647,7 @@
           break;
         }
         // must be looking at a phi
-        if( Find_id(n1) == lidxs.at(slidx) ) {
+        if (_lrg_map.find_id(n1) == lidxs.at(slidx)) {
           // found the necessary phi
           needs_phi = false;
           has_phi = true;
@@ -659,11 +669,11 @@
           Reachblock[slidx] = phi;
 
           // add node to block & node_to_block mapping
-          insert_proj( b, insidx++, phi, maxlrg++ );
+          insert_proj(b, insidx++, phi, maxlrg++);
           non_phi++;
           // Reset new phi's mapping to be the spilling live range
-          _names.map(phi->_idx, lidx);
-          assert(Find_id(phi) == lidx,"Bad update on Union-Find mapping");
+          _lrg_map.map(phi->_idx, lidx);
+          assert(_lrg_map.find_id(phi) == lidx, "Bad update on Union-Find mapping");
         }  // end if not found correct phi
         // Here you have either found or created the Phi, so record it
         assert(phi != NULL,"Must have a Phi Node here");
@@ -686,7 +696,7 @@
         // Grab predecessor block header
         n1 = b->pred(1);
         // Grab the appropriate reaching def info for k
-        pred = _cfg._bbs[n1->_idx];
+        pred = _cfg.get_block_for_node(n1);
         pidx = pred->_pre_order;
         Node **Ltmp = Reaches[pidx];
         bool  *Utmp = UP[pidx];
@@ -727,14 +737,14 @@
     //----------Walk Instructions in the Block and Split----------
     // For all non-phi instructions in the block
     for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
-      Node *n = b->_nodes[insidx];
+      Node *n = b->get_node(insidx);
       // Find the defining Node's live range index
-      uint defidx = Find_id(n);
+      uint defidx = _lrg_map.find_id(n);
       uint cnt = n->req();
 
-      if( n->is_Phi() ) {
+      if (n->is_Phi()) {
         // Skip phi nodes after removing dead copies.
-        if( defidx < _maxlrg ) {
+        if (defidx < _lrg_map.max_lrg_id()) {
           // Check for useless Phis.  These appear if we spill, then
           // coalesce away copies.  Dont touch Phis in spilling live
           // ranges; they are busy getting modifed in this pass.
@@ -752,11 +762,11 @@
               }
             }
             assert( u, "at least 1 valid input expected" );
-            if( i >= cnt ) {    // Found one unique input
-              assert(Find_id(n) == Find_id(u), "should be the same lrg");
+            if (i >= cnt) {    // Found one unique input
+              assert(_lrg_map.find_id(n) == _lrg_map.find_id(u), "should be the same lrg");
               n->replace_by(u); // Then replace with unique input
               n->disconnect_inputs(NULL, C);
-              b->_nodes.remove(insidx);
+              b->remove_node(insidx);
               insidx--;
               b->_ihrp_index--;
               b->_fhrp_index--;
@@ -769,12 +779,12 @@
               (b->_reg_pressure < (uint)INTPRESSURE) ||
               b->_ihrp_index > 4000000 ||
               b->_ihrp_index >= b->end_idx() ||
-              !b->_nodes[b->_ihrp_index]->is_Proj(), "" );
+              !b->get_node(b->_ihrp_index)->is_Proj(), "" );
       assert( insidx > b->_fhrp_index ||
               (b->_freg_pressure < (uint)FLOATPRESSURE) ||
               b->_fhrp_index > 4000000 ||
               b->_fhrp_index >= b->end_idx() ||
-              !b->_nodes[b->_fhrp_index]->is_Proj(), "" );
+              !b->get_node(b->_fhrp_index)->is_Proj(), "" );
 
       // ********** Handle Crossing HRP Boundry **********
       if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
@@ -799,18 +809,26 @@
                 // Insert point is just past last use or def in the block
                 int insert_point = insidx-1;
                 while( insert_point > 0 ) {
-                  Node *n = b->_nodes[insert_point];
+                  Node *n = b->get_node(insert_point);
                   // Hit top of block?  Quit going backwards
-                  if( n->is_Phi() ) break;
+                  if (n->is_Phi()) {
+                    break;
+                  }
                   // Found a def?  Better split after it.
-                  if( n2lidx(n) == lidx ) break;
+                  if (_lrg_map.live_range_id(n) == lidx) {
+                    break;
+                  }
                   // Look for a use
                   uint i;
-                  for( i = 1; i < n->req(); i++ )
-                    if( n2lidx(n->in(i)) == lidx )
+                  for( i = 1; i < n->req(); i++ ) {
+                    if (_lrg_map.live_range_id(n->in(i)) == lidx) {
                       break;
+                    }
+                  }
                   // Found a use?  Better split after it.
-                  if( i < n->req() ) break;
+                  if (i < n->req()) {
+                    break;
+                  }
                   insert_point--;
                 }
                 uint orig_eidx = b->end_idx();
@@ -820,8 +838,9 @@
                   return 0;
                 }
                 // Spill of NULL check mem op goes into the following block.
-                if (b->end_idx() > orig_eidx)
+                if (b->end_idx() > orig_eidx) {
                   insidx++;
+                }
               }
               // This is a new DEF, so update UP
               UPblock[slidx] = false;
@@ -836,20 +855,20 @@
             }
           }  // end if LRG is UP
         }  // end for all spilling live ranges
-        assert( b->_nodes[insidx] == n, "got insidx set incorrectly" );
+        assert( b->get_node(insidx) == n, "got insidx set incorrectly" );
       }  // end if crossing HRP Boundry
 
       // If the LRG index is oob, then this is a new spillcopy, skip it.
-      if( defidx >= _maxlrg ) {
+      if (defidx >= _lrg_map.max_lrg_id()) {
         continue;
       }
       LRG &deflrg = lrgs(defidx);
       uint copyidx = n->is_Copy();
       // Remove coalesced copy from CFG
-      if( copyidx && defidx == n2lidx(n->in(copyidx)) ) {
+      if (copyidx && defidx == _lrg_map.live_range_id(n->in(copyidx))) {
         n->replace_by( n->in(copyidx) );
         n->set_req( copyidx, NULL );
-        b->_nodes.remove(insidx--);
+        b->remove_node(insidx--);
         b->_ihrp_index--; // Adjust the point where we go hi-pressure
         b->_fhrp_index--;
         continue;
@@ -872,13 +891,13 @@
           // If inpidx > old_last, then one of these new inputs is being
           // handled. Skip the derived part of the pair, but process
           // the base like any other input.
-          if( inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED ) {
+          if (inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED) {
             continue;  // skip derived_debug added below
           }
           // Get lidx of input
-          uint useidx = Find_id(n->in(inpidx));
+          uint useidx = _lrg_map.find_id(n->in(inpidx));
           // Not a brand-new split, and it is a spill use
-          if( useidx < _maxlrg && lrgs(useidx).reg() >= LRG::SPILL_REG ) {
+          if (useidx < _lrg_map.max_lrg_id() && lrgs(useidx).reg() >= LRG::SPILL_REG) {
             // Check for valid reaching DEF
             slidx = lrg2reach[useidx];
             Node *def = Reachblock[slidx];
@@ -894,8 +913,8 @@
               if (def == NULL || C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
                 return 0;
               }
-              _names.extend(def->_idx,0);
-              _cfg._bbs.map(def->_idx,b);
+              _lrg_map.extend(def->_idx, 0);
+              _cfg.map_node_to_block(def, b);
               n->set_req(inpidx, def);
               continue;
             }
@@ -903,10 +922,10 @@
             // Rematerializable?  Then clone def at use site instead
             // of store/load
             if( def->rematerialize() ) {
-              int old_size = b->_nodes.size();
+              int old_size = b->number_of_nodes();
               def = split_Rematerialize( def, b, insidx, maxlrg, splits, slidx, lrg2reach, Reachblock, true );
               if( !def ) return 0; // Bail out
-              insidx += b->_nodes.size()-old_size;
+              insidx += b->number_of_nodes()-old_size;
             }
 
             MachNode *mach = n->is_Mach() ? n->as_Mach() : NULL;
@@ -1194,10 +1213,10 @@
       // ********** Split Left Over Mem-Mem Moves **********
       // Check for mem-mem copies and split them now.  Do not do this
       // to copies about to be spilled; they will be Split shortly.
-      if( copyidx ) {
+      if (copyidx) {
         Node *use = n->in(copyidx);
-        uint useidx = Find_id(use);
-        if( useidx < _maxlrg &&       // This is not a new split
+        uint useidx = _lrg_map.find_id(use);
+        if (useidx < _lrg_map.max_lrg_id() &&       // This is not a new split
             OptoReg::is_stack(deflrg.reg()) &&
             deflrg.reg() < LRG::SPILL_REG ) { // And DEF is from stack
           LRG &uselrg = lrgs(useidx);
@@ -1236,7 +1255,7 @@
         uint member;
         IndexSetIterator isi(liveout);
         while ((member = isi.next()) != 0) {
-          assert(defidx != Find_const(member), "Live out member has not been compressed");
+          assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed");
         }
 #endif
         Reachblock[slidx] = NULL;
@@ -1267,9 +1286,9 @@
   for( insidx = 0; insidx < phis->size(); insidx++ ) {
     Node *phi = phis->at(insidx);
     assert(phi->is_Phi(),"This list must only contain Phi Nodes");
-    Block *b = _cfg._bbs[phi->_idx];
+    Block *b = _cfg.get_block_for_node(phi);
     // Grab the live range number
-    uint lidx = Find_id(phi);
+    uint lidx = _lrg_map.find_id(phi);
     uint slidx = lrg2reach[lidx];
     // Update node to lidx map
     new_lrg(phi, maxlrg++);
@@ -1291,7 +1310,7 @@
     // DEF has the wrong UP/DOWN value.
     for( uint i = 1; i < b->num_preds(); i++ ) {
       // Get predecessor block pre-order number
-      Block *pred = _cfg._bbs[b->pred(i)->_idx];
+      Block *pred = _cfg.get_block_for_node(b->pred(i));
       pidx = pred->_pre_order;
       // Grab reaching def
       Node *def = Reaches[pidx][slidx];
@@ -1303,12 +1322,14 @@
         // so look at the node before it.
         int insert = pred->end_idx();
         while (insert >= 1 &&
-               pred->_nodes[insert - 1]->is_SpillCopy() &&
-               Find(pred->_nodes[insert - 1]) >= lrgs_before_phi_split) {
+               pred->get_node(insert - 1)->is_SpillCopy() &&
+               _lrg_map.find(pred->get_node(insert - 1)) >= lrgs_before_phi_split) {
           insert--;
         }
-        def = split_Rematerialize( def, pred, insert, maxlrg, splits, slidx, lrg2reach, Reachblock, false );
-        if( !def ) return 0;    // Bail out
+        def = split_Rematerialize(def, pred, insert, maxlrg, splits, slidx, lrg2reach, Reachblock, false);
+        if (!def) {
+          return 0;    // Bail out
+        }
       }
       // Update the Phi's input edge array
       phi->set_req(i,def);
@@ -1324,7 +1345,7 @@
     }  // End for all inputs to the Phi
   }  // End for all Phi Nodes
   // Update _maxlrg to save Union asserts
-  _maxlrg = maxlrg;
+  _lrg_map.set_max_lrg_id(maxlrg);
 
 
   //----------PASS 3----------
@@ -1336,47 +1357,51 @@
     for( uint i = 1; i < phi->req(); i++ ) {
       // Grab the input node
       Node *n = phi->in(i);
-      assert( n, "" );
-      uint lidx = Find(n);
-      uint pidx = Find(phi);
-      if( lidx < pidx )
+      assert(n, "node should exist");
+      uint lidx = _lrg_map.find(n);
+      uint pidx = _lrg_map.find(phi);
+      if (lidx < pidx) {
         Union(n, phi);
-      else if( lidx > pidx )
+      }
+      else if(lidx > pidx) {
         Union(phi, n);
+      }
     }  // End for all inputs to the Phi Node
   }  // End for all Phi Nodes
   // Now union all two address instructions
-  for( insidx = 0; insidx < defs->size(); insidx++ ) {
+  for (insidx = 0; insidx < defs->size(); insidx++) {
     // Grab the def
     n1 = defs->at(insidx);
     // Set new lidx for DEF & handle 2-addr instructions
-    if( n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0) ) {
-      assert( Find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
+    if (n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0)) {
+      assert(_lrg_map.find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
       // Union the input and output live ranges
-      uint lr1 = Find(n1);
-      uint lr2 = Find(n1->in(twoidx));
-      if( lr1 < lr2 )
+      uint lr1 = _lrg_map.find(n1);
+      uint lr2 = _lrg_map.find(n1->in(twoidx));
+      if (lr1 < lr2) {
         Union(n1, n1->in(twoidx));
-      else if( lr1 > lr2 )
+      }
+      else if (lr1 > lr2) {
         Union(n1->in(twoidx), n1);
+      }
     }  // End if two address
   }  // End for all defs
   // DEBUG
 #ifdef ASSERT
   // Validate all live range index assignments
-  for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
-    b  = _cfg._blocks[bidx];
-    for( insidx = 0; insidx <= b->end_idx(); insidx++ ) {
-      Node *n = b->_nodes[insidx];
-      uint defidx = Find(n);
-      assert(defidx < _maxlrg,"Bad live range index in Split");
+  for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) {
+    b  = _cfg.get_block(bidx);
+    for (insidx = 0; insidx <= b->end_idx(); insidx++) {
+      Node *n = b->get_node(insidx);
+      uint defidx = _lrg_map.find(n);
+      assert(defidx < _lrg_map.max_lrg_id(), "Bad live range index in Split");
       assert(defidx < maxlrg,"Bad live range index in Split");
     }
   }
   // Issue a warning if splitting made no progress
   int noprogress = 0;
-  for( slidx = 0; slidx < spill_cnt; slidx++ ) {
-    if( PrintOpto && WizardMode && splits.at(slidx) == 0 ) {
+  for (slidx = 0; slidx < spill_cnt; slidx++) {
+    if (PrintOpto && WizardMode && splits.at(slidx) == 0) {
       tty->print_cr("Failed to split live range %d", lidxs.at(slidx));
       //BREAKPOINT;
     }
--- a/src/share/vm/opto/regalloc.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/regalloc.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -113,7 +113,7 @@
   OptoReg::Name offset2reg( int stk_offset ) const;
 
   // Get the register encoding associated with the Node
-  int get_encode( const Node *n ) const {
+  int get_encode(const Node *n) const {
     assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
     OptoReg::Name first = _node_regs[n->_idx].first();
     OptoReg::Name second = _node_regs[n->_idx].second();
@@ -122,15 +122,6 @@
     return Matcher::_regEncode[first];
   }
 
-  // Platform dependent hook for actions prior to allocation
-  void  pd_preallocate_hook();
-
-#ifdef ASSERT
-  // Platform dependent hook for verification after allocation.  Will
-  // only get called when compiling with asserts.
-  void  pd_postallocate_verify_hook();
-#endif
-
 #ifndef PRODUCT
   static int _total_framesize;
   static int _max_framesize;
--- a/src/share/vm/opto/regmask.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/regmask.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
--- a/src/share/vm/opto/regmask.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/regmask.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/adGlobals_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/adGlobals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/adGlobals_sparc.hpp"
 #endif
--- a/src/share/vm/opto/runtime.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/runtime.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -74,6 +74,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
@@ -1328,9 +1331,9 @@
   }
   NamedCounter* c;
   if (tag == NamedCounter::BiasedLockingCounter) {
-    c = new BiasedLockingNamedCounter(strdup(st.as_string()));
+    c = new BiasedLockingNamedCounter(st.as_string());
   } else {
-    c = new NamedCounter(strdup(st.as_string()), tag);
+    c = new NamedCounter(st.as_string(), tag);
   }
 
   // atomically add the new counter to the head of the list.  We only
--- a/src/share/vm/opto/runtime.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/runtime.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -72,11 +72,17 @@
 
  public:
   NamedCounter(const char *n, CounterTag tag = NoTag):
-    _name(n),
+    _name(n == NULL ? NULL : os::strdup(n)),
     _count(0),
     _next(NULL),
     _tag(tag) {}
 
+  ~NamedCounter() {
+    if (_name != NULL) {
+      os::free((void*)_name);
+    }
+  }
+
   const char * name() const     { return _name; }
   int count() const             { return _count; }
   address addr()                { return (address)&_count; }
--- a/src/share/vm/opto/stringopts.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/stringopts.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1487,6 +1487,12 @@
       kit.store_String_length(kit.control(), result, length);
     }
     kit.store_String_value(kit.control(), result, char_array);
+
+    // The value field is final. Emit a barrier here to ensure that the effect
+    // of the initialization is committed to memory before any code publishes
+    // a reference to the newly constructed object (see Parse::do_exits()).
+    assert(AllocateNode::Ideal_allocation(result, _gvn) != NULL, "should be newly allocated");
+    kit.insert_mem_bar(Op_MemBarRelease, result);
   } else {
     result = C->top();
   }
--- a/src/share/vm/opto/subnode.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/subnode.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -242,8 +242,8 @@
 const Type *SubINode::sub( const Type *t1, const Type *t2 ) const {
   const TypeInt *r0 = t1->is_int(); // Handy access
   const TypeInt *r1 = t2->is_int();
-  int32 lo = r0->_lo - r1->_hi;
-  int32 hi = r0->_hi - r1->_lo;
+  int32 lo = java_subtract(r0->_lo, r1->_hi);
+  int32 hi = java_subtract(r0->_hi, r1->_lo);
 
   // We next check for 32-bit overflow.
   // If that happens, we just assume all integers are possible.
@@ -351,8 +351,8 @@
 const Type *SubLNode::sub( const Type *t1, const Type *t2 ) const {
   const TypeLong *r0 = t1->is_long(); // Handy access
   const TypeLong *r1 = t2->is_long();
-  jlong lo = r0->_lo - r1->_hi;
-  jlong hi = r0->_hi - r1->_lo;
+  jlong lo = java_subtract(r0->_lo, r1->_hi);
+  jlong hi = java_subtract(r0->_hi, r1->_lo);
 
   // We next check for 32-bit overflow.
   // If that happens, we just assume all integers are possible.
--- a/src/share/vm/opto/type.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/type.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1238,8 +1238,8 @@
 
   // The new type narrows the old type, so look for a "death march".
   // See comments on PhaseTransform::saturate.
-  juint nrange = _hi - _lo;
-  juint orange = ohi - olo;
+  juint nrange = (juint)_hi - _lo;
+  juint orange = (juint)ohi - olo;
   if (nrange < max_juint - 1 && nrange > (orange >> 1) + (SMALLINT*2)) {
     // Use the new type only if the range shrinks a lot.
     // We do not want the optimizer computing 2^31 point by point.
@@ -1272,7 +1272,7 @@
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypeInt::hash(void) const {
-  return _lo+_hi+_widen+(int)Type::Int;
+  return java_add(java_add(_lo, _hi), java_add(_widen, (int)Type::Int));
 }
 
 //------------------------------is_finite--------------------------------------
@@ -1450,7 +1450,7 @@
         // If neither endpoint is extremal yet, push out the endpoint
         // which is closer to its respective limit.
         if (_lo >= 0 ||                 // easy common case
-            (julong)(_lo - min) >= (julong)(max - _hi)) {
+            ((julong)_lo - min) >= ((julong)max - _hi)) {
           // Try to widen to an unsigned range type of 32/63 bits:
           if (max >= max_juint && _hi < max_juint)
             return make(_lo, max_juint, WidenMax);
@@ -2202,7 +2202,7 @@
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypePtr::hash(void) const {
-  return _ptr + _offset;
+  return java_add(_ptr, _offset);
 }
 
 //------------------------------dump2------------------------------------------
@@ -2375,7 +2375,12 @@
     _const_oop(o), _klass(k),
     _klass_is_exact(xk),
     _is_ptr_to_narrowoop(false),
+    _is_ptr_to_boxed_value(false),
     _instance_id(instance_id) {
+  if (Compile::current()->eliminate_boxing() && (t == InstPtr) &&
+      (offset > 0) && xk && (k != 0) && k->is_instance_klass()) {
+    _is_ptr_to_boxed_value = k->as_instance_klass()->is_boxed_value_offset(offset);
+  }
 #ifdef _LP64
   if (UseCompressedOops && _offset != 0) {
     if (klass() == NULL) {
@@ -2615,7 +2620,9 @@
 
 //------------------------------make_from_constant-----------------------------
 // Make a java pointer from an oop constant
-const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o, bool require_constant) {
+const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o,
+                                                 bool require_constant,
+                                                 bool is_autobox_cache) {
   if (o->is_method_data() || o->is_method()) {
     // Treat much like a typeArray of bytes, like below, but fake the type...
     const BasicType bt = T_BYTE;
@@ -2646,7 +2653,12 @@
       return TypeInstPtr::make(o);
     } else if (klass->is_obj_array_klass()) {
       // Element is an object array. Recursively call ourself.
-      const Type *etype = make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
+      const TypeOopPtr *etype =
+	TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
+      if (is_autobox_cache) {
+	  // The pointers in the autobox arrays are always non-null.
+	  etype = etype->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr();
+      }
       const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
       // We used to pass NotNull in here, asserting that the sub-arrays
       // are all not-null.  This is not true in generally, as code can
@@ -2656,7 +2668,7 @@
       } else if (!o->should_be_constant()) {
         return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
       }
-      return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+      return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, is_autobox_cache);
     } else if (klass->is_type_array_klass()) {
       // Element is an typeArray
       const Type* etype = get_const_basic_type(klass->as_type_array_klass()->element_type());
@@ -2772,6 +2784,8 @@
     _klass_is_exact +
     _instance_id +
     TypePtr::hash();
+    java_add(java_add(const_oop() ? const_oop()->hash() : 0, _klass_is_exact),
+             java_add(_instance_id, TypePtr::hash()));
 }
 
 //------------------------------dump2------------------------------------------
@@ -2874,6 +2888,28 @@
   return result;
 }
 
+/**
+ *  Create constant type for a constant boxed value
+ */
+const Type* TypeInstPtr::get_const_boxed_value() const {
+  assert(is_ptr_to_boxed_value(), "should be called only for boxed value");
+  assert((const_oop() != NULL), "should be called only for constant object");
+  ciConstant constant = const_oop()->as_instance()->field_value_by_offset(offset());
+  BasicType bt = constant.basic_type();
+  switch (bt) {
+    case T_BOOLEAN:  return TypeInt::make(constant.as_boolean());
+    case T_INT:      return TypeInt::make(constant.as_int());
+    case T_CHAR:     return TypeInt::make(constant.as_char());
+    case T_BYTE:     return TypeInt::make(constant.as_byte());
+    case T_SHORT:    return TypeInt::make(constant.as_short());
+    case T_FLOAT:    return TypeF::make(constant.as_float());
+    case T_DOUBLE:   return TypeD::make(constant.as_double());
+    case T_LONG:     return TypeLong::make(constant.as_long());
+    default:         break;
+  }
+  fatal(err_msg_res("Invalid boxed value type '%s'", type2name(bt)));
+  return NULL;
+}
 
 //------------------------------cast_to_ptr_type-------------------------------
 const Type *TypeInstPtr::cast_to_ptr_type(PTR ptr) const {
@@ -3279,7 +3315,7 @@
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypeInstPtr::hash(void) const {
-  int hash = klass()->hash() + TypeOopPtr::hash();
+  int hash = java_add(klass()->hash(), TypeOopPtr::hash());
   return hash;
 }
 
@@ -3348,18 +3384,18 @@
   if (!xk)  xk = ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false))->hashcons();
 }
 
 //------------------------------make-------------------------------------------
-const TypeAryPtr *TypeAryPtr::make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) {
+const TypeAryPtr *TypeAryPtr::make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, bool is_autobox_cache) {
   assert(!(k == NULL && ary->_elem->isa_int()),
          "integral arrays must be pre-equipped with a class");
   assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" );
   if (!xk)  xk = (o != NULL) || ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache))->hashcons();
 }
 
 //------------------------------cast_to_ptr_type-------------------------------
@@ -3414,8 +3450,20 @@
   jint max_hi = max_array_length(elem()->basic_type());
   //if (index_not_size)  --max_hi;     // type of a valid array index, FTR
   bool chg = false;
-  if (lo < min_lo) { lo = min_lo; chg = true; }
-  if (hi > max_hi) { hi = max_hi; chg = true; }
+  if (lo < min_lo) {
+    lo = min_lo;
+    if (size->is_con()) {
+      hi = lo;
+    }
+    chg = true;
+  }
+  if (hi > max_hi) {
+    hi = max_hi;
+    if (size->is_con()) {
+      lo = hi;
+    }
+    chg = true;
+  }
   // Negative length arrays will produce weird intermediate dead fast-path code
   if (lo > hi)
     return TypeInt::ZERO;
@@ -3648,7 +3696,7 @@
 //------------------------------xdual------------------------------------------
 // Dual: compute field-by-field dual
 const Type *TypeAryPtr::xdual() const {
-  return new TypeAryPtr( dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id() );
+  return new TypeAryPtr( dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache() );
 }
 
 //----------------------interface_vs_oop---------------------------------------
--- a/src/share/vm/opto/type.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/opto/type.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -217,6 +217,9 @@
   // compressed oop references.
   bool is_ptr_to_narrowoop() const;
 
+  bool is_ptr_to_boxing_obj() const;
+
+
   // Convenience access
   float getf() const;
   double getd() const;
@@ -770,6 +773,7 @@
   // Does the type exclude subclasses of the klass?  (Inexact == polymorphic.)
   bool          _klass_is_exact;
   bool          _is_ptr_to_narrowoop;
+  bool          _is_ptr_to_boxed_value;
 
   // If not InstanceTop or InstanceBot, indicates that this is
   // a particular instance of this type which is distinct.
@@ -802,7 +806,9 @@
   // If the object cannot be rendered as a constant,
   // may return a non-singleton type.
   // If require_constant, produce a NULL if a singleton is not possible.
-  static const TypeOopPtr* make_from_constant(ciObject* o, bool require_constant = false);
+  static const TypeOopPtr* make_from_constant(ciObject* o,
+                                              bool require_constant = false,
+                                              bool not_null_elements = false);
 
   // Make a generic (unclassed) pointer to an oop.
   static const TypeOopPtr* make(PTR ptr, int offset, int instance_id);
@@ -814,7 +820,7 @@
   // Returns true if this pointer points at memory which contains a
   // compressed oop references.
   bool is_ptr_to_narrowoop_nv() const { return _is_ptr_to_narrowoop; }
-
+  bool is_ptr_to_boxed_value()   const { return _is_ptr_to_boxed_value; }
   bool is_known_instance()       const { return _instance_id > 0; }
   int  instance_id()             const { return _instance_id; }
   bool is_known_instance_field() const { return is_known_instance() && _offset >= 0; }
@@ -888,6 +894,9 @@
   // Make a pointer to an oop.
   static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot );
 
+  /** Create constant type for a constant boxed value */
+  const Type* get_const_boxed_value() const;
+
   // If this is a java.lang.Class constant, return the type for it or NULL.
   // Pass to Type::get_const_type to turn it to a type, which will usually
   // be a TypeInstPtr, but may also be a TypeInt::INT for int.class, etc.
@@ -919,7 +928,12 @@
 //------------------------------TypeAryPtr-------------------------------------
 // Class of Java array pointers
 class TypeAryPtr : public TypeOopPtr {
-  TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id), _ary(ary) {
+  TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk,
+              int offset, int instance_id, bool is_autobox_cache )
+  : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id),
+    _ary(ary),
+    _is_autobox_cache(is_autobox_cache)
+ {
 #ifdef ASSERT
     if (k != NULL) {
       // Verify that specified klass and TypeAryPtr::klass() follow the same rules.
@@ -940,6 +954,7 @@
   virtual bool eq( const Type *t ) const;
   virtual int hash() const;     // Type specific hashing
   const TypeAry *_ary;          // Array we point into
+  const bool     _is_autobox_cache;
 
   ciKlass* compute_klass(DEBUG_ONLY(bool verify = false)) const;
 
@@ -950,9 +965,11 @@
   const Type*    elem() const { return _ary->_elem; }
   const TypeInt* size() const { return _ary->_size; }
 
+  bool is_autobox_cache() const { return _is_autobox_cache; }
+
   static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot);
   // Constant pointer to array
-  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot);
+  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, bool is_autobox_cache = false);
 
   // Return a 'ptr' version of this type
   virtual const Type *cast_to_ptr_type(PTR ptr) const;
@@ -1303,6 +1320,13 @@
   return false;
 }
 
+inline bool Type::is_ptr_to_boxing_obj() const {
+  const TypeInstPtr* tp = isa_instptr();
+  return (tp != NULL) && (tp->offset() == 0) &&
+         tp->klass()->is_instance_klass()  &&
+         tp->klass()->as_instance_klass()->is_box_klass();
+}
+
 
 // ===============================================================
 // Things that need to be 64-bits in the 64-bit build but
--- a/src/share/vm/prims/jni.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jni.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -3269,7 +3269,11 @@
   HOTSPOT_JNI_GETSTRINGLENGTH_ENTRY(
                                     env, string);
 #endif /* USDT2 */
-  jsize ret = java_lang_String::length(JNIHandles::resolve_non_null(string));
+  jsize ret = 0;
+  oop s = JNIHandles::resolve_non_null(string);
+  if (java_lang_String::value(s) != NULL) {
+    ret = java_lang_String::length(s);
+  }
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, GetStringLength__return, ret);
 #else /* USDT2 */
@@ -3289,19 +3293,25 @@
  HOTSPOT_JNI_GETSTRINGCHARS_ENTRY(
                                   env, string, (uintptr_t *) isCopy);
 #endif /* USDT2 */
-  //%note jni_5
-  if (isCopy != NULL) {
-    *isCopy = JNI_TRUE;
-  }
+  jchar* buf = NULL;
   oop s = JNIHandles::resolve_non_null(string);
-  int s_len = java_lang_String::length(s);
   typeArrayOop s_value = java_lang_String::value(s);
-  int s_offset = java_lang_String::offset(s);
-  jchar* buf = NEW_C_HEAP_ARRAY(jchar, s_len + 1, mtInternal);  // add one for zero termination
-  if (s_len > 0) {
-    memcpy(buf, s_value->char_at_addr(s_offset), sizeof(jchar)*s_len);
+  if (s_value != NULL) {
+    int s_len = java_lang_String::length(s);
+    int s_offset = java_lang_String::offset(s);
+    buf = NEW_C_HEAP_ARRAY_RETURN_NULL(jchar, s_len + 1, mtInternal);  // add one for zero termination
+    /* JNI Specification states return NULL on OOM */
+    if (buf != NULL) {
+      if (s_len > 0) {
+        memcpy(buf, s_value->char_at_addr(s_offset), sizeof(jchar)*s_len);
+      }
+      buf[s_len] = 0;
+      //%note jni_5
+      if (isCopy != NULL) {
+        *isCopy = JNI_TRUE;
+      }
+    }
   }
-  buf[s_len] = 0;
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, GetStringChars__return, buf);
 #else /* USDT2 */
@@ -3369,7 +3379,11 @@
  HOTSPOT_JNI_GETSTRINGUTFLENGTH_ENTRY(
                                       env, string);
 #endif /* USDT2 */
-  jsize ret = java_lang_String::utf8_length(JNIHandles::resolve_non_null(string));
+  jsize ret = 0;
+  oop java_string = JNIHandles::resolve_non_null(string);
+  if (java_lang_String::value(java_string) != NULL) {
+    ret = java_lang_String::utf8_length(java_string);
+  }
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, GetStringUTFLength__return, ret);
 #else /* USDT2 */
@@ -3388,11 +3402,19 @@
  HOTSPOT_JNI_GETSTRINGUTFCHARS_ENTRY(
                                      env, string, (uintptr_t *) isCopy);
 #endif /* USDT2 */
+  char* result = NULL;
   oop java_string = JNIHandles::resolve_non_null(string);
-  size_t length = java_lang_String::utf8_length(java_string);
-  char* result = AllocateHeap(length + 1, mtInternal);
-  java_lang_String::as_utf8_string(java_string, result, (int) length + 1);
-  if (isCopy != NULL) *isCopy = JNI_TRUE;
+  if (java_lang_String::value(java_string) != NULL) {
+    size_t length = java_lang_String::utf8_length(java_string);
+    /* JNI Specification states return NULL on OOM */
+    result = AllocateHeap(length + 1, mtInternal, 0, AllocFailStrategy::RETURN_NULL);
+    if (result != NULL) {
+      java_lang_String::as_utf8_string(java_string, result, (int) length + 1);
+      if (isCopy != NULL) {
+        *isCopy = JNI_TRUE;
+      }
+    }
+  }
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, GetStringUTFChars__return, result);
 #else /* USDT2 */
@@ -3646,11 +3668,16 @@
      * Avoid asserts in typeArrayOop. */ \
     result = (ElementType*)get_bad_address(); \
   } else { \
-    result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \
-    /* copy the array to the c chunk */ \
-    memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \
+    /* JNI Specification states return NULL on OOM */                    \
+    result = NEW_C_HEAP_ARRAY_RETURN_NULL(ElementType, len, mtInternal); \
+    if (result != NULL) {                                                \
+      /* copy the array to the c chunk */                                \
+      memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len);      \
+      if (isCopy) {                                                      \
+        *isCopy = JNI_TRUE;                                              \
+      }                                                                  \
+    }                                                                    \
   } \
-  if (isCopy) *isCopy = JNI_TRUE; \
   DTRACE_PROBE1(hotspot_jni, Get##Result##ArrayElements__return, result);\
   return result; \
 JNI_END
@@ -3683,11 +3710,16 @@
      * Avoid asserts in typeArrayOop. */ \
     result = (ElementType*)get_bad_address(); \
   } else { \
-    result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \
-    /* copy the array to the c chunk */ \
-    memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \
+    /* JNI Specification states return NULL on OOM */                    \
+    result = NEW_C_HEAP_ARRAY_RETURN_NULL(ElementType, len, mtInternal); \
+    if (result != NULL) {                                                \
+      /* copy the array to the c chunk */                                \
+      memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len);      \
+      if (isCopy) {                                                      \
+        *isCopy = JNI_TRUE;                                              \
+      }                                                                  \
+    }                                                                    \
   } \
-  if (isCopy) *isCopy = JNI_TRUE; \
   ReturnProbe; \
   return result; \
 JNI_END
@@ -4515,7 +4547,7 @@
   if (!directBufferSupportInitializeEnded) {
     if (!initializeDirectBufferSupport(env, thread)) {
 #ifndef USDT2
-      DTRACE_PROBE1(hotspot_jni, NewDirectByteBuffer__return, NULL);
+	DTRACE_PROBE1(hotspot_jni, NewDirectByteBuffer__return, (uintptr_t) NULL);
 #else /* USDT2 */
       HOTSPOT_JNI_NEWDIRECTBYTEBUFFER_RETURN(
                                              NULL);
--- a/src/share/vm/prims/jniCheck.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jniCheck.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -39,6 +39,9 @@
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "jniTypes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "jniTypes_sparc.hpp"
 #endif
@@ -1304,18 +1307,19 @@
     IN_VM(
       checkString(thr, str);
     )
+    jchar* newResult = NULL;
     const jchar *result = UNCHECKED()->GetStringChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringChars didn't return a copy as expected");
-
-    size_t len = UNCHECKED()->GetStringLength(env,str) + 1; // + 1 for NULL termination
-    jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), mtInternal);
-    *tagLocation = STRING_TAG;
-    jchar* newResult = (jchar*) (tagLocation + 1);
-    memcpy(newResult, result, len * sizeof(jchar));
-    // Avoiding call to UNCHECKED()->ReleaseStringChars() since that will fire unexpected dtrace probes
-    // Note that the dtrace arguments for the allocated memory will not match up with this solution.
-    FreeHeap((char*)result);
-
+    if (result != NULL) {
+      size_t len = UNCHECKED()->GetStringLength(env,str) + 1; // + 1 for NULL termination
+      jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), mtInternal);
+      *tagLocation = STRING_TAG;
+      newResult = (jchar*) (tagLocation + 1);
+      memcpy(newResult, result, len * sizeof(jchar));
+      // Avoiding call to UNCHECKED()->ReleaseStringChars() since that will fire unexpected dtrace probes
+      // Note that the dtrace arguments for the allocated memory will not match up with this solution.
+      FreeHeap((char*)result);
+    }
     functionExit(env);
     return newResult;
 JNI_END
@@ -1374,18 +1378,19 @@
     IN_VM(
       checkString(thr, str);
     )
+    char* newResult = NULL;
     const char *result = UNCHECKED()->GetStringUTFChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringUTFChars didn't return a copy as expected");
-
-    size_t len = strlen(result) + 1; // + 1 for NULL termination
-    jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), mtInternal);
-    *tagLocation = STRING_UTF_TAG;
-    char* newResult = (char*) (tagLocation + 1);
-    strcpy(newResult, result);
-    // Avoiding call to UNCHECKED()->ReleaseStringUTFChars() since that will fire unexpected dtrace probes
-    // Note that the dtrace arguments for the allocated memory will not match up with this solution.
-    FreeHeap((char*)result, mtInternal);
-
+    if (result != NULL) {
+      size_t len = strlen(result) + 1; // + 1 for NULL termination
+      jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), mtInternal);
+      *tagLocation = STRING_UTF_TAG;
+      newResult = (char*) (tagLocation + 1);
+      strcpy(newResult, result);
+      // Avoiding call to UNCHECKED()->ReleaseStringUTFChars() since that will fire unexpected dtrace probes
+      // Note that the dtrace arguments for the allocated memory will not match up with this solution.
+      FreeHeap((char*)result, mtInternal);
+    }
     functionExit(env);
     return newResult;
 JNI_END
--- a/src/share/vm/prims/jni_md.h	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jni_md.h	Mon Apr 13 16:44:26 2020 +0100
@@ -27,6 +27,9 @@
 #ifdef TARGET_ARCH_x86
 # include "jni_x86.h"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "jni_aarch64.h"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "jni_sparc.h"
 #endif
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -31,6 +31,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -2792,6 +2792,9 @@
   (*entry_count_ptr) = num_entries;
   (*table_ptr) = jvmti_table;
 
+  if (num_entries == 0)
+    return JVMTI_ERROR_ABSENT_INFORMATION;
+
   return JVMTI_ERROR_NONE;
 } /* end GetLineNumberTable */
 
--- a/src/share/vm/prims/jvmtiExport.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jvmtiExport.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1765,6 +1765,47 @@
   }
 }
 
+#if defined(ZERO) && defined(ARM)
+
+// special compiled_method_load notify API for thumb2 compiler
+
+void JvmtiExport::post_compiled_method_load(const methodOop method, const jint length,
+                                            const void *code_begin, const jint map_length,
+                                            const jvmtiAddrLocationMap* map,
+					    const void *compile_info)
+{
+  JavaThread* thread = JavaThread::current();
+  jmethodID methodId = method->jmethod_id();
+
+  EVT_TRIG_TRACE(JVMTI_EVENT_COMPILED_METHOD_LOAD,
+                 ("JVMTI [%s] method compile load event triggered (by thumb2_compile)",
+                 JvmtiTrace::safe_get_thread_name(thread)));
+
+  JvmtiEnvIterator it;
+  for (JvmtiEnv* env = it.first(); env != NULL; env = it.next(env)) {
+    if (env->is_enabled(JVMTI_EVENT_COMPILED_METHOD_LOAD)) {
+
+      EVT_TRACE(JVMTI_EVENT_COMPILED_METHOD_LOAD,
+                ("JVMTI [%s] class compile method load event sent %s.%s   (by thumb2_compile)",
+                JvmtiTrace::safe_get_thread_name(thread),
+                method->klass_name()->as_C_string(),
+		 method->name()->as_C_string()));
+
+      JvmtiEventMark jem(thread);
+      JvmtiJavaThreadEventTransition jet(thread);
+      jvmtiEventCompiledMethodLoad callback = env->callbacks()->CompiledMethodLoad;
+
+      if (callback != NULL) {
+        (*callback)(env->jvmti_external(), methodId,
+                    length, code_begin, map_length,
+                    map, compile_info);
+      }
+    }
+  }
+}
+
+#endif // defined(TARGET_ARCH_zero) && ZERO_LIBARCH == "arm"
+
 
 // post a COMPILED_METHOD_LOAD event for a given environment
 void JvmtiExport::post_compiled_method_load(JvmtiEnv* env, const jmethodID method, const jint length,
--- a/src/share/vm/prims/jvmtiExport.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jvmtiExport.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -134,6 +134,13 @@
   // internal implementation.  Also called from JvmtiDeferredEvent::post()
   static void post_dynamic_code_generated_internal(const char *name, const void *code_begin, const void *code_end);
 
+#ifdef __arm__
+  static void post_compiled_method_load(const methodOop method, const jint length,
+					const void *code_begin, const jint map_length,
+					const jvmtiAddrLocationMap* map,
+					const void *compile_info);
+#endif // __arm__
+
  private:
 
   // GenerateEvents support to allow posting of CompiledMethodLoad and
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -152,7 +152,8 @@
     size_t s = initial_size * sizeof(JvmtiTagHashmapEntry*);
     _table = (JvmtiTagHashmapEntry**)os::malloc(s, mtInternal);
     if (_table == NULL) {
-      vm_exit_out_of_memory(s, "unable to allocate initial hashtable for jvmti object tags");
+      vm_exit_out_of_memory(s, OOM_MALLOC_ERROR,
+        "unable to allocate initial hashtable for jvmti object tags");
     }
     for (int i=0; i<initial_size; i++) {
       _table[i] = NULL;
@@ -1069,10 +1070,16 @@
 {
   assert(str->klass() == SystemDictionary::String_klass(), "not a string");
 
+  typeArrayOop s_value = java_lang_String::value(str);
+
+  // JDK-6584008: the value field may be null if a String instance is
+  // partially constructed.
+  if (s_value == NULL) {
+    return 0;
+  }
   // get the string value and length
   // (string value may be offset from the base)
   int s_len = java_lang_String::length(str);
-  typeArrayOop s_value = java_lang_String::value(str);
   int s_offset = java_lang_String::offset(str);
   jchar* value;
   if (s_len > 0) {
--- a/src/share/vm/prims/methodHandles.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/methodHandles.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -68,7 +68,8 @@
   TraceTime timer("MethodHandles adapters generation", TraceStartupTime);
   _adapter_code = MethodHandlesAdapterBlob::create(MethodHandles::adapter_code_size());
   if (_adapter_code == NULL)
-    vm_exit_out_of_memory(MethodHandles::adapter_code_size(), "CodeCache: no room for MethodHandles adapters");
+    vm_exit_out_of_memory(MethodHandles::adapter_code_size(), OOM_MALLOC_ERROR,
+                          "CodeCache: no room for MethodHandles adapters");
   {
     CodeBuffer code(_adapter_code);
     MethodHandlesAdapterGenerator g(&code);
--- a/src/share/vm/prims/methodHandles.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/methodHandles.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -187,6 +187,9 @@
 #ifdef TARGET_ARCH_x86
 # include "methodHandles_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "methodHandles_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "methodHandles_sparc.hpp"
 #endif
--- a/src/share/vm/prims/unsafe.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/unsafe.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -369,6 +369,24 @@
 
 #endif // not SPARC and not X86
 
+UNSAFE_ENTRY(jboolean, Unsafe_isBigEndian0(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_IsBigEndian0");
+  {
+#ifdef VM_LITTLE_ENDIAN
+    return false;
+#else
+    return true;
+#endif
+  }
+UNSAFE_END
+
+UNSAFE_ENTRY(jint, Unsafe_unalignedAccess0(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_UnalignedAccess0");
+  {
+    return UseUnalignedAccesses;
+  }
+UNSAFE_END
+
 #define DEFINE_GETSETOOP(jboolean, Boolean) \
  \
 UNSAFE_ENTRY(jboolean, Unsafe_Get##Boolean##140(JNIEnv *env, jobject unsafe, jobject obj, jint offset)) \
@@ -685,6 +703,36 @@
   Copy::conjoint_memory_atomic(src, dst, sz);
 UNSAFE_END
 
+// This function is a leaf since if the source and destination are both in native memory
+// the copy may potentially be very large, and we don't want to disable GC if we can avoid it.
+// If either source or destination (or both) are on the heap, the function will enter VM using
+// JVM_ENTRY_FROM_LEAF
+JVM_LEAF(void, Unsafe_CopySwapMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size, jlong elemSize)) {
+  UnsafeWrapper("Unsafe_CopySwapMemory0");
+
+  size_t sz = (size_t)size;
+  size_t esz = (size_t)elemSize;
+
+  if (srcObj == NULL && dstObj == NULL) {
+    // Both src & dst are in native memory
+    address src = (address)srcOffset;
+    address dst = (address)dstOffset;
+
+    Copy::conjoint_swap(src, dst, sz, esz);
+  } else {
+    // At least one of src/dst are on heap, transition to VM to access raw pointers
+
+    JVM_ENTRY_FROM_LEAF(env, void, Unsafe_CopySwapMemory0) {
+      oop srcp = JNIHandles::resolve(srcObj);
+      oop dstp = JNIHandles::resolve(dstObj);
+
+      address src = (address)index_oop_from_field_offset_long(srcp, srcOffset);
+      address dst = (address)index_oop_from_field_offset_long(dstp, dstOffset);
+
+      Copy::conjoint_swap(src, dst, sz, esz);
+    } JVM_END
+  }
+} JVM_END
 
 ////// Random queries
 
@@ -1593,6 +1641,7 @@
 
 JNINativeMethod memcopy_methods[] = {
     {CC"copyMemory",         CC"("OBJ"J"OBJ"JJ)V",       FN_PTR(Unsafe_CopyMemory2)},
+    {CC"copySwapMemory0",    CC "(" OBJ "J" OBJ "JJJ)V", FN_PTR(Unsafe_CopySwapMemory0)},
     {CC"setMemory",          CC"("OBJ"JJB)V",            FN_PTR(Unsafe_SetMemory2)}
 };
 
@@ -1609,6 +1658,11 @@
     {CC"shouldBeInitialized",CC"("CLS")Z",               FN_PTR(Unsafe_ShouldBeInitialized)},
 };
 
+JNINativeMethod machine_methods[] = {
+    {CC"isBigEndian0",       CC"()Z",                    FN_PTR(Unsafe_isBigEndian0)},
+    {CC"unalignedAccess0",   CC"()Z",                    FN_PTR(Unsafe_unalignedAccess0)}
+};
+
 #undef CC
 #undef FN_PTR
 
@@ -1670,6 +1724,15 @@
         }
       }
     }
+    {
+      env->RegisterNatives(unsafecls, machine_methods, sizeof(machine_methods)/sizeof(JNINativeMethod));
+      if (env->ExceptionOccurred()) {
+        if (PrintMiscellaneous && (Verbose || WizardMode)) {
+          tty->print_cr("Warning:  SDK 1.7 Unsafe.isBigEndian0/unalignedAccess0 not found.");
+        }
+        env->ExceptionClear();
+      }
+    }
     if (EnableInvokeDynamic) {
       env->RegisterNatives(unsafecls, anonk_methods, sizeof(anonk_methods)/sizeof(JNINativeMethod));
       if (env->ExceptionOccurred()) {
--- a/src/share/vm/prims/whitebox.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/prims/whitebox.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -181,7 +181,7 @@
   if (res == NULL) {
     tty->print_cr("Invalid layout of %s at %s", ik->external_name(),
         name_symbol->as_C_string());
-    fatal("Invalid layout of preloaded class");
+    vm_exit_during_initialization("Invalid layout of preloaded class: use -XX:+TraceClassLoading to see the origin of the problem class");
   }
 
   //fetch the field at the offset we've found
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -56,7 +56,7 @@
   set_c2_count(MAX2(count - count / 3, 1));
 
   // Some inlining tuning
-#ifdef X86
+#if defined(X86) || defined(AARCH64)
   if (FLAG_IS_DEFAULT(InlineSmallCode)) {
     FLAG_SET_DEFAULT(InlineSmallCode, 2000);
   }
@@ -125,7 +125,8 @@
 }
 
 double AdvancedThresholdPolicy::weight(methodOop method) {
-  return (method->rate() + 1) * ((method->invocation_count() + 1) *  (method->backedge_count() + 1));
+  return (double)(method->rate() + 1) *
+    (method->invocation_count() + 1) * (method->backedge_count() + 1);
 }
 
 // Apply heuristics and return true if x should be compiled before y
--- a/src/share/vm/runtime/arguments.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -60,10 +60,31 @@
 #include "gc_implementation/parallelScavenge/parallelScavengeHeap.hpp"
 #endif
 
-// Note: This is a special bug reporting site for the JVM
-#define DEFAULT_VENDOR_URL_BUG "http://bugreport.java.com/bugreport/crash.jsp"
+#define DEFAULT_VENDOR_URL_BUG "http://icedtea.classpath.org/bugzilla"
 #define DEFAULT_JAVA_LAUNCHER  "generic"
 
+// Disable options not supported in this release, with a warning if they
+// were explicitly requested on the command-line
+#define UNSUPPORTED_OPTION(opt, description)                    \
+do {                                                            \
+  if (opt) {                                                    \
+    if (FLAG_IS_CMDLINE(opt)) {                                 \
+      warning(description " is disabled in this release.");     \
+    }                                                           \
+    FLAG_SET_DEFAULT(opt, false);                               \
+  }                                                             \
+} while(0)
+
+#define UNSUPPORTED_GC_OPTION(gc)                                     \
+do {                                                                  \
+  if (gc) {                                                           \
+    if (FLAG_IS_CMDLINE(gc)) {                                        \
+      warning(#gc " is not supported in this VM.  Using Serial GC."); \
+    }                                                                 \
+    FLAG_SET_DEFAULT(gc, false);                                      \
+  }                                                                   \
+} while(0)
+
 char**  Arguments::_jvm_flags_array             = NULL;
 int     Arguments::_num_jvm_flags               = 0;
 char**  Arguments::_jvm_args_array              = NULL;
@@ -771,7 +792,7 @@
   } else {
     *bldarray = REALLOC_C_HEAP_ARRAY(char*, *bldarray, *count, mtInternal);
   }
-  (*bldarray)[index] = strdup(arg);
+  (*bldarray)[index] = os::strdup_check_oom(arg);
 }
 
 void Arguments::build_jvm_args(const char* arg) {
@@ -1074,6 +1095,10 @@
     }
     break;
   }
+  if (!UseInterpreter) { // -Xcomp
+    Tier3InvokeNotifyFreqLog = 0;
+    Tier4InvocationThreshold = 0;
+  }
 }
 
 // Conflict: required to use shared spaces (-Xshare:on), but
@@ -1100,7 +1125,9 @@
   }
   // Increase the code cache size - tiered compiles a lot more.
   if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) {
-    FLAG_SET_DEFAULT(ReservedCodeCacheSize, ReservedCodeCacheSize * 2);
+    NOT_AARCH64(FLAG_SET_DEFAULT(ReservedCodeCacheSize, ReservedCodeCacheSize * 2));
+    AARCH64_ONLY(FLAG_SET_DEFAULT(ReservedCodeCacheSize,
+                                  MIN2(CODE_CACHE_DEFAULT_LIMIT, ReservedCodeCacheSize * 2)));
   }
 }
 
@@ -1217,7 +1244,7 @@
     // NewSize was set on the command line and it is larger than
     // preferred_max_new_size.
     if (!FLAG_IS_DEFAULT(NewSize)) {   // NewSize explicitly set at command-line
-      FLAG_SET_ERGO(uintx, MaxNewSize, MAX2(NewSize, preferred_max_new_size));
+      FLAG_SET_ERGO(uintx, MaxNewSize, MAX2((size_t) NewSize, preferred_max_new_size));
     } else {
       FLAG_SET_ERGO(uintx, MaxNewSize, preferred_max_new_size);
     }
@@ -1242,8 +1269,8 @@
       // Unless explicitly requested otherwise, make young gen
       // at least min_new, and at most preferred_max_new_size.
       if (FLAG_IS_DEFAULT(NewSize)) {
-        FLAG_SET_ERGO(uintx, NewSize, MAX2(NewSize, min_new));
-        FLAG_SET_ERGO(uintx, NewSize, MIN2(preferred_max_new_size, NewSize));
+        FLAG_SET_ERGO(uintx, NewSize, MAX2((size_t) NewSize, min_new));
+        FLAG_SET_ERGO(uintx, NewSize, MIN2(preferred_max_new_size, (size_t) NewSize));
         if (PrintGCDetails && Verbose) {
           // Too early to use gclog_or_tty
           tty->print_cr("CMS ergo set NewSize: " SIZE_FORMAT, NewSize);
@@ -1253,7 +1280,7 @@
       // so it's NewRatio x of NewSize.
       if (FLAG_IS_DEFAULT(OldSize)) {
         if (max_heap > NewSize) {
-          FLAG_SET_ERGO(uintx, OldSize, MIN2(NewRatio*NewSize, max_heap - NewSize));
+          FLAG_SET_ERGO(uintx, OldSize, MIN2((size_t)(NewRatio*NewSize), max_heap - NewSize));
           if (PrintGCDetails && Verbose) {
             // Too early to use gclog_or_tty
             tty->print_cr("CMS ergo set OldSize: " SIZE_FORMAT, OldSize);
@@ -1726,6 +1753,20 @@
 // Aggressive optimization flags  -XX:+AggressiveOpts
 void Arguments::set_aggressive_opts_flags() {
 #ifdef COMPILER2
+  if (AggressiveUnboxing) {
+    if (FLAG_IS_DEFAULT(EliminateAutoBox)) {
+      FLAG_SET_DEFAULT(EliminateAutoBox, true);
+    } else if (!EliminateAutoBox) {
+      // warning("AggressiveUnboxing is disabled because EliminateAutoBox is disabled");
+      AggressiveUnboxing = false;
+    }
+    if (FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
+      FLAG_SET_DEFAULT(DoEscapeAnalysis, true);
+    } else if (!DoEscapeAnalysis) {
+      // warning("AggressiveUnboxing is disabled because DoEscapeAnalysis is disabled");
+      AggressiveUnboxing = false;
+    }
+  }
   if (AggressiveOpts || !FLAG_IS_DEFAULT(AutoBoxCacheMax)) {
     // EliminateAutoBox code is broken in C2
     if (FLAG_IS_DEFAULT(EliminateAutoBox)) {
@@ -1769,7 +1810,7 @@
 }
 
 void Arguments::process_java_launcher_argument(const char* launcher, void* extra_info) {
-  _sun_java_launcher = strdup(launcher);
+  _sun_java_launcher = os::strdup_check_oom(launcher);
   if (strcmp("gamma", _sun_java_launcher) == 0) {
     _created_by_gamma_launcher = true;
   }
@@ -2662,7 +2703,7 @@
       // Redirect GC output to the file. -Xloggc:<filename>
       // ostream_init_log(), when called will use this filename
       // to initialize a fileStream.
-      _gc_log_filename = strdup(tail);
+      _gc_log_filename = os::strdup_check_oom(tail);
      if (!is_filename_valid(_gc_log_filename)) {
        jio_fprintf(defaultStream::output_stream(),
                   "Invalid file name for use with -Xloggc: Filename can only contain the "
@@ -3153,14 +3194,17 @@
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false));
   }
 
-  // Tiered compilation is undefined with C1.
-  TieredCompilation = false;
 #else
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
 #endif
 
+#ifndef TIERED
+  // Tiered compilation is undefined.
+  UNSUPPORTED_OPTION(TieredCompilation, "TieredCompilation");
+#endif
+
   // If we are running in a headless jre, force java.awt.headless property
   // to be true unless the property has already been set.
   // Also allow the OS environment variable JAVA_AWT_HEADLESS to set headless state.
@@ -3318,38 +3362,34 @@
   }
 }
 
-// Disable options not supported in this release, with a warning if they
-// were explicitly requested on the command-line
-#define UNSUPPORTED_OPTION(opt, description)                    \
-do {                                                            \
-  if (opt) {                                                    \
-    if (FLAG_IS_CMDLINE(opt)) {                                 \
-      warning(description " is disabled in this release.");     \
-    }                                                           \
-    FLAG_SET_DEFAULT(opt, false);                               \
-  }                                                             \
-} while(0)
+
+// Sharing support
+// Construct the path to the archive
+static char* get_shared_archive_path() {
+  char *shared_archive_path;
+  if (SharedArchiveFile == NULL) {
+    char jvm_path[JVM_MAXPATHLEN];
+    os::jvm_path(jvm_path, sizeof(jvm_path));
+    char *end = strrchr(jvm_path, *os::file_separator());
+    if (end != NULL) *end = '\0';
+    size_t jvm_path_len = strlen(jvm_path);
+    size_t file_sep_len = strlen(os::file_separator());
+    const size_t len = jvm_path_len + file_sep_len + 20;
+    shared_archive_path = NEW_C_HEAP_ARRAY(char, len, mtInternal);
+    if (shared_archive_path != NULL) {
+      jio_snprintf(shared_archive_path, len, "%s%sclasses%s.jsa",
+		   jvm_path, os::file_separator(), DEBUG_ONLY("_g") NOT_DEBUG(""));
+    }
+  } else {
+    shared_archive_path = os::strdup_check_oom(SharedArchiveFile, mtInternal);
+  }
+  return shared_archive_path;
+}
 
 // Parse entry point called from JNI_CreateJavaVM
 
 jint Arguments::parse(const JavaVMInitArgs* args) {
 
-  // Sharing support
-  // Construct the path to the archive
-  char jvm_path[JVM_MAXPATHLEN];
-  os::jvm_path(jvm_path, sizeof(jvm_path));
-  char *end = strrchr(jvm_path, *os::file_separator());
-  if (end != NULL) *end = '\0';
-  char *shared_archive_path = NEW_C_HEAP_ARRAY(char, strlen(jvm_path) +
-      strlen(os::file_separator()) + 20, mtInternal);
-  if (shared_archive_path == NULL) return JNI_ENOMEM;
-  strcpy(shared_archive_path, jvm_path);
-  strcat(shared_archive_path, os::file_separator());
-  strcat(shared_archive_path, "classes");
-  DEBUG_ONLY(strcat(shared_archive_path, "_g");)
-  strcat(shared_archive_path, ".jsa");
-  SharedArchivePath = shared_archive_path;
-
   // Remaining part of option string
   const char* tail;
 
@@ -3434,6 +3474,12 @@
     return result;
   }
 
+  // Call get_shared_archive_path() here, after possible SharedArchiveFile option got parsed.
+  SharedArchivePath = get_shared_archive_path();
+  if (SharedArchivePath == NULL) {
+    return JNI_ENOMEM;
+  }
+
   // Delay warning until here so that we've had a chance to process
   // the -XX:-PrintWarnings flag
   if (needs_hotspotrc_warning) {
@@ -3543,6 +3589,14 @@
     set_g1_gc_flags();
   }
 
+  if (AssumeMP && !UseSerialGC) {
+    if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) {
+      warning("If the number of processors is expected to increase from one, then"
+              " you should configure the number of parallel GC threads appropriately"
+              " using -XX:ParallelGCThreads=N");
+    }
+  }
+
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
 #endif // SERIALGC
--- a/src/share/vm/runtime/atomic.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/atomic.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -42,6 +42,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "atomic_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "atomic_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "atomic_linux_sparc.inline.hpp"
 #endif
--- a/src/share/vm/runtime/deoptimization.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/deoptimization.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -53,6 +53,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmreg_sparc.inline.hpp"
 #endif
@@ -72,6 +75,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/ad_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/ad_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/ad_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/dtraceJSDT.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/dtraceJSDT.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/fprofiler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/fprofiler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -618,10 +618,16 @@
   }
 
   vmNode(const char* name, const TickPosition where) : ProfilerNode() {
-    _name = name;
+    _name = os::strdup(name);
     update(where);
   }
 
+  ~vmNode() {
+    if (_name != NULL) {
+      os::free((void*)_name);
+    }
+  }
+
   const char *name()    const { return _name; }
   bool is_compiled()    const { return true; }
 
@@ -773,7 +779,7 @@
   assert(index >= 0, "Must be positive");
   // Note that we call strdup below since the symbol may be resource allocated
   if (!table[index]) {
-    table[index] = new (this) vmNode(os::strdup(name), where);
+    table[index] = new (this) vmNode(name, where);
   } else {
     ProfilerNode* prev = table[index];
     for(ProfilerNode* node = prev; node; node = node->next()) {
@@ -783,7 +789,7 @@
       }
       prev = node;
     }
-    prev->set_next(new (this) vmNode(os::strdup(name), where));
+    prev->set_next(new (this) vmNode(name, where));
   }
 }
 
--- a/src/share/vm/runtime/frame.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/frame.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -48,6 +48,9 @@
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/frame.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/frame.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -38,6 +38,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/adGlobals_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/adGlobals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/adGlobals_sparc.hpp"
 #endif
@@ -483,6 +486,9 @@
 #ifdef TARGET_ARCH_x86
 # include "frame_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "frame_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "frame_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/frame.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/frame.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "jniTypes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "jniTypes_sparc.hpp"
 #endif
@@ -99,6 +102,9 @@
 #ifdef TARGET_ARCH_x86
 # include "frame_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "frame_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "frame_sparc.inline.hpp"
 #endif
--- a/src/share/vm/runtime/globals.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/globals.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -40,6 +40,9 @@
 #ifdef TARGET_ARCH_x86
 # include "globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "globals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "globals_sparc.hpp"
 #endif
@@ -70,6 +73,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "globals_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "globals_linux_aarch64.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "globals_linux_sparc.hpp"
 #endif
@@ -104,6 +110,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c1_globals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c1_globals_sparc.hpp"
 #endif
@@ -133,6 +142,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c2_globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "c2_globals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "c2_globals_sparc.hpp"
 #endif
@@ -391,7 +403,7 @@
 // notproduct flags are settable / visible only during development and are not declared in the PRODUCT version
 
 // A flag must be declared with one of the following types:
-// bool, intx, uintx, ccstr.
+// bool, intx, uintx, ccstr, double, or uint64_t.
 // The type "ccstr" is an alias for "const char*" and is used
 // only in this file, because the macrology requires single-token type names.
 
@@ -468,6 +480,9 @@
   lp64_product(intx, ObjectAlignmentInBytes, 8,                             \
           "Default object alignment in bytes, 8 is minimum")                \
                                                                             \
+  product(bool, AssumeMP, false,                                            \
+          "Instruct the VM to assume multiple processors are available")    \
+                                                                            \
   /* UseMembar is theoretically a temp flag used for memory barrier         \
    * removal testing.  It was supposed to be removed before FCS but has     \
    * been re-added (see 6401008) */                                         \
@@ -652,6 +667,12 @@
   product(bool, UseAESIntrinsics, false,                                    \
           "use intrinsics for AES versions of crypto")                      \
                                                                             \
+  AARCH64_ONLY                                                              \
+  (                                                                         \
+  product(bool, UseCRC32Intrinsics, false,                                  \
+          "use intrinsics for java.util.zip.CRC32")                         \
+  )                                                                         \
+                                                                            \
   develop(bool, TraceCallFixup, false,                                      \
           "traces all call fixups")                                         \
                                                                             \
@@ -1226,6 +1247,13 @@
           "in perm.  This purely intended to allow debugging issues"        \
           "in production.")                                                 \
                                                                             \
+  product(bool, ExitOnOutOfMemoryError, false,                              \
+          "JVM exits on the first occurrence of an out-of-memory error")    \
+                                                                            \
+  product(bool, CrashOnOutOfMemoryError, false,                             \
+          "JVM aborts, producing an error log and core/mini dump, on the "  \
+          "first occurrence of an out-of-memory error")                     \
+                                                                            \
   /* tracing */                                                             \
                                                                             \
   notproduct(bool, TraceRuntimeCalls, false,                                \
@@ -3015,7 +3043,7 @@
   product(uintx, InitialHeapSize, 0,                                        \
           "Initial heap size (in bytes); zero means use ergonomics")        \
                                                                             \
-  product(uintx, MaxHeapSize, ScaleForWordSize(96*M),                       \
+  product(uintx, MaxHeapSize, ScaleForWordSize(512*M),                      \
           "Maximum heap size (in bytes)")                                   \
                                                                             \
   product(uintx, OldSize, ScaleForWordSize(4*M),                            \
@@ -3694,6 +3722,9 @@
   product(bool, PrintGCCause, true,                                         \
           "Include GC cause in GC logging")                                 \
                                                                             \
+  diagnostic(ccstr, SharedArchiveFile, NULL,                                \
+          "Override the default location of the CDS archive file")          \
+                                                                            \
   experimental(uintx, ArrayAllocatorMallocLimit,                            \
           SOLARIS_ONLY(64*K) NOT_SOLARIS(max_uintx),                        \
           "Allocation less than this value will be allocated "              \
@@ -3702,7 +3733,11 @@
   product(bool, EnableTracing, false,                                       \
                   "Enable event-based tracing")                             \
   product(bool, UseLockedTracing, false,                                    \
-          "Use locked-tracing when doing event-based tracing")
+          "Use locked-tracing when doing event-based tracing")              \
+                                                                            \
+  diagnostic(bool, UseUnalignedAccesses, false,                             \
+          "Use unaligned memory accesses in sun.misc.Unsafe")               \
+                                                                            \
 
 /*
  *  Macros for factoring of globals
--- a/src/share/vm/runtime/icache.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/icache.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -71,6 +71,9 @@
 #ifdef TARGET_ARCH_x86
 # include "icache_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "icache_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "icache_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/java.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/java.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -67,6 +67,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vm_version_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vm_version_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/javaCalls.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/javaCalls.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -33,6 +33,9 @@
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "jniTypes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "jniTypes_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/javaFrameAnchor.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/javaFrameAnchor.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "orderAccess_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "orderAccess_linux_sparc.inline.hpp"
 #endif
@@ -112,6 +115,9 @@
 #ifdef TARGET_ARCH_x86
 # include "javaFrameAnchor_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "javaFrameAnchor_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "javaFrameAnchor_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/objectMonitor.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/objectMonitor.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,7 +224,8 @@
 //
 // * The monitor entry list operations avoid locks, but strictly speaking
 //   they're not lock-free.  Enter is lock-free, exit is not.
-//   See http://j2se.east/~dice/PERSIST/040825-LockFreeQueues.html
+//   For a description of 'Methods and apparatus providing non-blocking access
+//   to a resource,' see U.S. Pat. No. 7844973.
 //
 // * The cxq can have multiple concurrent "pushers" but only one concurrent
 //   detaching thread.  This mechanism is immune from the ABA corruption.
@@ -378,6 +379,8 @@
   { // Change java thread status to indicate blocked on monitor enter.
     JavaThreadBlockedOnMonitorEnterState jtbmes(jt, this);
 
+    Self->set_current_pending_monitor(this);
+
     DTRACE_MONITOR_PROBE(contended__enter, this, object(), jt);
     if (JvmtiExport::should_post_monitor_contended_enter()) {
       JvmtiExport::post_monitor_contended_enter(jt, this);
@@ -392,8 +395,6 @@
     OSThreadContendState osts(Self->osthread());
     ThreadBlockInVM tbivm(jt);
 
-    Self->set_current_pending_monitor(this);
-
     // TODO-FIXME: change the following for(;;) loop to straight-line code.
     for (;;) {
       jt->set_suspend_equivalent();
@@ -1656,6 +1657,33 @@
        node._event->unpark();
      }
 
+     // Without the fix for 8028280, it is possible for the above call:
+     //
+     //   Thread::SpinAcquire (&_WaitSetLock, "WaitSet - unlink") ;
+     //
+     // to consume the unpark() that was done when the successor was set.
+     // The solution for this very rare possibility is to redo the unpark()
+     // outside of the JvmtiExport::should_post_monitor_waited() check.
+     //
+     if (node._notified != 0 && _succ == Self) {
+       // In this part of the monitor wait-notify-reenter protocol it
+       // is possible (and normal) for another thread to do a fastpath
+       // monitor enter-exit while this thread is still trying to get
+       // to the reenter portion of the protocol.
+       //
+       // The ObjectMonitor was notified and the current thread is
+       // the successor which also means that an unpark() has already
+       // been done. The JVMTI_EVENT_MONITOR_WAITED event handler can
+       // consume the unpark() that was done when the successor was
+       // set because the same ParkEvent is shared between Java
+       // monitors and JVM/TI RawMonitors (for now).
+       //
+       // We redo the unpark() to ensure forward progress, i.e., we
+       // don't want all pending threads hanging (parked) with none
+       // entering the unlocked monitor.
+       node._event->unpark();
+     }
+
      if (event.should_commit()) {
        post_monitor_wait_event(&event, node._notifier_tid, millis, ret == OS_TIMEOUT);
      }
@@ -1980,7 +2008,8 @@
 // (duration) or we can fix the count at approximately the duration of
 // a context switch and vary the frequency.   Of course we could also
 // vary both satisfying K == Frequency * Duration, where K is adaptive by monitor.
-// See http://j2se.east/~dice/PERSIST/040824-AdaptiveSpinning.html.
+// For a description of 'Adaptive spin-then-block mutual exclusion in
+// multi-threaded processing,' see U.S. Pat. No. 8046758.
 //
 // This implementation varies the duration "D", where D varies with
 // the success rate of recent spin attempts. (D is capped at approximately
@@ -2513,7 +2542,7 @@
   size_t sz = strlen (SyncKnobs) ;
   char * knobs = (char *) malloc (sz + 2) ;
   if (knobs == NULL) {
-     vm_exit_out_of_memory (sz + 2, "Parse SyncKnobs") ;
+     vm_exit_out_of_memory (sz + 2, OOM_MALLOC_ERROR, "Parse SyncKnobs") ;
      guarantee (0, "invariant") ;
   }
   strcpy (knobs, SyncKnobs) ;
--- a/src/share/vm/runtime/os.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/os.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -454,6 +454,13 @@
 }
 
 
+char* os::strdup_check_oom(const char* str, MEMFLAGS flags) {
+  char* p = os::strdup(str, flags);
+  if (p == NULL) {
+    vm_exit_out_of_memory(strlen(str) + 1, OOM_MALLOC_ERROR, "os::strdup_check_oom");
+  }
+  return p;
+}
 
 #ifdef ASSERT
 #define space_before             (MallocCushion + sizeof(double))
@@ -544,7 +551,6 @@
   }
 }
 
-
 void report_heap_error(void* memblock, void* bad, const char* where) {
   tty->print_cr("## nof_mallocs = " UINT64_FORMAT ", nof_frees = " UINT64_FORMAT, os::num_mallocs, os::num_frees);
   tty->print_cr("## memory stomp: byte at " PTR_FORMAT " %s object " PTR_FORMAT, bad, where, memblock);
@@ -1167,6 +1173,7 @@
         "%/lib/jsse.jar:"
         "%/lib/jce.jar:"
         "%/lib/charsets.jar:"
+        "%/lib/rhino.jar:"
         "%/lib/jfr.jar:"
         "%/classes";
     char* sysclasspath = format_boot_path(classpath_format, home, home_len, fileSep, pathSep);
@@ -1392,44 +1399,6 @@
   return result;
 }
 
-// Read file line by line, if line is longer than bsize,
-// skip rest of line.
-int os::get_line_chars(int fd, char* buf, const size_t bsize){
-  size_t sz, i = 0;
-
-  // read until EOF, EOL or buf is full
-  while ((sz = (int) read(fd, &buf[i], 1)) == 1 && i < (bsize-2) && buf[i] != '\n') {
-     ++i;
-  }
-
-  if (buf[i] == '\n') {
-    // EOL reached so ignore EOL character and return
-
-    buf[i] = 0;
-    return (int) i;
-  }
-
-  buf[i+1] = 0;
-
-  if (sz != 1) {
-    // EOF reached. if we read chars before EOF return them and
-    // return EOF on next call otherwise return EOF
-
-    return (i == 0) ? -1 : (int) i;
-  }
-
-  // line is longer than size of buf, skip to EOL
-  char ch;
-  while (read(fd, &ch, 1) == 1 && ch != '\n') {
-    // Do nothing
-  }
-
-  // return initial part of line that fits in buf.
-  // If we reached EOF, it will be returned on next call.
-
-  return (int) i;
-}
-
 void os::initialize_initial_active_processor_count() {
   assert(_initial_active_processor_count == 0, "Initial active processor count already set.");
   _initial_active_processor_count = active_processor_count();
--- a/src/share/vm/runtime/os.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/os.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -202,7 +202,7 @@
   // Interface for detecting multiprocessor system
   static inline bool is_MP() {
     assert(_processor_count > 0, "invalid processor count");
-    return _processor_count > 1;
+    return _processor_count > 1 || AssumeMP;
   }
   static julong available_memory();
   static julong physical_memory();
@@ -650,6 +650,8 @@
   static void  free    (void *memblock, MEMFLAGS flags = mtNone);
   static bool  check_heap(bool force = false);      // verify C heap integrity
   static char* strdup(const char *, MEMFLAGS flags = mtInternal);  // Like strdup
+  // Like strdup, but exit VM when strdup() returns NULL
+  static char* strdup_check_oom(const char*, MEMFLAGS flags = mtInternal);
 
 #ifndef PRODUCT
   static julong num_mallocs;         // # of calls to malloc/realloc
@@ -745,10 +747,6 @@
   // Hook for os specific jvm options that we don't want to abort on seeing
   static bool obsolete_option(const JavaVMOption *option);
 
-  // Read file line by line. If line is longer than bsize,
-  // rest of line is skipped. Returns number of bytes read or -1 on EOF
-  static int get_line_chars(int fd, char *buf, const size_t bsize);
-
   // Extensions
 #include "runtime/os_ext.hpp"
 
@@ -781,6 +779,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "os_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "os_linux_aarch64.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "os_linux_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/registerMap.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/registerMap.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,6 +30,9 @@
 #ifdef TARGET_ARCH_x86
 # include "register_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "register_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "register_sparc.hpp"
 #endif
@@ -138,6 +141,9 @@
 #ifdef TARGET_ARCH_x86
 # include "registerMap_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "registerMap_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "registerMap_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/relocator.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/relocator.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,6 +30,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "bytes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/safepoint.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/safepoint.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -55,6 +55,10 @@
 # include "nativeInst_x86.hpp"
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 # include "vmreg_sparc.inline.hpp"
--- a/src/share/vm/runtime/sharedRuntime.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -60,6 +60,10 @@
 # include "nativeInst_x86.hpp"
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+# include "vmreg_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 # include "vmreg_sparc.inline.hpp"
@@ -80,6 +84,10 @@
 #include "c1/c1_Runtime1.hpp"
 #endif
 
+#ifdef BUILTIN_SIM
+#include "../../../../../../simulator/simulator.hpp"
+#endif
+
 // Shared stub locations
 RuntimeStub*        SharedRuntime::_wrong_method_blob;
 RuntimeStub*        SharedRuntime::_ic_miss_blob;
@@ -2467,7 +2475,25 @@
       CompileBroker::handle_full_code_cache();
       return NULL; // Out of CodeCache space
     }
+#ifdef BUILTIN_SIM
+    address old_i2c = entry->get_i2c_entry();
+    address old_c2i = entry->get_c2i_entry();
+    AArch64Simulator *sim =  (NotifySimulator ? AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck) : NULL);
+#endif
+
     entry->relocate(B->content_begin());
+
+#ifdef BUILTIN_SIM
+    if (NotifySimulator) {
+      address new_i2c = entry->get_i2c_entry();
+      address new_c2i = entry->get_c2i_entry();
+      long offset = new_i2c - old_i2c;
+      sim->notifyRelocate(old_i2c, offset);
+      offset = new_c2i - old_c2i;
+      sim->notifyRelocate(old_c2i, offset);
+    }
+#endif
+
 #ifndef PRODUCT
     // debugging suppport
     if (PrintAdapterHandlers || PrintStubCode) {
--- a/src/share/vm/runtime/stackValueCollection.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/stackValueCollection.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -27,6 +27,9 @@
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "jniTypes_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "jniTypes_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/statSampler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/statSampler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -36,6 +36,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vm_version_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vm_version_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/stubCodeGenerator.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/stubCodeGenerator.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,6 +30,9 @@
 #ifdef TARGET_ARCH_x86
 # include "assembler_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "assembler_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "assembler_sparc.inline.hpp"
 #endif
--- a/src/share/vm/runtime/stubRoutines.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/stubRoutines.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -156,7 +156,7 @@
     TraceTime timer("StubRoutines generation 1", TraceStartupTime);
     _code1 = BufferBlob::create("StubRoutines (1)", code_size1);
     if (_code1 == NULL) {
-      vm_exit_out_of_memory(code_size1, "CodeCache: no room for StubRoutines (1)");
+      vm_exit_out_of_memory(code_size1, OOM_MALLOC_ERROR, "CodeCache: no room for StubRoutines (1)");
     }
     CodeBuffer buffer(_code1);
     StubGenerator_generate(&buffer, false);
@@ -208,12 +208,16 @@
     TraceTime timer("StubRoutines generation 2", TraceStartupTime);
     _code2 = BufferBlob::create("StubRoutines (2)", code_size2);
     if (_code2 == NULL) {
-      vm_exit_out_of_memory(code_size2, "CodeCache: no room for StubRoutines (2)");
+      vm_exit_out_of_memory(code_size2, OOM_MALLOC_ERROR, "CodeCache: no room for StubRoutines (2)");
     }
     CodeBuffer buffer(_code2);
     StubGenerator_generate(&buffer, true);
   }
 
+  // this test cannot be run for AArch64 when using the builtin sim
+  // because the generated code is AArch64 called from AArch64 amd
+  // with the builtin sim this test tries to call it as x86 code
+#ifndef BUILTIN_SIM
 #ifdef ASSERT
 
 #define TEST_ARRAYCOPY(type)                                                    \
@@ -290,6 +294,7 @@
   test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::aligned_disjoint_words), sizeof(jlong));
 
 #endif
+#endif
 }
 
 
--- a/src/share/vm/runtime/stubRoutines.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/stubRoutines.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -34,6 +34,9 @@
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "nativeInst_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "nativeInst_sparc.hpp"
 #endif
@@ -105,6 +108,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "stubRoutines_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "stubRoutines_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "stubRoutines_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/synchronizer.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/synchronizer.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1019,7 +1019,8 @@
         // We might be able to induce a STW safepoint and scavenge enough
         // objectMonitors to permit progress.
         if (temp == NULL) {
-            vm_exit_out_of_memory (sizeof (ObjectMonitor[_BLOCKSIZE]), "Allocate ObjectMonitors") ;
+            vm_exit_out_of_memory (sizeof (ObjectMonitor[_BLOCKSIZE]), OOM_MALLOC_ERROR,
+                                   "Allocate ObjectMonitors");
         }
 
         // Format the block.
--- a/src/share/vm/runtime/thread.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/thread.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1034,7 +1034,7 @@
   address last_Java_pc(void)                         { return _anchor.last_Java_pc(); }
 
   // Safepoint support
-#if defined(PPC64)
+#if (defined(PPC64) || defined(AARCH64))
   // Use membars when accessing volatile _thread_state. See
   // Threads::create_vm() for size checks.
   JavaThreadState thread_state() const           {
@@ -1699,6 +1699,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "thread_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "thread_linux_aarch64.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "thread_linux_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/threadLocalStorage.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/threadLocalStorage.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -47,6 +47,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "threadLS_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "threadLS_linux_aarch64.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "threadLS_linux_sparc.hpp"
 #endif
--- a/src/share/vm/runtime/vframe.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/vframe.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -146,8 +146,7 @@
   if (obj.not_null()) {
     st->print("\t- %s <" INTPTR_FORMAT "> ", lock_state, (address)obj());
     if (obj->klass() == SystemDictionary::Class_klass()) {
-      klassOop target_klass = java_lang_Class::as_klassOop(obj());
-      st->print_cr("(a java.lang.Class for %s)", instanceKlass::cast(target_klass)->external_name());
+      st->print_cr("(a java.lang.Class for %s)", java_lang_Class::as_external_name(obj()));
     } else {
       Klass* k = Klass::cast(obj->klass());
       st->print_cr("(a %s)", k->external_name());
--- a/src/share/vm/runtime/vframeArray.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/vframeArray.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -471,7 +471,7 @@
   // Copy registers for callee-saved registers
   if (reg_map != NULL) {
     for(int i = 0; i < RegisterMap::reg_count; i++) {
-#ifdef AMD64
+#if defined(AMD64) || defined(AARCH64)
       // The register map has one entry for every int (32-bit value), so
       // 64-bit physical registers have two entries in the map, one for
       // each half.  Ignore the high halves of 64-bit registers, just like
--- a/src/share/vm/runtime/vmStructs.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/vmStructs.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -51,6 +51,7 @@
 #include "interpreter/bytecodeInterpreter.hpp"
 #include "interpreter/bytecodes.hpp"
 #include "interpreter/interpreter.hpp"
+#include "memory/allocation.inline.hpp"
 #include "memory/cardTableRS.hpp"
 #include "memory/compactPermGen.hpp"
 #include "memory/defNewGeneration.hpp"
@@ -102,6 +103,7 @@
 #include "runtime/globals.hpp"
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/os.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/serviceThread.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -113,6 +115,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmStructs_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vmStructs_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vmStructs_sparc.hpp"
 #endif
@@ -143,6 +148,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "vmStructs_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "vmStructs_linux_aarch64.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "vmStructs_linux_sparc.hpp"
 #endif
@@ -215,6 +223,9 @@
 #ifdef TARGET_ARCH_MODEL_x86_64
 # include "adfiles/adGlobals_x86_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_aarch64
+# include "adfiles/adGlobals_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_MODEL_sparc
 # include "adfiles/adGlobals_sparc.hpp"
 #endif
@@ -834,6 +845,10 @@
   /***********************************/                                                                                              \
                                                                                                                                      \
      static_field(StubRoutines,                _call_stub_return_address,                     address)                               \
+  AARCH64_ONLY(NOT_ZERO(						\
+      static_field(StubRoutines,                _updateBytesCRC32,                             address)                              \
+     static_field(StubRoutines,                _crc_table_adr,                                address)                               \
+  ))   \
                                                                                                                                      \
   /***************************************/                                                                                          \
   /* PcDesc and other compiled code info */                                                                                          \
@@ -1115,6 +1130,7 @@
   c2_nonstatic_field(Compile,            _save_argument_registers, const bool)                                                       \
   c2_nonstatic_field(Compile,            _subsume_loads,           const bool)                                                       \
   c2_nonstatic_field(Compile,            _do_escape_analysis,      const bool)                                                       \
+  c2_nonstatic_field(Compile,            _eliminate_boxing,        const bool)                                                       \
   c2_nonstatic_field(Compile,            _ilt,                     InlineTree*)                                                      \
                                                                                                                                      \
   c2_nonstatic_field(InlineTree,         _caller_jvms,             JVMState*)                                                        \
@@ -1160,10 +1176,10 @@
                                                                                                                                      \
   c2_nonstatic_field(MachCallRuntimeNode,  _name,                  const char*)                                                      \
                                                                                                                                      \
-  c2_nonstatic_field(PhaseCFG,           _num_blocks,              uint)                                                             \
+  c2_nonstatic_field(PhaseCFG,           _number_of_blocks,        uint)                                                             \
   c2_nonstatic_field(PhaseCFG,           _blocks,                  Block_List)                                                       \
-  c2_nonstatic_field(PhaseCFG,           _bbs,                     Block_Array)                                                      \
-  c2_nonstatic_field(PhaseCFG,           _broot,                   Block*)                                                           \
+  c2_nonstatic_field(PhaseCFG,           _node_to_block_mapping,   Block_Array)                                                      \
+  c2_nonstatic_field(PhaseCFG,           _root_block,              Block*)                                                           \
                                                                                                                                      \
   c2_nonstatic_field(PhaseRegAlloc,      _node_regs,               OptoRegPair*)                                                     \
   c2_nonstatic_field(PhaseRegAlloc,      _node_regs_max_index,     uint)                                                             \
@@ -1176,7 +1192,6 @@
   c2_nonstatic_field(PhaseChaitin,       _lo_stk_degree,           uint)                                                             \
   c2_nonstatic_field(PhaseChaitin,       _hi_degree,               uint)                                                             \
   c2_nonstatic_field(PhaseChaitin,       _simplified,              uint)                                                             \
-  c2_nonstatic_field(PhaseChaitin,       _maxlrg,                  uint)                                                             \
                                                                                                                                      \
   c2_nonstatic_field(Block,              _nodes,                   Node_List)                                                        \
   c2_nonstatic_field(Block,              _succs,                   Block_Array)                                                      \
@@ -3186,10 +3201,10 @@
     s[len-1] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
-      delete s;
+      delete [] s;
       return 1;
     }
-    delete s;
+    delete [] s;
   }
   if (strstr(typeName, "GrowableArray<") == typeName) {
     const char * start = typeName + strlen("GrowableArray<");
@@ -3200,10 +3215,10 @@
     s[len-1] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
-      delete s;
+      delete [] s;
       return 1;
     }
-    delete s;
+    delete [] s;
   }
   if (strstr(typeName, "const ") == typeName) {
     const char * s = typeName + strlen("const ");
@@ -3213,12 +3228,14 @@
     }
   }
   if (strstr(typeName, " const") == typeName + len - 6) {
-    char * s = strdup(typeName);
+    char * s = os::strdup_check_oom(typeName);
     s[len - 6] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
+      os::free(s);
       return 1;
     }
+    os::free(s);
   }
   if (!isRecurse) {
     tty->print_cr("type \"%s\" not found", typeName);
--- a/src/share/vm/runtime/vm_version.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/runtime/vm_version.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -29,6 +29,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "vm_version_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "vm_version_sparc.hpp"
 #endif
@@ -187,6 +190,7 @@
                  ARM_ONLY("arm")                 \
                  PPC32_ONLY("ppc")               \
                  PPC64_ONLY("ppc64")             \
+                 AARCH64_ONLY("aarch64")	 \
                  SPARC_ONLY("sparc")
 #endif // ZERO
 
--- a/src/share/vm/services/management.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/services/management.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1876,7 +1876,7 @@
   ResourceMark rm(THREAD); // thread->name() uses ResourceArea
 
   assert(thread->name() != NULL, "All threads should have a name");
-  _names_chars[_count] = strdup(thread->name());
+  _names_chars[_count] = os::strdup(thread->name());
   _times->long_at_put(_count, os::is_thread_cpu_time_supported() ?
                         os::thread_cpu_time(thread) : -1);
   _count++;
@@ -1894,7 +1894,7 @@
 
 ThreadTimesClosure::~ThreadTimesClosure() {
   for (int i = 0; i < _count; i++) {
-    free(_names_chars[i]);
+    os::free(_names_chars[i]);
   }
   FREE_C_HEAP_ARRAY(char *, _names_chars, mtInternal);
 }
--- a/src/share/vm/shark/sharkBuilder.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/shark/sharkBuilder.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -503,7 +503,7 @@
   const char *name;
   if (value->hasName())
     // XXX this leaks, but it's only debug code
-    name = strdup(value->getName().str().c_str());
+    name = os::strdup(value->getName().str().c_str());
   else
     name = "unnamed_value";
 
--- a/src/share/vm/shark/sharkCompiler.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/shark/sharkCompiler.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2008, 2009, 2010, 2011 Red Hat, Inc.
+ * Copyright 2008, 2009, 2010, 2011, 2012 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -213,7 +213,7 @@
                        this,
                        env->comp_level(),
                        false,
-                       false);
+		       false);
 }
 
 nmethod* SharkCompiler::generate_native_wrapper(MacroAssembler* masm,
@@ -319,7 +319,7 @@
   // finish with the exception of the VM thread, so we can consider
   // ourself the owner of the execution engine lock even though we
   // can't actually acquire it at this time.
-  assert(Thread::current()->is_VM_thread(), "must be called by VM thread");
+  assert(JavaThread::current()->thread_state() == _thread_in_vm, "must run in vm mode");
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
 
   SharkEntry *entry = (SharkEntry *) code;
--- a/src/share/vm/shark/shark_globals.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/shark/shark_globals.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -40,6 +40,12 @@
   product(intx, SharkMaxInlineSize, 32,                                       \
           "Maximum bytecode size of methods to inline when using Shark")      \
                                                                               \
+  product(bool, EliminateNestedLocks, true,                                   \
+          "Eliminate nested locks of the same object when possible")          \
+                                                                              \
+  product(ccstr, SharkOptimizationLevel, "Default",                           \
+          "The optimization level passed to LLVM, possible values: None, Less, Default and Agressive") \
+                                                                              \
   /* compiler debugging */                                                    \
   develop(ccstr, SharkPrintTypeflowOf, NULL,                                  \
           "Print the typeflow of the specified method")                       \
@@ -58,6 +64,10 @@
                                                                               \
   diagnostic(bool, SharkPerformanceWarnings, false,                           \
           "Warn about things that could be made faster")                      \
+                                                                              \
+  develop(ccstr, SharkVerifyFunction, NULL,                                   \
+          "Runs LLVM verify over LLVM IR")                                    \
+
 
 SHARK_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
--- a/src/share/vm/trace/trace.dtd	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/trace/trace.dtd	Mon Apr 13 16:44:26 2020 +0100
@@ -45,9 +45,6 @@
                         jvm_type       CDATA #IMPLIED
                         builtin_type   CDATA #IMPLIED>
 <!ATTLIST struct_type   id             CDATA #REQUIRED>
-<!ATTLIST structarray   type           CDATA #REQUIRED
-                        field          CDATA #REQUIRED
-                        label          CDATA #REQUIRED>
 <!ATTLIST primary_type  symbol         CDATA #REQUIRED
                         datatype       CDATA #REQUIRED
                         contenttype    CDATA #REQUIRED
--- a/src/share/vm/utilities/bitMap.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/bitMap.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -80,7 +80,7 @@
 
   // Set a word to a specified value or to all ones; clear a word.
   void set_word  (idx_t word, bm_word_t val) { _map[word] = val; }
-  void set_word  (idx_t word)            { set_word(word, ~(uintptr_t)0); }
+  void set_word  (idx_t word)            { set_word(word, ~(idx_t)0); }
   void clear_word(idx_t word)            { _map[word] = 0; }
 
   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
--- a/src/share/vm/utilities/bitMap.inline.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/bitMap.inline.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -52,16 +52,16 @@
 
 inline bool BitMap::par_set_bit(idx_t bit) {
   verify_index(bit);
-  volatile idx_t* const addr = word_addr(bit);
-  const idx_t mask = bit_mask(bit);
-  idx_t old_val = *addr;
+  volatile bm_word_t* const addr = word_addr(bit);
+  const bm_word_t mask = bit_mask(bit);
+  bm_word_t old_val = *addr;
 
   do {
-    const idx_t new_val = old_val | mask;
+    const bm_word_t new_val = old_val | mask;
     if (new_val == old_val) {
       return false;     // Someone else beat us to it.
     }
-    const idx_t cur_val = (idx_t) Atomic::cmpxchg_ptr((void*) new_val,
+    const bm_word_t cur_val = (bm_word_t) Atomic::cmpxchg_ptr((void*) new_val,
                                                       (volatile void*) addr,
                                                       (void*) old_val);
     if (cur_val == old_val) {
@@ -73,16 +73,16 @@
 
 inline bool BitMap::par_clear_bit(idx_t bit) {
   verify_index(bit);
-  volatile idx_t* const addr = word_addr(bit);
-  const idx_t mask = ~bit_mask(bit);
-  idx_t old_val = *addr;
+  volatile bm_word_t* const addr = word_addr(bit);
+  const bm_word_t mask = ~bit_mask(bit);
+  bm_word_t old_val = *addr;
 
   do {
-    const idx_t new_val = old_val & mask;
+    const bm_word_t new_val = old_val & mask;
     if (new_val == old_val) {
       return false;     // Someone else beat us to it.
     }
-    const idx_t cur_val = (idx_t) Atomic::cmpxchg_ptr((void*) new_val,
+    const bm_word_t cur_val = (bm_word_t) Atomic::cmpxchg_ptr((void*) new_val,
                                                       (volatile void*) addr,
                                                       (void*) old_val);
     if (cur_val == old_val) {
--- a/src/share/vm/utilities/copy.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/copy.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -335,6 +335,9 @@
 #ifdef TARGET_ARCH_x86
 # include "copy_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "copy_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "copy_sparc.hpp"
 #endif
--- a/src/share/vm/utilities/debug.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/debug.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -240,7 +240,7 @@
 static jint _exiting_out_of_mem = 0;
 
 void report_vm_out_of_memory(const char* file, int line, size_t size,
-                             const char* message) {
+                             VMErrorType vm_err_type, const char* message) {
   if (Debugging) return;
 
   // We try to gather additional information for the first out of memory
@@ -253,7 +253,7 @@
 
   if (first_time_here) {
     Thread* thread = ThreadLocalStorage::get_thread_slow();
-    VMError(thread, file, line, size, message).report_and_die();
+    VMError(thread, file, line, size, vm_err_type, message).report_and_die();
   }
 
   // Dump core and abort
@@ -317,6 +317,16 @@
       VMError err(message);
       err.report_java_out_of_memory();
     }
+
+    if (CrashOnOutOfMemoryError) {
+      tty->print_cr("Aborting due to java.lang.OutOfMemoryError: %s", message);
+      fatal(err_msg("OutOfMemory encountered: %s", message));
+    }
+
+    if (ExitOnOutOfMemoryError) {
+      tty->print_cr("Terminating due to java.lang.OutOfMemoryError: %s", message);
+      exit(3);
+    }
   }
 }
 
@@ -366,7 +376,7 @@
                            msg, eol, msg, eol, msg, eol, msg, eol, msg, eol,
                            msg, eol, msg, eol, msg, eol, msg, eol, msg, eol,
                            msg, eol, msg, eol, msg, eol, msg, eol, msg));
-    case  8: vm_exit_out_of_memory(num, "ChunkPool::allocate");
+    case  8: vm_exit_out_of_memory(num, OOM_MALLOC_ERROR, "ChunkPool::allocate");
     case  9: ShouldNotCallThis();
     case 10: ShouldNotReachHere();
     case 11: Unimplemented();
--- a/src/share/vm/utilities/debug.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/debug.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -174,9 +174,9 @@
 } while (0)
 
 // out of memory
-#define vm_exit_out_of_memory(size, msg)                                     \
+#define vm_exit_out_of_memory(size, vm_err_type, msg)                        \
 do {                                                                         \
-  report_vm_out_of_memory(__FILE__, __LINE__, size, msg);                    \
+  report_vm_out_of_memory(__FILE__, __LINE__, size, vm_err_type, msg);       \
   BREAKPOINT;                                                                \
 } while (0)
 
@@ -204,12 +204,20 @@
   BREAKPOINT;                                                                \
 } while (0);
 
+
+// types of VM error - originally in vmError.hpp
+enum VMErrorType {
+  INTERNAL_ERROR   = 0xe0000000,
+  OOM_MALLOC_ERROR = 0xe0000001,
+  OOM_MMAP_ERROR   = 0xe0000002
+};
+
 // error reporting helper functions
 void report_vm_error(const char* file, int line, const char* error_msg,
                      const char* detail_msg = NULL);
 void report_fatal(const char* file, int line, const char* message);
 void report_vm_out_of_memory(const char* file, int line, size_t size,
-                             const char* message);
+                             VMErrorType vm_err_type, const char* message);
 void report_should_not_call(const char* file, int line);
 void report_should_not_reach_here(const char* file, int line);
 void report_unimplemented(const char* file, int line);
--- a/src/share/vm/utilities/elfFile.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/elfFile.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -261,7 +261,12 @@
       }
     }
   }
+// x86 defaults to execstack, AARCH64 defaults to noexecstack
+#ifdef AARCH64
+  return true;
+#else
   return false;
+#endif
 }
 #endif
 
--- a/src/share/vm/utilities/globalDefinitions.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -343,9 +343,17 @@
 
 // Machine dependent stuff
 
+// The maximum size of the code cache.  Can be overridden by targets.
+#define CODE_CACHE_SIZE_LIMIT (2*G)
+// Allow targets to reduce the default size of the code cache.
+#define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT
+
 #ifdef TARGET_ARCH_x86
 # include "globalDefinitions_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_aarch64
+# include "globalDefinitions_aarch64.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "globalDefinitions_sparc.hpp"
 #endif
@@ -1283,4 +1291,30 @@
 
 #define ARRAY_SIZE(array) (sizeof(array)/sizeof((array)[0]))
 
+//----------------------------------------------------------------------------------------------------
+// Sum and product which can never overflow: they wrap, just like the
+// Java operations.  Note that we don't intend these to be used for
+// general-purpose arithmetic: their purpose is to emulate Java
+// operations.
+
+// The goal of this code to avoid undefined or implementation-defined
+// behaviour.  The use of an lvalue to reference cast is explicitly
+// permitted by Lvalues and rvalues [basic.lval].  [Section 3.10 Para
+// 15 in C++03]
+#define JAVA_INTEGER_OP(OP, NAME, TYPE, UNSIGNED_TYPE)  \
+inline TYPE NAME (TYPE in1, TYPE in2) {                 \
+  UNSIGNED_TYPE ures = static_cast<UNSIGNED_TYPE>(in1); \
+  ures OP ## = static_cast<UNSIGNED_TYPE>(in2);         \
+  return reinterpret_cast<TYPE&>(ures);                 \
+}
+
+JAVA_INTEGER_OP(+, java_add, jint, juint)
+JAVA_INTEGER_OP(-, java_subtract, jint, juint)
+JAVA_INTEGER_OP(*, java_multiply, jint, juint)
+JAVA_INTEGER_OP(+, java_add, jlong, julong)
+JAVA_INTEGER_OP(-, java_subtract, jlong, julong)
+JAVA_INTEGER_OP(*, java_multiply, jlong, julong)
+
+#undef JAVA_INTEGER_OP
+
 #endif // SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -44,14 +44,6 @@
 #endif // SOLARIS
 
 #include <math.h>
-#ifndef FP_PZERO
-// Linux doesn't have positive/negative zero
-#define FP_PZERO FP_ZERO
-#endif
-#if (!defined fpclass) && ((!defined SPARC) || (!defined SOLARIS))
-#define fpclass fpclassify
-#endif
-
 #include <time.h>
 #include <fcntl.h>
 #include <dlfcn.h>
--- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -46,15 +46,6 @@
 # include <ieeefp.h>
 #endif
 # include <math.h>
-#ifdef LINUX
-#ifndef FP_PZERO
-  // Linux doesn't have positive/negative zero
-  #define FP_PZERO FP_ZERO
-#endif
-#ifndef fpclass
-  #define fpclass fpclassify
-#endif
-#endif
 # include <time.h>
 # include <fcntl.h>
 # include <dlfcn.h>
--- a/src/share/vm/utilities/globalDefinitions_xlc.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/globalDefinitions_xlc.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -41,14 +41,6 @@
 #include <wchar.h>
 
 #include <math.h>
-#ifndef FP_PZERO
-// Linux doesn't have positive/negative zero
-#define FP_PZERO FP_ZERO
-#endif
-#if (!defined fpclass)
-#define fpclass fpclassify
-#endif
-
 #include <time.h>
 #include <fcntl.h>
 #include <dlfcn.h>
--- a/src/share/vm/utilities/macros.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/macros.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -219,6 +219,14 @@
 #define NOT_AMD64(code) code
 #endif
 
+#ifdef AARCH64
+#define AARCH64_ONLY(code) code
+#define NOT_AARCH64(code)
+#else
+#define AARCH64_ONLY(code)
+#define NOT_AARCH64(code) code
+#endif
+
 #ifdef SPARC
 #define SPARC_ONLY(code) code
 #define NOT_SPARC(code)
@@ -234,7 +242,11 @@
 #define PPC_ONLY(code) code
 #define NOT_PPC(code)
 #else
-#undef PPC
+
+#ifdef PPC
+#error  "PPC is either 32- or 64-bit."
+#endif
+
 #define PPC_ONLY(code)
 #define NOT_PPC(code) code
 #endif
@@ -272,6 +284,14 @@
 #define NOT_ARM(code) code
 #endif
 
+#ifdef AARCH64
+#define AARCH64_ONLY(code) code
+#define NOT_AARCH64(code)
+#else
+#define AARCH64_ONLY(code)
+#define NOT_AARCH64(code) code
+#endif
+
 #ifdef JAVASE_EMBEDDED
 #define EMBEDDED_ONLY(code) code
 #define NOT_EMBEDDED(code)
--- a/src/share/vm/utilities/ostream.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/ostream.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -111,7 +111,7 @@
   }
   if (add_cr) {
     if (result != buffer) {
-      strncpy(buffer, result, buflen);
+      memcpy(buffer, result, result_len);
       result = buffer;
     }
     buffer[result_len++] = '\n';
@@ -325,15 +325,19 @@
       }
       char* oldbuf = buffer;
       buffer = NEW_RESOURCE_ARRAY(char, end);
-      strncpy(buffer, oldbuf, buffer_pos);
+      if (buffer_pos > 0) {
+        memcpy(buffer, oldbuf, buffer_pos);
+      }
       buffer_length = end;
     }
   }
   // invariant: buffer is always null-terminated
   guarantee(buffer_pos + write_len + 1 <= buffer_length, "stringStream oob");
-  buffer[buffer_pos + write_len] = 0;
-  strncpy(buffer + buffer_pos, s, write_len);
-  buffer_pos += write_len;
+  if (write_len > 0) {
+    buffer[buffer_pos + write_len] = 0;
+    memcpy(buffer + buffer_pos, s, write_len);
+    buffer_pos += write_len;
+  }
 
   // Note that the following does not depend on write_len.
   // This means that position and count get updated
@@ -1387,7 +1391,7 @@
   server.sin_port = htons(port);
 
   server.sin_addr.s_addr = inet_addr(ip);
-  if (server.sin_addr.s_addr == (uint32_t)-1) {
+  if (server.sin_addr.s_addr == (in_addr_t)-1) {
     struct hostent* host = os::get_host_by_name((char*)ip);
     if (host != NULL) {
       memcpy(&server.sin_addr, host->h_addr_list[0], host->h_length);
--- a/src/share/vm/utilities/taskqueue.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/taskqueue.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -32,6 +32,9 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "orderAccess_linux_aarch64.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "orderAccess_linux_sparc.inline.hpp"
 #endif
--- a/src/share/vm/utilities/vmError.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/vmError.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -102,7 +102,7 @@
                  const char* message, const char * detail_msg)
 {
   _thread = thread;
-  _id = internal_error;     // Value that's not an OS exception/signal
+  _id = INTERNAL_ERROR;     // Value that's not an OS exception/signal
   _filename = filename;
   _lineno = lineno;
   _message = message;
@@ -121,9 +121,9 @@
 
 // Constructor for OOM errors
 VMError::VMError(Thread* thread, const char* filename, int lineno, size_t size,
-                 const char* message) {
+                 VMErrorType vm_err_type, const char* message) {
     _thread = thread;
-    _id = oom_error;     // Value that's not an OS exception/signal
+    _id = vm_err_type; // Value that's not an OS exception/signal
     _filename = filename;
     _lineno = lineno;
     _message = message;
@@ -144,7 +144,7 @@
 // Constructor for non-fatal errors
 VMError::VMError(const char* message) {
     _thread = NULL;
-    _id = internal_error;     // Value that's not an OS exception/signal
+    _id = INTERNAL_ERROR;     // Value that's not an OS exception/signal
     _filename = NULL;
     _lineno = 0;
     _message = message;
@@ -196,7 +196,8 @@
 
 static void print_bug_submit_message(outputStream *out, Thread *thread) {
   if (out == NULL) return;
-  out->print_raw_cr("# If you would like to submit a bug report, please visit:");
+  out->print_raw_cr("# If you would like to submit a bug report, please include");
+  out->print_raw_cr("# instructions on how to reproduce the bug and visit:");
   out->print_raw   ("#   ");
   out->print_raw_cr(Arguments::java_vendor_url_bug());
   // If the crash is in native code, encourage user to submit a bug to the
@@ -257,6 +258,19 @@
   return buf;
 }
 
+#ifdef PRODUCT
+extern "C" void _ps() {
+  fdStream out(defaultStream::output_fd());
+  JavaThread* thread = JavaThread::active();
+  char *buf = new char[1024*1024];
+  VMError err(thread, "", 0, "", "");
+
+  err.print_stack_trace(&out, thread, buf, 1024*1024, true);
+
+  delete[] buf;
+}
+#endif // PRODUCT
+
 void VMError::print_stack_trace(outputStream* st, JavaThread* jt,
                                 char* buf, int buflen, bool verbose) {
 #ifdef ZERO
@@ -353,9 +367,12 @@
   STEP(15, "(printing type of error)")
 
      switch(_id) {
-       case oom_error:
+       case OOM_MALLOC_ERROR:
+       case OOM_MMAP_ERROR:
          if (_size) {
-           st->print("# Native memory allocation (malloc) failed to allocate ");
+           st->print("# Native memory allocation ");
+           st->print((_id == (int)OOM_MALLOC_ERROR) ? "(malloc) failed to allocate " :
+                                                 "(mmap) failed to map ");
            jio_snprintf(buf, sizeof(buf), SIZE_FORMAT, _size);
            st->print("%s", buf);
            st->print(" bytes");
@@ -388,7 +405,7 @@
            return;  // that's enough for the screen
          }
          break;
-       case internal_error:
+       case INTERNAL_ERROR:
        default:
          break;
      }
@@ -417,14 +434,7 @@
 #else
          const char *file = _filename;
 #endif
-         size_t len = strlen(file);
-         size_t buflen = sizeof(buf);
-
-         strncpy(buf, file, buflen);
-         if (len + 10 < buflen) {
-           sprintf(buf + len, ":%d", _lineno);
-         }
-         st->print(" (%s)", buf);
+         st->print(" (%s:%d)", file, _lineno);
        } else {
          st->print(" (0x%x)", _id);
        }
@@ -466,6 +476,13 @@
                    UseCompressedOops ? "compressed oops" : ""
                  );
 
+#ifdef DERIVATIVE_ID
+     st->print_cr("# Derivative: %s", DERIVATIVE_ID);
+#endif
+#ifdef DISTRIBUTION_ID
+     st->print_cr("# Distribution: %s", DISTRIBUTION_ID);
+#endif
+
   STEP(60, "(printing problematic frame)")
 
      // Print current frame if we have a context (i.e. it's a crash)
--- a/src/share/vm/utilities/vmError.hpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/vmError.hpp	Mon Apr 13 16:44:26 2020 +0100
@@ -30,14 +30,14 @@
 class Decoder;
 class VM_ReportJavaOutOfMemory;
 
+#ifdef PRODUCT
+extern "C" void _ps();
+#endif // PRODUCT
+
 class VMError : public StackObj {
   friend class VM_ReportJavaOutOfMemory;
   friend class Decoder;
 
-  enum ErrorType {
-    internal_error = 0xe0000000,
-    oom_error      = 0xe0000001
-  };
   int          _id;          // Solaris/Linux signals: 0 - SIGRTMAX
                              // Windows exceptions: 0xCxxxxxxx system errors
                              //                     0x8xxxxxxx system warnings
@@ -96,9 +96,16 @@
   // accessor
   const char* message() const    { return _message; }
   const char* detail_msg() const { return _detail_msg; }
-  bool should_report_bug(unsigned int id) { return id != oom_error; }
+  bool should_report_bug(unsigned int id) {
+    return (id != OOM_MALLOC_ERROR) && (id != OOM_MMAP_ERROR);
+  }
+
+#ifdef PRODUCT
+  friend void _ps();
+#endif // PRODUCT
 
 public:
+
   // Constructor for crashes
   VMError(Thread* thread, unsigned int sig, address pc, void* siginfo,
           void* context);
@@ -108,7 +115,7 @@
 
   // Constructor for VM OOM errors
   VMError(Thread* thread, const char* filename, int lineno, size_t size,
-          const char* message);
+          VMErrorType vm_err_type, const char* message);
   // Constructor for non-fatal errors
   VMError(const char* message);
 
--- a/src/share/vm/utilities/workgroup.cpp	Mon Apr 13 06:13:18 2020 +0100
+++ b/src/share/vm/utilities/workgroup.cpp	Mon Apr 13 16:44:26 2020 +0100
@@ -79,7 +79,7 @@
   }
   _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers(), mtInternal);
   if (gang_workers() == NULL) {
-    vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
+    vm_exit_out_of_memory(0, OOM_MALLOC_ERROR, "Cannot create GangWorker array.");
     return false;
   }
   os::ThreadType worker_type;
@@ -93,7 +93,8 @@
     assert(new_worker != NULL, "Failed to allocate GangWorker");
     _gang_workers[worker] = new_worker;
     if (new_worker == NULL || !os::create_thread(new_worker, worker_type)) {
-      vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources.");
+      vm_exit_out_of_memory(0, OOM_MALLOC_ERROR,
+              "Cannot create worker GC thread. Out of system resources.");
       return false;
     }
     if (!DisableStartThread) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestByteBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestByteBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestByteBoxing.dummy -XX:CompileCommand=exclude,TestByteBoxing.foo -XX:CompileCommand=exclude,TestByteBoxing.foob TestByteBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestByteBoxing.dummy -XX:CompileCommand=exclude,TestByteBoxing.foo -XX:CompileCommand=exclude,TestByteBoxing.foob TestByteBoxing
+ *
+ */
+
+public class TestByteBoxing {
+
+  static final Byte ibc = new Byte((byte)1);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void dummy()      { }
+  static byte foo(byte i)  { return i; }
+  static Byte foob(byte i) { return Byte.valueOf(i); }
+
+
+  static byte simple(byte i) {
+    Byte ib = new Byte(i);
+    return ib;
+  }
+
+  static byte simpleb(byte i) {
+    Byte ib = Byte.valueOf(i);
+    return ib;
+  }
+
+  static byte simplec() {
+    Byte ib = ibc;
+    return ib;
+  }
+
+  static byte simplef(byte i) {
+    Byte ib = foob(i);
+    return ib;
+  }
+
+  static byte simplep(Byte ib) {
+    return ib;
+  }
+
+  static byte simple2(byte i) {
+    Byte ib1 = new Byte(i);
+    Byte ib2 = new Byte((byte)(i+1));
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte simpleb2(byte i) {
+    Byte ib1 = Byte.valueOf(i);
+    Byte ib2 = Byte.valueOf((byte)(i+1));
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte simplem2(byte i) {
+    Byte ib1 = new Byte(i);
+    Byte ib2 = Byte.valueOf((byte)(i+1));
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte simplep2(byte i, Byte ib1) {
+    Byte ib2 = Byte.valueOf((byte)(i+1));
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte simplec2(byte i) {
+    Byte ib1 = ibc;
+    Byte ib2 = Byte.valueOf((byte)(i+1));
+    return (byte)(ib1 + ib2);
+  }
+
+  //===============================================
+  static byte test(byte i) {
+    Byte ib = new Byte(i);
+    if ((i&1) == 0)
+      ib = (byte)(i+1);
+    return ib;
+  }
+
+  static byte testb(byte i) {
+    Byte ib = i;
+    if ((i&1) == 0)
+      ib = (byte)(i+1);
+    return ib;
+  }
+
+  static byte testm(byte i) {
+    Byte ib = i;
+    if ((i&1) == 0)
+      ib = new Byte((byte)(i+1));
+    return ib;
+  }
+
+  static byte testp(byte i, Byte ib) {
+    if ((i&1) == 0)
+      ib = new Byte((byte)(i+1));
+    return ib;
+  }
+
+  static byte testc(byte i) {
+    Byte ib = ibc;
+    if ((i&1) == 0)
+      ib = new Byte((byte)(i+1));
+    return ib;
+  }
+
+  static byte test2(byte i) {
+    Byte ib1 = new Byte(i);
+    Byte ib2 = new Byte((byte)(i+1));
+    if ((i&1) == 0) {
+      ib1 = new Byte((byte)(i+1));
+      ib2 = new Byte((byte)(i+2));
+    }
+    return (byte)(ib1+ib2);
+  }
+
+  static byte testb2(byte i) {
+    Byte ib1 = i;
+    Byte ib2 = (byte)(i+1);
+    if ((i&1) == 0) {
+      ib1 = (byte)(i+1);
+      ib2 = (byte)(i+2);
+    }
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte testm2(byte i) {
+    Byte ib1 = new Byte(i);
+    Byte ib2 = (byte)(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Byte((byte)(i+1));
+      ib2 = (byte)(i+2);
+    }
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte testp2(byte i, Byte ib1) {
+    Byte ib2 = (byte)(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Byte((byte)(i+1));
+      ib2 = (byte)(i+2);
+    }
+    return (byte)(ib1 + ib2);
+  }
+
+  static byte testc2(byte i) {
+    Byte ib1 = ibc;
+    Byte ib2 = (byte)(i+1);
+    if ((i&1) == 0) {
+      ib1 = (byte)(ibc+1);
+      ib2 = (byte)(i+2);
+    }
+    return (byte)(ib1 + ib2);
+  }
+
+  //===============================================
+  static byte sum(byte[] a) {
+    byte result = 1;
+    for (Byte i : a)
+        result += i;
+    return result;
+  }
+
+  static byte sumb(byte[] a) {
+    Byte result = 1;
+    for (Byte i : a)
+        result = (byte)(result + i);
+    return result;
+  }
+
+  static byte sumc(byte[] a) {
+    Byte result = ibc;
+    for (Byte i : a)
+        result = (byte)(result + i);
+    return result;
+  }
+
+  static byte sumf(byte[] a) {
+    Byte result = foob((byte)1);
+    for (Byte i : a)
+        result = (byte)(result + i);
+    return result;
+  }
+
+  static byte sump(byte[] a, Byte result) {
+    for (Byte i : a)
+        result = (byte)(result + i);
+    return result;
+  }
+
+  static byte sum2(byte[] a) {
+    byte result1 = 1;
+    byte result2 = 1;
+    for (Byte i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return (byte)(result1 + result2);
+  }
+
+  static byte sumb2(byte[] a) {
+    Byte result1 = 1;
+    Byte result2 = 1;
+    for (Byte i : a) {
+        result1 = (byte)(result1 + i);
+        result2 = (byte)(result2 + i + 1);
+    }
+    return (byte)(result1 + result2);
+  }
+
+  static byte summ2(byte[] a) {
+    Byte result1 = 1;
+    Byte result2 = new Byte((byte)1);
+    for (Byte i : a) {
+        result1 = (byte)(result1 + i);
+        result2 = (byte)(result2 + new Byte((byte)(i + 1)));
+    }
+    return (byte)(result1 + result2);
+  }
+
+  static byte sump2(byte[] a, Byte result2) {
+    Byte result1 = 1;
+    for (Byte i : a) {
+        result1 = (byte)(result1 + i);
+        result2 = (byte)(result2 + i + 1);
+    }
+    return (byte)(result1 + result2);
+  }
+
+  static byte sumc2(byte[] a) {
+    Byte result1 = 1;
+    Byte result2 = ibc;
+    for (Byte i : a) {
+        result1 = (byte)(result1 + i);
+        result2 = (byte)(result2 + i + ibc);
+    }
+    return (byte)(result1 + result2);
+  }
+
+  //===============================================
+  static byte remi_sum() {
+    Byte j = new Byte((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j = new Byte((byte)(j + 1));
+    }
+    return j;
+  }
+
+  static byte remi_sumb() {
+    Byte j = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j = (byte)(j + 1);
+    }
+    return j;
+  }
+
+  static byte remi_sumf() {
+    Byte j = foob((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j = (byte)(j + 1);
+    }
+    return j;
+  }
+
+  static byte remi_sump(Byte j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Byte((byte)(j + 1));
+    }
+    return j;
+  }
+
+  static byte remi_sumc() {
+    Byte j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = (byte)(j + ibc);
+    }
+    return j;
+  }
+
+  static byte remi_sum2() {
+    Byte j1 = new Byte((byte)1);
+    Byte j2 = new Byte((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Byte((byte)(j1 + 1));
+      j2 = new Byte((byte)(j2 + 2));
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sumb2() {
+    Byte j1 = Byte.valueOf((byte)1);
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = (byte)(j1 + 1);
+      j2 = (byte)(j2 + 2);
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_summ2() {
+    Byte j1 = new Byte((byte)1);
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Byte((byte)(j1 + 1));
+      j2 = (byte)(j2 + 2);
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sump2(Byte j1) {
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Byte((byte)(j1 + 1));
+      j2 = (byte)(j2 + 2);
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sumc2() {
+    Byte j1 = ibc;
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = (byte)(j1 + ibc);
+      j2 = (byte)(j2 + 2);
+    }
+    return (byte)(j1 + j2);
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static byte simple_deop(byte i) {
+    Byte ib = new Byte(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static byte simpleb_deop(byte i) {
+    Byte ib = Byte.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static byte simplef_deop(byte i) {
+    Byte ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static byte simplep_deop(Byte ib) {
+    dummy();
+    return ib;
+  }
+
+  static byte simplec_deop(byte i) {
+    Byte ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static byte test_deop(byte i) {
+    Byte ib = new Byte(foo(i));
+    if ((i&1) == 0)
+      ib = foo((byte)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static byte testb_deop(byte i) {
+    Byte ib = foo(i);
+    if ((i&1) == 0)
+      ib = foo((byte)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static byte testf_deop(byte i) {
+    Byte ib = foob(i);
+    if ((i&1) == 0)
+      ib = foo((byte)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static byte testp_deop(byte i, Byte ib) {
+    if ((i&1) == 0)
+      ib = foo((byte)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static byte testc_deop(byte i) {
+    Byte ib = ibc;
+    if ((i&1) == 0)
+      ib = foo((byte)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static byte sum_deop(byte[] a) {
+    byte result = 1;
+    for (Byte i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static byte sumb_deop(byte[] a) {
+    Byte result = 1;
+    for (Byte i : a)
+        result = (byte)(result + foo(i));
+    dummy();
+    return result;
+  }
+
+  static byte sumf_deop(byte[] a) {
+    Byte result = 1;
+    for (Byte i : a)
+        result = (byte)(result + foob(i));
+    dummy();
+    return result;
+  }
+
+  static byte sump_deop(byte[] a, Byte result) {
+    for (Byte i : a)
+        result = (byte)(result + foob(i));
+    dummy();
+    return result;
+  }
+
+  static byte sumc_deop(byte[] a) {
+    Byte result = ibc;
+    for (Byte i : a)
+        result = (byte)(result + foo(i));
+    dummy();
+    return result;
+  }
+
+  static byte remi_sum_deop() {
+    Byte j = new Byte(foo((byte)1));
+    for (int i = 0; i< 1000; i++) {
+      j = new Byte(foo((byte)(j + 1)));
+    }
+    dummy();
+    return j;
+  }
+
+  static byte remi_sumb_deop() {
+    Byte j = Byte.valueOf(foo((byte)1));
+    for (int i = 0; i< 1000; i++) {
+      j = foo((byte)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static byte remi_sumf_deop() {
+    Byte j = foob((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      j = foo((byte)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static byte remi_sump_deop(Byte j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo((byte)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static byte remi_sumc_deop() {
+    Byte j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo((byte)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static byte remi_sum_cond() {
+    Byte j = new Byte((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Byte((byte)(j + 1));
+      }
+    }
+    return j;
+  }
+
+  static byte remi_sumb_cond() {
+    Byte j = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (byte)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static byte remi_sumf_cond() {
+    Byte j = foob((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (byte)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static byte remi_sump_cond(Byte j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (byte)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static byte remi_sumc_cond() {
+    Byte j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (byte)(j + ibc);
+      }
+    }
+    return j;
+  }
+
+  static byte remi_sum2_cond() {
+    Byte j1 = new Byte((byte)1);
+    Byte j2 = new Byte((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Byte((byte)(j1 + 1));
+      } else {
+        j2 = new Byte((byte)(j2 + 2));
+      }
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sumb2_cond() {
+    Byte j1 = Byte.valueOf((byte)1);
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = (byte)(j1 + 1);
+      } else {
+        j2 = (byte)(j2 + 2);
+      }
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_summ2_cond() {
+    Byte j1 = new Byte((byte)1);
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Byte((byte)(j1 + 1));
+      } else {
+        j2 = (byte)(j2 + 2);
+      }
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sump2_cond(Byte j1) {
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Byte((byte)(j1 + 1));
+      } else {
+        j2 = (byte)(j2 + 2);
+      }
+    }
+    return (byte)(j1 + j2);
+  }
+
+  static byte remi_sumc2_cond() {
+    Byte j1 = ibc;
+    Byte j2 = Byte.valueOf((byte)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = (byte)(j1 + ibc);
+      } else {
+        j2 = (byte)(j2 + 2);
+      }
+    }
+    return (byte)(j1 + j2);
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final int[] val = new int[] {
+      -5488, -5488, 12000, -5488, -5488,
+       1024,  1024, -5552,  1024,  1024,
+      -5488, -5488, 12000, -5488, -5488,
+        512,   512,  6256,   512,   512,
+      13024, 13024, -5584, 13024, 13024,
+        512,   512,  6256,   512,   512,
+         45,    45,    45,    45,    45,
+         66,    66,    66,    66,    66,
+         45,    45,    45,    45,    45,
+        -23,   -23,   -23,   -23,   -23,
+        -70,   -70,   -70,   -70,   -70,
+        -23,   -23,   -23,   -23,   -23,
+        -11,   -11,   -11,   -11,   -11,
+        -34,   -34,   -34,   -34,   -34
+    };
+
+    int[] res = new int[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0;
+    }
+
+
+    for (int i = 0; i < 12000; i++) {
+      res[0] += simple((byte)i);
+      res[1] += simpleb((byte)i);
+      res[2] += simplec();
+      res[3] += simplef((byte)i);
+      res[4] += simplep((byte)i);
+
+      res[5] += simple2((byte)i);
+      res[6] += simpleb2((byte)i);
+      res[7] += simplec2((byte)i);
+      res[8] += simplem2((byte)i);
+      res[9] += simplep2((byte)i, (byte)i);
+
+      res[10] += simple_deop((byte)i);
+      res[11] += simpleb_deop((byte)i);
+      res[12] += simplec_deop((byte)i);
+      res[13] += simplef_deop((byte)i);
+      res[14] += simplep_deop((byte)i);
+
+      res[15] += test((byte)i);
+      res[16] += testb((byte)i);
+      res[17] += testc((byte)i);
+      res[18] += testm((byte)i);
+      res[19] += testp((byte)i, (byte)i);
+
+      res[20] += test2((byte)i);
+      res[21] += testb2((byte)i);
+      res[22] += testc2((byte)i);
+      res[23] += testm2((byte)i);
+      res[24] += testp2((byte)i, (byte)i);
+
+      res[25] += test_deop((byte)i);
+      res[26] += testb_deop((byte)i);
+      res[27] += testc_deop((byte)i);
+      res[28] += testf_deop((byte)i);
+      res[29] += testp_deop((byte)i, (byte)i);
+    }
+
+    byte[] ia = new byte[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = (byte)i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, (byte)1);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, (byte)1);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, (byte)1);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump((byte)1);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2((byte)1);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop((byte)1);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond((byte)1);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond((byte)1);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestDoubleBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestDoubleBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestDoubleBoxing.dummy -XX:CompileCommand=exclude,TestDoubleBoxing.foo -XX:CompileCommand=exclude,TestDoubleBoxing.foob TestDoubleBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestDoubleBoxing.dummy -XX:CompileCommand=exclude,TestDoubleBoxing.foo -XX:CompileCommand=exclude,TestDoubleBoxing.foob TestDoubleBoxing
+ *
+ */
+
+public class TestDoubleBoxing {
+
+  static final Double ibc = new Double(1.);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void   dummy()        { }
+  static double foo(double i)  { return i; }
+  static Double foob(double i) { return Double.valueOf(i); }
+
+
+  static double simple(double i) {
+    Double ib = new Double(i);
+    return ib;
+  }
+
+  static double simpleb(double i) {
+    Double ib = Double.valueOf(i);
+    return ib;
+  }
+
+  static double simplec() {
+    Double ib = ibc;
+    return ib;
+  }
+
+  static double simplef(double i) {
+    Double ib = foob(i);
+    return ib;
+  }
+
+  static double simplep(Double ib) {
+    return ib;
+  }
+
+  static double simple2(double i) {
+    Double ib1 = new Double(i);
+    Double ib2 = new Double(i+1.);
+    return ib1 + ib2;
+  }
+
+  static double simpleb2(double i) {
+    Double ib1 = Double.valueOf(i);
+    Double ib2 = Double.valueOf(i+1.);
+    return ib1 + ib2;
+  }
+
+  static double simplem2(double i) {
+    Double ib1 = new Double(i);
+    Double ib2 = Double.valueOf(i+1.);
+    return ib1 + ib2;
+  }
+
+  static double simplep2(double i, Double ib1) {
+    Double ib2 = Double.valueOf(i+1.);
+    return ib1 + ib2;
+  }
+
+  static double simplec2(double i) {
+    Double ib1 = ibc;
+    Double ib2 = Double.valueOf(i+1.);
+    return ib1 + ib2;
+  }
+
+  //===============================================
+  static double test(double f, int i) {
+    Double ib = new Double(f);
+    if ((i&1) == 0)
+      ib = f+1.;
+    return ib;
+  }
+
+  static double testb(double f, int i) {
+    Double ib = f;
+    if ((i&1) == 0)
+      ib = (f+1.);
+    return ib;
+  }
+
+  static double testm(double f, int i) {
+    Double ib = f;
+    if ((i&1) == 0)
+      ib = new Double(f+1.);
+    return ib;
+  }
+
+  static double testp(double f, int i, Double ib) {
+    if ((i&1) == 0)
+      ib = new Double(f+1.);
+    return ib;
+  }
+
+  static double testc(double f, int i) {
+    Double ib = ibc;
+    if ((i&1) == 0)
+      ib = new Double(f+1.);
+    return ib;
+  }
+
+  static double test2(double f, int i) {
+    Double ib1 = new Double(f);
+    Double ib2 = new Double(f+1.);
+    if ((i&1) == 0) {
+      ib1 = new Double(f+1.);
+      ib2 = new Double(f+2.);
+    }
+    return ib1+ib2;
+  }
+
+  static double testb2(double f, int i) {
+    Double ib1 = f;
+    Double ib2 = f+1.;
+    if ((i&1) == 0) {
+      ib1 = (f+1.);
+      ib2 = (f+2.);
+    }
+    return ib1+ib2;
+  }
+
+  static double testm2(double f, int i) {
+    Double ib1 = new Double(f);
+    Double ib2 = f+1.;
+    if ((i&1) == 0) {
+      ib1 = new Double(f+1.);
+      ib2 = (f+2.);
+    }
+    return ib1+ib2;
+  }
+
+  static double testp2(double f, int i, Double ib1) {
+    Double ib2 = f+1.;
+    if ((i&1) == 0) {
+      ib1 = new Double(f+1.);
+      ib2 = (f+2.);
+    }
+    return ib1+ib2;
+  }
+
+  static double testc2(double f, int i) {
+    Double ib1 = ibc;
+    Double ib2 = f+1.;
+    if ((i&1) == 0) {
+      ib1 = (ibc+1.);
+      ib2 = (f+2.);
+    }
+    return ib1+ib2;
+  }
+
+  //===============================================
+  static double sum(double[] a) {
+    double result = 1.;
+    for (Double i : a)
+        result += i;
+    return result;
+  }
+
+  static double sumb(double[] a) {
+    Double result = 1.;
+    for (Double i : a)
+        result += i;
+    return result;
+  }
+
+  static double sumc(double[] a) {
+    Double result = ibc;
+    for (Double i : a)
+        result += i;
+    return result;
+  }
+
+  static double sumf(double[] a) {
+    Double result = foob(1.);
+    for (Double i : a)
+        result += i;
+    return result;
+  }
+
+  static double sump(double[] a, Double result) {
+    for (Double i : a)
+        result += i;
+    return result;
+  }
+
+  static double sum2(double[] a) {
+    double result1 = 1.;
+    double result2 = 1.;
+    for (Double i : a) {
+        result1 += i;
+        result2 += i + 1.;
+    }
+    return result1 + result2;
+  }
+
+  static double sumb2(double[] a) {
+    Double result1 = 1.;
+    Double result2 = 1.;
+    for (Double i : a) {
+        result1 += i;
+        result2 += i + 1.;
+    }
+    return result1 + result2;
+  }
+
+  static double summ2(double[] a) {
+    Double result1 = 1.;
+    Double result2 = new Double(1.);
+    for (Double i : a) {
+        result1 += i;
+        result2 += new Double(i + 1.);
+    }
+    return result1 + result2;
+  }
+
+  static double sump2(double[] a, Double result2) {
+    Double result1 = 1.;
+    for (Double i : a) {
+        result1 += i;
+        result2 += i + 1.;
+    }
+    return result1 + result2;
+  }
+
+  static double sumc2(double[] a) {
+    Double result1 = 1.;
+    Double result2 = ibc;
+    for (Double i : a) {
+        result1 += i;
+        result2 += i + ibc;
+    }
+    return result1 + result2;
+  }
+
+  //===============================================
+  static double remi_sum() {
+    Double j = new Double(1.);
+    for (int i = 0; i< 1000; i++) {
+      j = new Double(j + 1.);
+    }
+    return j;
+  }
+
+  static double remi_sumb() {
+    Double j = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1.;
+    }
+    return j;
+  }
+
+  static double remi_sumf() {
+    Double j = foob(1.);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1.;
+    }
+    return j;
+  }
+
+  static double remi_sump(Double j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Double(j + 1.);
+    }
+    return j;
+  }
+
+  static double remi_sumc() {
+    Double j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = j + ibc;
+    }
+    return j;
+  }
+
+  static double remi_sum2() {
+    Double j1 = new Double(1.);
+    Double j2 = new Double(1.);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Double(j1 + 1.);
+      j2 = new Double(j2 + 2.);
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sumb2() {
+    Double j1 = Double.valueOf(1.);
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + 1.;
+      j2 = j2 + 2.;
+    }
+    return j1 + j2;
+  }
+
+  static double remi_summ2() {
+    Double j1 = new Double(1.);
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Double(j1 + 1.);
+      j2 = j2 + 2.;
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sump2(Double j1) {
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Double(j1 + 1.);
+      j2 = j2 + 2.;
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sumc2() {
+    Double j1 = ibc;
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + ibc;
+      j2 = j2 + 2.;
+    }
+    return j1 + j2;
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static double simple_deop(double i) {
+    Double ib = new Double(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static double simpleb_deop(double i) {
+    Double ib = Double.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static double simplef_deop(double i) {
+    Double ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static double simplep_deop(Double ib) {
+    dummy();
+    return ib;
+  }
+
+  static double simplec_deop(double i) {
+    Double ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static double test_deop(double f, int i) {
+    Double ib = new Double(foo(f));
+    if ((i&1) == 0)
+      ib = foo(f+1.);
+    dummy();
+    return ib;
+  }
+
+  static double testb_deop(double f, int i) {
+    Double ib = foo(f);
+    if ((i&1) == 0)
+      ib = foo(f+1.);
+    dummy();
+    return ib;
+  }
+
+  static double testf_deop(double f, int i) {
+    Double ib = foob(f);
+    if ((i&1) == 0)
+      ib = foo(f+1.);
+    dummy();
+    return ib;
+  }
+
+  static double testp_deop(double f, int i, Double ib) {
+    if ((i&1) == 0)
+      ib = foo(f+1.);
+    dummy();
+    return ib;
+  }
+
+  static double testc_deop(double f, int i) {
+    Double ib = ibc;
+    if ((i&1) == 0)
+      ib = foo(f+1.);
+    dummy();
+    return ib;
+  }
+
+  static double sum_deop(double[] a) {
+    double result = 1.;
+    for (Double i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static double sumb_deop(double[] a) {
+    Double result = 1.;
+    for (Double i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static double sumf_deop(double[] a) {
+    Double result = 1.;
+    for (Double i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static double sump_deop(double[] a, Double result) {
+    for (Double i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static double sumc_deop(double[] a) {
+    Double result = ibc;
+    for (Double i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static double remi_sum_deop() {
+    Double j = new Double(foo(1.));
+    for (int i = 0; i< 1000; i++) {
+      j = new Double(foo(j + 1.));
+    }
+    dummy();
+    return j;
+  }
+
+  static double remi_sumb_deop() {
+    Double j = Double.valueOf(foo(1.));
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.);
+    }
+    dummy();
+    return j;
+  }
+
+  static double remi_sumf_deop() {
+    Double j = foob(1.);
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.);
+    }
+    dummy();
+    return j;
+  }
+
+  static double remi_sump_deop(Double j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.);
+    }
+    dummy();
+    return j;
+  }
+
+  static double remi_sumc_deop() {
+    Double j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.);
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static double remi_sum_cond() {
+    Double j = new Double(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Double(j + 1.);
+      }
+    }
+    return j;
+  }
+
+  static double remi_sumb_cond() {
+    Double j = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.;
+      }
+    }
+    return j;
+  }
+
+  static double remi_sumf_cond() {
+    Double j = foob(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.;
+      }
+    }
+    return j;
+  }
+
+  static double remi_sump_cond(Double j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.;
+      }
+    }
+    return j;
+  }
+
+  static double remi_sumc_cond() {
+    Double j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + ibc;
+      }
+    }
+    return j;
+  }
+
+  static double remi_sum2_cond() {
+    Double j1 = new Double(1.);
+    Double j2 = new Double(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Double(j1 + 1.);
+      } else {
+        j2 = new Double(j2 + 2.);
+      }
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sumb2_cond() {
+    Double j1 = Double.valueOf(1.);
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + 1.;
+      } else {
+        j2 = j2 + 2.;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static double remi_summ2_cond() {
+    Double j1 = new Double(1.);
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Double(j1 + 1.);
+      } else {
+        j2 = j2 + 2.;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sump2_cond(Double j1) {
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Double(j1 + 1.);
+      } else {
+        j2 = j2 + 2.;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static double remi_sumc2_cond() {
+    Double j1 = ibc;
+    Double j2 = Double.valueOf(1.);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + ibc;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final double[] val = new double[] {
+       71994000.,  71994000.,    12000.,  71994000.,  71994000.,
+      144000000., 144000000., 72018000., 144000000., 144000000.,
+       71994000.,  71994000.,    12000.,  71994000.,  71994000.,
+       72000000.,  72000000., 36006000.,  72000000.,  72000000.,
+      144012000., 144012000., 72030000., 144012000., 144012000.,
+       72000000.,  72000000., 36006000.,  72000000.,  72000000.,
+         499501.,    499501.,   499501.,    499501.,    499501.,
+        1000002.,   1000002.,  1000002.,   1000002.,   1000002.,
+         499501.,    499501.,   499501.,    499501.,    499501.,
+           1001.,      1001.,     1001.,      1001.,      1001.,
+           3002.,      3002.,     3002.,      3002.,      3002.,
+           1001.,      1001.,     1001.,      1001.,      1001.,
+            501.,       501.,      501.,       501.,       501.,
+           1502.,      1502.,     1502.,      1502.,      1502.
+    };
+
+    double[] res = new double[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0.;
+    }
+
+
+    for (int i = 0; i < 12000; i++) {
+      res[0] += simple(i);
+      res[1] += simpleb(i);
+      res[2] += simplec();
+      res[3] += simplef(i);
+      res[4] += simplep((double)i);
+
+      res[5] += simple2((double)i);
+      res[6] += simpleb2((double)i);
+      res[7] += simplec2((double)i);
+      res[8] += simplem2((double)i);
+      res[9] += simplep2((double)i, (double)i);
+
+      res[10] += simple_deop((double)i);
+      res[11] += simpleb_deop((double)i);
+      res[12] += simplec_deop((double)i);
+      res[13] += simplef_deop((double)i);
+      res[14] += simplep_deop((double)i);
+
+      res[15] += test((double)i, i);
+      res[16] += testb((double)i, i);
+      res[17] += testc((double)i, i);
+      res[18] += testm((double)i, i);
+      res[19] += testp((double)i, i, (double)i);
+
+      res[20] += test2((double)i, i);
+      res[21] += testb2((double)i, i);
+      res[22] += testc2((double)i, i);
+      res[23] += testm2((double)i, i);
+      res[24] += testp2((double)i, i, (double)i);
+
+      res[25] += test_deop((double)i, i);
+      res[26] += testb_deop((double)i, i);
+      res[27] += testc_deop((double)i, i);
+      res[28] += testf_deop((double)i, i);
+      res[29] += testp_deop((double)i, i, (double)i);
+    }
+
+    double[] ia = new double[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, 1.);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, 1.);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, 1.);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump(1.);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2(1.);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop(1.);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond(1.);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond(1.);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestFloatBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestFloatBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestFloatBoxing.dummy -XX:CompileCommand=exclude,TestFloatBoxing.foo -XX:CompileCommand=exclude,TestFloatBoxing.foob TestFloatBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestFloatBoxing.dummy -XX:CompileCommand=exclude,TestFloatBoxing.foo -XX:CompileCommand=exclude,TestFloatBoxing.foob TestFloatBoxing
+ *
+ */
+
+public class TestFloatBoxing {
+
+  static final Float ibc = new Float(1.f);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void  dummy()       { }
+  static float foo(float i)  { return i; }
+  static Float foob(float i) { return Float.valueOf(i); }
+
+
+  static float simple(float i) {
+    Float ib = new Float(i);
+    return ib;
+  }
+
+  static float simpleb(float i) {
+    Float ib = Float.valueOf(i);
+    return ib;
+  }
+
+  static float simplec() {
+    Float ib = ibc;
+    return ib;
+  }
+
+  static float simplef(float i) {
+    Float ib = foob(i);
+    return ib;
+  }
+
+  static float simplep(Float ib) {
+    return ib;
+  }
+
+  static float simple2(float i) {
+    Float ib1 = new Float(i);
+    Float ib2 = new Float(i+1.f);
+    return ib1 + ib2;
+  }
+
+  static float simpleb2(float i) {
+    Float ib1 = Float.valueOf(i);
+    Float ib2 = Float.valueOf(i+1.f);
+    return ib1 + ib2;
+  }
+
+  static float simplem2(float i) {
+    Float ib1 = new Float(i);
+    Float ib2 = Float.valueOf(i+1.f);
+    return ib1 + ib2;
+  }
+
+  static float simplep2(float i, Float ib1) {
+    Float ib2 = Float.valueOf(i+1.f);
+    return ib1 + ib2;
+  }
+
+  static float simplec2(float i) {
+    Float ib1 = ibc;
+    Float ib2 = Float.valueOf(i+1.f);
+    return ib1 + ib2;
+  }
+
+  //===============================================
+  static float test(float f, int i) {
+    Float ib = new Float(f);
+    if ((i&1) == 0)
+      ib = f+1.f;
+    return ib;
+  }
+
+  static float testb(float f, int i) {
+    Float ib = f;
+    if ((i&1) == 0)
+      ib = (f+1.f);
+    return ib;
+  }
+
+  static float testm(float f, int i) {
+    Float ib = f;
+    if ((i&1) == 0)
+      ib = new Float(f+1.f);
+    return ib;
+  }
+
+  static float testp(float f, int i, Float ib) {
+    if ((i&1) == 0)
+      ib = new Float(f+1.f);
+    return ib;
+  }
+
+  static float testc(float f, int i) {
+    Float ib = ibc;
+    if ((i&1) == 0)
+      ib = new Float(f+1.f);
+    return ib;
+  }
+
+  static float test2(float f, int i) {
+    Float ib1 = new Float(f);
+    Float ib2 = new Float(f+1.f);
+    if ((i&1) == 0) {
+      ib1 = new Float(f+1.f);
+      ib2 = new Float(f+2.f);
+    }
+    return ib1+ib2;
+  }
+
+  static float testb2(float f, int i) {
+    Float ib1 = f;
+    Float ib2 = f+1.f;
+    if ((i&1) == 0) {
+      ib1 = (f+1.f);
+      ib2 = (f+2.f);
+    }
+    return ib1+ib2;
+  }
+
+  static float testm2(float f, int i) {
+    Float ib1 = new Float(f);
+    Float ib2 = f+1.f;
+    if ((i&1) == 0) {
+      ib1 = new Float(f+1.f);
+      ib2 = (f+2.f);
+    }
+    return ib1+ib2;
+  }
+
+  static float testp2(float f, int i, Float ib1) {
+    Float ib2 = f+1.f;
+    if ((i&1) == 0) {
+      ib1 = new Float(f+1.f);
+      ib2 = (f+2.f);
+    }
+    return ib1+ib2;
+  }
+
+  static float testc2(float f, int i) {
+    Float ib1 = ibc;
+    Float ib2 = f+1.f;
+    if ((i&1) == 0) {
+      ib1 = (ibc+1.f);
+      ib2 = (f+2.f);
+    }
+    return ib1+ib2;
+  }
+
+  //===============================================
+  static float sum(float[] a) {
+    float result = 1.f;
+    for (Float i : a)
+        result += i;
+    return result;
+  }
+
+  static float sumb(float[] a) {
+    Float result = 1.f;
+    for (Float i : a)
+        result += i;
+    return result;
+  }
+
+  static float sumc(float[] a) {
+    Float result = ibc;
+    for (Float i : a)
+        result += i;
+    return result;
+  }
+
+  static float sumf(float[] a) {
+    Float result = foob(1.f);
+    for (Float i : a)
+        result += i;
+    return result;
+  }
+
+  static float sump(float[] a, Float result) {
+    for (Float i : a)
+        result += i;
+    return result;
+  }
+
+  static float sum2(float[] a) {
+    float result1 = 1.f;
+    float result2 = 1.f;
+    for (Float i : a) {
+        result1 += i;
+        result2 += i + 1.f;
+    }
+    return result1 + result2;
+  }
+
+  static float sumb2(float[] a) {
+    Float result1 = 1.f;
+    Float result2 = 1.f;
+    for (Float i : a) {
+        result1 += i;
+        result2 += i + 1.f;
+    }
+    return result1 + result2;
+  }
+
+  static float summ2(float[] a) {
+    Float result1 = 1.f;
+    Float result2 = new Float(1.f);
+    for (Float i : a) {
+        result1 += i;
+        result2 += new Float(i + 1.f);
+    }
+    return result1 + result2;
+  }
+
+  static float sump2(float[] a, Float result2) {
+    Float result1 = 1.f;
+    for (Float i : a) {
+        result1 += i;
+        result2 += i + 1.f;
+    }
+    return result1 + result2;
+  }
+
+  static float sumc2(float[] a) {
+    Float result1 = 1.f;
+    Float result2 = ibc;
+    for (Float i : a) {
+        result1 += i;
+        result2 += i + ibc;
+    }
+    return result1 + result2;
+  }
+
+  //===============================================
+  static float remi_sum() {
+    Float j = new Float(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j = new Float(j + 1.f);
+    }
+    return j;
+  }
+
+  static float remi_sumb() {
+    Float j = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1.f;
+    }
+    return j;
+  }
+
+  static float remi_sumf() {
+    Float j = foob(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1.f;
+    }
+    return j;
+  }
+
+  static float remi_sump(Float j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Float(j + 1.f);
+    }
+    return j;
+  }
+
+  static float remi_sumc() {
+    Float j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = j + ibc;
+    }
+    return j;
+  }
+
+  static float remi_sum2() {
+    Float j1 = new Float(1.f);
+    Float j2 = new Float(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Float(j1 + 1.f);
+      j2 = new Float(j2 + 2.f);
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sumb2() {
+    Float j1 = Float.valueOf(1.f);
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + 1.f;
+      j2 = j2 + 2.f;
+    }
+    return j1 + j2;
+  }
+
+  static float remi_summ2() {
+    Float j1 = new Float(1.f);
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Float(j1 + 1.f);
+      j2 = j2 + 2.f;
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sump2(Float j1) {
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Float(j1 + 1.f);
+      j2 = j2 + 2.f;
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sumc2() {
+    Float j1 = ibc;
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + ibc;
+      j2 = j2 + 2.f;
+    }
+    return j1 + j2;
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static float simple_deop(float i) {
+    Float ib = new Float(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static float simpleb_deop(float i) {
+    Float ib = Float.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static float simplef_deop(float i) {
+    Float ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static float simplep_deop(Float ib) {
+    dummy();
+    return ib;
+  }
+
+  static float simplec_deop(float i) {
+    Float ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static float test_deop(float f, int i) {
+    Float ib = new Float(foo(f));
+    if ((i&1) == 0)
+      ib = foo(f+1.f);
+    dummy();
+    return ib;
+  }
+
+  static float testb_deop(float f, int i) {
+    Float ib = foo(f);
+    if ((i&1) == 0)
+      ib = foo(f+1.f);
+    dummy();
+    return ib;
+  }
+
+  static float testf_deop(float f, int i) {
+    Float ib = foob(f);
+    if ((i&1) == 0)
+      ib = foo(f+1.f);
+    dummy();
+    return ib;
+  }
+
+  static float testp_deop(float f, int i, Float ib) {
+    if ((i&1) == 0)
+      ib = foo(f+1.f);
+    dummy();
+    return ib;
+  }
+
+  static float testc_deop(float f, int i) {
+    Float ib = ibc;
+    if ((i&1) == 0)
+      ib = foo(f+1.f);
+    dummy();
+    return ib;
+  }
+
+  static float sum_deop(float[] a) {
+    float result = 1.f;
+    for (Float i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static float sumb_deop(float[] a) {
+    Float result = 1.f;
+    for (Float i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static float sumf_deop(float[] a) {
+    Float result = 1.f;
+    for (Float i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static float sump_deop(float[] a, Float result) {
+    for (Float i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static float sumc_deop(float[] a) {
+    Float result = ibc;
+    for (Float i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static float remi_sum_deop() {
+    Float j = new Float(foo(1.f));
+    for (int i = 0; i< 1000; i++) {
+      j = new Float(foo(j + 1.f));
+    }
+    dummy();
+    return j;
+  }
+
+  static float remi_sumb_deop() {
+    Float j = Float.valueOf(foo(1.f));
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.f);
+    }
+    dummy();
+    return j;
+  }
+
+  static float remi_sumf_deop() {
+    Float j = foob(1.f);
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.f);
+    }
+    dummy();
+    return j;
+  }
+
+  static float remi_sump_deop(Float j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.f);
+    }
+    dummy();
+    return j;
+  }
+
+  static float remi_sumc_deop() {
+    Float j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1.f);
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static float remi_sum_cond() {
+    Float j = new Float(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Float(j + 1.f);
+      }
+    }
+    return j;
+  }
+
+  static float remi_sumb_cond() {
+    Float j = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.f;
+      }
+    }
+    return j;
+  }
+
+  static float remi_sumf_cond() {
+    Float j = foob(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.f;
+      }
+    }
+    return j;
+  }
+
+  static float remi_sump_cond(Float j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1.f;
+      }
+    }
+    return j;
+  }
+
+  static float remi_sumc_cond() {
+    Float j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + ibc;
+      }
+    }
+    return j;
+  }
+
+  static float remi_sum2_cond() {
+    Float j1 = new Float(1.f);
+    Float j2 = new Float(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Float(j1 + 1.f);
+      } else {
+        j2 = new Float(j2 + 2.f);
+      }
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sumb2_cond() {
+    Float j1 = Float.valueOf(1.f);
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + 1.f;
+      } else {
+        j2 = j2 + 2.f;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static float remi_summ2_cond() {
+    Float j1 = new Float(1.f);
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Float(j1 + 1.f);
+      } else {
+        j2 = j2 + 2.f;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sump2_cond(Float j1) {
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Float(j1 + 1.f);
+      } else {
+        j2 = j2 + 2.f;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static float remi_sumc2_cond() {
+    Float j1 = ibc;
+    Float j2 = Float.valueOf(1.f);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + ibc;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final float[] val = new float[] {
+       71990896.f,  71990896.f,    12000.f,  71990896.f,  71990896.f,
+      144000000.f, 144000000.f, 72014896.f, 144000000.f, 144000000.f,
+       71990896.f,  71990896.f,    12000.f,  71990896.f,  71990896.f,
+       72000000.f,  72000000.f, 36004096.f,  72000000.f,  72000000.f,
+      144012288.f, 144012288.f, 72033096.f, 144012288.f, 144012288.f,
+       72000000.f,  72000000.f, 36004096.f,  72000000.f,  72000000.f,
+         499501.f,    499501.f,   499501.f,    499501.f,    499501.f,
+        1000002.f,   1000002.f,  1000002.f,   1000002.f,   1000002.f,
+         499501.f,    499501.f,   499501.f,    499501.f,    499501.f,
+           1001.f,      1001.f,     1001.f,      1001.f,      1001.f,
+           3002.f,      3002.f,     3002.f,      3002.f,      3002.f,
+           1001.f,      1001.f,     1001.f,      1001.f,      1001.f,
+            501.f,       501.f,      501.f,       501.f,       501.f,
+           1502.f,      1502.f,     1502.f,      1502.f,      1502.f
+    };
+
+    float[] res = new float[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0.f;
+    }
+
+
+    for (int i = 0; i < 12000; i++) {
+      res[0] += simple(i);
+      res[1] += simpleb(i);
+      res[2] += simplec();
+      res[3] += simplef(i);
+      res[4] += simplep((float)i);
+
+      res[5] += simple2((float)i);
+      res[6] += simpleb2((float)i);
+      res[7] += simplec2((float)i);
+      res[8] += simplem2((float)i);
+      res[9] += simplep2((float)i, (float)i);
+
+      res[10] += simple_deop((float)i);
+      res[11] += simpleb_deop((float)i);
+      res[12] += simplec_deop((float)i);
+      res[13] += simplef_deop((float)i);
+      res[14] += simplep_deop((float)i);
+
+      res[15] += test((float)i, i);
+      res[16] += testb((float)i, i);
+      res[17] += testc((float)i, i);
+      res[18] += testm((float)i, i);
+      res[19] += testp((float)i, i, (float)i);
+
+      res[20] += test2((float)i, i);
+      res[21] += testb2((float)i, i);
+      res[22] += testc2((float)i, i);
+      res[23] += testm2((float)i, i);
+      res[24] += testp2((float)i, i, (float)i);
+
+      res[25] += test_deop((float)i, i);
+      res[26] += testb_deop((float)i, i);
+      res[27] += testc_deop((float)i, i);
+      res[28] += testf_deop((float)i, i);
+      res[29] += testp_deop((float)i, i, (float)i);
+    }
+
+    float[] ia = new float[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, 1.f);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, 1.f);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, 1.f);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump(1.f);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2(1.f);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop(1.f);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond(1.f);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond(1.f);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestIntBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestIntBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestIntBoxing.dummy -XX:CompileCommand=exclude,TestIntBoxing.foo -XX:CompileCommand=exclude,TestIntBoxing.foob TestIntBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestIntBoxing.dummy -XX:CompileCommand=exclude,TestIntBoxing.foo -XX:CompileCommand=exclude,TestIntBoxing.foob TestIntBoxing
+ *
+ */
+
+public class TestIntBoxing {
+
+  static final Integer ibc = new Integer(1);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void    dummy()     { }
+  static int     foo(int i)  { return i; }
+  static Integer foob(int i) { return Integer.valueOf(i); }
+
+
+  static int simple(int i) {
+    Integer ib = new Integer(i);
+    return ib;
+  }
+
+  static int simpleb(int i) {
+    Integer ib = Integer.valueOf(i);
+    return ib;
+  }
+
+  static int simplec() {
+    Integer ib = ibc;
+    return ib;
+  }
+
+  static int simplef(int i) {
+    Integer ib = foob(i);
+    return ib;
+  }
+
+  static int simplep(Integer ib) {
+    return ib;
+  }
+
+  static int simple2(int i) {
+    Integer ib1 = new Integer(i);
+    Integer ib2 = new Integer(i+1);
+    return ib1 + ib2;
+  }
+
+  static int simpleb2(int i) {
+    Integer ib1 = Integer.valueOf(i);
+    Integer ib2 = Integer.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static int simplem2(int i) {
+    Integer ib1 = new Integer(i);
+    Integer ib2 = Integer.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static int simplep2(int i, Integer ib1) {
+    Integer ib2 = Integer.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static int simplec2(int i) {
+    Integer ib1 = ibc;
+    Integer ib2 = Integer.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  //===============================================
+  static int test(int i) {
+    Integer ib = new Integer(i);
+    if ((i&1) == 0)
+      ib = i+1;
+    return ib;
+  }
+
+  static int testb(int i) {
+    Integer ib = i;
+    if ((i&1) == 0)
+      ib = (i+1);
+    return ib;
+  }
+
+  static int testm(int i) {
+    Integer ib = i;
+    if ((i&1) == 0)
+      ib = new Integer(i+1);
+    return ib;
+  }
+
+  static int testp(int i, Integer ib) {
+    if ((i&1) == 0)
+      ib = new Integer(i+1);
+    return ib;
+  }
+
+  static int testc(int i) {
+    Integer ib = ibc;
+    if ((i&1) == 0)
+      ib = new Integer(i+1);
+    return ib;
+  }
+
+  static int test2(int i) {
+    Integer ib1 = new Integer(i);
+    Integer ib2 = new Integer(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Integer(i+1);
+      ib2 = new Integer(i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static int testb2(int i) {
+    Integer ib1 = i;
+    Integer ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = (i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static int testm2(int i) {
+    Integer ib1 = new Integer(i);
+    Integer ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = new Integer(i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static int testp2(int i, Integer ib1) {
+    Integer ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = new Integer(i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static int testc2(int i) {
+    Integer ib1 = ibc;
+    Integer ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = (ibc+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  //===============================================
+  static int sum(int[] a) {
+    int result = 1;
+    for (Integer i : a)
+        result += i;
+    return result;
+  }
+
+  static int sumb(int[] a) {
+    Integer result = 1;
+    for (Integer i : a)
+        result += i;
+    return result;
+  }
+
+  static int sumc(int[] a) {
+    Integer result = ibc;
+    for (Integer i : a)
+        result += i;
+    return result;
+  }
+
+  static int sumf(int[] a) {
+    Integer result = foob(1);
+    for (Integer i : a)
+        result += i;
+    return result;
+  }
+
+  static int sump(int[] a, Integer result) {
+    for (Integer i : a)
+        result += i;
+    return result;
+  }
+
+  static int sum2(int[] a) {
+    int result1 = 1;
+    int result2 = 1;
+    for (Integer i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static int sumb2(int[] a) {
+    Integer result1 = 1;
+    Integer result2 = 1;
+    for (Integer i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static int summ2(int[] a) {
+    Integer result1 = 1;
+    Integer result2 = new Integer(1);
+    for (Integer i : a) {
+        result1 += i;
+        result2 += new Integer(i + 1);
+    }
+    return result1 + result2;
+  }
+
+  static int sump2(int[] a, Integer result2) {
+    Integer result1 = 1;
+    for (Integer i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static int sumc2(int[] a) {
+    Integer result1 = 1;
+    Integer result2 = ibc;
+    for (Integer i : a) {
+        result1 += i;
+        result2 += i + ibc;
+    }
+    return result1 + result2;
+  }
+
+  //===============================================
+  static int remi_sum() {
+    Integer j = new Integer(1);
+    for (int i = 0; i< 1000; i++) {
+      j = new Integer(j + 1);
+    }
+    return j;
+  }
+
+  static int remi_sumb() {
+    Integer j = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1;
+    }
+    return j;
+  }
+
+  static int remi_sumf() {
+    Integer j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1;
+    }
+    return j;
+  }
+
+  static int remi_sump(Integer j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Integer(j + 1);
+    }
+    return j;
+  }
+
+  static int remi_sumc() {
+    Integer j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = j + ibc;
+    }
+    return j;
+  }
+
+  static int remi_sum2() {
+    Integer j1 = new Integer(1);
+    Integer j2 = new Integer(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Integer(j1 + 1);
+      j2 = new Integer(j2 + 2);
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sumb2() {
+    Integer j1 = Integer.valueOf(1);
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + 1;
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static int remi_summ2() {
+    Integer j1 = new Integer(1);
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Integer(j1 + 1);
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sump2(Integer j1) {
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Integer(j1 + 1);
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sumc2() {
+    Integer j1 = ibc;
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + ibc;
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static int simple_deop(int i) {
+    Integer ib = new Integer(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static int simpleb_deop(int i) {
+    Integer ib = Integer.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static int simplef_deop(int i) {
+    Integer ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static int simplep_deop(Integer ib) {
+    dummy();
+    return ib;
+  }
+
+  static int simplec_deop(int i) {
+    Integer ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static int test_deop(int i) {
+    Integer ib = new Integer(foo(i));
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static int testb_deop(int i) {
+    Integer ib = foo(i);
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static int testf_deop(int i) {
+    Integer ib = foob(i);
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static int testp_deop(int i, Integer ib) {
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static int testc_deop(int i) {
+    Integer ib = ibc;
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static int sum_deop(int[] a) {
+    int result = 1;
+    for (Integer i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static int sumb_deop(int[] a) {
+    Integer result = 1;
+    for (Integer i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static int sumf_deop(int[] a) {
+    Integer result = 1;
+    for (Integer i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static int sump_deop(int[] a, Integer result) {
+    for (Integer i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static int sumc_deop(int[] a) {
+    Integer result = ibc;
+    for (Integer i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static int remi_sum_deop() {
+    Integer j = new Integer(foo(1));
+    for (int i = 0; i< 1000; i++) {
+      j = new Integer(foo(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static int remi_sumb_deop() {
+    Integer j = Integer.valueOf(foo(1));
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static int remi_sumf_deop() {
+    Integer j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static int remi_sump_deop(Integer j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static int remi_sumc_deop() {
+    Integer j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static int remi_sum_cond() {
+    Integer j = new Integer(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Integer(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static int remi_sumb_cond() {
+    Integer j = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static int remi_sumf_cond() {
+    Integer j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static int remi_sump_cond(Integer j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static int remi_sumc_cond() {
+    Integer j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + ibc;
+      }
+    }
+    return j;
+  }
+
+  static int remi_sum2_cond() {
+    Integer j1 = new Integer(1);
+    Integer j2 = new Integer(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Integer(j1 + 1);
+      } else {
+        j2 = new Integer(j2 + 2);
+      }
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sumb2_cond() {
+    Integer j1 = Integer.valueOf(1);
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + 1;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static int remi_summ2_cond() {
+    Integer j1 = new Integer(1);
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Integer(j1 + 1);
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sump2_cond(Integer j1) {
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Integer(j1 + 1);
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static int remi_sumc2_cond() {
+    Integer j1 = ibc;
+    Integer j2 = Integer.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + ibc;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final int[] val = new int[] {
+       71994000,  71994000,    12000,  71994000,  71994000,
+      144000000, 144000000, 72018000, 144000000, 144000000,
+       71994000,  71994000,    12000,  71994000,  71994000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+      144012000, 144012000, 72030000, 144012000, 144012000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+         499501,    499501,   499501,    499501,    499501,
+        1000002,   1000002,  1000002,   1000002,   1000002,
+         499501,    499501,   499501,    499501,    499501,
+           1001,      1001,     1001,      1001,      1001,
+           3002,      3002,     3002,      3002,      3002,
+           1001,      1001,     1001,      1001,      1001,
+            501,       501,      501,       501,       501,
+           1502,      1502,     1502,      1502,      1502
+    };
+
+    int[] res = new int[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0;
+    }
+
+
+    for (int i = 0; i < 12000; i++) {
+      res[0] += simple(i);
+      res[1] += simpleb(i);
+      res[2] += simplec();
+      res[3] += simplef(i);
+      res[4] += simplep(i);
+
+      res[5] += simple2(i);
+      res[6] += simpleb2(i);
+      res[7] += simplec2(i);
+      res[8] += simplem2(i);
+      res[9] += simplep2(i, i);
+
+      res[10] += simple_deop(i);
+      res[11] += simpleb_deop(i);
+      res[12] += simplec_deop(i);
+      res[13] += simplef_deop(i);
+      res[14] += simplep_deop(i);
+
+      res[15] += test(i);
+      res[16] += testb(i);
+      res[17] += testc(i);
+      res[18] += testm(i);
+      res[19] += testp(i, i);
+
+      res[20] += test2(i);
+      res[21] += testb2(i);
+      res[22] += testc2(i);
+      res[23] += testm2(i);
+      res[24] += testp2(i, i);
+
+      res[25] += test_deop(i);
+      res[26] += testb_deop(i);
+      res[27] += testc_deop(i);
+      res[28] += testf_deop(i);
+      res[29] += testp_deop(i, i);
+    }
+
+    int[] ia = new int[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, 1);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, 1);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, 1);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump(1);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2(1);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop(1);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond(1);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond(1);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestLongBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestLongBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestLongBoxing.dummy -XX:CompileCommand=exclude,TestLongBoxing.foo -XX:CompileCommand=exclude,TestLongBoxing.foob TestLongBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestLongBoxing.dummy -XX:CompileCommand=exclude,TestLongBoxing.foo -XX:CompileCommand=exclude,TestLongBoxing.foob TestLongBoxing
+ *
+ */
+
+public class TestLongBoxing {
+
+  static final Long ibc = new Long(1);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void dummy()     { }
+  static long  foo(long i)  { return i; }
+  static Long  foob(long i) { return Long.valueOf(i); }
+
+
+  static long simple(long i) {
+    Long ib = new Long(i);
+    return ib;
+  }
+
+  static long simpleb(long i) {
+    Long ib = Long.valueOf(i);
+    return ib;
+  }
+
+  static long simplec() {
+    Long ib = ibc;
+    return ib;
+  }
+
+  static long simplef(long i) {
+    Long ib = foob(i);
+    return ib;
+  }
+
+  static long simplep(Long ib) {
+    return ib;
+  }
+
+  static long simple2(long i) {
+    Long ib1 = new Long(i);
+    Long ib2 = new Long(i+1);
+    return ib1 + ib2;
+  }
+
+  static long simpleb2(long i) {
+    Long ib1 = Long.valueOf(i);
+    Long ib2 = Long.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static long simplem2(long i) {
+    Long ib1 = new Long(i);
+    Long ib2 = Long.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static long simplep2(long i, Long ib1) {
+    Long ib2 = Long.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  static long simplec2(long i) {
+    Long ib1 = ibc;
+    Long ib2 = Long.valueOf(i+1);
+    return ib1 + ib2;
+  }
+
+  //===============================================
+  static long test(long i) {
+    Long ib = new Long(i);
+    if ((i&1) == 0)
+      ib = i+1;
+    return ib;
+  }
+
+  static long testb(long i) {
+    Long ib = i;
+    if ((i&1) == 0)
+      ib = (i+1);
+    return ib;
+  }
+
+  static long testm(long i) {
+    Long ib = i;
+    if ((i&1) == 0)
+      ib = new Long(i+1);
+    return ib;
+  }
+
+  static long testp(long i, Long ib) {
+    if ((i&1) == 0)
+      ib = new Long(i+1);
+    return ib;
+  }
+
+  static long testc(long i) {
+    Long ib = ibc;
+    if ((i&1) == 0)
+      ib = new Long(i+1);
+    return ib;
+  }
+
+  static long test2(long i) {
+    Long ib1 = new Long(i);
+    Long ib2 = new Long(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Long(i+1);
+      ib2 = new Long(i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static long testb2(long i) {
+    Long ib1 = i;
+    Long ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = (i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static long testm2(long i) {
+    Long ib1 = new Long(i);
+    Long ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = new Long(i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static long testp2(long i, Long ib1) {
+    Long ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = new Long(i+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  static long testc2(long i) {
+    Long ib1 = ibc;
+    Long ib2 = i+1;
+    if ((i&1) == 0) {
+      ib1 = (ibc+1);
+      ib2 = (i+2);
+    }
+    return ib1+ib2;
+  }
+
+  //===============================================
+  static long sum(long[] a) {
+    long result = 1;
+    for (Long i : a)
+        result += i;
+    return result;
+  }
+
+  static long sumb(long[] a) {
+    Long result = 1l;
+    for (Long i : a)
+        result += i;
+    return result;
+  }
+
+  static long sumc(long[] a) {
+    Long result = ibc;
+    for (Long i : a)
+        result += i;
+    return result;
+  }
+
+  static long sumf(long[] a) {
+    Long result = foob(1);
+    for (Long i : a)
+        result += i;
+    return result;
+  }
+
+  static long sump(long[] a, Long result) {
+    for (Long i : a)
+        result += i;
+    return result;
+  }
+
+  static long sum2(long[] a) {
+    long result1 = 1;
+    long result2 = 1;
+    for (Long i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static long sumb2(long[] a) {
+    Long result1 = 1l;
+    Long result2 = 1l;
+    for (Long i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static long summ2(long[] a) {
+    Long result1 = 1l;
+    Long result2 = new Long(1);
+    for (Long i : a) {
+        result1 += i;
+        result2 += new Long(i + 1);
+    }
+    return result1 + result2;
+  }
+
+  static long sump2(long[] a, Long result2) {
+    Long result1 = 1l;
+    for (Long i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return result1 + result2;
+  }
+
+  static long sumc2(long[] a) {
+    Long result1 = 1l;
+    Long result2 = ibc;
+    for (Long i : a) {
+        result1 += i;
+        result2 += i + ibc;
+    }
+    return result1 + result2;
+  }
+
+  //===============================================
+  static long remi_sum() {
+    Long j = new Long(1);
+    for (int i = 0; i< 1000; i++) {
+      j = new Long(j + 1);
+    }
+    return j;
+  }
+
+  static long remi_sumb() {
+    Long j = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1;
+    }
+    return j;
+  }
+
+  static long remi_sumf() {
+    Long j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      j = j + 1;
+    }
+    return j;
+  }
+
+  static long remi_sump(Long j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Long(j + 1);
+    }
+    return j;
+  }
+
+  static long remi_sumc() {
+    Long j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = j + ibc;
+    }
+    return j;
+  }
+
+  static long remi_sum2() {
+    Long j1 = new Long(1);
+    Long j2 = new Long(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Long(j1 + 1);
+      j2 = new Long(j2 + 2);
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sumb2() {
+    Long j1 = Long.valueOf(1);
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + 1;
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static long remi_summ2() {
+    Long j1 = new Long(1);
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Long(j1 + 1);
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sump2(Long j1) {
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Long(j1 + 1);
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sumc2() {
+    Long j1 = ibc;
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = j1 + ibc;
+      j2 = j2 + 2;
+    }
+    return j1 + j2;
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static long simple_deop(long i) {
+    Long ib = new Long(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static long simpleb_deop(long i) {
+    Long ib = Long.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static long simplef_deop(long i) {
+    Long ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static long simplep_deop(Long ib) {
+    dummy();
+    return ib;
+  }
+
+  static long simplec_deop(long i) {
+    Long ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static long test_deop(long i) {
+    Long ib = new Long(foo(i));
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static long testb_deop(long i) {
+    Long ib = foo(i);
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static long testf_deop(long i) {
+    Long ib = foob(i);
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static long testp_deop(long i, Long ib) {
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static long testc_deop(long i) {
+    Long ib = ibc;
+    if ((i&1) == 0)
+      ib = foo(i+1);
+    dummy();
+    return ib;
+  }
+
+  static long sum_deop(long[] a) {
+    long result = 1;
+    for (Long i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static long sumb_deop(long[] a) {
+    Long result = 1l;
+    for (Long i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static long sumf_deop(long[] a) {
+    Long result = 1l;
+    for (Long i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static long sump_deop(long[] a, Long result) {
+    for (Long i : a)
+        result += foob(i);
+    dummy();
+    return result;
+  }
+
+  static long sumc_deop(long[] a) {
+    Long result = ibc;
+    for (Long i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static long remi_sum_deop() {
+    Long j = new Long(foo(1));
+    for (int i = 0; i< 1000; i++) {
+      j = new Long(foo(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static long remi_sumb_deop() {
+    Long j = Long.valueOf(foo(1));
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static long remi_sumf_deop() {
+    Long j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static long remi_sump_deop(Long j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  static long remi_sumc_deop() {
+    Long j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo(j + 1);
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static long remi_sum_cond() {
+    Long j = new Long(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Long(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static long remi_sumb_cond() {
+    Long j = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static long remi_sumf_cond() {
+    Long j = foob(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static long remi_sump_cond(Long j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + 1;
+      }
+    }
+    return j;
+  }
+
+  static long remi_sumc_cond() {
+    Long j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = j + ibc;
+      }
+    }
+    return j;
+  }
+
+  static long remi_sum2_cond() {
+    Long j1 = new Long(1);
+    Long j2 = new Long(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Long(j1 + 1);
+      } else {
+        j2 = new Long(j2 + 2);
+      }
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sumb2_cond() {
+    Long j1 = Long.valueOf(1);
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + 1;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static long remi_summ2_cond() {
+    Long j1 = new Long(1);
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Long(j1 + 1);
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sump2_cond(Long j1) {
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Long(j1 + 1);
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+  static long remi_sumc2_cond() {
+    Long j1 = ibc;
+    Long j2 = Long.valueOf(1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = j1 + ibc;
+      } else {
+        j2 = j2 + 2;
+      }
+    }
+    return j1 + j2;
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final long[] val = new long[] {
+       71994000,  71994000,    12000,  71994000,  71994000,
+      144000000, 144000000, 72018000, 144000000, 144000000,
+       71994000,  71994000,    12000,  71994000,  71994000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+      144012000, 144012000, 72030000, 144012000, 144012000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+         499501,    499501,   499501,    499501,    499501,
+        1000002,   1000002,  1000002,   1000002,   1000002,
+         499501,    499501,   499501,    499501,    499501,
+           1001,      1001,     1001,      1001,      1001,
+           3002,      3002,     3002,      3002,      3002,
+           1001,      1001,     1001,      1001,      1001,
+            501,       501,      501,       501,       501,
+           1502,      1502,     1502,      1502,      1502
+    };
+
+    long[] res = new long[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0;
+    }
+
+
+    for (long i = 0; i < 12000; i++) {
+      res[0] += simple(i);
+      res[1] += simpleb(i);
+      res[2] += simplec();
+      res[3] += simplef(i);
+      res[4] += simplep(i);
+
+      res[5] += simple2(i);
+      res[6] += simpleb2(i);
+      res[7] += simplec2(i);
+      res[8] += simplem2(i);
+      res[9] += simplep2(i, i);
+
+      res[10] += simple_deop(i);
+      res[11] += simpleb_deop(i);
+      res[12] += simplec_deop(i);
+      res[13] += simplef_deop(i);
+      res[14] += simplep_deop(i);
+
+      res[15] += test(i);
+      res[16] += testb(i);
+      res[17] += testc(i);
+      res[18] += testm(i);
+      res[19] += testp(i, i);
+
+      res[20] += test2(i);
+      res[21] += testb2(i);
+      res[22] += testc2(i);
+      res[23] += testm2(i);
+      res[24] += testp2(i, i);
+
+      res[25] += test_deop(i);
+      res[26] += testb_deop(i);
+      res[27] += testc_deop(i);
+      res[28] += testf_deop(i);
+      res[29] += testp_deop(i, i);
+    }
+
+    long[] ia = new long[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, (long)1);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, (long)1);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, (long)1);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump((long)1);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2((long)1);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop((long)1);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond((long)1);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond((long)1);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6934604/TestShortBoxing.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,777 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6934604
+ * @summary enable parts of EliminateAutoBox by default
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox TestShortBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestShortBoxing.dummy -XX:CompileCommand=exclude,TestShortBoxing.foo -XX:CompileCommand=exclude,TestShortBoxing.foob TestShortBoxing
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-EliminateAutoBox
+ * -XX:CompileCommand=exclude,TestShortBoxing.dummy -XX:CompileCommand=exclude,TestShortBoxing.foo -XX:CompileCommand=exclude,TestShortBoxing.foob TestShortBoxing
+ *
+ */
+
+public class TestShortBoxing {
+
+  static final Short ibc = new Short((short)1);
+
+  //===============================================
+  // Non-inlined methods to test deoptimization info
+  static void dummy()      { }
+  static short foo(short i)  { return i; }
+  static Short foob(short i) { return Short.valueOf(i); }
+
+
+  static short simple(short i) {
+    Short ib = new Short(i);
+    return ib;
+  }
+
+  static short simpleb(short i) {
+    Short ib = Short.valueOf(i);
+    return ib;
+  }
+
+  static short simplec() {
+    Short ib = ibc;
+    return ib;
+  }
+
+  static short simplef(short i) {
+    Short ib = foob(i);
+    return ib;
+  }
+
+  static short simplep(Short ib) {
+    return ib;
+  }
+
+  static short simple2(short i) {
+    Short ib1 = new Short(i);
+    Short ib2 = new Short((short)(i+1));
+    return (short)(ib1 + ib2);
+  }
+
+  static short simpleb2(short i) {
+    Short ib1 = Short.valueOf(i);
+    Short ib2 = Short.valueOf((short)(i+1));
+    return (short)(ib1 + ib2);
+  }
+
+  static short simplem2(short i) {
+    Short ib1 = new Short(i);
+    Short ib2 = Short.valueOf((short)(i+1));
+    return (short)(ib1 + ib2);
+  }
+
+  static short simplep2(short i, Short ib1) {
+    Short ib2 = Short.valueOf((short)(i+1));
+    return (short)(ib1 + ib2);
+  }
+
+  static short simplec2(short i) {
+    Short ib1 = ibc;
+    Short ib2 = Short.valueOf((short)(i+1));
+    return (short)(ib1 + ib2);
+  }
+
+  //===============================================
+  static short test(short i) {
+    Short ib = new Short(i);
+    if ((i&1) == 0)
+      ib = (short)(i+1);
+    return ib;
+  }
+
+  static short testb(short i) {
+    Short ib = i;
+    if ((i&1) == 0)
+      ib = (short)(i+1);
+    return ib;
+  }
+
+  static short testm(short i) {
+    Short ib = i;
+    if ((i&1) == 0)
+      ib = new Short((short)(i+1));
+    return ib;
+  }
+
+  static short testp(short i, Short ib) {
+    if ((i&1) == 0)
+      ib = new Short((short)(i+1));
+    return ib;
+  }
+
+  static short testc(short i) {
+    Short ib = ibc;
+    if ((i&1) == 0)
+      ib = new Short((short)(i+1));
+    return ib;
+  }
+
+  static short test2(short i) {
+    Short ib1 = new Short(i);
+    Short ib2 = new Short((short)(i+1));
+    if ((i&1) == 0) {
+      ib1 = new Short((short)(i+1));
+      ib2 = new Short((short)(i+2));
+    }
+    return (short)(ib1+ib2);
+  }
+
+  static short testb2(short i) {
+    Short ib1 = i;
+    Short ib2 = (short)(i+1);
+    if ((i&1) == 0) {
+      ib1 = (short)(i+1);
+      ib2 = (short)(i+2);
+    }
+    return (short)(ib1 + ib2);
+  }
+
+  static short testm2(short i) {
+    Short ib1 = new Short(i);
+    Short ib2 = (short)(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Short((short)(i+1));
+      ib2 = (short)(i+2);
+    }
+    return (short)(ib1 + ib2);
+  }
+
+  static short testp2(short i, Short ib1) {
+    Short ib2 = (short)(i+1);
+    if ((i&1) == 0) {
+      ib1 = new Short((short)(i+1));
+      ib2 = (short)(i+2);
+    }
+    return (short)(ib1 + ib2);
+  }
+
+  static short testc2(short i) {
+    Short ib1 = ibc;
+    Short ib2 = (short)(i+1);
+    if ((i&1) == 0) {
+      ib1 = (short)(ibc+1);
+      ib2 = (short)(i+2);
+    }
+    return (short)(ib1 + ib2);
+  }
+
+  //===============================================
+  static short sum(short[] a) {
+    short result = 1;
+    for (Short i : a)
+        result += i;
+    return result;
+  }
+
+  static short sumb(short[] a) {
+    Short result = 1;
+    for (Short i : a)
+        result = (short)(result + i);
+    return result;
+  }
+
+  static short sumc(short[] a) {
+    Short result = ibc;
+    for (Short i : a)
+        result = (short)(result + i);
+    return result;
+  }
+
+  static short sumf(short[] a) {
+    Short result = foob((short)1);
+    for (Short i : a)
+        result = (short)(result + i);
+    return result;
+  }
+
+  static short sump(short[] a, Short result) {
+    for (Short i : a)
+        result = (short)(result + i);
+    return result;
+  }
+
+  static short sum2(short[] a) {
+    short result1 = 1;
+    short result2 = 1;
+    for (Short i : a) {
+        result1 += i;
+        result2 += i + 1;
+    }
+    return (short)(result1 + result2);
+  }
+
+  static short sumb2(short[] a) {
+    Short result1 = 1;
+    Short result2 = 1;
+    for (Short i : a) {
+        result1 = (short)(result1 + i);
+        result2 = (short)(result2 + i + 1);
+    }
+    return (short)(result1 + result2);
+  }
+
+  static short summ2(short[] a) {
+    Short result1 = 1;
+    Short result2 = new Short((short)1);
+    for (Short i : a) {
+        result1 = (short)(result1 + i);
+        result2 = (short)(result2 + new Short((short)(i + 1)));
+    }
+    return (short)(result1 + result2);
+  }
+
+  static short sump2(short[] a, Short result2) {
+    Short result1 = 1;
+    for (Short i : a) {
+        result1 = (short)(result1 + i);
+        result2 = (short)(result2 + i + 1);
+    }
+    return (short)(result1 + result2);
+  }
+
+  static short sumc2(short[] a) {
+    Short result1 = 1;
+    Short result2 = ibc;
+    for (Short i : a) {
+        result1 = (short)(result1 + i);
+        result2 = (short)(result2 + i + ibc);
+    }
+    return (short)(result1 + result2);
+  }
+
+  //===============================================
+  static short remi_sum() {
+    Short j = new Short((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j = new Short((short)(j + 1));
+    }
+    return j;
+  }
+
+  static short remi_sumb() {
+    Short j = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j = (short)(j + 1);
+    }
+    return j;
+  }
+
+  static short remi_sumf() {
+    Short j = foob((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j = (short)(j + 1);
+    }
+    return j;
+  }
+
+  static short remi_sump(Short j) {
+    for (int i = 0; i< 1000; i++) {
+      j = new Short((short)(j + 1));
+    }
+    return j;
+  }
+
+  static short remi_sumc() {
+    Short j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = (short)(j + ibc);
+    }
+    return j;
+  }
+
+  static short remi_sum2() {
+    Short j1 = new Short((short)1);
+    Short j2 = new Short((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Short((short)(j1 + 1));
+      j2 = new Short((short)(j2 + 2));
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sumb2() {
+    Short j1 = Short.valueOf((short)1);
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = (short)(j1 + 1);
+      j2 = (short)(j2 + 2);
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_summ2() {
+    Short j1 = new Short((short)1);
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Short((short)(j1 + 1));
+      j2 = (short)(j2 + 2);
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sump2(Short j1) {
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = new Short((short)(j1 + 1));
+      j2 = (short)(j2 + 2);
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sumc2() {
+    Short j1 = ibc;
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j1 = (short)(j1 + ibc);
+      j2 = (short)(j2 + 2);
+    }
+    return (short)(j1 + j2);
+  }
+
+
+  //===============================================
+  // Safepointa and debug info for deoptimization
+  static short simple_deop(short i) {
+    Short ib = new Short(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static short simpleb_deop(short i) {
+    Short ib = Short.valueOf(foo(i));
+    dummy();
+    return ib;
+  }
+
+  static short simplef_deop(short i) {
+    Short ib = foob(i);
+    dummy();
+    return ib;
+  }
+
+  static short simplep_deop(Short ib) {
+    dummy();
+    return ib;
+  }
+
+  static short simplec_deop(short i) {
+    Short ib = ibc;
+    dummy();
+    return ib;
+  }
+
+  static short test_deop(short i) {
+    Short ib = new Short(foo(i));
+    if ((i&1) == 0)
+      ib = foo((short)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static short testb_deop(short i) {
+    Short ib = foo(i);
+    if ((i&1) == 0)
+      ib = foo((short)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static short testf_deop(short i) {
+    Short ib = foob(i);
+    if ((i&1) == 0)
+      ib = foo((short)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static short testp_deop(short i, Short ib) {
+    if ((i&1) == 0)
+      ib = foo((short)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static short testc_deop(short i) {
+    Short ib = ibc;
+    if ((i&1) == 0)
+      ib = foo((short)(i+1));
+    dummy();
+    return ib;
+  }
+
+  static short sum_deop(short[] a) {
+    short result = 1;
+    for (Short i : a)
+        result += foo(i);
+    dummy();
+    return result;
+  }
+
+  static short sumb_deop(short[] a) {
+    Short result = 1;
+    for (Short i : a)
+        result = (short)(result + foo(i));
+    dummy();
+    return result;
+  }
+
+  static short sumf_deop(short[] a) {
+    Short result = 1;
+    for (Short i : a)
+        result = (short)(result + foob(i));
+    dummy();
+    return result;
+  }
+
+  static short sump_deop(short[] a, Short result) {
+    for (Short i : a)
+        result = (short)(result + foob(i));
+    dummy();
+    return result;
+  }
+
+  static short sumc_deop(short[] a) {
+    Short result = ibc;
+    for (Short i : a)
+        result = (short)(result + foo(i));
+    dummy();
+    return result;
+  }
+
+  static short remi_sum_deop() {
+    Short j = new Short(foo((short)1));
+    for (int i = 0; i< 1000; i++) {
+      j = new Short(foo((short)(j + 1)));
+    }
+    dummy();
+    return j;
+  }
+
+  static short remi_sumb_deop() {
+    Short j = Short.valueOf(foo((short)1));
+    for (int i = 0; i< 1000; i++) {
+      j = foo((short)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static short remi_sumf_deop() {
+    Short j = foob((short)1);
+    for (int i = 0; i< 1000; i++) {
+      j = foo((short)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static short remi_sump_deop(Short j) {
+    for (int i = 0; i< 1000; i++) {
+      j = foo((short)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  static short remi_sumc_deop() {
+    Short j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      j = foo((short)(j + 1));
+    }
+    dummy();
+    return j;
+  }
+
+  //===============================================
+  // Conditional increment
+  static short remi_sum_cond() {
+    Short j = new Short((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = new Short((short)(j + 1));
+      }
+    }
+    return j;
+  }
+
+  static short remi_sumb_cond() {
+    Short j = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (short)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static short remi_sumf_cond() {
+    Short j = foob((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (short)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static short remi_sump_cond(Short j) {
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (short)(j + 1);
+      }
+    }
+    return j;
+  }
+
+  static short remi_sumc_cond() {
+    Short j = ibc;
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j = (short)(j + ibc);
+      }
+    }
+    return j;
+  }
+
+  static short remi_sum2_cond() {
+    Short j1 = new Short((short)1);
+    Short j2 = new Short((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Short((short)(j1 + 1));
+      } else {
+        j2 = new Short((short)(j2 + 2));
+      }
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sumb2_cond() {
+    Short j1 = Short.valueOf((short)1);
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = (short)(j1 + 1);
+      } else {
+        j2 = (short)(j2 + 2);
+      }
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_summ2_cond() {
+    Short j1 = new Short((short)1);
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Short((short)(j1 + 1));
+      } else {
+        j2 = (short)(j2 + 2);
+      }
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sump2_cond(Short j1) {
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = new Short((short)(j1 + 1));
+      } else {
+        j2 = (short)(j2 + 2);
+      }
+    }
+    return (short)(j1 + j2);
+  }
+
+  static short remi_sumc2_cond() {
+    Short j1 = ibc;
+    Short j2 = Short.valueOf((short)1);
+    for (int i = 0; i< 1000; i++) {
+      if ((i&1) == 0) {
+        j1 = (short)(j1 + ibc);
+      } else {
+        j2 = (short)(j2 + 2);
+      }
+    }
+    return (short)(j1 + j2);
+  }
+
+
+  public static void main(String[] args) {
+    final int ntests = 70;
+
+    String[] test_name = new String[] {
+        "simple",      "simpleb",      "simplec",      "simplef",      "simplep",
+        "simple2",     "simpleb2",     "simplec2",     "simplem2",     "simplep2",
+        "simple_deop", "simpleb_deop", "simplec_deop", "simplef_deop", "simplep_deop",
+        "test",        "testb",        "testc",        "testm",        "testp",
+        "test2",       "testb2",       "testc2",       "testm2",       "testp2",
+        "test_deop",   "testb_deop",   "testc_deop",   "testf_deop",   "testp_deop",
+        "sum",         "sumb",         "sumc",         "sumf",         "sump",
+        "sum2",        "sumb2",        "sumc2",        "summ2",        "sump2",
+        "sum_deop",    "sumb_deop",    "sumc_deop",    "sumf_deop",    "sump_deop",
+        "remi_sum",       "remi_sumb",       "remi_sumc",       "remi_sumf",       "remi_sump",
+        "remi_sum2",      "remi_sumb2",      "remi_sumc2",      "remi_summ2",      "remi_sump2",
+        "remi_sum_deop",  "remi_sumb_deop",  "remi_sumc_deop",  "remi_sumf_deop",  "remi_sump_deop",
+        "remi_sum_cond",  "remi_sumb_cond",  "remi_sumc_cond",  "remi_sumf_cond",  "remi_sump_cond",
+        "remi_sum2_cond", "remi_sumb2_cond", "remi_sumc2_cond", "remi_summ2_cond", "remi_sump2_cond"
+    };
+
+    final int[] val = new int[] {
+       71994000,  71994000,    12000,  71994000,  71994000,
+      144000000, 144000000, 72018000, 144000000, 144000000,
+       71994000,  71994000,    12000,  71994000,  71994000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+      144012000, 144012000, 72030000, 144012000, 144012000,
+       72000000,  72000000, 36006000,  72000000,  72000000,
+         -24787,    -24787,   -24787,    -24787,    -24787,
+          16962,     16962,    16962,     16962,     16962,
+         -24787,    -24787,   -24787,    -24787,    -24787,
+           1001,      1001,     1001,      1001,      1001,
+           3002,      3002,     3002,      3002,      3002,
+           1001,      1001,     1001,      1001,      1001,
+            501,       501,      501,       501,       501,
+           1502,      1502,     1502,      1502,      1502
+    };
+
+    int[] res = new int[ntests];
+    for (int i = 0; i < ntests; i++) {
+      res[i] = 0;
+    }
+
+
+    for (int i = 0; i < 12000; i++) {
+      res[0] += simple((short)i);
+      res[1] += simpleb((short)i);
+      res[2] += simplec();
+      res[3] += simplef((short)i);
+      res[4] += simplep((short)i);
+
+      res[5] += simple2((short)i);
+      res[6] += simpleb2((short)i);
+      res[7] += simplec2((short)i);
+      res[8] += simplem2((short)i);
+      res[9] += simplep2((short)i, (short)i);
+
+      res[10] += simple_deop((short)i);
+      res[11] += simpleb_deop((short)i);
+      res[12] += simplec_deop((short)i);
+      res[13] += simplef_deop((short)i);
+      res[14] += simplep_deop((short)i);
+
+      res[15] += test((short)i);
+      res[16] += testb((short)i);
+      res[17] += testc((short)i);
+      res[18] += testm((short)i);
+      res[19] += testp((short)i, (short)i);
+
+      res[20] += test2((short)i);
+      res[21] += testb2((short)i);
+      res[22] += testc2((short)i);
+      res[23] += testm2((short)i);
+      res[24] += testp2((short)i, (short)i);
+
+      res[25] += test_deop((short)i);
+      res[26] += testb_deop((short)i);
+      res[27] += testc_deop((short)i);
+      res[28] += testf_deop((short)i);
+      res[29] += testp_deop((short)i, (short)i);
+    }
+
+    short[] ia = new short[1000];
+    for (int i = 0; i < 1000; i++) {
+      ia[i] = (short)i;
+    }
+
+    for (int i = 0; i < 100; i++) {
+      res[30] = sum(ia);
+      res[31] = sumb(ia);
+      res[32] = sumc(ia);
+      res[33] = sumf(ia);
+      res[34] = sump(ia, (short)1);
+
+      res[35] = sum2(ia);
+      res[36] = sumb2(ia);
+      res[37] = sumc2(ia);
+      res[38] = summ2(ia);
+      res[39] = sump2(ia, (short)1);
+
+      res[40] = sum_deop(ia);
+      res[41] = sumb_deop(ia);
+      res[42] = sumc_deop(ia);
+      res[43] = sumf_deop(ia);
+      res[44] = sump_deop(ia, (short)1);
+
+      res[45] = remi_sum();
+      res[46] = remi_sumb();
+      res[47] = remi_sumc();
+      res[48] = remi_sumf();
+      res[49] = remi_sump((short)1);
+
+      res[50] = remi_sum2();
+      res[51] = remi_sumb2();
+      res[52] = remi_sumc2();
+      res[53] = remi_summ2();
+      res[54] = remi_sump2((short)1);
+
+      res[55] = remi_sum_deop();
+      res[56] = remi_sumb_deop();
+      res[57] = remi_sumc_deop();
+      res[58] = remi_sumf_deop();
+      res[59] = remi_sump_deop((short)1);
+
+      res[60] = remi_sum_cond();
+      res[61] = remi_sumb_cond();
+      res[62] = remi_sumc_cond();
+      res[63] = remi_sumf_cond();
+      res[64] = remi_sump_cond((short)1);
+
+      res[65] = remi_sum2_cond();
+      res[66] = remi_sumb2_cond();
+      res[67] = remi_sumc2_cond();
+      res[68] = remi_summ2_cond();
+      res[69] = remi_sump2_cond((short)1);
+    }
+
+    int failed = 0;
+    for (int i = 0; i < ntests; i++) {
+      if (res[i] != val[i]) {
+        System.err.println(test_name[i] + ": " + res[i] + " != " + val[i]);
+        failed++;
+      }
+    }
+    if (failed > 0) {
+      System.err.println("Failed " + failed + " tests.");
+      throw new InternalError();
+    } else {
+      System.out.println("Passed.");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/codegen/8144028/BitTests.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2015, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8144028
+ * @summary Use AArch64 bit-test instructions in C2
+ * @modules java.base
+ * @run main/othervm -Xbatch -XX:CompileCommand=dontinline,BitTests::* -XX:-TieredCompilation BitTests
+ * @run main/othervm -Xbatch -XX:+TieredCompilation -XX:TieredStopAtLevel=1 BitTests
+ * @run main/othervm -Xbatch -XX:+TieredCompilation BitTests
+ *
+ */
+
+// Try to ensure that the bit test instructions TBZ/TBNZ, TST/TSTW
+// don't generate incorrect code.  We can't guarantee that C2 will use
+// bit test instructions for this test and it's not a bug if it
+// doesn't.  However, these test cases are ideal candidates for each
+// of the instruction forms.
+public class BitTests {
+
+    private final XorShift r = new XorShift();
+
+    private final long increment(long ctr) {
+        return ctr + 1;
+    }
+
+    private final int increment(int ctr) {
+        return ctr + 1;
+    }
+
+    private final long testIntSignedBranch(long counter) {
+        if ((int)r.nextLong() < 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongSignedBranch(long counter) {
+        if (r.nextLong() < 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testIntBitBranch(long counter) {
+        if (((int)r.nextLong() & (1 << 27)) != 0) {
+            counter = increment(counter);
+        }
+        if (((int)r.nextLong() & (1 << 27)) != 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongBitBranch(long counter) {
+        if ((r.nextLong() & (1l << 50)) != 0) {
+            counter = increment(counter);
+        }
+        if ((r.nextLong() & (1l << 50)) != 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongMaskBranch(long counter) {
+        if (((r.nextLong() & 0x0800000000l) != 0)) {
+            counter++;
+        }
+       return counter;
+    }
+
+    private final long testIntMaskBranch(long counter) {
+        if ((((int)r.nextLong() & 0x08) != 0)) {
+            counter++;
+        }
+        return counter;
+    }
+
+    private final long testLongMaskBranch(long counter, long mask) {
+        if (((r.nextLong() & mask) != 0)) {
+            counter++;
+        }
+       return counter;
+    }
+
+    private final long testIntMaskBranch(long counter, int mask) {
+        if ((((int)r.nextLong() & mask) != 0)) {
+            counter++;
+        }
+        return counter;
+    }
+
+    private final long step(long counter) {
+        counter = testIntSignedBranch(counter);
+        counter = testLongSignedBranch(counter);
+        counter = testIntBitBranch(counter);
+        counter = testLongBitBranch(counter);
+        counter = testIntMaskBranch(counter);
+        counter = testLongMaskBranch(counter);
+        counter = testIntMaskBranch(counter, 0x8000);
+        counter = testLongMaskBranch(counter, 0x800000000l);
+        return counter;
+    }
+
+
+    private final long finalBits = 3;
+
+    private long bits = 7;
+
+    public static void main(String[] args) {
+        BitTests t = new BitTests();
+
+        long counter = 0;
+        for (int i = 0; i < 10000000; i++) {
+            counter = t.step((int) counter);
+        }
+        if (counter != 50001495) {
+            System.err.println("FAILED: counter = " + counter + ", should be 50001495.");
+            System.exit(97);
+        }
+        System.out.println("PASSED");
+    }
+
+}
+
+// Marsaglia's xor-shift generator, used here because it is
+// reproducible across all Java implementations.  It is also very
+// fast.
+class XorShift {
+
+    private long y;
+
+    XorShift() {
+        y = 2463534242l;
+    }
+
+    public long nextLong() {
+        y ^= (y << 13);
+        y ^= (y >>> 17);
+        return (y ^= (y << 5));
+
+    }
+}
--- a/test/compiler/codegen/IntRotateWithImmediate.java	Mon Apr 13 06:13:18 2020 +0100
+++ b/test/compiler/codegen/IntRotateWithImmediate.java	Mon Apr 13 16:44:26 2020 +0100
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015 SAP AG.  All Rights Reserved.
+ * Copyright (c) 2016, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +25,7 @@
 /*
  * @test
  * @bug 8080190
+ * @bug 8154537
  * @key regression
  * @summary Test that the rotate distance used in the rotate instruction is properly masked with 0x1f
  * @run main/othervm -Xbatch -XX:-UseOnStackReplacement IntRotateWithImmediate
@@ -33,7 +35,7 @@
 public class IntRotateWithImmediate {
 
   // This is currently the same as Integer.rotateRight()
-  static int rotateRight(int i, int distance) {
+  static int rotateRight1(int i, int distance) {
     // On some architectures (i.e. x86_64 and ppc64) the following computation is
     // matched in the .ad file into a single MachNode which emmits a single rotate
     // machine instruction. It is important that the shift amount is masked to match
@@ -43,17 +45,29 @@
     return ((i >>> distance) | (i << -distance));
   }
 
-  static int compute(int x) {
-    return rotateRight(x, 3);
+  static int rotateRight2(int i, int distance) {
+      return ((i >>> distance) | (i << (32-distance)));
+  }
+
+  static int compute1(int x) {
+    return rotateRight1(x, 3);
+  }
+
+  static int compute2(int x) {
+    return rotateRight2(x, 3);
   }
 
   public static void main(String args[]) {
     int val = 4096;
 
-    int firstResult = compute(val);
+    int firstResult = compute1(val);
 
     for (int i = 0; i < 100000; i++) {
-      int newResult = compute(val);
+      int newResult = compute1(val);
+      if (firstResult != newResult) {
+        throw new InternalError(firstResult + " != " + newResult);
+      }
+      newResult = compute2(val);
       if (firstResult != newResult) {
         throw new InternalError(firstResult + " != " + newResult);
       }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8165673/TestFloatJNIArgs.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class TestFloatJNIArgs {
+    static {
+        try {
+            System.loadLibrary("TestFloatJNIArgs");
+        } catch (UnsatisfiedLinkError e) {
+            System.out.println("could not load native lib: " + e);
+        }
+    }
+
+    public static native float add15floats(
+        float f1, float f2, float f3, float f4,
+        float f5, float f6, float f7, float f8,
+        float f9, float f10, float f11, float f12,
+        float f13, float f14, float f15);
+
+    public static native float add10floats(
+        float f1, float f2, float f3, float f4,
+        float f5, float f6, float f7, float f8,
+        float f9, float f10);
+
+    public static native float addFloatsInts(
+        float f1, float f2, float f3, float f4,
+        float f5, float f6, float f7, float f8,
+        float f9, float f10, float f11, float f12,
+        float f13, float f14, float f15, int a16, int a17);
+
+    public static native double add15doubles(
+        double d1, double d2, double d3, double d4,
+        double d5, double d6, double d7, double d8,
+        double d9, double d10, double d11, double d12,
+        double d13, double d14, double d15);
+
+    static void test() throws Exception {
+        float sum = TestFloatJNIArgs.add15floats(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
+        if (sum != 15.0f) {
+            throw new Error("Passed 15 times 1.0f to jni function which didn't add them properly: " + sum);
+        }
+
+        float sum1 = TestFloatJNIArgs.add10floats(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
+        if (sum1 != 10.0f) {
+            throw new Error("Passed 10 times 1.0f to jni function which didn't add them properly: " + sum1);
+        }
+
+        float sum2 = TestFloatJNIArgs.addFloatsInts(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                                   1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1, 1);
+        if (sum2 != 17.0f) {
+            throw new Error("Passed 17 times 1 to jni function which didn't add them properly: " + sum2);
+        }
+
+        double dsum = TestFloatJNIArgs.add15doubles(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+        if (dsum != 15.0) {
+            throw new Error("Passed 15 times 1.0 to jni function which didn't add them properly: " + dsum);
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        for (int i = 0; i < 200; ++i) {
+            test();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8165673/TestFloatJNIArgs.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,105 @@
+#!/bin/sh
+
+#
+#  Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+#  Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+#  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+#  This code is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License version 2 only, as
+#  published by the Free Software Foundation.
+#
+#  This code is distributed in the hope that it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#  version 2 for more details (a copy is included in the LICENSE file that
+#  accompanied this code).
+#
+#  You should have received a copy of the GNU General Public License version
+#  2 along with this work; if not, write to the Free Software Foundation,
+#  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+#  or visit www.oracle.com if you need additional information or have any
+#  questions.
+#
+
+##
+## @test
+## @bug 8165673
+## @summary regression test for passing float args to a jni function.
+## @run shell/timeout=30 TestFloatJNIArgs.sh
+##
+
+if [ "${TESTSRC}" = "" ]
+then
+  TESTSRC=${PWD}
+  echo "TESTSRC not set.  Using "${TESTSRC}" as default"
+fi
+echo "TESTSRC=${TESTSRC}"
+## Adding common setup Variables for running shell tests.
+. ${TESTSRC}/../../../test_env.sh
+
+# set platform-dependent variables
+if [ $VM_OS == "linux" -a $VM_CPU == "aarch64" ]; then
+    echo "Testing on linux-aarch64"
+    gcc_cmd=`which gcc`
+    if [ "x$gcc_cmd" == "x" ]; then
+        echo "WARNING: gcc not found. Cannot execute test." 2>&1
+        exit 0;
+    fi
+else
+    echo "Test passed; only valid for linux-aarch64"
+    exit 0;
+fi
+
+THIS_DIR=.
+
+cp ${TESTSRC}${FS}*.java ${THIS_DIR}
+${TESTJAVA}${FS}bin${FS}javac *.java
+
+$gcc_cmd -O1 -DLINUX -fPIC -shared \
+    -o ${THIS_DIR}${FS}libTestFloatJNIArgs.so \
+    -I${TESTJAVA}${FS}include \
+    -I${TESTJAVA}${FS}include${FS}linux \
+    ${TESTSRC}${FS}libTestFloatJNIArgs.c
+
+# run the java test in the background
+cmd="${TESTJAVA}${FS}bin${FS}java -Xint \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+cmd="${TESTJAVA}${FS}bin${FS}java -XX:+TieredCompilation -Xcomp \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+cmd="${TESTJAVA}${FS}bin${FS}java -XX:-TieredCompilation -Xcomp \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+echo "Test Passed"
+exit 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8165673/libTestFloatJNIArgs.c	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015, 2016. All rights reserved.
+ * Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_add15floats
+  (JNIEnv *env, jclass cls,
+   jfloat  f1, jfloat  f2, jfloat  f3, jfloat  f4,
+   jfloat  f5, jfloat  f6, jfloat  f7, jfloat  f8,
+   jfloat  f9, jfloat f10, jfloat f11, jfloat f12,
+   jfloat f13, jfloat f14, jfloat f15) {
+  return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15;
+}
+
+JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_add10floats
+  (JNIEnv *env, jclass cls,
+   jfloat  f1, jfloat  f2, jfloat  f3, jfloat  f4,
+   jfloat  f5, jfloat  f6, jfloat  f7, jfloat  f8,
+   jfloat  f9, jfloat f10) {
+  return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10;
+}
+
+JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_addFloatsInts
+  (JNIEnv *env, jclass cls,
+   jfloat  f1, jfloat  f2, jfloat  f3, jfloat  f4,
+   jfloat  f5, jfloat  f6, jfloat  f7, jfloat  f8,
+   jfloat  f9, jfloat f10, jfloat f11, jfloat f12,
+   jfloat f13, jfloat f14, jfloat f15, jint a16, jint a17) {
+  return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15 + a16 + a17;
+}
+
+JNIEXPORT jdouble JNICALL Java_TestFloatJNIArgs_add15doubles
+  (JNIEnv *env, jclass cls,
+   jdouble  f1, jdouble  f2, jdouble  f3, jdouble  f4,
+   jdouble  f5, jdouble  f6, jdouble  f7, jdouble  f8,
+   jdouble  f9, jdouble f10, jdouble f11, jdouble f12,
+   jdouble f13, jdouble f14, jdouble f15) {
+  return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8207838/TestFloatSyncJNIArgs.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2015, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class TestFloatSyncJNIArgs {
+    static {
+        try {
+            System.loadLibrary("TestFloatSyncJNIArgs");
+        } catch (UnsatisfiedLinkError e) {
+            System.out.println("could not load native lib: " + e);
+        }
+    }
+
+    private static final int numberOfThreads = 8;
+
+    static volatile Error testFailed = null;
+
+    public synchronized static native float combine15floats(
+        float f1, float f2, float f3, float f4,
+        float f5, float f6, float f7, float f8,
+        float f9, float f10, float f11, float f12,
+        float f13, float f14, float f15);
+
+    public synchronized static native double combine15doubles(
+        double d1, double d2, double d3, double d4,
+        double d5, double d6, double d7, double d8,
+        double d9, double d10, double d11, double d12,
+        double d13, double d14, double d15);
+
+    static void test() throws Exception {
+        Thread[] threads = new Thread[numberOfThreads];
+
+        for (int i = 0; i < numberOfThreads; i++) {
+            threads[i] = new Thread(() -> {
+                for (int j = 0; j < 10000; j++) {
+                    float f = combine15floats(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                              9, 10, 11, 12, 13, 14, 15);
+                    if (f != 81720.0f) {
+                        testFailed = new Error("jni function didn't combine 15 float args properly: " + f);
+                        throw testFailed;
+                    }
+                }
+            });
+        }
+        for (int i = 0; i < numberOfThreads; i++) {
+            threads[i].start();
+        }
+        for (int i = 0; i < numberOfThreads; i++) {
+            threads[i].join();
+        }
+        if (testFailed != null) {
+            throw testFailed;
+        }
+
+        for (int i = 0; i < numberOfThreads; i++) {
+            threads[i] = new Thread(() -> {
+                for (int j = 0; j < 10000; j++) {
+                    double d = combine15doubles(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+                                                9, 10, 11, 12, 13, 14, 15);
+                    if (d != 81720.0) {
+                        testFailed = new Error("jni function didn't combine 15 double args properly: " + d);
+                        throw testFailed;
+                    }
+                }
+            });
+        }
+        for (int i = 0; i < numberOfThreads; i++) {
+           threads[i].start();
+        }
+        for (int i = 0; i < numberOfThreads; i++) {
+            threads[i].join();
+        }
+        if (testFailed != null) {
+            throw testFailed;
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        for (int i = 0; i < 200; ++i) {
+            test();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8207838/TestFloatSyncJNIArgs.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,105 @@
+#!/bin/sh
+
+#
+#  Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+#  Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+#  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+#  This code is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License version 2 only, as
+#  published by the Free Software Foundation.
+#
+#  This code is distributed in the hope that it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#  version 2 for more details (a copy is included in the LICENSE file that
+#  accompanied this code).
+#
+#  You should have received a copy of the GNU General Public License version
+#  2 along with this work; if not, write to the Free Software Foundation,
+#  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+#  or visit www.oracle.com if you need additional information or have any
+#  questions.
+#
+
+##
+## @test
+## @bug 8207838
+## @summary regression test for passing float args to a synchronized jni function.
+## @run shell/timeout=300 TestFloatSyncJNIArgs.sh
+##
+
+if [ "${TESTSRC}" = "" ]
+then
+  TESTSRC=${PWD}
+  echo "TESTSRC not set.  Using "${TESTSRC}" as default"
+fi
+echo "TESTSRC=${TESTSRC}"
+## Adding common setup Variables for running shell tests.
+. ${TESTSRC}/../../../test_env.sh
+
+# set platform-dependent variables
+if [ $VM_OS == "linux" -a $VM_CPU == "aarch64" ]; then
+    echo "Testing on linux-aarch64"
+    gcc_cmd=`which gcc`
+    if [ "x$gcc_cmd" == "x" ]; then
+        echo "WARNING: gcc not found. Cannot execute test." 2>&1
+        exit 0;
+    fi
+else
+    echo "Test passed; only valid for linux-aarch64"
+    exit 0;
+fi
+
+THIS_DIR=.
+
+cp ${TESTSRC}${FS}*.java ${THIS_DIR}
+${TESTJAVA}${FS}bin${FS}javac *.java
+
+$gcc_cmd -O1 -DLINUX -fPIC -shared \
+    -o ${THIS_DIR}${FS}libTestFloatSyncJNIArgs.so \
+    -I${TESTJAVA}${FS}include \
+    -I${TESTJAVA}${FS}include${FS}linux \
+    ${TESTSRC}${FS}libTestFloatSyncJNIArgs.c
+
+# run the java test in the background
+cmd="${TESTJAVA}${FS}bin${FS}java -Xint \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+cmd="${TESTJAVA}${FS}bin${FS}java -XX:+TieredCompilation \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+cmd="${TESTJAVA}${FS}bin${FS}java -XX:-TieredCompilation \
+    -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs"
+
+echo "$cmd"
+eval $cmd
+
+if [ $? -ne 0 ]
+then
+    echo "Test Failed"
+    exit 1
+fi
+
+echo "Test Passed"
+exit 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/floatingpoint/8207838/libTestFloatSyncJNIArgs.c	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Fletcher checksum. This is a nonlinear function which detects both */
+/* missing or otherwise incorrect arguments and arguments in the wrong */
+/* order. */
+static jfloat fcombine(jfloat f[], int len) {
+  int i;
+  jfloat sum = 0, sum_of_sums = 0;
+  for (i = 0; i < len; i++) {
+    sum += f[i];
+    sum_of_sums += sum;
+  }
+  return sum + sum_of_sums * sum;
+}
+
+static jdouble combine(jdouble f[], int len) {
+  int i;
+  double sum = 0, sum_of_sums = 0;
+  for (i = 0; i < len; i++) {
+    sum += f[i];
+    sum_of_sums += sum;
+  }
+  return sum + sum_of_sums * sum;
+}
+
+JNIEXPORT jfloat JNICALL Java_TestFloatSyncJNIArgs_combine15floats
+  (JNIEnv *env, jclass cls,
+   jfloat  f1, jfloat  f2, jfloat  f3, jfloat  f4,
+   jfloat  f5, jfloat  f6, jfloat  f7, jfloat  f8,
+   jfloat  f9, jfloat f10, jfloat f11, jfloat f12,
+   jfloat f13, jfloat f14, jfloat f15) {
+
+  jfloat f[15];
+  f[0] = f1; f[1] = f2; f[2] = f3; f[3] = f4; f[4] = f5;
+  f[5] = f6; f[6] = f7; f[7] = f8; f[8] = f9; f[9] = f10;
+  f[10] = f11; f[11] = f12; f[12] = f13; f[13] = f14; f[14] = f15;
+
+  return fcombine(f, sizeof f / sizeof f[0]);
+}
+
+JNIEXPORT jdouble JNICALL Java_TestFloatSyncJNIArgs_combine15doubles
+  (JNIEnv *env, jclass cls,
+   jdouble  f1, jdouble  f2, jdouble  f3, jdouble  f4,
+   jdouble  f5, jdouble  f6, jdouble  f7, jdouble  f8,
+   jdouble  f9, jdouble f10, jdouble f11, jdouble f12,
+   jdouble f13, jdouble f14, jdouble f15) {
+
+  jdouble f[15];
+  f[0] = f1; f[1] = f2; f[2] = f3; f[3] = f4; f[4] = f5;
+  f[5] = f6; f[6] = f7; f[7] = f8; f[8] = f9; f[9] = f10;
+  f[10] = f11; f[11] = f12; f[12] = f13; f[13] = f14; f[14] = f15;
+
+  return combine(f, sizeof f / sizeof f[0]);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/ConstFPVectorization.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074869
+ * @summary C2 code generator can replace -0.0f with +0.0f on Linux
+ * @run main ConstFPVectorization 8
+ * @author volker.simonis@gmail.com
+ *
+ */
+
+public class ConstFPVectorization {
+
+    static float[] f = new float[16];
+    static double[] d = new double[16];
+
+    static void floatLoop(int count) {
+        for (int i = 0; i < count; i++) {
+            f[i] = -0.0f;
+        }
+    }
+
+    static void doubleLoop(int count) {
+        for (int i = 0; i < count; i++) {
+            d[i] = -0.0d;
+        }
+    }
+
+    public static void main(String args[]) {
+        for (int i = 0; i < 10_000; i++) {
+            floatLoop(Integer.parseInt(args[0]));
+            doubleLoop(Integer.parseInt(args[0]));
+        }
+        for (int i = 0; i < Integer.parseInt(args[0]); i++) {
+            if (Float.floatToRawIntBits(f[i]) != Float.floatToRawIntBits(-0.0f))
+                throw new Error("Float error at index " + i);
+            if (Double.doubleToRawLongBits(d[i]) != Double.doubleToRawLongBits(-0.0d))
+                throw new Error("Double error at index " + i);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/oracle/CheckCompileCommandOption.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import com.oracle.java.testlibrary.*;
+
+/*
+ * @test CheckCompileCommandOption
+ * @bug 8055286 8056964 8059847
+ * @summary "Checks parsing of -XX:+CompileCommand=option"
+ * @library /testlibrary
+ * @run main CheckCompileCommandOption
+ */
+
+public class CheckCompileCommandOption {
+
+    // Currently, two types of trailing options can be used with
+    // -XX:CompileCommand=option
+    //
+    // (1) CompileCommand=option,Klass::method,flag
+    // (2) CompileCommand=option,Klass::method,type,flag,value
+    //
+    // Type (1) is used to enable a boolean flag for a method.
+    //
+    // Type (2) is used to support flags with a value. Values can
+    // have the the following types: intx, uintx, bool, ccstr,
+    // ccstrlist, and double.
+
+    private static final String[][] TYPE_1_ARGUMENTS = {
+        {
+            "-XX:CompileCommand=option,com/oracle/Test.test,MyBoolOption1",
+            "-XX:CompileCommand=option,com/oracle/Test,test,MyBoolOption2",
+            "-XX:CompileCommand=option,com.oracle.Test::test,MyBoolOption3",
+            "-XX:CompileCommand=option,com/oracle/Test::test,MyBoolOption4",
+            "-version"
+        },
+        {
+            "-XX:CompileCommand=option,com/oracle/Test.test,MyBoolOption1,MyBoolOption2",
+            "-version"
+        },
+        {
+            "-XX:CompileCommand=option,com/oracle/Test,test,MyBoolOption1,MyBoolOption2",
+            "-version"
+        }
+    };
+
+    private static final String[][] TYPE_1_EXPECTED_OUTPUTS = {
+        {
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption1 = true",
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption2 = true",
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption3 = true",
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption4 = true"
+        },
+        {
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption1 = true",
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption2 = true",
+        },
+        {
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption1 = true",
+            "CompilerOracle: option com/oracle/Test.test bool MyBoolOption2 = true",
+        }
+    };
+
+    private static final String[][] TYPE_2_ARGUMENTS = {
+        {
+            "-XX:CompileCommand=option,Test::test,ccstrlist,MyListOption,_foo,_bar",
+            "-XX:CompileCommand=option,Test::test,ccstr,MyStrOption,_foo",
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,false",
+            "-XX:CompileCommand=option,Test::test,intx,MyIntxOption,-1",
+            "-XX:CompileCommand=option,Test::test,uintx,MyUintxOption,1",
+            "-XX:CompileCommand=option,Test::test,MyFlag",
+            "-XX:CompileCommand=option,Test::test,double,MyDoubleOption,1.123",
+            "-version"
+        },
+        {
+            "-XX:CompileCommand=option,Test.test,double,MyDoubleOption,1.123",
+            "-version"
+        },
+        {
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,false,intx,MyIntxOption,-1,uintx,MyUintxOption,1,MyFlag,double,MyDoubleOption,1.123",
+            "-version"
+        }
+    };
+
+    private static final String[][] TYPE_2_EXPECTED_OUTPUTS = {
+        {
+            "CompilerOracle: option Test.test const char* MyListOption = '_foo _bar'",
+            "CompilerOracle: option Test.test const char* MyStrOption = '_foo'",
+            "CompilerOracle: option Test.test bool MyBoolOption = false",
+            "CompilerOracle: option Test.test intx MyIntxOption = -1",
+            "CompilerOracle: option Test.test uintx MyUintxOption = 1",
+            "CompilerOracle: option Test.test bool MyFlag = true",
+            "CompilerOracle: option Test.test double MyDoubleOption = 1.123000"
+        },
+        {
+            "CompilerOracle: option Test.test double MyDoubleOption = 1.123000"
+        },
+        {
+            "CompilerOracle: option Test.test bool MyBoolOption = false",
+            "CompilerOracle: option Test.test intx MyIntxOption = -1",
+            "CompilerOracle: option Test.test uintx MyUintxOption = 1",
+            "CompilerOracle: option Test.test bool MyFlag = true",
+            "CompilerOracle: option Test.test double MyDoubleOption = 1.123000",
+        }
+    };
+
+    private static final String[][] TYPE_2_INVALID_ARGUMENTS = {
+        {
+            // bool flag name missing
+            "-XX:CompileCommand=option,Test::test,bool",
+            "-version"
+        },
+        {
+            // bool flag value missing
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption",
+            "-version"
+        },
+        {
+            // wrong value for bool flag
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,100",
+            "-version"
+        },
+        {
+            // intx flag name missing
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,false,intx",
+            "-version"
+        },
+        {
+            // intx flag value missing
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,false,intx,MyIntOption",
+            "-version"
+        },
+        {
+            // wrong value for intx flag
+            "-XX:CompileCommand=option,Test::test,bool,MyBoolOption,false,intx,MyIntOption,true",
+            "-version"
+        },
+        {
+            // wrong value for flag double flag
+            "-XX:CompileCommand=option,Test::test,double,MyDoubleOption,1",
+            "-version"
+        }
+    };
+
+    private static void verifyValidOption(String[] arguments, String[] expected_outputs) throws Exception {
+        ProcessBuilder pb;
+        OutputAnalyzer out;
+
+        pb = ProcessTools.createJavaProcessBuilder(arguments);
+        out = new OutputAnalyzer(pb.start());
+
+        for (String expected_output : expected_outputs) {
+            out.shouldContain(expected_output);
+        }
+
+        out.shouldNotContain("CompilerOracle: unrecognized line");
+        out.shouldHaveExitValue(0);
+    }
+
+    private static void verifyInvalidOption(String[] arguments) throws Exception {
+        ProcessBuilder pb;
+        OutputAnalyzer out;
+
+        pb = ProcessTools.createJavaProcessBuilder(arguments);
+        out = new OutputAnalyzer(pb.start());
+
+        out.shouldContain("CompilerOracle: unrecognized line");
+        out.shouldHaveExitValue(0);
+    }
+
+    public static void main(String[] args) throws Exception {
+
+        if (TYPE_1_ARGUMENTS.length != TYPE_1_EXPECTED_OUTPUTS.length) {
+            throw new RuntimeException("Test is set up incorrectly: length of arguments and expected outputs for type (1) options does not match.");
+        }
+
+        if (TYPE_2_ARGUMENTS.length != TYPE_2_EXPECTED_OUTPUTS.length) {
+            throw new RuntimeException("Test is set up incorrectly: length of arguments and expected outputs for type (2) options does not match.");
+        }
+
+        // Check if type (1) options are parsed correctly
+        for (int i = 0; i < TYPE_1_ARGUMENTS.length; i++) {
+            verifyValidOption(TYPE_1_ARGUMENTS[i], TYPE_1_EXPECTED_OUTPUTS[i]);
+        }
+
+        // Check if type (2) options are parsed correctly
+        for (int i = 0; i < TYPE_2_ARGUMENTS.length; i++) {
+            verifyValidOption(TYPE_2_ARGUMENTS[i], TYPE_2_EXPECTED_OUTPUTS[i]);
+        }
+
+        // Check if error is reported for invalid type (2) options
+        // (flags with type information specified)
+        for (String[] arguments: TYPE_2_INVALID_ARGUMENTS) {
+            verifyInvalidOption(arguments);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/stringopts/TestStringObjectInitialization.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+
+import java.util.Arrays;
+
+/*
+ * @test
+ * @bug 8159244
+ * @requires vm.gc == "Parallel" | vm.gc == "null"
+ * @summary Verifies that no partially initialized String object escapes from
+ *          C2's String concat optimization in a highly concurrent setting.
+ *          This test triggers the bug in about 1 out of 10 runs.
+ * @compile -XDstringConcat=inline TestStringObjectInitialization.java
+ * @run main/othervm/timeout=300 -XX:+IgnoreUnrecognizedVMOptions -XX:-UseCompressedOops -XX:-CompactStrings
+ *                               -XX:-UseG1GC -XX:+UseParallelGC TestStringObjectInitialization
+ */
+public class TestStringObjectInitialization {
+
+    String myString;
+
+    public static void main(String[] args) throws Exception {
+        TestStringObjectInitialization t = new TestStringObjectInitialization();
+        // Create some threads that concurrently update 'myString'
+        for (int i = 0; i < 100; ++i) {
+            (new Thread(new Runner(t))).start();
+        }
+        Thread last = new Thread(new Runner(t));
+        last.start();
+        last.join();
+    }
+
+    private void add(String message) {
+        // String escapes to other threads here
+        myString += message;
+    }
+
+    public void run(String s, String[] sArray) {
+        // Trigger C2's string concatenation optimization
+        add(s + Arrays.toString(sArray) + " const ");
+    }
+}
+
+class Runner implements Runnable {
+    private TestStringObjectInitialization test;
+
+    public Runner(TestStringObjectInitialization t) {
+        test = t;
+    }
+
+    public void run(){
+        String[] array = {"a", "b", "c"};
+        for (int i = 0; i < 10000; ++i) {
+            test.run("a", array);
+        }
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeUnalignedAccess.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8158260
+ * @summary Test unaligned Unsafe accesses
+ * @modules java.base/jdk.internal.misc
+ * @run main/othervm -Diters=20000 -XX:-UseOnStackReplacement -XX:-BackgroundCompilation JdkInternalMiscUnsafeUnalignedAccess
+ * @author volker.simonis@gmail.com
+ */
+
+import java.lang.reflect.Field;
+import java.nio.ByteOrder;
+import sun.misc.Unsafe;
+
+public class JdkInternalMiscUnsafeUnalignedAccess {
+    static final int ITERS = Integer.getInteger("iters", 20_000);
+    private static final boolean BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+    private static final Unsafe UNSAFE;
+    private static final int SIZE = 1024;
+    private static long memory;
+
+    static {
+        try {
+            Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+            unsafeField.setAccessible(true);
+            UNSAFE = (Unsafe) unsafeField.get(null);
+        }
+        catch (Exception e) {
+            throw new RuntimeException("Unable to get Unsafe instance.", e);
+        }
+    }
+
+    static int getInt_0() {
+        return UNSAFE.getInt(memory + 0);
+    }
+    static int getInt_1() {
+        return UNSAFE.getInt(memory + 1);
+    }
+    static int getInt_4() {
+        return UNSAFE.getInt(memory + 4);
+    }
+    static int getInt_17() {
+        return UNSAFE.getInt(memory + 17);
+    }
+
+    static long getIntAsLong_0() {
+        return UNSAFE.getInt(memory + 0);
+    }
+    static long getIntAsLong_1() {
+        return UNSAFE.getInt(memory + 1);
+    }
+    static long getIntAsLong_4() {
+        return UNSAFE.getInt(memory + 4);
+    }
+    static long getIntAsLong_17() {
+        return UNSAFE.getInt(memory + 17);
+    }
+
+    static long getLong_0() {
+        return UNSAFE.getLong(memory + 0);
+    }
+    static long getLong_1() {
+        return UNSAFE.getLong(memory + 1);
+    }
+    static long getLong_4() {
+        return UNSAFE.getLong(memory + 4);
+    }
+    static long getLong_8() {
+        return UNSAFE.getLong(memory + 8);
+    }
+    static long getLong_17() {
+        return UNSAFE.getLong(memory + 17);
+    }
+
+    static void putInt_0(int i) {
+        UNSAFE.putInt(memory + 0, i);
+    }
+    static void putInt_1(int i) {
+        UNSAFE.putInt(memory + 1, i);
+    }
+    static void putInt_4(int i) {
+        UNSAFE.putInt(memory + 4, i);
+    }
+    static void putInt_17(int i) {
+        UNSAFE.putInt(memory + 17, i);
+    }
+
+    static void putLong_0(long l) {
+        UNSAFE.putLong(memory + 0, l);
+    }
+    static void putLong_1(long l) {
+        UNSAFE.putLong(memory + 1, l);
+    }
+    static void putLong_4(long l) {
+        UNSAFE.putLong(memory + 4, l);
+    }
+    static void putLong_8(long l) {
+        UNSAFE.putLong(memory + 8, l);
+    }
+    static void putLong_17(long l) {
+        UNSAFE.putLong(memory + 17, l);
+    }
+
+    public static void main(String[] args) throws Exception {
+
+        if (!UNSAFE.unalignedAccess()) {
+            System.out.println("Platform is not supporting unaligned access - nothing to test.");
+            return;
+        }
+
+        memory = UNSAFE.allocateMemory(SIZE);
+
+        UNSAFE.putInt(memory +  0, 0x00112233);
+        UNSAFE.putInt(memory +  4, 0x44556677);
+        UNSAFE.putInt(memory +  8, 0x8899aabb);
+        UNSAFE.putInt(memory + 12, 0xccddeeff);
+        UNSAFE.putInt(memory + 16, 0x01234567);
+        UNSAFE.putInt(memory + 20, 0x89abcdef);
+        UNSAFE.putInt(memory + 24, 0x01234567);
+
+        // Unsafe.getInt()
+        int res;
+        for (int i = 0; i < ITERS; i++) {
+            res = getInt_0();
+            if (res != 0x00112233) {
+                throw new Exception(res + " != 0x00112233");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            res = getInt_1();
+            if (res != (BIG_ENDIAN ? 0x11223344 : 0x77001122)) {
+                throw new Exception(res + " != " + (BIG_ENDIAN ? 0x11223344 : 0x77001122));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            res = getInt_4();
+            if (res != 0x44556677) {
+                throw new Exception(res + " != 0x44556677");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            res = getInt_17();
+            if (res != (BIG_ENDIAN ? 0x23456789 : 0xef012345)) {
+                throw new Exception(res + " != " + (BIG_ENDIAN ? 0x23456789 : 0xef012345));
+            }
+        }
+
+        // (long)Unsafe.getInt()
+        long lres;
+        for (int i = 0; i < ITERS; i++) {
+            lres = getIntAsLong_0();
+            if (lres != (long)0x00112233) {
+                throw new Exception(lres + " != 0x00112233");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getIntAsLong_1();
+            if (lres != (BIG_ENDIAN ? (long)0x11223344 : (long)0x77001122)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? (long)0x11223344 : (long)0x77001122));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getIntAsLong_4();
+            if (lres != (long)0x44556677) {
+                throw new Exception(lres + " != 0x44556677");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getIntAsLong_17();
+            if (lres != (BIG_ENDIAN ? (long)0x23456789 : (long)0xef012345)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? (long)0x23456789 : (long)0xef012345));
+            }
+        }
+
+        // Unsafe.getLong()
+        for (int i = 0; i < ITERS; i++) {
+            lres = getLong_0();
+            if (lres != (BIG_ENDIAN ? 0x0011223344556677L : 0x4455667700112233L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x0011223344556677L : 0x4455667700112233L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getLong_1();
+            if (lres != (BIG_ENDIAN ? 0x1122334455667788L : 0xbb44556677001122L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x1122334455667788L : 0xbb44556677001122L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getLong_4();
+            if (lres != (BIG_ENDIAN ? 0x445566778899aabbL : 0x8899aabb44556677L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x445566778899aabbL : 0x8899aabb44556677L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getLong_8();
+            if (lres != (BIG_ENDIAN ? 0x8899aabbccddeeffL : 0xccddeeff8899aabbL)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x8899aabbccddeeffL : 0xccddeeff8899aabbL));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            lres = getLong_17();
+            if (lres != (BIG_ENDIAN ? 0x23456789abcdef01L : 0x6789abcdef012345L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x23456789abcdef01L : 0x6789abcdef012345L));
+            }
+        }
+
+        // Unsafe.putInt()
+        for (int i = 0; i < ITERS; i++) {
+            putInt_0(0x00112233);
+            res = getInt_0();
+            if (res != 0x00112233) {
+                throw new Exception(res + " != 0x00112233");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putInt_1(BIG_ENDIAN ? 0x11223344 : 0x77001122);
+            res = getInt_1();
+            if (res != (BIG_ENDIAN ? 0x11223344 : 0x77001122)) {
+                throw new Exception(res + " != " + (BIG_ENDIAN ? 0x11223344 : 0x77001122));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putInt_4(0x44556677);
+            res = getInt_4();
+            if (res != 0x44556677) {
+                throw new Exception(res + " != 0x44556677");
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putInt_17(BIG_ENDIAN ? 0x23456789 : 0xef012345);
+            res = getInt_17();
+            if (res != (BIG_ENDIAN ? 0x23456789 : 0xef012345)) {
+                throw new Exception(res + " != " + (BIG_ENDIAN ? 0x23456789 : 0xef012345));
+            }
+        }
+
+
+        // Unsafe.putLong()
+        for (int i = 0; i < ITERS; i++) {
+            putLong_0(BIG_ENDIAN ? 0x0011223344556677L : 0x4455667700112233L);
+            lres = getLong_0();
+            if (lres != (BIG_ENDIAN ? 0x0011223344556677L : 0x4455667700112233L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x0011223344556677L : 0x4455667700112233L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putLong_1(BIG_ENDIAN ? 0x1122334455667788L : 0xbb44556677001122L);
+            lres = getLong_1();
+            if (lres != (BIG_ENDIAN ? 0x1122334455667788L : 0xbb44556677001122L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x1122334455667788L : 0xbb44556677001122L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putLong_4(BIG_ENDIAN ? 0x445566778899aabbL : 0x8899aabb44556677L);
+            lres = getLong_4();
+            if (lres != (BIG_ENDIAN ? 0x445566778899aabbL : 0x8899aabb44556677L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x445566778899aabbL : 0x8899aabb44556677L));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putLong_8(BIG_ENDIAN ? 0x8899aabbccddeeffL : 0xccddeeff8899aabbL);
+            lres = getLong_8();
+            if (lres != (BIG_ENDIAN ? 0x8899aabbccddeeffL : 0xccddeeff8899aabbL)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x8899aabbccddeeffL : 0xccddeeff8899aabbL));
+            }
+        }
+
+        for (int i = 0; i < ITERS; i++) {
+            putLong_17(BIG_ENDIAN ? 0x23456789abcdef01L : 0x6789abcdef012345L);
+            lres = getLong_17();
+            if (lres != (BIG_ENDIAN ? 0x23456789abcdef01L : 0x6789abcdef012345L)) {
+                throw new Exception(lres + " != " + (BIG_ENDIAN ? 0x23456789abcdef01L : 0x6789abcdef012345L));
+            }
+        }
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7020373/GenOOMCrashClass.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2011, Red Hat Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * This code is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+import java.applet.Applet;
+import java.io.IOException;
+
+import com.sun.org.apache.bcel.internal.Constants;
+import com.sun.org.apache.bcel.internal.generic.AALOAD;
+import com.sun.org.apache.bcel.internal.generic.ACONST_NULL;
+import com.sun.org.apache.bcel.internal.generic.ALOAD;
+import com.sun.org.apache.bcel.internal.generic.ArrayType;
+import com.sun.org.apache.bcel.internal.generic.ClassGen;
+import com.sun.org.apache.bcel.internal.generic.ConstantPoolGen;
+import com.sun.org.apache.bcel.internal.generic.GOTO;
+import com.sun.org.apache.bcel.internal.generic.ICONST;
+import com.sun.org.apache.bcel.internal.generic.IFEQ;
+import com.sun.org.apache.bcel.internal.generic.ILOAD;
+import com.sun.org.apache.bcel.internal.generic.INVOKESTATIC;
+import com.sun.org.apache.bcel.internal.generic.ISTORE;
+import com.sun.org.apache.bcel.internal.generic.InstructionHandle;
+import com.sun.org.apache.bcel.internal.generic.InstructionList;
+import com.sun.org.apache.bcel.internal.generic.JSR;
+import com.sun.org.apache.bcel.internal.generic.MethodGen;
+import com.sun.org.apache.bcel.internal.generic.RETURN;
+import com.sun.org.apache.bcel.internal.generic.Type;
+
+
+public class GenOOMCrashClass {
+
+    public static  String genOOMCrashClass(int maxmeth, int nums/*String[] a*/) throws Exception {
+        String theClassFile = "OOMCrashClass"+nums+"_"+maxmeth;
+        ClassGen cg = new ClassGen(theClassFile, "java.applet.Applet",
+                "<generated>", Constants.ACC_PUBLIC | Constants.ACC_SUPER, null);
+        ConstantPoolGen cp = cg.getConstantPool(); // cg creates constant pool
+
+        //      int br0 = cp.addClass("marc.schoenefeld.2008");
+
+        int br2 = cp.addMethodref("java.lang.Integer", "parseInt",
+                "(Ljava/lang/String;)I");
+
+        Type[] argtype = new Type[] {
+            new ArrayType(Type.STRING, 1)
+        };
+
+        for (int j = 0; j < maxmeth; j++) {
+
+            InstructionList il = new InstructionList();
+
+            String methodName = maxmeth == 1 ? "main" : "main" + j;
+            MethodGen mg = new MethodGen(Constants.ACC_STATIC
+                    | Constants.ACC_PUBLIC,// access flags
+                    Type.VOID, // return type
+                    argtype, new String[] { "argv" }, // arg
+                    // names
+                    methodName, theClassFile, // method, class
+                    il, cp);
+
+            il.append(new ALOAD(0));
+            il.append(new ICONST(0));
+            il.append(new AALOAD()); // load something unpredictable, no folding
+                                     // please
+
+            il.append(new INVOKESTATIC(br2));
+            il.append(new ISTORE(1));
+
+            GOTO gototail = new GOTO(null);
+
+            il.append(gototail);
+
+            InstructionHandle ret = il.append(new RETURN());
+            InstructionHandle ih = null;
+            for (int i = 0; i < nums; i++) {
+                ih = il.append(new ILOAD(1));
+                IFEQ ifeq = new IFEQ(null);
+                il.append(ifeq);
+
+                JSR jsr = new JSR(null);
+                GOTO next = new GOTO(null);
+
+                InstructionHandle h_jsr = il.append(jsr);
+                InstructionHandle h_goto = il.append(next);
+                InstructionHandle h_ret = il.append(new RETURN());
+
+                InstructionHandle danach = il.append(new ACONST_NULL());
+                jsr.setTarget(h_ret);
+                next.setTarget(danach);
+
+                il.append(new GOTO(ih));
+                ifeq.setTarget(ret);
+                ret = ih;
+            }
+
+            gototail.setTarget(ih);
+
+            mg.setMaxStack(Integer.MAX_VALUE); // Needed stack size
+
+            mg.setMaxLocals();
+            cg.addMethod(mg.getMethod());
+        }
+        /* Add public <init> method, i.e. empty constructor */
+        cg.addEmptyConstructor(Constants.ACC_PUBLIC);
+
+        /* Get JavaClass object and dump it to file. */
+        try {
+            System.out.println("dumping:"+theClassFile);
+            cg.getJavaClass().dump(theClassFile + ".class");
+        } catch (java.io.IOException e) {
+            System.err.println(e);
+        }
+        return theClassFile;
+    }
+
+    public static void main(String[] a) throws Exception {
+        int maxmeth_default = 250;
+        int nums_default = 20;
+        int maxmeth;
+        int nums;
+        try {
+            maxmeth = Integer.parseInt(a[0]);
+        }
+        catch (Exception e) {
+            maxmeth = maxmeth_default;
+        }
+        try {
+            nums = Integer.parseInt(a[1]);
+        }
+        catch (Exception e) {
+            nums = nums_default;
+        }       
+        String classname = genOOMCrashClass(maxmeth,nums);
+        System.out.println("Generated");
+        // System.out.println(a[0]);
+        // System.out.println("Loading");
+
+        // long t = System.currentTimeMillis();
+        // Class g2 = Class.forName(classname);
+        // long u = System.currentTimeMillis();
+        // System.out.println(g2 + ":" + (u - t));
+    }
+
+}
--- a/test/runtime/7020373/Test7020373.sh	Mon Apr 13 06:13:18 2020 +0100
+++ b/test/runtime/7020373/Test7020373.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -20,6 +20,10 @@
 
 ${COMPILEJAVA}${FS}bin${FS}jar xvf ${TESTSRC}${FS}testcase.jar
 
+# second step: run the generator to create test class
+${TESTJAVA}${FS}bin${FS}java GenOOMCrashClass 1 4000
+
+# third step: run the reproducer
 ${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} OOMCrashClass4000_1 > test.out 2>&1
 
 cat test.out
Binary file test/runtime/7020373/testcase.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/InitialThreadOverflow/DoOverflow.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class DoOverflow {
+
+    static int count;
+
+    public void overflow() {
+        count+=1;
+        overflow();
+    }
+
+    public static void printIt() {
+        System.out.println("Going to overflow stack");
+        try {
+            new DoOverflow().overflow();
+        } catch(java.lang.StackOverflowError e) {
+            System.out.println("Overflow OK " + count);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/InitialThreadOverflow/invoke.cxx	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <assert.h>
+#include <jni.h>
+
+#include <pthread.h>
+
+JavaVM* jvm;
+
+void *
+floobydust (void *p) {
+  JNIEnv *env;
+
+  jvm->AttachCurrentThread((void**)&env, NULL);
+
+  jclass class_id = env->FindClass ("DoOverflow");
+  assert (class_id);
+
+  jmethodID method_id = env->GetStaticMethodID(class_id, "printIt", "()V");
+  assert (method_id);
+
+  env->CallStaticVoidMethod(class_id, method_id, NULL);
+
+  jvm->DetachCurrentThread();
+}
+
+int
+main (int argc, const char** argv) {
+  JavaVMOption options[1];
+  options[0].optionString = (char*) "-Xss320k";
+
+  JavaVMInitArgs vm_args;
+  vm_args.version = JNI_VERSION_1_2;
+  vm_args.ignoreUnrecognized = JNI_TRUE;
+  vm_args.options = options;
+  vm_args.nOptions = 1;
+
+  JNIEnv* env;
+  jint result = JNI_CreateJavaVM(&jvm, (void**)&env, &vm_args);
+  assert(result >= 0);
+
+  pthread_t thr;
+  pthread_create(&thr, NULL, floobydust, NULL);
+  pthread_join(thr, NULL);
+
+  floobydust(NULL);
+
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/InitialThreadOverflow/testme.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+# Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+# @test testme.sh
+# @bug 8009062
+# @summary Poor performance of JNI AttachCurrentThread after fix for 7017193
+# @compile DoOverflow.java
+# @run shell testme.sh
+
+set -x
+if [ "${TESTSRC}" = "" ]
+then
+  TESTSRC=${PWD}
+  echo "TESTSRC not set.  Using "${TESTSRC}" as default"
+fi
+echo "TESTSRC=${TESTSRC}"
+## Adding common setup Variables for running shell tests.
+. ${TESTSRC}/../../test_env.sh
+
+if [ "${VM_OS}" != "linux" ]
+then
+  echo "Test only valid for Linux"
+  exit 0
+fi
+
+gcc_cmd=`which gcc`
+if [ "x$gcc_cmd" == "x" ]; then
+    echo "WARNING: gcc not found. Cannot execute test." 2>&1
+    exit 0;
+fi
+
+CFLAGS="-m${VM_BITS}"
+
+LD_LIBRARY_PATH=.:${COMPILEJAVA}/jre/lib/${VM_CPU}/${VM_TYPE}:/usr/lib:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH
+
+cp ${TESTSRC}${FS}invoke.cxx .
+
+# Copy the result of our @compile action:
+cp ${TESTCLASSES}${FS}DoOverflow.class .
+
+echo "Compilation flag: ${COMP_FLAG}"
+# Note pthread may not be found thus invoke creation will fail to be created.
+# Check to ensure you have a /usr/lib/libpthread.so if you don't please look
+# for /usr/lib/`uname -m`-linux-gnu version ensure to add that path to below compilation.
+
+$gcc_cmd -DLINUX ${CFLAGS} -o invoke \
+    -I${COMPILEJAVA}/include -I${COMPILEJAVA}/include/linux \
+    -L${COMPILEJAVA}/jre/lib/${VM_CPU}/${VM_TYPE} \
+    -ljvm -lpthread invoke.cxx
+
+./invoke
+exit $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/RedefineFinalizer/RedefineFinalizer.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6904403
+ * @summary Don't assert if we redefine finalize method
+ * @library /testlibrary
+ * @build RedefineClassHelper
+ * @run main RedefineClassHelper
+ * @run main/othervm -javaagent:redefineagent.jar RedefineFinalizer
+ */
+
+/*
+ * Regression test for hitting:
+ *
+ * assert(f == k->has_finalizer()) failed: inconsistent has_finalizer
+ *
+ * when redefining finalizer method
+ */
+public class RedefineFinalizer {
+
+    public static String newB =
+                "class RedefineFinalizer$B {" +
+                "   protected void finalize() { " +
+                "       System.out.println(\"Finalizer called\");" +
+                "   }" +
+                "}";
+
+    public static void main(String[] args) throws Exception {
+        RedefineClassHelper.redefineClass(B.class, newB);
+
+        A a = new A();
+    }
+
+    static class A extends B {
+    }
+
+    static class B {
+        protected void finalize() {
+            // should be empty
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/RedefineTests/RedefineRunningMethodsWithResolutionErrors.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8076110
+ * @summary Redefine running methods that have cached resolution errors
+ * @library /testlibrary
+ * @modules java.instrument
+ *          java.base/jdk.internal.org.objectweb.asm
+ * @build RedefineClassHelper
+ * @run main RedefineClassHelper
+ * @run main/othervm -javaagent:redefineagent.jar -XX:TraceRedefineClasses=0x600 RedefineRunningMethodsWithResolutionErrors
+ */
+
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.Label;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import jdk.internal.org.objectweb.asm.Opcodes;
+
+import java.lang.reflect.InvocationTargetException;
+
+public class RedefineRunningMethodsWithResolutionErrors extends ClassLoader implements Opcodes {
+
+    @Override
+    protected Class<?> findClass(String name) throws ClassNotFoundException {
+        if (name.equals("C")) {
+            byte[] b = loadC(false);
+            return defineClass(name, b, 0, b.length);
+        } else {
+            return super.findClass(name);
+        }
+    }
+
+    private static byte[] loadC(boolean redefine) {
+        ClassWriter cw = new ClassWriter(0);
+
+        cw.visit(52, ACC_SUPER | ACC_PUBLIC, "C", null, "java/lang/Object", null);
+        {
+            MethodVisitor mv;
+
+            mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "m", "()V", null, null);
+            mv.visitCode();
+
+            // First time we run we will:
+            // 1) Cache resolution errors
+            // 2) Redefine the class / method
+            // 3) Try to read the resolution errors that were cached
+            //
+            // The redefined method will never run, throw error to be sure
+            if (redefine) {
+                createThrowRuntimeExceptionCode(mv, "The redefined method was called");
+            } else {
+                createMethodBody(mv);
+            }
+            mv.visitMaxs(3, 0);
+            mv.visitEnd();
+        }
+        cw.visitEnd();
+        return cw.toByteArray();
+    }
+
+    private static void createMethodBody(MethodVisitor mv) {
+        Label classExists = new Label();
+
+        // Cache resolution errors
+        createLoadNonExistentClassCode(mv, classExists);
+
+        // Redefine our own class and method
+        mv.visitMethodInsn(INVOKESTATIC, "RedefineRunningMethodsWithResolutionErrors", "redefine", "()V");
+
+        // Provoke the same error again to make sure the resolution error cache works
+        createLoadNonExistentClassCode(mv, classExists);
+
+        // Test passed
+        mv.visitInsn(RETURN);
+
+        mv.visitFrame(F_SAME, 0, new Object[0], 0, new Object[0]);
+        mv.visitLabel(classExists);
+
+        createThrowRuntimeExceptionCode(mv, "Loaded class that shouldn't exist (\"NonExistentClass\")");
+    }
+
+    private static void createLoadNonExistentClassCode(MethodVisitor mv, Label classExists) {
+        Label tryLoadBegin = new Label();
+        Label tryLoadEnd = new Label();
+        Label catchLoadBlock = new Label();
+        mv.visitTryCatchBlock(tryLoadBegin, tryLoadEnd, catchLoadBlock, "java/lang/NoClassDefFoundError");
+
+        // Try to load a class that does not exist to provoke resolution errors
+        mv.visitLabel(tryLoadBegin);
+        mv.visitMethodInsn(INVOKESTATIC, "NonExistentClass", "nonExistentMethod", "()V");
+        mv.visitLabel(tryLoadEnd);
+
+        // No NoClassDefFoundError means NonExistentClass existed, which shouldn't happen
+        mv.visitJumpInsn(GOTO, classExists);
+
+        mv.visitFrame(F_SAME1, 0, new Object[0], 1, new Object[] { "java/lang/NoClassDefFoundError" });
+        mv.visitLabel(catchLoadBlock);
+
+        // Ignore the expected NoClassDefFoundError
+        mv.visitInsn(POP);
+    }
+
+    private static void createThrowRuntimeExceptionCode(MethodVisitor mv, String msg) {
+        mv.visitTypeInsn(NEW, "java/lang/RuntimeException");
+        mv.visitInsn(DUP);
+        mv.visitLdcInsn(msg);
+        mv.visitMethodInsn(INVOKESPECIAL, "java/lang/RuntimeException", "<init>", "(Ljava/lang/String;)V");
+        mv.visitInsn(ATHROW);
+    }
+
+    private static Class<?> c;
+
+    public static void redefine() throws Exception {
+        RedefineClassHelper.redefineClass(c, loadC(true));
+    }
+
+    public static void main(String[] args) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
+        c = Class.forName("C", true, new RedefineRunningMethodsWithResolutionErrors());
+        c.getMethod("m").invoke(null);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/SharedArchiveFile/SharedArchiveFile.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8014138
+ * @summary Testing new -XX:SharedArchiveFile=<file-name> option
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.*;
+
+public class SharedArchiveFile {
+  public static void main(String[] args) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+        "-XX:+UnlockDiagnosticVMOptions", "-XX:SharedArchiveFile=./sample.jsa", "-Xshare:dump");
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+    try {
+      output.shouldContain("Loading classes to share");
+      output.shouldHaveExitValue(0);
+
+      pb = ProcessTools.createJavaProcessBuilder(
+        "-XX:+UnlockDiagnosticVMOptions", "-XX:SharedArchiveFile=./sample.jsa", "-Xshare:on", "-version");
+      output = new OutputAnalyzer(pb.start());
+      output.shouldContain("sharing");
+      output.shouldHaveExitValue(0);
+
+    } catch (RuntimeException e) {
+      output.shouldContain("Unable to use shared archive");
+      output.shouldHaveExitValue(1);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/StackGap/T.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class T {
+
+  public static void test(int n) {
+    if (n == 0) return;
+    System.out.println (n);
+    test (n - 1);
+
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/StackGap/exestack-gap.c	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+JNIEnv* create_vm(JavaVM **jvm, char *extra_option)
+{
+    JNIEnv* env;
+    JavaVMInitArgs args;
+    JavaVMOption options[4];
+    args.version = JNI_VERSION_1_6;
+    args.nOptions = 3 + (extra_option != NULL);
+    options[0].optionString = "-Xss2048k";
+    char classpath[4096];
+    snprintf(classpath, sizeof classpath,
+             "-Djava.class.path=%s", getenv("CLASSPATH"));
+    options[1].optionString = classpath;
+    options[2].optionString = "-XX:+UnlockExperimentalVMOptions";
+    if (extra_option) {
+      options[3].optionString = extra_option;
+    }
+    args.options = &options[0];
+    args.ignoreUnrecognized = 0;
+    int rv;
+    rv = JNI_CreateJavaVM(jvm, (void**)&env, &args);
+    if (rv < 0) return NULL;
+    return env;
+}
+
+void run(char *extra_arg) {
+  JavaVM *jvm;
+  jclass T_class;
+  jmethodID test_method;
+  JNIEnv *env = create_vm(&jvm, extra_arg);
+  if (env == NULL)
+    exit(1);
+  T_class = (*env)->FindClass(env, "T");
+  if ((*env)->ExceptionCheck(env) == JNI_TRUE) {
+    (*env)->ExceptionDescribe(env);
+    exit(1);
+  }
+  test_method = (*env)->GetStaticMethodID(env, T_class, "test", "(I)V");
+  if ((*env)->ExceptionCheck(env) == JNI_TRUE) {
+    (*env)->ExceptionDescribe(env);
+    exit(1);
+  }
+  (*env)->CallStaticVoidMethod(env, T_class, test_method, 1000);
+}
+
+
+int main(int argc, char **argv)
+{
+  if (argc > 1) {
+    run(argv[1]);
+  } else {
+    run(NULL);
+  }
+
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/StackGap/testme.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,48 @@
+# Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#!/bin/sh
+
+#
+# @test testme.sh
+# @summary Linux kernel stack guard should not cause segfaults on x86-32
+# @compile T.java
+# @run shell testme.sh
+#
+
+if [ "${TESTSRC}" = "" ]
+then
+  TESTSRC=${PWD}
+  echo "TESTSRC not set.  Using "${TESTSRC}" as default"
+fi
+echo "TESTSRC=${TESTSRC}"
+## Adding common setup Variables for running shell tests.
+. ${TESTSRC}/../../test_env.sh
+
+if [ "${VM_OS}" != "linux" ]
+then
+  echo "Test only valid for Linux"
+  exit 0
+fi
+
+LD_LIBRARY_PATH=.:${TESTJAVA}/jre/lib/${VM_CPU}/${VM_TYPE}:/usr/lib:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH
+
+${TESTNATIVEPATH}/stack-gap || exit $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/os/AvailableProcessors.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+import java.io.File;
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import java.util.ArrayList;
+
+/*
+ * @test
+ * @bug 6515172
+ * @summary Check that availableProcessors reports the correct value when running in a cpuset on linux
+ * @requires os.family == "linux"
+ * @library /testlibrary
+ * @build com.oracle.java.testlibrary.*
+ * @run driver AvailableProcessors
+ */
+public class AvailableProcessors {
+
+    static final String SUCCESS_STRING = "Found expected processors: ";
+
+    public static void main(String[] args) throws Throwable {
+        if (args.length > 0)
+            checkProcessors(Integer.parseInt(args[0]));
+        else {
+            // run ourselves under different cpu configurations
+            // using the taskset command
+            String taskset;
+            final String taskset1 = "/bin/taskset";
+            final String taskset2 = "/usr/bin/taskset";
+            if (new File(taskset1).exists())
+                taskset = taskset1;
+            else if (new File(taskset2).exists())
+                taskset = taskset2;
+            else {
+                System.out.println("Skipping test: could not find taskset command");
+                return;
+            }
+
+            int available = Runtime.getRuntime().availableProcessors();
+
+            if (available == 1) {
+                System.out.println("Skipping test: only one processor available");
+                return;
+            }
+
+            // Get the java command we want to execute
+            // Enable logging for easier failure diagnosis
+            ProcessBuilder master =
+                    ProcessTools.createJavaProcessBuilder("-XX:+UnlockDiagnosticVMOptions",
+                                                          "-XX:+PrintActiveCpus",
+                                                          "AvailableProcessors");
+
+            int[] expected = new int[] { 1, available/2, available-1, available };
+
+            for (int i : expected) {
+                System.out.println("Testing for " + i + " processors ...");
+                int max = i - 1;
+                ArrayList<String> cmdline = new ArrayList<>(master.command());
+                // prepend taskset command
+                cmdline.add(0, "0-" + max);
+                cmdline.add(0, "-c");
+                cmdline.add(0, taskset);
+                // append expected processor count
+                cmdline.add(String.valueOf(i));
+                ProcessBuilder pb = new ProcessBuilder(cmdline);
+                System.out.println("Final command line: " +
+                                   ProcessTools.getCommandLine(pb));
+                OutputAnalyzer output = ProcessTools.executeProcess(pb);
+                output.shouldContain(SUCCESS_STRING);
+            }
+        }
+    }
+
+    static void checkProcessors(int expected) {
+        int available = Runtime.getRuntime().availableProcessors();
+        if (available != expected)
+            throw new Error("Expected " + expected + " processors, but found "
+                            + available);
+        else
+            System.out.println(SUCCESS_STRING + available);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMap.jasm	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,152 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 45 (astore_2, line 123) is incorrect. The stack maps for bytecode indexes 45
+ * and 49 (astore, line 133) do not match because 45 does not supply enough
+ * locals to satisfy 49.
+ *
+ * The astore_2 bytecode at bytecode index 45 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMap
+    version 51:0
+{
+
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    throws java/lang/Throwable
+    stack 0 locals 1
+{
+        return;
+}
+
+public static Method foo:"()V"
+    stack 3 locals 5
+{
+        iconst_0;
+        ifne    L5;
+        nop;
+        try t7;
+    L5:    stack_frame_type full;
+        aconst_null;
+        dup;
+        astore_0;
+        astore_1;
+        try t0;
+        aconst_null;
+        astore_0;
+        endtry t0;
+        goto    L19;
+        catch t0 java/io/IOException;
+        stack_frame_type full;
+        locals_map class java/lang/Object, null;
+        stack_map class java/io/IOException;
+        astore_2;
+        aconst_null;
+        dup;
+        astore_1;
+        astore_0;
+        try t1;
+    L19:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        aconst_null;
+        astore_2;
+        endtry t1;
+        aload_1;
+        ifnonnull    L37;
+        nop;
+        goto    L37;
+        catch t1 #0;
+        catch t2 #0;
+        try t2;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore_3;
+        endtry t2;
+        aload_1;
+        ifnonnull    L35;
+        nop;
+    L35:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, bogus, class java/lang/Throwable;
+        aload_3;
+        athrow;
+        try t3, t4;
+    L37:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        aload_1;
+        ifnonnull    L42;
+        nop;
+        endtry t3, t4;
+    L42:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        goto    L54;
+        catch t3 java/lang/Exception;
+        try t5;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Exception;
+        astore_2;   // astore_2, at bci 45, that changes the type state.
+        endtry t5;
+        goto    L54;
+        catch t4 #0;
+        catch t5 #0;
+        catch t6 #0;
+        try t6;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore    4;
+        endtry t6;
+        aload    4;
+        athrow;
+    L54:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        goto    L57;
+    L57:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        nop;
+        endtry t7;
+        return;
+        catch t7 #0;
+        stack_frame_type full;
+        stack_map class java/lang/Throwable;
+        nop;
+        athrow;
+}
+
+} // end Class BadMap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMapDstore.jasm	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,79 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 9 (dstore_2, line 60) is incorrect. The stack maps for bytecode indexes 9
+ * and 18 (astore_2, line 70) do not match because 9 does not supply enough
+ * locals to satisfy 18.
+ *
+ * The dstore_2 bytecode at bytecode index 9 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMapDstore
+    version 51:0
+{
+
+Field blah:I;
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    stack 4 locals 4
+{
+        new    class BadMapDstore;
+        dup;
+        invokespecial    Method "<init>":"()V";
+        astore_1;
+        dconst_1;
+        try t0;
+        dstore_2;
+        aload_1;
+        iconst_5;
+        putfield    Field blah:"I";
+        endtry t0;
+        goto    L22;
+        catch t0 java/lang/Throwable;
+        stack_frame_type full;
+        locals_map class "[Ljava/lang/String;", class BadMapDstore, double;
+        stack_map class java/lang/Throwable;
+        astore_2;
+        aload_1;
+        dconst_0;
+        dstore_2;
+        pop;
+    L22:    stack_frame_type same;
+        return;
+}
+
+} // end Class BadMapDstore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMapIstore.jasm	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,79 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 9 (istore_2, line 60) is incorrect. The stack maps for bytecode indexes 9
+ * and 18 (astore_2, line 70) do not match because 9 does not supply enough
+ * locals to satisfy 18.
+ *
+ * The istore_2 bytecode at bytecode index 9 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMapIstore
+    version 51:0
+{
+
+Field blah:I;
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    stack 2 locals 3
+{
+        new    class BadMapIstore;
+        dup;
+        invokespecial    Method "<init>":"()V";
+        astore_1;
+        iconst_2;
+        try t0;
+        istore_2;
+        aload_1;
+        iconst_5;
+        putfield    Field blah:"I";
+        endtry t0;
+        goto    L22;
+        catch t0 java/lang/Throwable;
+        stack_frame_type full;
+        locals_map class "[Ljava/lang/String;", class BadMapIstore, int;
+        stack_map class java/lang/Throwable;
+        astore_2;
+        aload_1;
+        iconst_4;
+        istore_2;
+        pop;
+    L22:    stack_frame_type same;
+        return;
+}
+
+} // end Class BadMapIstore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/StackMapCheck.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,63 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * @test
+ * @bug 7127066
+ * @summary Class verifier accepts an invalid class file
+ * @compile BadMap.jasm
+ * @compile BadMapDstore.jasm
+ * @compile BadMapIstore.jasm
+ * @run main/othervm -Xverify:all StackMapCheck
+ */
+
+public class StackMapCheck {
+    public static void main(String args[]) throws Throwable {
+
+        System.out.println("Regression test for bug 7127066");
+        try {
+            Class newClass = Class.forName("BadMap");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMap did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMap passed, VerifyError was thrown");
+        }
+
+        try {
+            Class newClass = Class.forName("BadMapDstore");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMapDstore did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMapDstore passed, VerifyError was thrown");
+        }
+
+        try {
+            Class newClass = Class.forName("BadMapIstore");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMapIstore did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMapIstore passed, VerifyError was thrown");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/serviceability/jvmti/TestRedefineWithUnresolvedClass.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Redefine a class with an UnresolvedClass reference in the constant pool.
+ * @bug 8035150
+ * @library /testlibrary
+ * @build UnresolvedClassAgent com.oracle.java.testlibrary.ProcessTools com.oracle.java.testlibrary.OutputAnalyzer
+ * @run main TestRedefineWithUnresolvedClass
+ */
+
+import java.io.File;
+import java.util.Arrays;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+public class TestRedefineWithUnresolvedClass {
+
+    final static String slash = File.separator;
+    final static String testClasses = System.getProperty("test.classes") + slash;
+
+    public static void main(String... args) throws Throwable {
+        // delete this class to cause a NoClassDefFoundError
+        File unresolved = new File(testClasses, "MyUnresolvedClass.class");
+        if (unresolved.exists() && !unresolved.delete()) {
+            throw new Exception("Could not delete: " + unresolved);
+        }
+
+        // build the javaagent
+        buildJar("UnresolvedClassAgent");
+
+        // launch a VM with the javaagent
+        launchTest();
+    }
+
+    private static void buildJar(String jarName) throws Throwable {
+        String testSrc = System.getProperty("test.src", "?") + slash;
+
+        String jarPath = String.format("%s%s.jar", testClasses, jarName);
+        String manifestPath = String.format("%s%s.mf", testSrc, jarName);
+        String className = String.format("%s.class", jarName);
+
+        String[] args = new String[] {"-cfm", jarPath, manifestPath, "-C", testClasses, className};
+
+        System.out.println("Running jar " + Arrays.toString(args));
+        sun.tools.jar.Main jarTool = new sun.tools.jar.Main(System.out, System.err, "jar");
+        if (!jarTool.run(args)) {
+            throw new Exception("jar failed: args=" + Arrays.toString(args));
+        }
+    }
+
+    private static void launchTest() throws Throwable {
+        String[] args = {
+            "-javaagent:" + testClasses + "UnresolvedClassAgent.jar",
+            "-Dtest.classes=" + testClasses,
+            "UnresolvedClassAgent" };
+        OutputAnalyzer output = ProcessTools.executeTestJvm(args);
+        output.shouldHaveExitValue(0);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/serviceability/jvmti/UnresolvedClassAgent.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.lang.instrument.ClassDefinition;
+import java.lang.instrument.Instrumentation;
+
+/*
+ * This class is present during compilation, but will be deleted before execution.
+ */
+class MyUnresolvedClass {
+    static void bar() {
+    }
+}
+
+class MyRedefinedClass {
+    static void foo() {
+        MyUnresolvedClass.bar();
+    }
+}
+
+public class UnresolvedClassAgent {
+    public static void main(String... args) {
+    }
+
+    public static void premain(String args, Instrumentation inst) throws Exception {
+        try {
+            MyRedefinedClass.foo();
+        } catch(NoClassDefFoundError err) {
+            System.out.println("NoClassDefFoundError (expected)");
+        }
+
+        File f = new File(System.getProperty("test.classes"), "MyRedefinedClass.class");
+        byte[] buf = new byte[(int)f.length()];
+        try (DataInputStream dis = new DataInputStream(new FileInputStream(f))) {
+            dis.readFully(buf);
+        }
+        ClassDefinition cd = new ClassDefinition(MyRedefinedClass.class, buf);
+        inst.redefineClasses(new ClassDefinition[] {cd});
+
+        try {
+            MyRedefinedClass.foo();
+        } catch(NoClassDefFoundError err) {
+            System.out.println("NoClassDefFoundError (expected again)");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/serviceability/jvmti/UnresolvedClassAgent.mf	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Premain-Class: UnresolvedClassAgent
+Can-Redefine-Classes: true
--- a/test/test_env.sh	Mon Apr 13 06:13:18 2020 +0100
+++ b/test/test_env.sh	Mon Apr 13 16:44:26 2020 +0100
@@ -186,6 +186,11 @@
 then
   VM_CPU="ia64"
 fi
+grep "aarch64" vm_version.out > ${NULL}
+if [ $? = 0 ]
+then
+  VM_CPU="aarch64"
+fi
 export VM_TYPE VM_BITS VM_OS VM_CPU
 echo "VM_TYPE=${VM_TYPE}"
 echo "VM_BITS=${VM_BITS}"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/RedefineClassHelper.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.io.PrintWriter;
+import java.lang.instrument.*;
+import com.oracle.java.testlibrary.*;
+
+/*
+ * Helper class to write tests that redefine classes.
+ * When main method is run, it will create a redefineagent.jar that can be used
+ * with the -javaagent option to support redefining classes in jtreg tests.
+ *
+ * See sample test in test/testlibrary_tests/RedefineClassTest.java
+ */
+public class RedefineClassHelper {
+
+    public static Instrumentation instrumentation;
+    public static void premain(String agentArgs, Instrumentation inst) {
+        instrumentation = inst;
+    }
+
+    /**
+     * Redefine a class
+     *
+     * @param clazz Class to redefine
+     * @param javacode String with the new java code for the class to be redefined
+     */
+    public static void redefineClass(Class clazz, String javacode) throws Exception {
+        byte[] bytecode = InMemoryJavaCompiler.compile(clazz.getName(), javacode);
+        redefineClass(clazz, bytecode);
+    }
+
+    /**
+     * Redefine a class
+     *
+     * @param clazz Class to redefine
+     * @param bytecode byte[] with the new class
+     */
+    public static void redefineClass(Class clazz, byte[] bytecode) throws Exception {
+        instrumentation.redefineClasses(new ClassDefinition(clazz, bytecode));
+    }
+
+    /**
+     * Main method to be invoked before test to create the redefineagent.jar
+     */
+    public static void main(String[] args) throws Exception {
+        ClassFileInstaller.main("RedefineClassHelper");
+
+        PrintWriter pw = new PrintWriter("MANIFEST.MF");
+        pw.println("Premain-Class: RedefineClassHelper");
+        pw.println("Can-Redefine-Classes: true");
+        pw.close();
+
+        sun.tools.jar.Main jarTool = new sun.tools.jar.Main(System.out, System.err, "jar");
+        if (!jarTool.run(new String[] { "-cmf", "MANIFEST.MF", "redefineagent.jar", "RedefineClassHelper.class" })) {
+            throw new Exception("jar operation failed");
+        }
+    }
+}
--- a/test/testlibrary/com/oracle/java/testlibrary/ProcessTools.java	Mon Apr 13 06:13:18 2020 +0100
+++ b/test/testlibrary/com/oracle/java/testlibrary/ProcessTools.java	Mon Apr 13 16:44:26 2020 +0100
@@ -151,11 +151,88 @@
 
     // Reporting
     StringBuilder cmdLine = new StringBuilder();
-    for (String cmd : args)
-        cmdLine.append(cmd).append(' ');
+    for (String cmd : args) {
+      cmdLine.append(cmd).append(' ');
+    }
     System.out.println("Command line: [" + cmdLine.toString() + "]");
 
     return new ProcessBuilder(args.toArray(new String[args.size()]));
   }
 
+  /**
+   * Executes a test jvm process, waits for it to finish and returns the process output.
+   * The default jvm options from jtreg, test.vm.opts and test.java.opts, are added.
+   * The java from the test.jdk is used to execute the command.
+   *
+   * The command line will be like:
+   * {test.jdk}/bin/java {test.vm.opts} {test.java.opts} cmds
+   *
+   * @param cmds User specifed arguments.
+   * @return The output from the process.
+   */
+  public static OutputAnalyzer executeTestJvm(String... cmds) throws Throwable {
+    ProcessBuilder pb = createJavaProcessBuilder(Utils.addTestJavaOpts(cmds));
+    return executeProcess(pb);
+  }
+
+  /**
+   * Executes a process, waits for it to finish and returns the process output.
+   * @param pb The ProcessBuilder to execute.
+   * @return The output from the process.
+   */
+  public static OutputAnalyzer executeProcess(ProcessBuilder pb) throws Throwable {
+    OutputAnalyzer output = null;
+    try {
+      output = new OutputAnalyzer(pb.start());
+      return output;
+    } catch (Throwable t) {
+      System.out.println("executeProcess() failed: " + t);
+      throw t;
+    } finally {
+      System.out.println(getProcessLog(pb, output));
+    }
+  }
+
+  /**
+   * Executes a process, waits for it to finish and returns the process output.
+   * @param cmds The command line to execute.
+   * @return The output from the process.
+   */
+  public static OutputAnalyzer executeProcess(String... cmds) throws Throwable {
+    return executeProcess(new ProcessBuilder(cmds));
+  }
+
+  /**
+   * Used to log command line, stdout, stderr and exit code from an executed process.
+   * @param pb The executed process.
+   * @param output The output from the process.
+   */
+  public static String getProcessLog(ProcessBuilder pb, OutputAnalyzer output) {
+    String stderr = output == null ? "null" : output.getStderr();
+    String stdout = output == null ? "null" : output.getStdout();
+    String exitValue = output == null ? "null": Integer.toString(output.getExitValue());
+    StringBuilder logMsg = new StringBuilder();
+    final String nl = System.getProperty("line.separator");
+    logMsg.append("--- ProcessLog ---" + nl);
+    logMsg.append("cmd: " + getCommandLine(pb) + nl);
+    logMsg.append("exitvalue: " + exitValue + nl);
+    logMsg.append("stderr: " + stderr + nl);
+    logMsg.append("stdout: " + stdout + nl);
+    return logMsg.toString();
+  }
+
+  /**
+   * @return The full command line for the ProcessBuilder.
+   */
+  public static String getCommandLine(ProcessBuilder pb) {
+    if (pb == null) {
+      return "null";
+    }
+    StringBuilder cmd = new StringBuilder();
+    for (String s : pb.command()) {
+      cmd.append(s).append(" ");
+    }
+    return cmd.toString().trim();
+  }
+
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/com/oracle/java/testlibrary/Utils.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.oracle.java.testlibrary;
+
+import static com.oracle.java.testlibrary.Asserts.assertTrue;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.ServerSocket;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+/**
+ * Common library for various test helper functions.
+ */
+public final class Utils {
+
+    /**
+     * Returns the sequence used by operating system to separate lines.
+     */
+    public static final String NEW_LINE = System.getProperty("line.separator");
+
+    /**
+     * Returns the value of 'test.vm.opts'system property.
+     */
+    public static final String VM_OPTIONS = System.getProperty("test.vm.opts", "").trim();
+
+    /**
+     * Returns the value of 'test.java.opts'system property.
+     */
+    public static final String JAVA_OPTIONS = System.getProperty("test.java.opts", "").trim();
+
+    /**
+    * Returns the value of 'test.timeout.factor' system property
+    * converted to {@code double}.
+    */
+    public static final double TIMEOUT_FACTOR;
+    static {
+        String toFactor = System.getProperty("test.timeout.factor", "1.0");
+        TIMEOUT_FACTOR = Double.parseDouble(toFactor);
+    }
+
+    private Utils() {
+        // Private constructor to prevent class instantiation
+    }
+
+    /**
+     * Returns the list of VM options.
+     *
+     * @return List of VM options
+     */
+    public static List<String> getVmOptions() {
+        return Arrays.asList(safeSplitString(VM_OPTIONS));
+    }
+
+    /**
+     * Returns the list of VM options with -J prefix.
+     *
+     * @return The list of VM options with -J prefix
+     */
+    public static List<String> getForwardVmOptions() {
+        String[] opts = safeSplitString(VM_OPTIONS);
+        for (int i = 0; i < opts.length; i++) {
+            opts[i] = "-J" + opts[i];
+        }
+        return Arrays.asList(opts);
+    }
+
+    /**
+     * Returns the default JTReg arguments for a jvm running a test.
+     * This is the combination of JTReg arguments test.vm.opts and test.java.opts.
+     * @return An array of options, or an empty array if no opptions.
+     */
+    public static String[] getTestJavaOpts() {
+        List<String> opts = new ArrayList<String>();
+        Collections.addAll(opts, safeSplitString(VM_OPTIONS));
+        Collections.addAll(opts, safeSplitString(JAVA_OPTIONS));
+        return opts.toArray(new String[0]);
+    }
+
+    /**
+     * Combines given arguments with default JTReg arguments for a jvm running a test.
+     * This is the combination of JTReg arguments test.vm.opts and test.java.opts
+     * @return The combination of JTReg test java options and user args.
+     */
+    public static String[] addTestJavaOpts(String... userArgs) {
+        List<String> opts = new ArrayList<String>();
+        Collections.addAll(opts, getTestJavaOpts());
+        Collections.addAll(opts, userArgs);
+        return opts.toArray(new String[0]);
+    }
+
+    /**
+     * Splits a string by white space.
+     * Works like String.split(), but returns an empty array
+     * if the string is null or empty.
+     */
+    private static String[] safeSplitString(String s) {
+        if (s == null || s.trim().isEmpty()) {
+            return new String[] {};
+        }
+        return s.trim().split("\\s+");
+    }
+
+    /**
+     * @return The full command line for the ProcessBuilder.
+     */
+    public static String getCommandLine(ProcessBuilder pb) {
+        StringBuilder cmd = new StringBuilder();
+        for (String s : pb.command()) {
+            cmd.append(s).append(" ");
+        }
+        return cmd.toString();
+    }
+
+    /**
+     * Returns the free port on the local host.
+     * The function will spin until a valid port number is found.
+     *
+     * @return The port number
+     * @throws InterruptedException if any thread has interrupted the current thread
+     * @throws IOException if an I/O error occurs when opening the socket
+     */
+    public static int getFreePort() throws InterruptedException, IOException {
+        int port = -1;
+
+        while (port <= 0) {
+            Thread.sleep(100);
+
+            ServerSocket serverSocket = null;
+            try {
+                serverSocket = new ServerSocket(0);
+                port = serverSocket.getLocalPort();
+            } finally {
+                serverSocket.close();
+            }
+        }
+
+        return port;
+    }
+
+    /**
+     * Returns the name of the local host.
+     *
+     * @return The host name
+     * @throws UnknownHostException if IP address of a host could not be determined
+     */
+    public static String getHostname() throws UnknownHostException {
+        InetAddress inetAddress = InetAddress.getLocalHost();
+        String hostName = inetAddress.getHostName();
+
+        assertTrue((hostName != null && !hostName.isEmpty()),
+                "Cannot get hostname");
+
+        return hostName;
+    }
+
+    /**
+     * Uses "jcmd -l" to search for a jvm pid. This function will wait
+     * forever (until jtreg timeout) for the pid to be found.
+     * @param key Regular expression to search for
+     * @return The found pid.
+     */
+    public static int waitForJvmPid(String key) throws Throwable {
+        final long iterationSleepMillis = 250;
+        System.out.println("waitForJvmPid: Waiting for key '" + key + "'");
+        System.out.flush();
+        while (true) {
+            int pid = tryFindJvmPid(key);
+            if (pid >= 0) {
+                return pid;
+            }
+            Thread.sleep(iterationSleepMillis);
+        }
+    }
+
+    /**
+     * Searches for a jvm pid in the output from "jcmd -l".
+     *
+     * Example output from jcmd is:
+     * 12498 sun.tools.jcmd.JCmd -l
+     * 12254 /tmp/jdk8/tl/jdk/JTwork/classes/com/sun/tools/attach/Application.jar
+     *
+     * @param key A regular expression to search for.
+     * @return The found pid, or -1 if Enot found.
+     * @throws Exception If multiple matching jvms are found.
+     */
+    public static int tryFindJvmPid(String key) throws Throwable {
+        OutputAnalyzer output = null;
+        try {
+            JDKToolLauncher jcmdLauncher = JDKToolLauncher.create("jcmd");
+            jcmdLauncher.addToolArg("-l");
+            output = ProcessTools.executeProcess(jcmdLauncher.getCommand());
+            output.shouldHaveExitValue(0);
+
+            // Search for a line starting with numbers (pid), follwed by the key.
+            Pattern pattern = Pattern.compile("([0-9]+)\\s.*(" + key + ").*\\r?\\n");
+            Matcher matcher = pattern.matcher(output.getStdout());
+
+            int pid = -1;
+            if (matcher.find()) {
+                pid = Integer.parseInt(matcher.group(1));
+                System.out.println("findJvmPid.pid: " + pid);
+                if (matcher.find()) {
+                    throw new Exception("Found multiple JVM pids for key: " + key);
+                }
+            }
+            return pid;
+        } catch (Throwable t) {
+            System.out.println(String.format("Utils.findJvmPid(%s) failed: %s", key, t));
+            throw t;
+        }
+    }
+
+    /**
+     * Returns file content as a list of strings
+     *
+     * @param file File to operate on
+     * @return List of strings
+     * @throws IOException
+     */
+    public static List<String> fileAsList(File file) throws IOException {
+        assertTrue(file.exists() && file.isFile(),
+                file.getAbsolutePath() + " does not exist or not a file");
+        List<String> output = new ArrayList<>();
+        try (BufferedReader reader = new BufferedReader(new FileReader(file.getAbsolutePath()))) {
+            while (reader.ready()) {
+                output.add(reader.readLine().replace(NEW_LINE, ""));
+            }
+        }
+        return output;
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/RedefineClassTest.java	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @library /testlibrary
+ * @summary Proof of concept test for RedefineClassHelper
+ * @build RedefineClassHelper
+ * @run main RedefineClassHelper
+ * @run main/othervm -javaagent:redefineagent.jar RedefineClassTest
+ */
+
+import static com.oracle.java.testlibrary.Asserts.*;
+import com.oracle.java.testlibrary.*;
+
+/*
+ * Proof of concept test for the test utility class RedefineClassHelper
+ */
+public class RedefineClassTest {
+
+    public static String newClass = "class RedefineClassTest$A { public int Method() { return 2; } }";
+    public static void main(String[] args) throws Exception {
+        A a = new A();
+        assertTrue(a.Method() == 1);
+        RedefineClassHelper.redefineClass(A.class, newClass);
+        assertTrue(a.Method() == 2);
+    }
+
+    static class A {
+        public int Method() {
+            return 1;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mkbc.c	Mon Apr 13 16:44:26 2020 +0100
@@ -0,0 +1,607 @@
+/*
+ * Copyright 2009 Edward Nevill
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define DEFAULT_PREFIX	"do_"
+
+static char *prefix = (char *)DEFAULT_PREFIX;
+
+#define ISALPHA(c) (isalpha(c) || (c) == '_')
+#define ISALNUM(c) (isalnum(c) || (c) == '_')
+
+FILE *source_f, *bci_f;
+
+typedef struct Bytecode {
+	char	*name;
+	int	len;
+} Bytecode;
+
+typedef struct StringList {
+	struct StringList *next;
+	char 		*line;
+} StringList;
+
+typedef struct OpcodeList {
+	struct OpcodeList *next;
+	long	 	opcode;
+} OpcodeList;
+
+typedef struct OpcodeSequence {
+	struct OpcodeSequence *next;
+	OpcodeList	*opcode_list;
+} OpcodeSequence;
+
+typedef struct BytecodeImpl {
+	struct BytecodeImpl *next;
+	OpcodeSequence	*opcode_seq;
+	StringList	*macro_impl;
+	StringList	*direct_impl;
+	int		len;
+	char		*name;
+	char		*do_name;
+} BytecodeImpl;
+
+Bytecode bytecodes[256];
+
+BytecodeImpl *the_impl = 0;
+BytecodeImpl **the_impl_ptr = &the_impl;
+
+#define BUFLEN 1024
+
+static int lineno = 1;
+
+void fatal(const char *s)
+{
+	fputs(s, stderr);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+void outmem(void)
+{
+	fprintf(stderr, "Out of memory\n");
+	exit(1);
+}
+
+void synerr(void)
+{
+	fprintf(stderr, "Syntax error at line %d\n", lineno);
+	exit(1);
+}
+
+int readchar()
+{
+	int c;
+
+	c = getc(source_f);
+	if (c == '\n') lineno++;
+	return c;
+}
+
+int readwhitespace(int c, char *buf, int len)
+{
+	int i = 0;
+
+	while ((isspace)(c)) {
+		if (buf && i < len-1) buf[i++] = c;
+		c = (readchar)();
+	}
+	if (buf && i < len) buf[i] = 0;
+	return c;
+}
+
+int skipwhitespace(int c)
+{
+	while ((isspace)(c)) {
+		c = (readchar)();
+	}
+	return c;
+}
+
+int readeol(int c, char *buf, int len)
+{
+	int i = 0;
+
+	while (c != '\n' && c != EOF) {
+		if (buf && i < len-1) buf[i++] = c;
+		c = (readchar)();
+	}
+	if (buf && i < len) buf[i] = 0;
+	if (c == '\n') c = (readchar)();
+	return c;
+}
+
+int skipeol(int c)
+{
+	while (c != '\n' && c != EOF) c = (readchar)();
+	if (c == '\n') c = (readchar)();
+	return c;
+}
+
+int readsymbol(int c, char *buf, int len)
+{
+	int i = 0;
+
+	while (ISALNUM(c)) {
+		if (buf && i < len-1) buf[i++] = c;
+		c = (readchar)();
+	}
+	if (buf && i < len) buf[i] = 0;
+	return c;
+}
+
+int bcdef(int c, char *buf, int len)
+{
+	BytecodeImpl *def;
+	OpcodeSequence *seq;
+	OpcodeSequence **seqp;
+	OpcodeList *opc;
+	OpcodeList **opcp;
+	StringList *macro, **macrop;
+	StringList *direct, **directp;
+	char *name;
+	char *line;
+	int i;
+	int length, overall_len;
+
+	def = (BytecodeImpl *)malloc(sizeof(BytecodeImpl));
+	if (!def) outmem();
+	def->next = 0;
+	def->opcode_seq = 0;
+	def->macro_impl = 0;
+	def->direct_impl = 0;
+	def->len = -1;
+	*the_impl_ptr = def;
+	the_impl_ptr = &(def->next);
+	seqp = &(def->opcode_seq);
+	overall_len = 0;
+	do {
+		seq = (OpcodeSequence *)malloc(sizeof(OpcodeSequence));
+		if (!seq) outmem();
+		seq->next = 0;
+		seq->opcode_list = 0;
+		*seqp = seq;
+		seqp = &(seq->next);
+		opcp = &(seq->opcode_list);
+		length = -2;
+		do {
+			c = (readchar)();
+			c = skipwhitespace(c);
+			if (!ISALPHA(c)) synerr();
+			c = readsymbol(c, buf, len);
+			c = skipwhitespace(c);
+			opc = (OpcodeList *)malloc(sizeof(OpcodeList));
+			if (!opc) outmem();
+			opc->next = 0;
+			opc->opcode = -1;
+			*opcp = opc;
+			opcp = &(opc->next);
+			name = strdup(buf);
+			if (!name) outmem();
+			for (i = 0; i < 256; i++) {
+				if (strcmp(name, bytecodes[i].name) == 0) {
+					opc->opcode = i;
+					break;
+				}
+			}
+			if (i == 256) {
+				fprintf(stderr, "No such opcode '%s'\n", name);
+				exit(1);
+			}
+			if (length == -2) length = bytecodes[i].len;
+		} while (c == ',');
+		overall_len += length;
+		if (c != ')') synerr();
+		c = (readchar)();
+		c = skipwhitespace(c);
+	} while (c == '(');
+//	strcpy(buf, "do_");
+	*buf = 0;
+	if (ISALPHA(c)) {
+		c = readsymbol(c, buf, len);
+		c = skipwhitespace(c);
+	} else {
+		seq = def->opcode_seq;
+//		strcat(buf, "bytecode");
+		while (seq) {
+			opc = seq->opcode_list;
+			if (*buf) strcat(buf, "_");
+			strcat(buf, bytecodes[opc->opcode].name);
+//			sprintf(buf+strlen(buf), "_%ld", opc->opcode);
+			seq = seq->next;
+		}
+	}
+	name = strdup(buf);
+	if (!name) outmem();
+	def->name = name;
+	def->do_name = name;
+	def->len = overall_len;
+	if (c != '{') synerr();
+	c = (readchar)();
+	while (c != '\n' && isspace(c)) c = (readchar)();
+	if (c != '\n') synerr();
+	c = (readchar)();
+	c = readwhitespace(c, buf, len);
+	macrop = &(def->macro_impl);
+	while (c != '}' && c != EOF) {
+		c = readeol(c, buf + strlen(buf), len - strlen(buf));
+		line = strdup(buf);
+		if (!line) outmem();
+		macro = (StringList *)malloc(sizeof(StringList));
+		if (!macro) outmem();
+		*macrop = macro;
+		macrop = &(macro->next);
+		macro->next = 0;
+		macro->line = line;
+		c = readwhitespace(c, buf, len);
+	}
+	if (c != '}') synerr();
+	c = (readchar)();
+	c = skipwhitespace(c);
+	if (ISALPHA(c)) {
+		c = readsymbol(c, buf, len);
+		c = skipwhitespace(c);
+		name = strdup(buf);
+		if (!name) outmem();
+		def->do_name = name;
+	}
+	if (c == '[') {
+		c = (readchar)();
+		while (c != '\n' && isspace(c)) c = (readchar)();
+		if (c != '\n') synerr();
+		c = (readchar)();
+		c = readwhitespace(c, buf, len);
+		directp = &(def->direct_impl);
+		while (c != ']' && c != EOF) {
+			c = readeol(c, buf + strlen(buf), len - strlen(buf));
+			line = strdup(buf);
+			if (!line) outmem();
+			direct = (StringList *)malloc(sizeof(StringList));
+			if (!direct) outmem();
+			*directp = direct;
+			directp = &(direct->next);
+			direct->next = 0;
+			direct->line = line;
+			c = readwhitespace(c, buf, len);
+		}
+		if (c != ']') synerr();
+		c = (readchar)();
+	}
+	return c;
+}
+
+void mkbc(void)
+{
+	char buf[BUFLEN];
+	char *endptr;
+	int c;
+	char *name;
+	long opcode, len;
+
+	c = (readchar)();
+	c = skipwhitespace(c);
+	while (c != EOF) {
+		if (c == '@' || c == '#') {
+			c = skipeol(c);
+		} else if (ISALPHA(c)) {
+			c = readsymbol(c, buf, BUFLEN);
+			c = skipwhitespace(c);
+			if (c == '=') {
+				name = strdup(buf);
+				if (!name) outmem();
+				c = (readchar)();
+				c = skipwhitespace(c);
+				if (!(isdigit)(c)) synerr();
+				c = readsymbol(c, buf, BUFLEN);
+				opcode = strtol(buf, &endptr, 0);
+				if (*endptr != 0) synerr();
+				c = skipwhitespace(c);
+				if (c != ',') synerr();
+				c = (readchar)();
+				c = skipwhitespace(c);
+				if (!(isdigit)(c)) synerr();
+				c = readsymbol(c, buf, BUFLEN);
+				len = strtol(buf, &endptr, 0);
+				if (*endptr != 0) synerr();
+				bytecodes[opcode].name = name;
+				bytecodes[opcode].len = len;
+			}
+		} else if (c == '(') {
+			c = bcdef(c, buf, BUFLEN);
+		} else synerr();
+		c = skipwhitespace(c);
+	}
+}
+
+typedef struct TableEntry {
+	BytecodeImpl *impl;
+	char *impl_name;
+	char *def_name;
+	struct TableEntry *subtable;
+} TableEntry;
+
+TableEntry *the_table;
+
+int is_duplicate(TableEntry *a, TableEntry *b)
+{
+	int i;
+	char buf[256];
+
+	for (i = 0; i < 256; i++) {
+		if (a[i].subtable || b[i].subtable) {
+			if (!(a[i].subtable) || !(b[i].subtable)) return 0;
+			if (!is_duplicate(a[i].subtable, b[i].subtable)) return 0;
+		} else if (a[i].impl_name && b[i].impl_name) {
+			if (strcmp(a[i].impl_name, b[i].impl_name) != 0)
+				return 0;
+		} else if (a[i].def_name && b[i].def_name) {
+			if (strcmp(a[i].def_name, b[i].def_name) != 0)
+				return 0;
+		} else return 0;
+	}
+	return 1;
+}
+
+void remove_duplicates(TableEntry *table, int start, int *table_indices, int depth)
+{
+	TableEntry *start_entry = table[start].subtable;
+	int i, j;
+
+	if (!start_entry) fatal("Subtable is NULL in remove_duplicates!!!");
+	for (i = start+1; i < 256; i++) {
+		if (table[i].subtable) {
+			if (is_duplicate(start_entry, table[i].subtable)) {
+				fputs("dispatch", bci_f);
+				for (j = 0; j < depth; j++) {
+					fputc('_', bci_f);
+					fputs(bytecodes[table_indices[j]].name, bci_f);
+				}
+				fputc('_', bci_f);
+				fputs(bytecodes[i].name, bci_f);
+				fputs(":\n", bci_f);
+				free(table[i].subtable);
+				table[i].subtable = 0;
+			}
+		}
+	}
+}
+
+void writeouttable(TableEntry *table, int *table_indices, int depth)
+{
+	int i, j;
+	int len;
+
+	for (i = 0; i < 256; i++) {
+		if (table[i].subtable) {
+			len = 0;
+			fputs("\t.word\tdispatch", bci_f);
+			table_indices[depth] = i;
+			for (j = 0; j <= depth; j++) {
+				fputc('_', bci_f);
+				fputs(bytecodes[table_indices[j]].name, bci_f);
+				len += bytecodes[table_indices[j]].len;
+			}
+			fprintf(bci_f, "+%d\n", len);
+		} else {
+			if (table[i].impl_name)
+				fprintf(bci_f, "\t.word\t%s%s \t@ %d 0x%02x\n", prefix, table[i].impl_name, i, i);
+			else
+				fprintf(bci_f, "\t.word\t%s%s \t@ %d 0x%02x\n", prefix, table[i].def_name, i, i);
+		}
+	}
+	if (depth == 0) {
+		fputs("\t.endm\n", bci_f);
+		fputs("\t.macro\tSUB_DISPATCH_TABLES\n", bci_f);
+	}
+	for (i = 0; i < 256; i++) {
+		if (table[i].subtable) {
+			fputs("dispatch", bci_f);
+			table_indices[depth] = i;
+			for (j = 0; j <= depth; j++) {
+				fputc('_', bci_f);
+				fputs(bytecodes[table_indices[j]].name, bci_f);
+			}
+			fprintf(bci_f, ":\t@ %d 0x%02x\n", i, i);
+			remove_duplicates(table, i, table_indices, depth);
+			writeouttable(table[i].subtable, table_indices, depth+1);
+		}
+	}
+}
+
+void do_tableentry(BytecodeImpl *impl, TableEntry **tablep, int *table_indices, int depth)
+{
+	TableEntry *table;
+	char *def = (char *)"undefined";
+	int i,j;
+
+	if (depth == 0) fatal("Depth = 0 for tableentry\n");
+	for (i = 0; i < depth; i++) {
+		table = *tablep;
+		if (!table) {
+			table = (TableEntry *)malloc(sizeof(TableEntry) * 256);
+			if (!table) outmem();
+			*tablep = table;
+			def = strdup(def);
+			if (!def) outmem();
+			for (j = 0; j < 256; j++) {
+				table[j].impl_name = 0;
+				table[j].def_name = def;
+				table[j].subtable = 0;
+			}
+		}
+		table = &table[table_indices[i]];
+		tablep = &(table->subtable);
+		if (table->impl_name) def = table->def_name;
+	}
+	if (!table->impl_name)
+		table->impl_name = impl->do_name;
+	table->def_name = impl->do_name;
+}
+
+void dumpseq(BytecodeImpl *impl, OpcodeSequence *seq, int *table_indices, int depth)
+{
+	OpcodeList *opc;
+
+	opc = seq->opcode_list;
+	while (opc) {
+		table_indices[depth++] = opc->opcode;
+		if (seq->next != NULL) {
+			dumpseq(impl, seq->next, table_indices, depth);
+		} else {
+			do_tableentry(impl, &the_table, table_indices, depth);
+		}
+		depth--;
+		opc = opc->next;
+	}
+}
+
+void dumptable(void)
+{
+	BytecodeImpl *impl = the_impl;
+	int table_indices[256];
+	int j;
+	char	buf[256];
+	char *def;
+
+	the_table = (TableEntry *)malloc(sizeof(TableEntry) * 256);
+	if (!the_table) outmem();
+	for (j = 0; j < 256; j++) {
+		sprintf(buf, "%s", bytecodes[j].name);
+		def = strdup(buf);
+		if (!def) outmem();
+		the_table[j].impl_name = 0;
+		the_table[j].def_name = def;
+		the_table[j].subtable = 0;
+	}
+	while (impl) {
+		dumpseq(impl, impl->opcode_seq, table_indices, 0);
+		impl = impl->next;
+	}
+	fputs("\t.macro\tMAIN_DISPATCH_TABLE\n", bci_f);
+	writeouttable(the_table, table_indices, 0);
+	fputs("\t.endm\n", bci_f);
+}
+
+void dumpimpl(void)
+{
+	BytecodeImpl *impl = the_impl;
+	OpcodeList *opc;
+	StringList *code;
+	StringList *sl;
+	char buf[BUFLEN];
+	char macro[BUFLEN];
+
+	while (impl) {
+		buf[0] = 0;
+		fprintf(bci_f, "@-----------------------------------------------------------------------------\n");
+		fprintf(bci_f, "\t.macro\t%s\tjpc_off=0, seq_len=%d\n", impl->name, impl->len);
+		sl = impl->macro_impl;
+		while (sl) {
+			fputs(sl->line, bci_f);
+			fputc('\n', bci_f);
+			sl = sl->next;
+		}
+		fprintf(bci_f, "\t.endm\n\n");
+		sl = impl->direct_impl;
+		if (sl) {
+			do {
+				fputs(sl->line, bci_f);
+				fputc('\n', bci_f);
+				sl = sl->next;
+			} while (sl);
+		} else {
+			fprintf(bci_f, "\tOpcode\t%s\n", impl->do_name);
+//			fprintf(bci_f, "%s:\n", impl->do_name);
+			fprintf(bci_f, "\t%s\n", impl->name);
+//			fprintf(bci_f, "\tDISPATCH\t%d\n", impl->len);
+		}
+		impl = impl->next;
+	}
+}
+
+void dumpbc()
+{
+	int i;
+
+	for (i = 0; i < 256; i++) {
+		if (strcmp(bytecodes[i].name, "undefined") != 0)
+			fprintf(bci_f, "#define opc_%s\t\t0x%02x\n", bytecodes[i].name, i);
+	}
+	fputc('\n', bci_f);
+	dumpimpl();
+	dumptable();
+}
+
+void usage(void)
+{
+	fatal("Usage: mkbc <bytecode definition file> <asm output file>");
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+	char *source, *bci;
+	char *s;
+
+	source = bci = 0;
+	while (s = *++argv) {
+		if (s[0] == '-' && s[1] != 0) {
+			if (s[1] == 'P') {
+				prefix = s+2;
+			} else {
+				fprintf(stderr, "Unrecognized option %s\n", s);
+				usage();
+			}
+		} else {
+			if (!source) source = s;
+			else if (!bci) bci = s;
+			else {
+				fprintf(stderr, "Too many arguments\n");
+				usage();
+			}
+		}
+	}
+	if (!bci) {
+		fprintf(stderr, "Too few arguments\n");
+		usage();
+	}
+	if (strcmp(source, "-") == 0) {
+		source_f = stdin;
+	} else {
+		source_f = fopen(source, "r");
+		if (!source_f) fatal("Error opening source file");
+	}
+	if (strcmp(bci, "-") == 0) {
+		bci_f = stdout;
+	} else {
+		bci_f = fopen(bci, "w");
+		if (!bci_f) fatal("Error opening bci file for write");
+	}
+	for (i = 0; i < 256; i++) {
+		bytecodes[i].name = (char *)"undefined";
+		bytecodes[i].len = -1;
+	}
+	mkbc();
+	dumpbc();
+	if (ferror(source_f)) fatal("Error reading source");
+	if (ferror(bci_f)) fatal("Error writing bci");
+	if (source_f != stdin) fclose(source_f);
+	if (bci_f != stdout) fclose(bci_f);
+
+	return 0;
+}