Mercurial > hg > openjdk > hsx21

--- a/.hgignore	Mon May 02 00:55:09 2011 -0700
+++ b/.hgignore	Mon May 02 10:51:36 2011 -0700
@@ -5,3 +5,4 @@
 ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/
 ^src/share/tools/IdealGraphVisualizer/build/
 ^src/share/tools/IdealGraphVisualizer/dist/
+^.hgtip
--- a/.hgtags	Mon May 02 00:55:09 2011 -0700
+++ b/.hgtags	Mon May 02 10:51:36 2011 -0700
@@ -162,3 +162,7 @@
 bd586e392d93b7ed7a1636dcc8da2b6a4203a102 hs21-b06
 2dbcb4a4d8dace5fe78ceb563b134f1fb296cd8f jdk7-b137
 2dbcb4a4d8dace5fe78ceb563b134f1fb296cd8f hs21-b07
+0930dc920c185afbf40fed9a655290b8e5b16783 jdk7-b138
+0930dc920c185afbf40fed9a655290b8e5b16783 hs21-b08
+611e19a16519d6fb5deea9ab565336e6e6ee475d jdk7-b139
+611e19a16519d6fb5deea9ab565336e6e6ee475d hs21-b09
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/cscope.make	Mon May 02 10:51:36 2011 -0700
@@ -0,0 +1,141 @@
+#
+# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+#
+
+# The cscope.out file is generated in the current directory.  The old cscope.out
+# file is *not* removed because cscope is smart enough to only build what has
+# changed.  cscope can be confused if files are renamed or removed, so it may be
+# necessary to remove cscope.out (gmake cscope.clean) if a lot of reorganization
+# has occurred.
+
+include $(GAMMADIR)/make/scm.make
+
+RM	= rm -f
+HG	= hg
+CS_TOP	= $(GAMMADIR)
+
+CSDIRS	= $(CS_TOP)/src $(CS_TOP)/make
+CSINCS	= $(CSDIRS:%=-I%)
+
+CSCOPE		= cscope
+CSCOPE_OUT	= cscope.out
+CSCOPE_FLAGS	= -b
+
+# Allow .java files to be added from the environment (CSCLASSES=yes).
+ifdef	CSCLASSES
+ADDCLASSES=	-o -name '*.java'
+endif
+
+# Adding CClassHeaders also pushes the file count of a full workspace up about
+# 200 files (these files also don't exist in a new workspace, and thus will
+# cause the recreation of the database as they get created, which might seem
+# a little confusing).  Thus allow these files to be added from the environment
+# (CSHEADERS=yes).
+ifndef	CSHEADERS
+RMCCHEADERS=	-o -name CClassHeaders
+endif
+
+# Ignore build products.
+CS_PRUNE_GENERATED	= -o -name '${OSNAME}_*_core' -o \
+			     -name '${OSNAME}_*_compiler?'
+
+# O/S-specific files for all systems are included by default.  Set CS_OS to a
+# space-separated list of identifiers to include only those systems.
+ifdef	CS_OS
+CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',\
+		    $(filter-out ${CS_OS},linux macos solaris windows))
+endif
+
+# CPU-specific files for all processors are included by default.  Set CS_CPU
+# space-separated list identifiers to include only those CPUs.
+ifdef	CS_CPU
+CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',\
+		    $(filter-out ${CS_CPU},arm ppc sparc x86 zero))
+endif
+
+# What files should we include?  A simple rule might be just those files under
+# SCCS control, however this would miss files we create like the opcodes and
+# CClassHeaders.  The following attempts to find everything that is *useful*.
+# (.del files are created by sccsrm, demo directories contain many .java files
+# that probably aren't useful for development, and the pkgarchive may contain
+# duplicates of files within the source hierarchy).
+
+# Directories to exclude.
+CS_PRUNE_STD	= $(SCM_DIRS) \
+		  -o -name '.del-*' \
+		  -o -name '*demo' \
+		  -o -name pkgarchive
+
+# Placeholder for user-defined excludes.
+CS_PRUNE_EX	=
+
+CS_PRUNE	= $(CS_PRUNE_STD) \
+		  $(CS_PRUNE_OS) \
+		  $(CS_PRUNE_CPU) \
+		  $(CS_PRUNE_GENERATED) \
+		  $(CS_PRUNE_EX) \
+		  $(RMCCHEADERS)
+
+# File names to include.
+CSFILENAMES	= -name '*.[ch]pp' \
+		  -o -name '*.[Ccshlxy]' \
+		  $(CS_ADD_GENERATED) \
+		  -o -name '*.d' \
+		  -o -name '*.il' \
+		  -o -name '*.cc' \
+		  -o -name '*[Mm]akefile*' \
+		  -o -name '*.gmk' \
+		  -o -name '*.make' \
+		  -o -name '*.ad' \
+		  $(ADDCLASSES)
+
+.PHONY:		cscope cscope.clean cscope.scratch TAGS.clean FORCE
+.PRECIOUS:	cscope.out
+
+cscope $(CSCOPE_OUT): cscope.files FORCE
+	$(CSCOPE) -f $(CSCOPE_OUT) $(CSCOPE_FLAGS)
+
+cscope.clean:
+	$(QUIETLY) $(RM) $(CSCOPE_OUT) cscope.files
+
+cscope.scratch:  cscope.clean cscope
+
+# The raw list is reordered so cscope displays the most relevant files first.
+cscope.files:
+	$(QUIETLY)						\
+	raw=cscope.$$$$;					\
+	find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o	\
+	    -type f \( $(CSFILENAMES) \) -print > $$raw;	\
+	{							\
+	echo "$(CSINCS)";					\
+	egrep -v "\.java|/make/" $$raw;				\
+	fgrep ".java" $$raw;					\
+	fgrep "/make/" $$raw;					\
+	} > $@;							\
+	rm -f $$raw
+
+TAGS:  cscope.files FORCE
+	egrep -v '^-|^$$' $< | etags --members -
+
+TAGS.clean:
+	$(RM) TAGS
--- a/make/hotspot_version	Mon May 02 00:55:09 2011 -0700
+++ b/make/hotspot_version	Mon May 02 10:51:36 2011 -0700
@@ -35,7 +35,7 @@

 HS_MAJOR_VER=21
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=08
+HS_BUILD_NUMBER=11

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/make/linux/Makefile	Mon May 02 00:55:09 2011 -0700
+++ b/make/linux/Makefile	Mon May 02 10:51:36 2011 -0700
@@ -359,7 +359,7 @@

 clean:  clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_docs

-include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
+include $(GAMMADIR)/make/cscope.make

 #-------------------------------------------------------------------------------
--- a/make/linux/makefiles/cscope.make	Mon May 02 00:55:09 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-#
-# Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-#
-
-#
-# The cscope.out file is made in the current directory and spans the entire
-# source tree.
-#
-# Things to note:
-#	1. We use relative names for cscope.
-#	2. We *don't* remove the old cscope.out file, because cscope is smart
-#	   enough to only build what has changed.  It can be confused, however,
-#	   if files are renamed or removed, so it may be necessary to manually
-#	   remove cscope.out if a lot of reorganization has occurred.
-#
-
-include $(GAMMADIR)/make/scm.make
-
-NAWK	= awk
-RM	= rm -f
-HG	= hg
-CS_TOP	= ../..
-
-CSDIRS	= $(CS_TOP)/src $(CS_TOP)/build
-CSINCS	= $(CSDIRS:%=-I%)
-
-CSCOPE		= cscope
-CSCOPE_FLAGS	= -b
-
-# Allow .java files to be added from the environment (CSCLASSES=yes).
-ifdef	CSCLASSES
-ADDCLASSES=	-o -name '*.java'
-endif
-
-# Adding CClassHeaders also pushes the file count of a full workspace up about
-# 200 files (these files also don't exist in a new workspace, and thus will
-# cause the recreation of the database as they get created, which might seem
-# a little confusing).  Thus allow these files to be added from the environment
-# (CSHEADERS=yes).
-ifndef	CSHEADERS
-RMCCHEADERS=	-o -name CClassHeaders
-endif
-
-# Use CS_GENERATED=x to include auto-generated files in the build directories.
-ifdef	CS_GENERATED
-CS_ADD_GENERATED	= -o -name '*.incl'
-else
-CS_PRUNE_GENERATED	= -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
-endif
-
-# OS-specific files for other systems are excluded by default.  Use CS_OS=yes
-# to include platform-specific files for other platforms.
-ifndef	CS_OS
-CS_OS		= linux macos solaris win32
-CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
-endif
-
-# Processor-specific files for other processors are excluded by default.  Use
-# CS_CPU=x to include platform-specific files for other platforms.
-ifndef	CS_CPU
-CS_CPU		= i486 sparc amd64 ia64
-CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
-endif
-
-# What files should we include?  A simple rule might be just those files under
-# SCCS control, however this would miss files we create like the opcodes and
-# CClassHeaders.  The following attempts to find everything that is *useful*.
-# (.del files are created by sccsrm, demo directories contain many .java files
-# that probably aren't useful for development, and the pkgarchive may contain
-# duplicates of files within the source hierarchy).
-
-# Directories to exclude.
-CS_PRUNE_STD	= $(SCM_DIRS) \
-		  -o -name '.del-*' \
-		  -o -name '*demo' \
-		  -o -name pkgarchive
-
-CS_PRUNE	= $(CS_PRUNE_STD) \
-		  $(CS_PRUNE_OS) \
-		  $(CS_PRUNE_CPU) \
-		  $(CS_PRUNE_GENERATED) \
-		  $(RMCCHEADERS)
-
-# File names to include.
-CSFILENAMES	= -name '*.[ch]pp' \
-		  -o -name '*.[Ccshlxy]' \
-		  $(CS_ADD_GENERATED) \
-		  -o -name '*.il' \
-		  -o -name '*.cc' \
-		  -o -name '*[Mm]akefile*' \
-		  -o -name '*.gmk' \
-		  -o -name '*.make' \
-		  -o -name '*.ad' \
-		  $(ADDCLASSES)
-
-.PRECIOUS:	cscope.out
-
-cscope cscope.out: cscope.files FORCE
-	$(CSCOPE) $(CSCOPE_FLAGS)
-
-# The .raw file is reordered here in an attempt to make cscope display the most
-# relevant files first.
-cscope.files: .cscope.files.raw
-	echo "$(CSINCS)" > $@
-	-egrep -v "\.java|\/make\/"	$< >> $@
-	-fgrep ".java"			$< >> $@
-	-fgrep "/make/"		$< >> $@
-
-.cscope.files.raw:  .nametable.files
-	-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-	    -type f \( $(CSFILENAMES) \) -print > $@
-
-cscope.clean:  nametable.clean
-	-$(RM) cscope.out cscope.files .cscope.files.raw
-
-TAGS:  cscope.files FORCE
-	egrep -v '^-|^$$' $< | etags --members -
-
-TAGS.clean:  nametable.clean
-	-$(RM) TAGS
-
-# .nametable.files and .nametable.files.tmp are used to determine if any files
-# were added to/deleted from/renamed in the workspace.  If not, then there's
-# normally no need to rebuild the cscope database. To force a rebuild of
-# the cscope database: gmake nametable.clean.
-.nametable.files:  .nametable.files.tmp
-	( cmp -s $@ $< ) || ( cp $< $@ )
-	-$(RM) $<
-
-# `hg status' is slightly faster than `hg fstatus'. Both are
-# quite a bit slower on an NFS mounted file system, so this is
-# really geared towards repos on local file systems.
-.nametable.files.tmp:
-	-$(HG) fstatus -acmn > $@
-nametable.clean:
-	-$(RM) .nametable.files .nametable.files.tmp
-
-FORCE:
-
-.PHONY:		cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/make/solaris/Makefile	Mon May 02 00:55:09 2011 -0700
+++ b/make/solaris/Makefile	Mon May 02 10:51:36 2011 -0700
@@ -296,7 +296,7 @@

 clean:  clean_compiler2 clean_compiler1 clean_core clean_docs clean_kernel

-include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
+include $(GAMMADIR)/make/cscope.make

 #-------------------------------------------------------------------------------
--- a/make/solaris/makefiles/cscope.make	Mon May 02 00:55:09 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#
-# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-#
-
-#
-# The cscope.out file is made in the current directory and spans the entire
-# source tree.
-#
-# Things to note:
-#	1. We use relative names for cscope.
-#	2. We *don't* remove the old cscope.out file, because cscope is smart
-#	   enough to only build what has changed.  It can be confused, however,
-#	   if files are renamed or removed, so it may be necessary to manually
-#	   remove cscope.out if a lot of reorganization has occurred.
-#
-
-include $(GAMMADIR)/make/scm.make
-
-NAWK	= /usr/xpg4/bin/awk
-RM	= rm -f
-HG	= hg
-CS_TOP	= ../..
-
-CSDIRS	= $(CS_TOP)/src $(CS_TOP)/make
-CSINCS	= $(CSDIRS:%=-I%)
-
-CSCOPE		= cscope
-CSCOPE_FLAGS	= -b
-
-# Allow .java files to be added from the environment (CSCLASSES=yes).
-ifdef	CSCLASSES
-ADDCLASSES=	-o -name '*.java'
-endif
-
-# Adding CClassHeaders also pushes the file count of a full workspace up about
-# 200 files (these files also don't exist in a new workspace, and thus will
-# cause the recreation of the database as they get created, which might seem
-# a little confusing).  Thus allow these files to be added from the environment
-# (CSHEADERS=yes).
-ifndef	CSHEADERS
-RMCCHEADERS=	-o -name CClassHeaders
-endif
-
-# Use CS_GENERATED=x to include auto-generated files in the make directories.
-ifdef	CS_GENERATED
-CS_ADD_GENERATED	= -o -name '*.incl'
-else
-CS_PRUNE_GENERATED	= -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
-endif
-
-# OS-specific files for other systems are excluded by default.  Use CS_OS=yes
-# to include platform-specific files for other platforms.
-ifndef	CS_OS
-CS_OS		= linux macos solaris win32
-CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
-endif
-
-# Processor-specific files for other processors are excluded by default.  Use
-# CS_CPU=x to include platform-specific files for other platforms.
-ifndef	CS_CPU
-CS_CPU		= i486 sparc amd64 ia64
-CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
-endif
-
-# What files should we include?  A simple rule might be just those files under
-# SCCS control, however this would miss files we create like the opcodes and
-# CClassHeaders.  The following attempts to find everything that is *useful*.
-# (.del files are created by sccsrm, demo directories contain many .java files
-# that probably aren't useful for development, and the pkgarchive may contain
-# duplicates of files within the source hierarchy).
-
-# Directories to exclude.
-CS_PRUNE_STD	= $(SCM_DIRS) \
-		  -o -name '.del-*' \
-		  -o -name '*demo' \
-		  -o -name pkgarchive
-
-CS_PRUNE	= $(CS_PRUNE_STD) \
-		  $(CS_PRUNE_OS) \
-		  $(CS_PRUNE_CPU) \
-		  $(CS_PRUNE_GENERATED) \
-		  $(RMCCHEADERS)
-
-# File names to include.
-CSFILENAMES	= -name '*.[ch]pp' \
-		  -o -name '*.[Ccshlxy]' \
-		  $(CS_ADD_GENERATED) \
-		  -o -name '*.d' \
-		  -o -name '*.il' \
-		  -o -name '*.cc' \
-		  -o -name '*[Mm]akefile*' \
-		  -o -name '*.gmk' \
-		  -o -name '*.make' \
-		  -o -name '*.ad' \
-		  $(ADDCLASSES)
-
-.PRECIOUS:	cscope.out
-
-cscope cscope.out: cscope.files FORCE
-	$(CSCOPE) $(CSCOPE_FLAGS)
-
-# The .raw file is reordered here in an attempt to make cscope display the most
-# relevant files first.
-cscope.files: .cscope.files.raw
-	echo "$(CSINCS)" > $@
-	-egrep -v "\.java|\/make\/"	$< >> $@
-	-fgrep ".java"			$< >> $@
-	-fgrep "/make/"		$< >> $@
-
-.cscope.files.raw:  .nametable.files
-	-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-	    -type f \( $(CSFILENAMES) \) -print > $@
-
-cscope.clean:  nametable.clean
-	-$(RM) cscope.out cscope.files .cscope.files.raw
-
-TAGS:  cscope.files FORCE
-	egrep -v '^-|^$$' $< | etags --members -
-
-TAGS.clean:  nametable.clean
-	-$(RM) TAGS
-
-# .nametable.files and .nametable.files.tmp are used to determine if any files
-# were added to/deleted from/renamed in the workspace.  If not, then there's
-# normally no need to rebuild the cscope database. To force a rebuild of
-# the cscope database: gmake nametable.clean.
-.nametable.files:  .nametable.files.tmp
-	( cmp -s $@ $< ) || ( cp $< $@ )
-	-$(RM) $<
-
-# `hg status' is slightly faster than `hg fstatus'. Both are
-# quite a bit slower on an NFS mounted file system, so this is
-# really geared towards repos on local file systems.
-.nametable.files.tmp:
-	-$(HG) fstatus -acmn > $@
-
-nametable.clean:
-	-$(RM) .nametable.files .nametable.files.tmp
-
-FORCE:
-
-.PHONY:		cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -4257,34 +4257,14 @@
 ///////////////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC

-static uint num_stores = 0;
-static uint num_null_pre_stores = 0;
-
-static void count_null_pre_vals(void* pre_val) {
-  num_stores++;
-  if (pre_val == NULL) num_null_pre_stores++;
-  if ((num_stores % 1000000) == 0) {
-    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
-                  num_stores, num_null_pre_stores,
-                  100.0*(float)num_null_pre_stores/(float)num_stores);
-  }
-}
-
-static address satb_log_enqueue_with_frame = 0;
-static u_char* satb_log_enqueue_with_frame_end = 0;
-
-static address satb_log_enqueue_frameless = 0;
-static u_char* satb_log_enqueue_frameless_end = 0;
+static address satb_log_enqueue_with_frame = NULL;
+static u_char* satb_log_enqueue_with_frame_end = NULL;
+
+static address satb_log_enqueue_frameless = NULL;
+static u_char* satb_log_enqueue_frameless_end = NULL;

 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?

-// The calls to this don't work.  We'd need to do a fair amount of work to
-// make it work.
-static void check_index(int ind) {
-  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
-         "Invariants.");
-}
-
 static void generate_satb_log_enqueue(bool with_frame) {
   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
   CodeBuffer buf(bb);
@@ -4388,13 +4368,27 @@
   }
 }

-void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
-  assert(offset == 0 || index == noreg, "choose one");
-
-  if (G1DisablePreBarrier) return;
-  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register index,
+                                          int offset,
+                                          Register pre_val,
+                                          Register tmp,
+                                          bool preserve_o_regs) {
   Label filtered;
-  // satb_log_barrier_work0(tmp, filtered);
+
+  if (obj == noreg) {
+    // We are not loading the previous value so make
+    // sure that we don't trash the value in pre_val
+    // with the code below.
+    assert_different_registers(pre_val, tmp);
+  } else {
+    // We will be loading the previous value
+    // in this code so...
+    assert(offset == 0 || index == noreg, "choose one");
+    assert(pre_val == noreg, "check this code");
+  }
+
+  // Is marking active?
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
     ld(G2,
        in_bytes(JavaThread::satb_mark_queue_offset() +
@@ -4413,61 +4407,46 @@
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
   delayed() -> nop();

-  // satb_log_barrier_work1(tmp, offset);
-  if (index == noreg) {
-    if (Assembler::is_simm13(offset)) {
-      load_heap_oop(obj, offset, tmp);
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    // Load the previous value...
+    if (index == noreg) {
+      if (Assembler::is_simm13(offset)) {
+        load_heap_oop(obj, offset, tmp);
+      } else {
+        set(offset, tmp);
+        load_heap_oop(obj, tmp, tmp);
+      }
     } else {
-      set(offset, tmp);
-      load_heap_oop(obj, tmp, tmp);
+      load_heap_oop(obj, index, tmp);
     }
-  } else {
-    load_heap_oop(obj, index, tmp);
+    // Previous value has been loaded into tmp
+    pre_val = tmp;
   }

-  // satb_log_barrier_work2(obj, tmp, offset);
-
-  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
-
-  const Register pre_val = tmp;
-
-  if (G1SATBBarrierPrintNullPreVals) {
-    save_frame(0);
-    mov(pre_val, O0);
-    // Save G-regs that target may use.
-    mov(G1, L1);
-    mov(G2, L2);
-    mov(G3, L3);
-    mov(G4, L4);
-    mov(G5, L5);
-    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
-    delayed()->nop();
-    // Restore G-regs that target may have used.
-    mov(L1, G1);
-    mov(L2, G2);
-    mov(L3, G3);
-    mov(L4, G4);
-    mov(L5, G5);
-    restore(G0, G0, G0);
-  }
-
+  assert(pre_val != noreg, "must have a real register");
+
+  // Is the previous value null?
   // Check on whether to annul.
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
   delayed() -> nop();

   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
-  // case, pre_val will be a scratch G-reg, but there's some cases in which
-  // it's an O-reg.  In the first case, do a normal call.  In the latter,
-  // do a save here and call the frameless version.
+  // case, pre_val will be a scratch G-reg, but there are some cases in
+  // which it's an O-reg.  In the first case, do a normal call.  In the
+  // latter, do a save here and call the frameless version.

   guarantee(pre_val->is_global() || pre_val->is_out(),
             "Or we need to think harder.");
+
   if (pre_val->is_global() && !preserve_o_regs) {
-    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    generate_satb_log_enqueue_if_necessary(true); // with frame
+
     call(satb_log_enqueue_with_frame);
     delayed()->mov(pre_val, O0);
   } else {
-    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    generate_satb_log_enqueue_if_necessary(false); // frameless
+
     save_frame(0);
     call(satb_log_enqueue_frameless);
     delayed()->mov(pre_val->after_save(), O0);
@@ -4614,7 +4593,6 @@
   MacroAssembler* post_filter_masm = this;

   if (new_val == G0) return;
-  if (G1DisablePostBarrier) return;

   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
   assert(bs->kind() == BarrierSet::G1SATBCT ||
@@ -4626,6 +4604,7 @@
 #else
     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 #endif
+
     if (G1PrintCTFilterStats) {
       guarantee(tmp->is_global(), "Or stats won't work...");
       // This is a sleazy hack: I'm temporarily hijacking G2, which I
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Mon May 02 10:51:36 2011 -0700
@@ -2210,15 +2210,11 @@
   void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);

 #ifndef SERIALGC
-  // Array store and offset
-  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
-
+  // General G1 pre-barrier generator.
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
+
+  // General G1 post-barrier generator
   void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-
-  // May do filtering, depending on the boolean arguments.
-  void g1_card_table_write(jbyte* byte_map_base,
-                           Register tmp, Register obj, Register new_val,
-                           bool region_filter, bool null_filter);
 #endif // SERIALGC

   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -408,13 +408,20 @@
 #ifndef SERIALGC

 void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
   __ bind(_entry);

   assert(pre_val()->is_register(), "Precondition.");
-
   Register pre_val_reg = pre_val()->as_register();

-  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
   if (__ is_in_wdisp16_range(_continuation)) {
     __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
                       pre_val_reg, _continuation);
@@ -431,6 +438,96 @@

 }

+void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that offset == referent_offset.
+  //
+  // So we might have to emit:
+  //   if (src == null) goto continuation.
+  //
+  // and we definitely have to emit:
+  //   if (klass(src).reference_type == REF_NONE) goto continuation
+  //   if (!marking_active) goto continuation
+  //   if (pre_val == null) goto continuation
+  //   call pre_barrier(pre_val)
+  //   goto continuation
+  //
+  __ bind(_entry);
+
+  assert(src()->is_register(), "sanity");
+  Register src_reg = src()->as_register();
+
+  if (gen_src_check()) {
+    // The original src operand was not a constant.
+    // Generate src == null?
+    if (__ is_in_wdisp16_range(_continuation)) {
+      __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                        src_reg, _continuation);
+    } else {
+      __ cmp(src_reg, G0);
+      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+    }
+    __ delayed()->nop();
+  }
+
+  // Generate src->_klass->_reference_type() == REF_NONE)?
+  assert(tmp()->is_register(), "sanity");
+  Register tmp_reg = tmp()->as_register();
+
+  __ load_klass(src_reg, tmp_reg);
+
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  __ ld(ref_type_adr, tmp_reg);
+
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      tmp_reg, _continuation);
+  } else {
+    __ cmp(tmp_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  // Is marking active?
+  assert(thread()->is_register(), "precondition");
+  Register thread_reg = thread()->as_pointer_register();
+
+  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    __ ld(in_progress, tmp_reg);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    __ ldsb(in_progress, tmp_reg);
+  }
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      tmp_reg, _continuation);
+  } else {
+    __ cmp(tmp_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  // val == null?
+  assert(val()->is_register(), "Precondition.");
+  Register val_reg = val()->as_register();
+
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      val_reg, _continuation);
+  } else {
+    __ cmp(val_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;

 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -387,7 +387,8 @@

   if (obj_store) {
     // Needs GC write barriers.
-    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
@@ -687,7 +688,8 @@
   __ add(obj.result(), offset.result(), addr);

   if (type == objectType) {  // Write-barrier needed for Object fields.
-    pre_barrier(addr, false, NULL);
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }

   if (type == objectType)
@@ -1187,7 +1189,8 @@
       }

       if (is_obj) {
-        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                    true /* do_load */, false /* patch */, NULL);
         // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
       }
       __ move(data, addr);
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -551,6 +551,26 @@
   return NULL;
 }

+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (C++ interpreter)
 // This sets up a somewhat different looking stack for calling the native method
--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry(void);
   void lock_method(void);
   void save_native_result(void);
   void restore_native_result(void);
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -407,6 +407,8 @@
     case Interpreter::java_lang_math_abs     :                                                                             break;
     case Interpreter::java_lang_math_log     :                                                                             break;
     case Interpreter::java_lang_math_log10   :                                                                             break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -763,6 +763,87 @@
   return NULL;
 }

+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+     Label slow_path;
+
+    // In the G1 code we don't check if we need to reach a safepoint. We
+    // continue and the thread will safepoint at the next bytecode dispatch.
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
+    __ tst(Otos_i);  // check if local 0 == NULL and go the slow path
+    __ brx(Assembler::zero, false, Assembler::pn, slow_path);
+    __ delayed()->nop();
+
+
+    // Load the value of the referent field.
+    if (Assembler::is_simm13(referent_offset)) {
+      __ load_heap_oop(Otos_i, referent_offset, Otos_i);
+    } else {
+      __ set(referent_offset, G3_scratch);
+      __ load_heap_oop(Otos_i, G3_scratch, Otos_i);
+    }
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer. Note with
+    // these parameters the pre-barrier does not generate
+    // the load of the previous value
+
+    __ g1_write_barrier_pre(noreg /* obj */, noreg /* index */, 0 /* offset */,
+                            Otos_i /* pre_val */,
+                            G3_scratch /* tmp */,
+                            true /* preserve_o_regs */);
+
+    // _areturn
+    __ retl();                      // return from leaf routine
+    __ delayed()->mov(O5_savedSP, SP);
+
+    // Generate regular method entry
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon May 02 10:51:36 2011 -0700
@@ -57,7 +57,11 @@
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
       {
-        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        // Load and record the previous value.
+        __ g1_write_barrier_pre(base, index, offset,
+                                noreg /* pre_val */,
+                                tmp, true /*preserve_o_regs*/);
+
         if (index == noreg ) {
           assert(Assembler::is_simm13(offset), "fix this code");
           __ store_heap_oop(val, base, offset);
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon May 02 10:51:36 2011 -0700
@@ -6902,26 +6902,39 @@
 #ifndef SERIALGC

 void MacroAssembler::g1_write_barrier_pre(Register obj,
-#ifndef _LP64
+                                          Register pre_val,
                                           Register thread,
-#endif
                                           Register tmp,
-                                          Register tmp2,
-                                          bool tosca_live) {
-  LP64_ONLY(Register thread = r15_thread;)
+                                          bool tosca_live,
+                                          bool expand_call) {
+
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != rax, "check this code");
+  }
+
   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_active()));
-
   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_index()));
   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_buf()));


-  Label done;
-  Label runtime;
-
-  // if (!marking_in_progress) goto done;
+  // Is marking active?
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
     cmpl(in_progress, 0);
   } else {
@@ -6930,65 +6943,92 @@
   }
   jcc(Assembler::equal, done);

-  // if (x.f == NULL) goto done;
-#ifdef _LP64
-  load_heap_oop(tmp2, Address(obj, 0));
-#else
-  movptr(tmp2, Address(obj, 0));
-#endif
-  cmpptr(tmp2, (int32_t) NULL_WORD);
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  cmpptr(pre_val, (int32_t) NULL_WORD);
   jcc(Assembler::equal, done);

   // Can we store original value in the thread's buffer?
-
-#ifdef _LP64
-  movslq(tmp, index);
-  cmpq(tmp, 0);
-#else
-  cmpl(index, 0);
-#endif
-  jcc(Assembler::equal, runtime);
-#ifdef _LP64
-  subq(tmp, wordSize);
-  movl(index, tmp);
-  addq(tmp, buffer);
-#else
-  subl(index, wordSize);
-  movl(tmp, buffer);
-  addl(tmp, index);
-#endif
-  movptr(Address(tmp, 0), tmp2);
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  movptr(tmp, index);                   // tmp := *index_adr
+  cmpptr(tmp, 0);                       // tmp == 0?
+  jcc(Assembler::equal, runtime);       // If yes, goto runtime
+
+  subptr(tmp, wordSize);                // tmp := tmp - wordSize
+  movptr(index, tmp);                   // *index_adr := tmp
+  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  movptr(Address(tmp, 0), pre_val);
   jmp(done);
+
   bind(runtime);
   // save the live input values
   if(tosca_live) push(rax);
-  push(obj);
-#ifdef _LP64
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
-#else
-  push(thread);
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
-  pop(thread);
-#endif
-  pop(obj);
+
+  if (obj != noreg && obj != rax)
+    push(obj);
+
+  if (pre_val != rax)
+    push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  NOT_LP64( push(thread); )
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+    pass_arg1(this, thread);
+    pass_arg0(this, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  NOT_LP64( pop(thread); )
+
+  // save the live input values
+  if (pre_val != rax)
+    pop(pre_val);
+
+  if (obj != noreg && obj != rax)
+    pop(obj);
+
   if(tosca_live) pop(rax);
+
   bind(done);
-
 }

 void MacroAssembler::g1_write_barrier_post(Register store_addr,
                                            Register new_val,
-#ifndef _LP64
                                            Register thread,
-#endif
                                            Register tmp,
                                            Register tmp2) {
-
-  LP64_ONLY(Register thread = r15_thread;)
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                        PtrQueue::byte_offset_of_index()));
   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                        PtrQueue::byte_offset_of_buf()));
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
   Label done;
@@ -7067,7 +7107,6 @@
   pop(store_addr);

   bind(done);
-
 }

 #endif // SERIALGC
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1453,6 +1453,7 @@
 class MacroAssembler: public Assembler {
   friend class LIR_Assembler;
   friend class Runtime1;      // as_Address()
+
  protected:

   Address as_Address(AddressLiteral adr);
@@ -1674,21 +1675,22 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)

+#ifndef SERIALGC
+
   void g1_write_barrier_pre(Register obj,
-#ifndef _LP64
+                            Register pre_val,
                             Register thread,
-#endif
                             Register tmp,
-                            Register tmp2,
-                            bool     tosca_live);
+                            bool tosca_live,
+                            bool expand_call);
+
   void g1_write_barrier_post(Register store_addr,
                              Register new_val,
-#ifndef _LP64
                              Register thread,
-#endif
                              Register tmp,
                              Register tmp2);

+#endif // SERIALGC

   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon May 02 10:51:36 2011 -0700
@@ -466,15 +466,19 @@
 #ifndef SERIALGC

 void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
-
-  // At this point we know that marking is in progress
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.

   __ bind(_entry);
   assert(pre_val()->is_register(), "Precondition.");

   Register pre_val_reg = pre_val()->as_register();

-  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }

   __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
   __ jcc(Assembler::equal, _continuation);
@@ -484,6 +488,68 @@

 }

+void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that offset == referent_offset.
+  //
+  // So we might have to emit:
+  //   if (src == null) goto continuation.
+  //
+  // and we definitely have to emit:
+  //   if (klass(src).reference_type == REF_NONE) goto continuation
+  //   if (!marking_active) goto continuation
+  //   if (pre_val == null) goto continuation
+  //   call pre_barrier(pre_val)
+  //   goto continuation
+  //
+  __ bind(_entry);
+
+  assert(src()->is_register(), "sanity");
+  Register src_reg = src()->as_register();
+
+  if (gen_src_check()) {
+    // The original src operand was not a constant.
+    // Generate src == null?
+    __ cmpptr(src_reg, (int32_t) NULL_WORD);
+    __ jcc(Assembler::equal, _continuation);
+  }
+
+  // Generate src->_klass->_reference_type == REF_NONE)?
+  assert(tmp()->is_register(), "sanity");
+  Register tmp_reg = tmp()->as_register();
+
+  __ load_klass(tmp_reg, src_reg);
+
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  __ cmpl(ref_type_adr, REF_NONE);
+  __ jcc(Assembler::equal, _continuation);
+
+  // Is marking active?
+  assert(thread()->is_register(), "precondition");
+  Register thread_reg = thread()->as_pointer_register();
+
+  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    __ cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    __ cmpb(in_progress, 0);
+  }
+  __ jcc(Assembler::equal, _continuation);
+
+  // val == null?
+  assert(val()->is_register(), "Precondition.");
+  Register val_reg = val()->as_register();
+
+  __ cmpptr(val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+
+  ce->store_parameter(val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;

 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon May 02 10:51:36 2011 -0700
@@ -326,7 +326,8 @@

   if (obj_store) {
     // Needs GC write barriers.
-    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
     __ move(value.result(), array_addr, null_check_info);
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
@@ -794,7 +795,8 @@

   if (type == objectType) {  // Write-barrier needed for Object fields.
     // Do the pre-write barrier, if any.
-    pre_barrier(addr, false, NULL);
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }

   LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
@@ -1339,7 +1341,8 @@
     bool is_obj = (type == T_ARRAY || type == T_OBJECT);
     if (is_obj) {
       // Do the pre-write barrier, if any.
-      pre_barrier(LIR_OprFact::address(addr), false, NULL);
+      pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
       __ move(data, addr);
       assert(src->is_register(), "must be register");
       // Seems to be a precise address
--- a/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry(void);
   void lock_method(void);
   void generate_stack_overflow_check(void);
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon May 02 10:51:36 2011 -0700
@@ -936,6 +936,26 @@

 }

+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // C++ Interpreter stub for calling a native method.
 // This sets up a somewhat different looking stack for calling the native method
@@ -2210,6 +2230,8 @@
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry();
   void lock_method(void);
   void generate_stack_overflow_check(void);
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon May 02 10:51:36 2011 -0700
@@ -776,6 +776,98 @@

 }

+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code below can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+
+  // rbx,: methodOop
+  // rcx: receiver (preserve for slow entry into asm interpreter)
+
+  // rsi: senderSP must preserved for slow path, set SP to it on fast path
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ movptr(rax, Address(rsp, wordSize));
+    __ testptr(rax, rax);
+    __ jcc(Assembler::zero, slow_path);
+
+    // rax: local 0 (must be preserved across the G1 barrier call)
+    //
+    // rbx: method (at this point it's scratch)
+    // rcx: receiver (at this point it's scratch)
+    // rdx: scratch
+    // rdi: scratch
+    //
+    // rsi: sender sp
+
+    // Preserve the sender sp in case the pre-barrier
+    // calls the runtime
+    __ push(rsi);
+
+    // Load the value of the referent field.
+    const Address field_address(rax, referent_offset);
+    __ movptr(rax, field_address);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ get_thread(rcx);
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            rax /* pre_val */,
+                            rcx /* thread */,
+                            rbx /* tmp */,
+                            true /* tosca_save */,
+                            true /* expand_call */);
+
+    // _areturn
+    __ pop(rsi);                // get sender sp
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
@@ -1444,6 +1536,8 @@
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon May 02 10:51:36 2011 -0700
@@ -757,6 +757,95 @@
   return entry_point;
 }

+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+  //
+  // rbx: methodOop
+
+  // r13: senderSP must preserve for slow path, set SP to it on fast path
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+    // rbx: method
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ movptr(rax, Address(rsp, wordSize));
+
+    __ testptr(rax, rax);
+    __ jcc(Assembler::zero, slow_path);
+
+    // rax: local 0
+    // rbx: method (but can be used as scratch now)
+    // rdx: scratch
+    // rdi: scratch
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+
+    // Load the value of the referent field.
+    const Address field_address(rax, referent_offset);
+    __ load_heap_oop(rax, field_address);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            rax /* pre_val */,
+                            r15_thread /* thread */,
+                            rbx /* tmp */,
+                            true /* tosca_live */,
+                            true /* expand_call */);
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, r13);           // set sp to sender sp
+    __ jmp(rdi);
+    __ ret(0);
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
+
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the
 // native method than the typical interpreter frame setup.
@@ -1463,6 +1552,8 @@
   case Interpreter::java_lang_math_log     : // fall thru
   case Interpreter::java_lang_math_log10   : // fall thru
   case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
+  case Interpreter::java_lang_ref_reference_get
+                                           : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
   default                                  : ShouldNotReachHere();                                                       break;
   }
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon May 02 10:51:36 2011 -0700
@@ -140,7 +140,12 @@
         }
         __ get_thread(rcx);
         __ save_bcp();
-        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+        __ g1_write_barrier_pre(rdx /* obj */,
+                                rbx /* pre_val */,
+                                rcx /* thread */,
+                                rsi /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);

         // Do the actual store
         // noreg means NULL
@@ -149,7 +154,11 @@
           // No post barrier for NULL
         } else {
           __ movl(Address(rdx, 0), val);
-          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+          __ g1_write_barrier_post(rdx /* store_adr */,
+                                   val /* new_val */,
+                                   rcx /* thread */,
+                                   rbx /* tmp */,
+                                   rsi /* tmp2 */);
         }
         __ restore_bcp();
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon May 02 10:51:36 2011 -0700
@@ -147,12 +147,21 @@
         } else {
           __ leaq(rdx, obj);
         }
-        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        __ g1_write_barrier_pre(rdx /* obj */,
+                                rbx /* pre_val */,
+                                r15_thread /* thread */,
+                                r8  /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
         if (val == noreg) {
           __ store_heap_oop_null(Address(rdx, 0));
         } else {
           __ store_heap_oop(Address(rdx, 0), val);
-          __ g1_write_barrier_post(rdx, val, r8, rbx);
+          __ g1_write_barrier_post(rdx /* store_adr */,
+                                   val /* new_val */,
+                                   r15_thread /* thread */,
+                                   r8 /* tmp */,
+                                   rbx /* tmp2 */);
         }

       }
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1302,6 +1302,26 @@
   return generate_entry((address) CppInterpreter::accessor_entry);
 }

+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 address InterpreterGenerator::generate_native_entry(bool synchronized) {
   assert(synchronized == false, "should be");

@@ -1357,6 +1377,10 @@
     entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);
     break;

+  case Interpreter::java_lang_ref_reference_get:
+    entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry();
+    break;
+
   default:
     ShouldNotReachHere();
   }
--- a/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -37,6 +37,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry();
   address generate_accessor_entry();
+  address generate_Reference_get_entry();
   address generate_method_handle_entry();

 #endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP
--- a/src/os/linux/vm/globals_linux.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/os/linux/vm/globals_linux.hpp	Mon May 02 10:51:36 2011 -0700
@@ -29,13 +29,19 @@
 // Defines Linux specific flags. They are not available on other platforms.
 //
 #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
-  product(bool, UseOprofile, false,                                 \
-        "enable support for Oprofile profiler")                     \
-                                                                    \
-  product(bool, UseLinuxPosixThreadCPUClocks, true,                 \
-          "enable fast Linux Posix clocks where available")
-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
-// overridden in Arguments::parse_each_vm_init_arg.
+  product(bool, UseOprofile, false,                                     \
+        "enable support for Oprofile profiler")                         \
+                                                                        \
+  product(bool, UseLinuxPosixThreadCPUClocks, true,                     \
+          "enable fast Linux Posix clocks where available")             \
+/*  NB: The default value of UseLinuxPosixThreadCPUClocks may be        \
+    overridden in Arguments::parse_each_vm_init_arg.  */                \
+                                                                        \
+  product(bool, UseHugeTLBFS, false,                                    \
+          "Use MAP_HUGETLB for large pages")                            \
+                                                                        \
+  product(bool, UseSHM, false,                                          \
+          "Use SYSV shared memory for large pages")

 //
 // Defines Linux-specific default values. The flags are available on all
--- a/src/os/linux/vm/os_linux.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2465,16 +2465,40 @@
   return res != (uintptr_t) MAP_FAILED;
 }

+// Define MAP_HUGETLB here so we can build HotSpot on old systems.
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000
+#endif
+
+// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
+#ifndef MADV_HUGEPAGE
+#define MADV_HUGEPAGE 14
+#endif
+
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+    uintptr_t res =
+      (uintptr_t) ::mmap(addr, size, prot,
+                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
+                         -1, 0);
+    return res != (uintptr_t) MAP_FAILED;
+  }
+
   return commit_memory(addr, size, exec);
 }

-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
+void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
+    // be supported or the memory may already be backed by huge pages.
+    ::madvise(addr, bytes, MADV_HUGEPAGE);
+  }
+}

 void os::free_memory(char *addr, size_t bytes) {
-  ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
-         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
+  ::madvise(addr, bytes, MADV_DONTNEED);
 }

 void os::numa_make_global(char *addr, size_t bytes) {
@@ -2812,6 +2836,43 @@
   return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
 }

+bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
+  bool result = false;
+  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
+                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
+                  -1, 0);
+
+  if (p != (void *) -1) {
+    // We don't know if this really is a huge page or not.
+    FILE *fp = fopen("/proc/self/maps", "r");
+    if (fp) {
+      while (!feof(fp)) {
+        char chars[257];
+        long x = 0;
+        if (fgets(chars, sizeof(chars), fp)) {
+          if (sscanf(chars, "%lx-%*lx", &x) == 1
+              && x == (long)p) {
+            if (strstr (chars, "hugepage")) {
+              result = true;
+              break;
+            }
+          }
+        }
+      }
+      fclose(fp);
+    }
+    munmap (p, page_size);
+    if (result)
+      return true;
+  }
+
+  if (warn) {
+    warning("HugeTLBFS is not supported by the operating system.");
+  }
+
+  return result;
+}
+
 /*
 * Set the coredump_filter bits to include largepages in core dump (bit 6)
 *
@@ -2854,7 +2915,16 @@
 static size_t _large_page_size = 0;

 bool os::large_page_init() {
-  if (!UseLargePages) return false;
+  if (!UseLargePages) {
+    UseHugeTLBFS = false;
+    UseSHM = false;
+    return false;
+  }
+
+  if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
+    // Our user has not expressed a preference, so we'll try both.
+    UseHugeTLBFS = UseSHM = true;
+  }

   if (LargePageSizeInBytes) {
     _large_page_size = LargePageSizeInBytes;
@@ -2899,6 +2969,9 @@
     }
   }

+  // print a warning if any large page related flag is specified on command line
+  bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
+
   const size_t default_page_size = (size_t)Linux::page_size();
   if (_large_page_size > default_page_size) {
     _page_sizes[0] = _large_page_size;
@@ -2906,6 +2979,14 @@
     _page_sizes[2] = 0;
   }

+  UseHugeTLBFS = UseHugeTLBFS &&
+                 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
+
+  if (UseHugeTLBFS)
+    UseSHM = false;
+
+  UseLargePages = UseHugeTLBFS || UseSHM;
+
   set_coredump_filter();

   // Large page support is available on 2.6 or newer kernel, some vendors
@@ -2922,7 +3003,7 @@
 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
   // "exec" is passed in but not used.  Creating the shared image for
   // the code cache doesn't have an SHM_X executable permission to check.
-  assert(UseLargePages, "only for large pages");
+  assert(UseLargePages && UseSHM, "only for SHM large pages");

   key_t key = IPC_PRIVATE;
   char *addr;
@@ -2989,16 +3070,15 @@
   return _large_page_size;
 }

-// Linux does not support anonymous mmap with large page memory. The only way
-// to reserve large page memory without file backing is through SysV shared
-// memory API. The entire memory region is committed and pinned upfront.
-// Hopefully this will change in the future...
+// HugeTLBFS allows application to commit large page memory on demand;
+// with SysV SHM the entire memory region must be allocated as shared
+// memory.
 bool os::can_commit_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 bool os::can_execute_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 // Reserve memory at an arbitrary address, only if that area is
@@ -4090,6 +4170,23 @@
         UseNUMA = false;
       }
     }
+    // With SHM large pages we cannot uncommit a page, so there's not way
+    // we can make the adaptive lgrp chunk resizing work. If the user specified
+    // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
+    // disable adaptive resizing.
+    if (UseNUMA && UseLargePages && UseSHM) {
+      if (!FLAG_IS_DEFAULT(UseNUMA)) {
+        if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
+          UseLargePages = false;
+        } else {
+          warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
+          UseAdaptiveSizePolicy = false;
+          UseAdaptiveNUMAChunkSizing = false;
+        }
+      } else {
+        UseNUMA = false;
+      }
+    }
     if (!UseNUMA && ForceNUMA) {
       UseNUMA = true;
     }
--- a/src/os/linux/vm/os_linux.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Mon May 02 10:51:36 2011 -0700
@@ -86,6 +86,9 @@

   static void rebuild_cpu_to_node_map();
   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
+
+  static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
+
  public:
   static void init_thread_fpu_state();
   static int  get_fpu_control_word();
--- a/src/os/solaris/vm/os_solaris.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2826,7 +2826,9 @@
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
   assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
   assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned.");
-  Solaris::set_mpss_range(addr, bytes, alignment_hint);
+  if (UseLargePages && UseMPSS) {
+    Solaris::set_mpss_range(addr, bytes, alignment_hint);
+  }
 }

 // Tell the OS to make the range local to the first-touching LWP
@@ -5044,6 +5046,20 @@
         UseNUMA = false;
       }
     }
+    // ISM is not compatible with the NUMA allocator - it always allocates
+    // pages round-robin across the lgroups.
+    if (UseNUMA && UseLargePages && UseISM) {
+      if (!FLAG_IS_DEFAULT(UseNUMA)) {
+        if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseISM)) {
+          UseLargePages = false;
+        } else {
+          warning("UseNUMA is not compatible with ISM large pages, disabling NUMA allocator");
+          UseNUMA = false;
+        }
+      } else {
+        UseNUMA = false;
+      }
+    }
     if (!UseNUMA && ForceNUMA) {
       UseNUMA = true;
     }
--- a/src/os/windows/vm/os_windows.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Mon May 02 10:51:36 2011 -0700
@@ -921,6 +921,8 @@
   HINSTANCE dbghelp;
   EXCEPTION_POINTERS ep;
   MINIDUMP_EXCEPTION_INFORMATION mei;
+  MINIDUMP_EXCEPTION_INFORMATION* pmei;
+
   HANDLE hProcess = GetCurrentProcess();
   DWORD processId = GetCurrentProcessId();
   HANDLE dumpFile;
@@ -971,17 +973,22 @@
     VMError::report_coredump_status("Failed to create file for dumping", false);
     return;
   }
-
-  ep.ContextRecord = (PCONTEXT) contextRecord;
-  ep.ExceptionRecord = (PEXCEPTION_RECORD) exceptionRecord;
-
-  mei.ThreadId = GetCurrentThreadId();
-  mei.ExceptionPointers = &ep;
+  if (exceptionRecord != NULL && contextRecord != NULL) {
+    ep.ContextRecord = (PCONTEXT) contextRecord;
+    ep.ExceptionRecord = (PEXCEPTION_RECORD) exceptionRecord;
+
+    mei.ThreadId = GetCurrentThreadId();
+    mei.ExceptionPointers = &ep;
+    pmei = &mei;
+  } else {
+    pmei = NULL;
+  }
+

   // Older versions of dbghelp.dll (the one shipped with Win2003 for example) may not support all
   // the dump types we really want. If first call fails, lets fall back to just use MiniDumpWithFullMemory then.
-  if (_MiniDumpWriteDump(hProcess, processId, dumpFile, dumpType, &mei, NULL, NULL) == false &&
-      _MiniDumpWriteDump(hProcess, processId, dumpFile, (MINIDUMP_TYPE)MiniDumpWithFullMemory, &mei, NULL, NULL) == false) {
+  if (_MiniDumpWriteDump(hProcess, processId, dumpFile, dumpType, pmei, NULL, NULL) == false &&
+      _MiniDumpWriteDump(hProcess, processId, dumpFile, (MINIDUMP_TYPE)MiniDumpWithFullMemory, pmei, NULL, NULL) == false) {
     VMError::report_coredump_status("Call to MiniDumpWriteDump() failed", false);
   } else {
     VMError::report_coredump_status(buffer, true);
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Mon May 02 10:51:36 2011 -0700
@@ -519,42 +519,126 @@
 // Code stubs for Garbage-First barriers.
 class G1PreBarrierStub: public CodeStub {
  private:
+  bool _do_load;
   LIR_Opr _addr;
   LIR_Opr _pre_val;
   LIR_PatchCode _patch_code;
   CodeEmitInfo* _info;

  public:
-  // pre_val (a temporary register) must be a register;
+  // Version that _does_ generate a load of the previous value from addr.
   // addr (the address of the field to be read) must be a LIR_Address
+  // pre_val (a temporary register) must be a register;
   G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
-    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+    _addr(addr), _pre_val(pre_val), _do_load(true),
+    _patch_code(patch_code), _info(info)
   {
     assert(_pre_val->is_register(), "should be temporary register");
     assert(_addr->is_address(), "should be the address of the field");
   }

+  // Version that _does not_ generate load of the previous value; the
+  // previous value is assumed to have already been loaded into pre_val.
+  G1PreBarrierStub(LIR_Opr pre_val) :
+    _addr(LIR_OprFact::illegalOpr), _pre_val(pre_val), _do_load(false),
+    _patch_code(lir_patch_none), _info(NULL)
+  {
+    assert(_pre_val->is_register(), "should be a register");
+  }
+
   LIR_Opr addr() const { return _addr; }
   LIR_Opr pre_val() const { return _pre_val; }
   LIR_PatchCode patch_code() const { return _patch_code; }
   CodeEmitInfo* info() const { return _info; }
+  bool do_load() const { return _do_load; }

   virtual void emit_code(LIR_Assembler* e);
   virtual void visit(LIR_OpVisitState* visitor) {
-    // don't pass in the code emit info since it's processed in the fast
-    // path
-    if (_info != NULL)
-      visitor->do_slow_case(_info);
-    else
+    if (_do_load) {
+      // don't pass in the code emit info since it's processed in the fast
+      // path
+      if (_info != NULL)
+        visitor->do_slow_case(_info);
+      else
+        visitor->do_slow_case();
+
+      visitor->do_input(_addr);
+      visitor->do_temp(_pre_val);
+    } else {
       visitor->do_slow_case();
-    visitor->do_input(_addr);
-    visitor->do_temp(_pre_val);
+      visitor->do_input(_pre_val);
+    }
   }
 #ifndef PRODUCT
   virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
 #endif // PRODUCT
 };

+// This G1 barrier code stub is used in Unsafe.getObject.
+// It generates a sequence of guards around the SATB
+// barrier code that are used to detect when we have
+// the referent field of a Reference object.
+// The first check is assumed to have been generated
+// in the code generated for Unsafe.getObject().
+
+class G1UnsafeGetObjSATBBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _val;
+  LIR_Opr _src;
+
+  LIR_Opr _tmp;
+  LIR_Opr _thread;
+
+  bool _gen_src_check;
+
+ public:
+  // A G1 barrier that is guarded by generated guards that determine whether
+  // val (which is the result of Unsafe.getObject() should be recorded in an
+  // SATB log buffer. We could be reading the referent field of a Reference object
+  // using Unsafe.getObject() and we need to record the referent.
+  //
+  // * val is the operand returned by the unsafe.getObject routine.
+  // * src is the base object
+  // * tmp is a temp used to load the klass of src, and then reference type
+  // * thread is the thread object.
+
+  G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src,
+                                LIR_Opr tmp, LIR_Opr thread,
+                                bool gen_src_check) :
+    _val(val), _src(src),
+    _tmp(tmp), _thread(thread),
+    _gen_src_check(gen_src_check)
+  {
+    assert(_val->is_register(), "should have already been loaded");
+    assert(_src->is_register(), "should have already been loaded");
+
+    assert(_tmp->is_register(), "should be a temporary register");
+  }
+
+  LIR_Opr val() const { return _val; }
+  LIR_Opr src() const { return _src; }
+
+  LIR_Opr tmp() const { return _tmp; }
+  LIR_Opr thread() const { return _thread; }
+
+  bool gen_src_check() const { return _gen_src_check; }
+
+  virtual void emit_code(LIR_Assembler* e);
+
+  virtual void visit(LIR_OpVisitState* visitor) {
+    visitor->do_slow_case();
+    visitor->do_input(_val);
+    visitor->do_input(_src);
+    visitor->do_input(_thread);
+
+    visitor->do_temp(_tmp);
+  }
+
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); }
+#endif // PRODUCT
+};
+
 class G1PostBarrierStub: public CodeStub {
  private:
   LIR_Opr _addr;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2913,6 +2913,46 @@
       block()->set_end(end);
       break;
     }
+
+  case vmIntrinsics::_Reference_get:
+    {
+      if (UseG1GC) {
+        // With java.lang.ref.reference.get() we must go through the
+        // intrinsic - when G1 is enabled - even when get() is the root
+        // method of the compile so that, if necessary, the value in
+        // the referent field of the reference object gets recorded by
+        // the pre-barrier code.
+        // Specifically, if G1 is enabled, the value in the referent
+        // field is recorded by the G1 SATB pre barrier. This will
+        // result in the referent being marked live and the reference
+        // object removed from the list of discovered references during
+        // reference processing.
+
+        // Set up a stream so that appending instructions works properly.
+        ciBytecodeStream s(scope->method());
+        s.reset_to_bci(0);
+        scope_data()->set_stream(&s);
+        s.next();
+
+        // setup the initial block state
+        _block = start_block;
+        _state = start_block->state()->copy_for_parsing();
+        _last  = start_block;
+        load_local(objectType, 0);
+
+        // Emit the intrinsic node.
+        bool result = try_inline_intrinsics(scope->method());
+        if (!result) BAILOUT("failed to inline intrinsic");
+        method_return(apop());
+
+        // connect the begin and end blocks and we're all done.
+        BlockEnd* end = last()->as_BlockEnd();
+        block()->set_end(end);
+        break;
+      }
+      // Otherwise, fall thru
+    }
+
   default:
     scope_data()->add_to_work_list(start_block);
     iterate_all_blocks();
@@ -3150,6 +3190,15 @@
       append_unsafe_CAS(callee);
       return true;

+    case vmIntrinsics::_Reference_get:
+      // It is only when G1 is enabled that we absolutely
+      // need to use the intrinsic version of Reference.get()
+      // so that the value in the referent field, if necessary,
+      // can be registered by the pre-barrier code.
+      if (!UseG1GC) return false;
+      preserves_state = true;
+      break;
+
     default                       : return false; // do not inline
   }
   // create intrinsic node
--- a/src/share/vm/c1/c1_Instruction.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Mon May 02 10:51:36 2011 -0700
@@ -596,7 +596,7 @@
 // of the inserted block, without recomputing the values of the other blocks
 // in the CFG. Therefore the value of "depth_first_number" in BlockBegin becomes meaningless.
 BlockBegin* BlockBegin::insert_block_between(BlockBegin* sux) {
-  BlockBegin* new_sux = new BlockBegin(-99);
+  BlockBegin* new_sux = new BlockBegin(end()->state()->bci());

   // mark this block (special treatment when block order is computed)
   new_sux->set(critical_edge_split_flag);
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1209,6 +1209,38 @@
   set_no_result(x);
 }

+// Examble: ref.get()
+// Combination of LoadField and g1 pre-write barrier
+void LIRGenerator::do_Reference_get(Intrinsic* x) {
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  assert(x->number_of_arguments() == 1, "wrong type");
+
+  LIRItem reference(x->argument_at(0), this);
+  reference.load_item();
+
+  // need to perform the null check on the reference objecy
+  CodeEmitInfo* info = NULL;
+  if (x->needs_null_check()) {
+    info = state_for(x);
+  }
+
+  LIR_Address* referent_field_adr =
+    new LIR_Address(reference.result(), referent_offset, T_OBJECT);
+
+  LIR_Opr result = rlock_result(x);
+
+  __ load(referent_field_adr, result, info);
+
+  // Register the value in the referent field with the pre-barrier
+  pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */,
+              result /* pre_val */,
+              false  /* do_load */,
+              false  /* patch */,
+              NULL   /* info */);
+}

 // Example: object.getClass ()
 void LIRGenerator::do_getClass(Intrinsic* x) {
@@ -1351,13 +1383,14 @@

 // Various barriers

-void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                               bool do_load, bool patch, CodeEmitInfo* info) {
   // Do the pre-write barrier, if any.
   switch (_bs->kind()) {
 #ifndef SERIALGC
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      G1SATBCardTableModRef_pre_barrier(addr_opr, pre_val, do_load, patch, info);
       break;
 #endif // SERIALGC
     case BarrierSet::CardTableModRef:
@@ -1398,9 +1431,8 @@
 ////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC

-void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
-  if (G1DisablePreBarrier) return;
-
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                                                     bool do_load, bool patch, CodeEmitInfo* info) {
   // First we test whether marking is in progress.
   BasicType flag_type;
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
@@ -1419,26 +1451,40 @@
   // Read the marking-in-progress flag.
   LIR_Opr flag_val = new_register(T_INT);
   __ load(mark_active_flag_addr, flag_val);
-
-  LIR_PatchCode pre_val_patch_code =
-    patch ? lir_patch_normal : lir_patch_none;
-
-  LIR_Opr pre_val = new_register(T_OBJECT);
-
   __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
-  if (!addr_opr->is_address()) {
-    assert(addr_opr->is_register(), "must be");
-    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, T_OBJECT));
+
+  LIR_PatchCode pre_val_patch_code = lir_patch_none;
+
+  CodeStub* slow;
+
+  if (do_load) {
+    assert(pre_val == LIR_OprFact::illegalOpr, "sanity");
+    assert(addr_opr != LIR_OprFact::illegalOpr, "sanity");
+
+    if (patch)
+      pre_val_patch_code = lir_patch_normal;
+
+    pre_val = new_register(T_OBJECT);
+
+    if (!addr_opr->is_address()) {
+      assert(addr_opr->is_register(), "must be");
+      addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, T_OBJECT));
+    }
+    slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code, info);
+  } else {
+    assert(addr_opr == LIR_OprFact::illegalOpr, "sanity");
+    assert(pre_val->is_register(), "must be");
+    assert(pre_val->type() == T_OBJECT, "must be an object");
+    assert(info == NULL, "sanity");
+
+    slow = new G1PreBarrierStub(pre_val);
   }
-  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
-                                        info);
+
   __ branch(lir_cond_notEqual, T_INT, slow);
   __ branch_destination(slow->continuation());
 }

 void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  if (G1DisablePostBarrier) return;
-
   // If the "new_val" is a constant NULL, no barrier is necessary.
   if (new_val->is_constant() &&
       new_val->as_constant_ptr()->as_jobject() == NULL) return;
@@ -1662,6 +1708,8 @@
   if (is_oop) {
     // Do the pre-write barrier, if any.
     pre_barrier(LIR_OprFact::address(address),
+                LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load*/,
                 needs_patching,
                 (info ? new CodeEmitInfo(info) : NULL));
   }
@@ -2091,9 +2139,144 @@
   off.load_item();
   src.load_item();

-  LIR_Opr reg = reg = rlock_result(x, x->basic_type());
+  LIR_Opr reg = rlock_result(x, x->basic_type());

   get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile());
+
+#ifndef SERIALGC
+  // We might be reading the value of the referent field of a
+  // Reference object in order to attach it back to the live
+  // object graph. If G1 is enabled then we need to record
+  // the value that is being returned in an SATB log buffer.
+  //
+  // We need to generate code similar to the following...
+  //
+  // if (offset == java_lang_ref_Reference::referent_offset) {
+  //   if (src != NULL) {
+  //     if (klass(src)->reference_type() != REF_NONE) {
+  //       pre_barrier(..., reg, ...);
+  //     }
+  //   }
+  // }
+  //
+  // The first non-constant check of either the offset or
+  // the src operand will be done here; the remainder
+  // will take place in the generated code stub.
+
+  if (UseG1GC && type == T_OBJECT) {
+    bool gen_code_stub = true;       // Assume we need to generate the slow code stub.
+    bool gen_offset_check = true;       // Assume the code stub has to generate the offset guard.
+    bool gen_source_check = true;       // Assume the code stub has to check the src object for null.
+
+    if (off.is_constant()) {
+      jlong off_con = (off.type()->is_int() ?
+                        (jlong) off.get_jint_constant() :
+                        off.get_jlong_constant());
+
+
+      if (off_con != (jlong) java_lang_ref_Reference::referent_offset) {
+        // The constant offset is something other than referent_offset.
+        // We can skip generating/checking the remaining guards and
+        // skip generation of the code stub.
+        gen_code_stub = false;
+      } else {
+        // The constant offset is the same as referent_offset -
+        // we do not need to generate a runtime offset check.
+        gen_offset_check = false;
+      }
+    }
+
+    // We don't need to generate stub if the source object is an array
+    if (gen_code_stub && src.type()->is_array()) {
+      gen_code_stub = false;
+    }
+
+    if (gen_code_stub) {
+      // We still need to continue with the checks.
+      if (src.is_constant()) {
+        ciObject* src_con = src.get_jobject_constant();
+
+        if (src_con->is_null_object()) {
+          // The constant src object is null - We can skip
+          // generating the code stub.
+          gen_code_stub = false;
+        } else {
+          // Non-null constant source object. We still have to generate
+          // the slow stub - but we don't need to generate the runtime
+          // null object check.
+          gen_source_check = false;
+        }
+      }
+    }
+
+    if (gen_code_stub) {
+      // Temoraries.
+      LIR_Opr src_klass = new_register(T_OBJECT);
+
+      // Get the thread pointer for the pre-barrier
+      LIR_Opr thread = getThreadPointer();
+
+      CodeStub* stub;
+
+      // We can have generate one runtime check here. Let's start with
+      // the offset check.
+      if (gen_offset_check) {
+        // if (offset == referent_offset) -> slow code stub
+        // If offset is an int then we can do the comparison with the
+        // referent_offset constant; otherwise we need to move
+        // referent_offset into a temporary register and generate
+        // a reg-reg compare.
+
+        LIR_Opr referent_off;
+
+        if (off.type()->is_int()) {
+          referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset);
+        } else {
+          assert(off.type()->is_long(), "what else?");
+          referent_off = new_register(T_LONG);
+          __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
+        }
+
+        __ cmp(lir_cond_equal, off.result(), referent_off);
+
+        // Optionally generate "src == null" check.
+        stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    gen_source_check);
+
+        __ branch(lir_cond_equal, as_BasicType(off.type()), stub);
+      } else {
+        if (gen_source_check) {
+          // offset is a const and equals referent offset
+          // if (source != null) -> slow code stub
+          __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL));
+
+          // Since we are generating the "if src == null" guard here,
+          // there is no need to generate the "src == null" check again.
+          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    false);
+
+          __ branch(lir_cond_notEqual, T_OBJECT, stub);
+        } else {
+          // We have statically determined that offset == referent_offset
+          // && src != null so we unconditionally branch to code stub
+          // to perform the guards and record reg in the SATB log buffer.
+
+          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    false);
+
+          __ branch(lir_cond_always, T_ILLEGAL, stub);
+        }
+      }
+
+      // Continuation point
+      __ branch_destination(stub->continuation());
+    }
+  }
+#endif // SERIALGC
+
   if (x->is_volatile() && os::is_MP()) __ membar_acquire();
 }

@@ -2759,6 +2942,10 @@
     do_AttemptUpdate(x);
     break;

+  case vmIntrinsics::_Reference_get:
+    do_Reference_get(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Mon May 02 10:51:36 2011 -0700
@@ -246,6 +246,7 @@
   void do_AttemptUpdate(Intrinsic* x);
   void do_NIOCheckIndex(Intrinsic* x);
   void do_FPIntrinsics(Intrinsic* x);
+  void do_Reference_get(Intrinsic* x);

   void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);

@@ -260,13 +261,14 @@

   // generic interface

-  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
+  void pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val, bool do_load, bool patch, CodeEmitInfo* info);
   void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);

   // specific implementations
   // pre barriers

-  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                                         bool do_load, bool patch, CodeEmitInfo* info);

   // post barriers
--- a/src/share/vm/c1/c1_Runtime1.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1026,9 +1026,21 @@
           // first replace the tail, then the call
 #ifdef ARM
           if(stub_id == Runtime1::load_klass_patching_id && !VM_Version::supports_movw()) {
+            nmethod* nm = CodeCache::find_nmethod(instr_pc);
+            oop* oop_addr = NULL;
+            assert(nm != NULL, "invalid nmethod_pc");
+            RelocIterator oops(nm, copy_buff, copy_buff + 1);
+            while (oops.next()) {
+              if (oops.type() == relocInfo::oop_type) {
+                oop_Relocation* r = oops.oop_reloc();
+                oop_addr = r->oop_addr();
+                break;
+              }
+            }
+            assert(oop_addr != NULL, "oop relocation must exist");
             copy_buff -= *byte_count;
             NativeMovConstReg* n_copy2 = nativeMovConstReg_at(copy_buff);
-            n_copy2->set_data((intx) (load_klass()), instr_pc);
+            n_copy2->set_pc_relative_offset((address)oop_addr, instr_pc);
           }
 #endif
--- a/src/share/vm/classfile/classFileParser.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2196,11 +2196,12 @@
                                               TRAPS) {
   typeArrayHandle nullHandle;
   int length = methods()->length();
-  // If JVMTI original method ordering is enabled we have to
+  // If JVMTI original method ordering or sharing is enabled we have to
   // remember the original class file ordering.
   // We temporarily use the vtable_index field in the methodOop to store the
   // class file index, so we can read in after calling qsort.
-  if (JvmtiExport::can_maintain_original_method_order()) {
+  // Put the method ordering in the shared archive.
+  if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
     for (int index = 0; index < length; index++) {
       methodOop m = methodOop(methods->obj_at(index));
       assert(!m->valid_vtable_index(), "vtable index should not be set");
@@ -2214,8 +2215,9 @@
                               methods_parameter_annotations(),
                               methods_default_annotations());

-  // If JVMTI original method ordering is enabled construct int array remembering the original ordering
-  if (JvmtiExport::can_maintain_original_method_order()) {
+  // If JVMTI original method ordering or sharing is enabled construct int
+  // array remembering the original ordering
+  if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
     typeArrayOop new_ordering = oopFactory::new_permanent_intArray(length, CHECK_(nullHandle));
     typeArrayHandle method_ordering(THREAD, new_ordering);
     for (int index = 0; index < length; index++) {
--- a/src/share/vm/classfile/javaClasses.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1357,7 +1357,7 @@
 };


-void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) {
+void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS) {
   if (!StackTraceInThrowable) return;
   ResourceMark rm(THREAD);

@@ -1374,6 +1374,16 @@
   JavaThread* thread = (JavaThread*)THREAD;
   BacktraceBuilder bt(CHECK);

+  // If there is no Java frame just return the method that was being called
+  // with bci 0
+  if (!thread->has_last_Java_frame()) {
+    if (max_depth >= 1 && method() != NULL) {
+      bt.push(method(), 0, CHECK);
+      set_backtrace(throwable(), bt.backtrace());
+    }
+    return;
+  }
+
   // Instead of using vframe directly, this version of fill_in_stack_trace
   // basically handles everything by hand. This significantly improved the
   // speed of this method call up to 28.5% on Solaris sparc. 27.1% on Windows.
@@ -1477,7 +1487,7 @@
   set_backtrace(throwable(), bt.backtrace());
 }

-void java_lang_Throwable::fill_in_stack_trace(Handle throwable) {
+void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method) {
   // No-op if stack trace is disabled
   if (!StackTraceInThrowable) {
     return;
@@ -1491,7 +1501,7 @@
   PRESERVE_EXCEPTION_MARK;

   JavaThread* thread = JavaThread::active();
-  fill_in_stack_trace(throwable, thread);
+  fill_in_stack_trace(throwable, method, thread);
   // ignore exceptions thrown during stack trace filling
   CLEAR_PENDING_EXCEPTION;
 }
--- a/src/share/vm/classfile/javaClasses.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Mon May 02 10:51:36 2011 -0700
@@ -440,8 +440,8 @@
   static void fill_in_stack_trace_of_preallocated_backtrace(Handle throwable);

   // Fill in current stack trace, can cause GC
-  static void fill_in_stack_trace(Handle throwable, TRAPS);
-  static void fill_in_stack_trace(Handle throwable);
+  static void fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS);
+  static void fill_in_stack_trace(Handle throwable, methodHandle method = methodHandle());
   // Programmatic access to stack trace
   static oop  get_stack_trace_element(oop throwable, int index, TRAPS);
   static int  get_stack_trace_depth(oop throwable, TRAPS);
--- a/src/share/vm/classfile/systemDictionary.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1255,6 +1255,16 @@
         methodHandle m(THREAD, methodOop(methods->obj_at(index2)));
         m()->link_method(m, CHECK_(nh));
       }
+      if (JvmtiExport::has_redefined_a_class()) {
+        // Reinitialize vtable because RedefineClasses may have changed some
+        // entries in this vtable for super classes so the CDS vtable might
+        // point to old or obsolete entries.  RedefineClasses doesn't fix up
+        // vtables in the shared system dictionary, only the main one.
+        // It also redefines the itable too so fix that too.
+        ResourceMark rm(THREAD);
+        ik->vtable()->initialize_vtable(false, CHECK_(nh));
+        ik->itable()->initialize_itable(false, CHECK_(nh));
+      }
     }

     if (TraceClassLoading) {
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Mon May 02 10:51:36 2011 -0700
@@ -678,6 +678,10 @@
   do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
    do_name(     checkIndex_name,                                 "checkIndex")                                          \
                                                                                                                         \
+  /* java/lang/ref/Reference */                                                                                         \
+  do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
+                                                                                                                        \
+                                                                                                                        \
   do_class(sun_misc_AtomicLongCSImpl,     "sun/misc/AtomicLongCSImpl")                                                  \
   do_intrinsic(_get_AtomicLong,           sun_misc_AtomicLongCSImpl, get_name, void_long_signature,              F_R)   \
   /*   (symbols get_name and void_long_signature defined above) */                                                      \
--- a/src/share/vm/compiler/compileBroker.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/compiler/compileBroker.cpp	Mon May 02 10:51:36 2011 -0700
@@ -976,6 +976,15 @@
     return;
   }

+  // If the requesting thread is holding the pending list lock
+  // then we just return. We can't risk blocking while holding
+  // the pending list lock or a 3-way deadlock may occur
+  // between the reference handler thread, a GC (instigated
+  // by a compiler thread), and compiled method registration.
+  if (instanceRefKlass::owns_pending_list_lock(JavaThread::current())) {
+    return;
+  }
+
   // Outputs from the following MutexLocker block:
   CompileTask* task     = NULL;
   bool         blocking = false;
@@ -1304,17 +1313,8 @@
 // Should the current thread be blocked until this compilation request
 // has been fulfilled?
 bool CompileBroker::is_compile_blocking(methodHandle method, int osr_bci) {
-  if (!BackgroundCompilation) {
-    Symbol* class_name = method->method_holder()->klass_part()->name();
-    if (class_name->starts_with("java/lang/ref/Reference", 23)) {
-      // The reference handler thread can dead lock with the GC if compilation is blocking,
-      // so we avoid blocking compiles for anything in the java.lang.ref.Reference class,
-      // including inner classes such as ReferenceHandler.
-      return false;
-    }
-    return true;
-  }
-  return false;
+  assert(!instanceRefKlass::owns_pending_list_lock(JavaThread::current()), "possible deadlock");
+  return !BackgroundCompilation;
 }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1963,10 +1963,21 @@
 // Iteration support, mostly delegated from a CMS generation

 void CompactibleFreeListSpace::save_marks() {
-  // mark the "end" of the used space at the time of this call;
+  assert(Thread::current()->is_VM_thread(),
+         "Global variable should only be set when single-threaded");
+  // Mark the "end" of the used space at the time of this call;
   // note, however, that promoted objects from this point
   // on are tracked in the _promoInfo below.
   set_saved_mark_word(unallocated_block());
+#ifdef ASSERT
+  // Check the sanity of save_marks() etc.
+  MemRegion ur    = used_region();
+  MemRegion urasm = used_region_at_save_marks();
+  assert(ur.contains(urasm),
+         err_msg(" Error at save_marks(): [" PTR_FORMAT "," PTR_FORMAT ")"
+                 " should contain [" PTR_FORMAT "," PTR_FORMAT ")",
+                 ur.start(), ur.end(), urasm.start(), urasm.end()));
+#endif
   // inform allocator that promotions should be tracked.
   assert(_promoInfo.noPromotions(), "_promoInfo inconsistency");
   _promoInfo.startTrackingPromotions();
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon May 02 10:51:36 2011 -0700
@@ -3189,10 +3189,9 @@
 }

 void CMSCollector::setup_cms_unloading_and_verification_state() {
-  const  bool should_verify =    VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
+  const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =    SharedHeap::SO_Symbols | SharedHeap::SO_Strings
-                             |   SharedHeap::SO_CodeCache;
+  const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;

   if (should_unload_classes()) {   // Should unload classes this cycle
     remove_root_scanning_option(rso);  // Shrink the root set appropriately
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Mon May 02 10:51:36 2011 -0700
@@ -172,7 +172,7 @@

   // hash a given key (index of card_ptr) with the specified size
   static unsigned int hash(size_t key, size_t size) {
-    return (unsigned int) key % size;
+    return (unsigned int) (key % size);
   }

   // hash a given key (index of card_ptr)
@@ -180,11 +180,11 @@
     return hash(key, _n_card_counts);
   }

-  unsigned ptr_2_card_num(jbyte* card_ptr) {
-    return (unsigned) (card_ptr - _ct_bot);
+  unsigned int ptr_2_card_num(jbyte* card_ptr) {
+    return (unsigned int) (card_ptr - _ct_bot);
   }

-  jbyte* card_num_2_ptr(unsigned card_num) {
+  jbyte* card_num_2_ptr(unsigned int card_num) {
     return (jbyte*) (_ct_bot + card_num);
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1161,6 +1161,7 @@
     TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC",
                 PrintGC, true, gclog_or_tty);

+    TraceCollectorStats tcs(g1mm()->full_collection_counters());
     TraceMemoryManagerStats tms(true /* fullGC */);

     double start = os::elapsedTime();
@@ -1339,6 +1340,7 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+  g1mm()->update_counters();

   return true;
 }
@@ -1971,6 +1973,10 @@

   init_mutator_alloc_region();

+  // Do create of the monitoring and management support so that
+  // values in the heap have been properly initialized.
+  _g1mm = new G1MonitoringSupport(this, &_g1_storage);
+
   return JNI_OK;
 }

@@ -2113,6 +2119,28 @@
      (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
 }

+#ifndef PRODUCT
+void G1CollectedHeap::allocate_dummy_regions() {
+  // Let's fill up most of the region
+  size_t word_size = HeapRegion::GrainWords - 1024;
+  // And as a result the region we'll allocate will be humongous.
+  guarantee(isHumongous(word_size), "sanity");
+
+  for (uintx i = 0; i < G1DummyRegionsPerGC; ++i) {
+    // Let's use the existing mechanism for the allocation
+    HeapWord* dummy_obj = humongous_obj_allocate(word_size);
+    if (dummy_obj != NULL) {
+      MemRegion mr(dummy_obj, word_size);
+      CollectedHeap::fill_with_object(mr);
+    } else {
+      // If we can't allocate once, we probably cannot allocate
+      // again. Let's get out of the loop.
+      break;
+    }
+  }
+}
+#endif // !PRODUCT
+
 void G1CollectedHeap::increment_full_collections_completed(bool concurrent) {
   MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);

@@ -2777,17 +2805,26 @@
                              bool silent,
                              bool use_prev_marking) {
   if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
-    if (!silent) { gclog_or_tty->print("roots "); }
+    if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
     VerifyRootsClosure rootsCl(use_prev_marking);
     CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
-    process_strong_roots(true,  // activate StrongRootsScope
-                         false,
-                         SharedHeap::SO_AllClasses,
+    // We apply the relevant closures to all the oops in the
+    // system dictionary, the string table and the code cache.
+    const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+    process_strong_roots(true,      // activate StrongRootsScope
+                         true,      // we set "collecting perm gen" to true,
+                                    // so we don't reset the dirty cards in the perm gen.
+                         SharedHeap::ScanningOption(so),  // roots scanning options
                          &rootsCl,
                          &blobsCl,
                          &rootsCl);
+    // Since we used "collecting_perm_gen" == true above, we will not have
+    // checked the refs from perm into the G1-collected heap. We check those
+    // references explicitly below. Whether the relevant cards are dirty
+    // is checked further below in the rem set verification.
+    if (!silent) { gclog_or_tty->print("Permgen roots "); }
+    perm_gen()->oop_iterate(&rootsCl);
     bool failures = rootsCl.failures();
-    rem_set()->invalidate(perm_gen()->used_region(), false);
     if (!silent) { gclog_or_tty->print("HeapRegionSets "); }
     verify_region_sets();
     if (!silent) { gclog_or_tty->print("HeapRegions "); }
@@ -3164,6 +3201,7 @@
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);

+    TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
     TraceMemoryManagerStats tms(false /* fullGC */);

     // If the secondary_free_list is not empty, append it to the
@@ -3338,6 +3376,8 @@
         doConcurrentMark();
       }

+      allocate_dummy_regions();
+
 #if YOUNG_LIST_VERBOSE
       gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:");
       _young_list->print();
@@ -3401,6 +3441,8 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+  g1mm()->update_counters();
+
   if (G1SummarizeRSetStats &&
       (G1SummarizeRSetStatsPeriod > 0) &&
       (total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
@@ -5314,6 +5356,7 @@
     if (new_alloc_region != NULL) {
       g1_policy()->update_region_num(true /* next_is_young */);
       set_region_short_lived_locked(new_alloc_region);
+      g1mm()->update_eden_counters();
       return new_alloc_region;
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon May 02 10:51:36 2011 -0700
@@ -28,7 +28,9 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
@@ -57,6 +59,7 @@
 class ConcurrentMark;
 class ConcurrentMarkThread;
 class ConcurrentG1Refine;
+class GenerationCounters;

 typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
@@ -236,6 +239,9 @@
   // current collection.
   HeapRegion* _gc_alloc_region_list;

+  // Helper for monitoring and management support.
+  G1MonitoringSupport* _g1mm;
+
   // Determines PLAB size for a particular allocation purpose.
   static size_t desired_plab_sz(GCAllocPurpose purpose);

@@ -298,6 +304,14 @@
   // started is maintained in _total_full_collections in CollectedHeap.
   volatile unsigned int _full_collections_completed;

+  // This is a non-product method that is helpful for testing. It is
+  // called at the end of a GC and artificially expands the heap by
+  // allocating a number of dead regions. This way we can induce very
+  // frequent marking cycles and stress the cleanup / concurrent
+  // cleanup code more (as all the regions that will be allocated by
+  // this method will be found dead by the marking cycle).
+  void allocate_dummy_regions() PRODUCT_RETURN;
+
   // These are macros so that, if the assert fires, we get the correct
   // line number, file, etc.

@@ -542,6 +556,9 @@
   HeapWord* expand_and_allocate(size_t word_size);

 public:
+
+  G1MonitoringSupport* g1mm() { return _g1mm; }
+
   // Expand the garbage-first heap by at least the given size (in bytes!).
   // Returns true if the heap was expanded by the requested amount;
   // false otherwise.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Mon May 02 10:51:36 2011 -0700
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+
+G1MonitoringSupport::G1MonitoringSupport(G1CollectedHeap* g1h,
+                                         VirtualSpace* g1_storage_addr) :
+  _g1h(g1h),
+  _incremental_collection_counters(NULL),
+  _full_collection_counters(NULL),
+  _non_young_collection_counters(NULL),
+  _old_space_counters(NULL),
+  _young_collection_counters(NULL),
+  _eden_counters(NULL),
+  _from_counters(NULL),
+  _to_counters(NULL),
+  _g1_storage_addr(g1_storage_addr)
+{
+  // Counters for GC collections
+  //
+  //  name "collector.0".  In a generational collector this would be the
+  // young generation collection.
+  _incremental_collection_counters =
+    new CollectorCounters("G1 incremental collections", 0);
+  //   name "collector.1".  In a generational collector this would be the
+  // old generation collection.
+  _full_collection_counters =
+    new CollectorCounters("G1 stop-the-world full collections", 1);
+
+  // timer sampling for all counters supporting sampling only update the
+  // used value.  See the take_sample() method.  G1 requires both used and
+  // capacity updated so sampling is not currently used.  It might
+  // be sufficient to update all counters in take_sample() even though
+  // take_sample() only returns "used".  When sampling was used, there
+  // were some anomolous values emitted which may have been the consequence
+  // of not updating all values simultaneously (i.e., see the calculation done
+  // in eden_space_used(), is it possbile that the values used to
+  // calculate either eden_used or survivor_used are being updated by
+  // the collector when the sample is being done?).
+  const bool sampled = false;
+
+  // "Generation" and "Space" counters.
+  //
+  //  name "generation.1" This is logically the old generation in
+  // generational GC terms.  The "1, 1" parameters are for
+  // the n-th generation (=1) with 1 space.
+  // Counters are created from minCapacity, maxCapacity, and capacity
+  _non_young_collection_counters =
+    new GenerationCounters("whole heap", 1, 1, _g1_storage_addr);
+
+  //  name  "generation.1.space.0"
+  // Counters are created from maxCapacity, capacity, initCapacity,
+  // and used.
+  _old_space_counters = new HSpaceCounters("space", 0,
+    _g1h->max_capacity(), _g1h->capacity(), _non_young_collection_counters);
+
+  //   Young collection set
+  //  name "generation.0".  This is logically the young generation.
+  //  The "0, 3" are paremeters for the n-th genertaion (=0) with 3 spaces.
+  // See  _non_young_collection_counters for additional counters
+  _young_collection_counters = new GenerationCounters("young", 0, 3, NULL);
+
+  // Replace "max_heap_byte_size() with maximum young gen size for
+  // g1Collectedheap
+  //  name "generation.0.space.0"
+  // See _old_space_counters for additional counters
+  _eden_counters = new HSpaceCounters("eden", 0,
+    _g1h->max_capacity(), eden_space_committed(),
+    _young_collection_counters);
+
+  //  name "generation.0.space.1"
+  // See _old_space_counters for additional counters
+  // Set the arguments to indicate that this survivor space is not used.
+  _from_counters = new HSpaceCounters("s0", 1, (long) 0, (long) 0,
+    _young_collection_counters);
+
+  //  name "generation.0.space.2"
+  // See _old_space_counters for additional counters
+  _to_counters = new HSpaceCounters("s1", 2,
+    _g1h->max_capacity(),
+    survivor_space_committed(),
+    _young_collection_counters);
+}
+
+size_t G1MonitoringSupport::overall_committed() {
+  return g1h()->capacity();
+}
+
+size_t G1MonitoringSupport::overall_used() {
+  return g1h()->used_unlocked();
+}
+
+size_t G1MonitoringSupport::eden_space_committed() {
+  return MAX2(eden_space_used(), (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::eden_space_used() {
+  size_t young_list_length = g1h()->young_list()->length();
+  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
+  size_t survivor_used = survivor_space_used();
+  eden_used = subtract_up_to_zero(eden_used, survivor_used);
+  return eden_used;
+}
+
+size_t G1MonitoringSupport::survivor_space_committed() {
+  return MAX2(survivor_space_used(),
+              (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::survivor_space_used() {
+  size_t survivor_num = g1h()->g1_policy()->recorded_survivor_regions();
+  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
+  return survivor_used;
+}
+
+size_t G1MonitoringSupport::old_space_committed() {
+  size_t committed = overall_committed();
+  size_t eden_committed = eden_space_committed();
+  size_t survivor_committed = survivor_space_committed();
+  committed = subtract_up_to_zero(committed, eden_committed);
+  committed = subtract_up_to_zero(committed, survivor_committed);
+  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
+  return committed;
+}
+
+// See the comment near the top of g1MonitoringSupport.hpp for
+// an explanation of these calculations for "used" and "capacity".
+size_t G1MonitoringSupport::old_space_used() {
+  size_t used = overall_used();
+  size_t eden_used = eden_space_used();
+  size_t survivor_used = survivor_space_used();
+  used = subtract_up_to_zero(used, eden_used);
+  used = subtract_up_to_zero(used, survivor_used);
+  return used;
+}
+
+void G1MonitoringSupport::update_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+    to_counters()->update_capacity(survivor_space_committed());
+    to_counters()->update_used(survivor_space_used());
+    old_space_counters()->update_capacity(old_space_committed());
+    old_space_counters()->update_used(old_space_used());
+    non_young_collection_counters()->update_all();
+  }
+}
+
+void G1MonitoringSupport::update_eden_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Mon May 02 10:51:36 2011 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+
+class G1CollectedHeap;
+class G1SpaceMonitoringSupport;
+
+// Class for monitoring logical spaces in G1.
+// G1 defines a set of regions as a young
+// collection (analogous to a young generation).
+// The young collection is a logical generation
+// with no fixed chunk (see space.hpp) reflecting
+// the address space for the generation.  In addition
+// to the young collection there is its complement
+// the non-young collection that is simply the regions
+// not in the young collection.  The non-young collection
+// is treated here as a logical old generation only
+// because the monitoring tools expect a generational
+// heap.  The monitoring tools expect that a Space
+// (see space.hpp) exists that describe the
+// address space of young collection and non-young
+// collection and such a view is provided here.
+//
+// This class provides interfaces to access
+// the value of variables for the young collection
+// that include the "capacity" and "used" of the
+// young collection along with constant values
+// for the minimum and maximum capacities for
+// the logical spaces.  Similarly for the non-young
+// collection.
+//
+// Also provided are counters for G1 concurrent collections
+// and stop-the-world full heap collecitons.
+//
+// Below is a description of how "used" and "capactiy"
+// (or committed) is calculated for the logical spaces.
+//
+// 1) The used space calculation for a pool is not necessarily
+// independent of the others. We can easily get from G1 the overall
+// used space in the entire heap, the number of regions in the young
+// generation (includes both eden and survivors), and the number of
+// survivor regions. So, from that we calculate:
+//
+//  survivor_used = survivor_num * region_size
+//  eden_used     = young_region_num * region_size - survivor_used
+//  old_gen_used  = overall_used - eden_used - survivor_used
+//
+// Note that survivor_used and eden_used are upper bounds. To get the
+// actual value we would have to iterate over the regions and add up
+// ->used(). But that'd be expensive. So, we'll accept some lack of
+// accuracy for those two. But, we have to be careful when calculating
+// old_gen_used, in case we subtract from overall_used more then the
+// actual number and our result goes negative.
+//
+// 2) Calculating the used space is straightforward, as described
+// above. However, how do we calculate the committed space, given that
+// we allocate space for the eden, survivor, and old gen out of the
+// same pool of regions? One way to do this is to use the used value
+// as also the committed value for the eden and survivor spaces and
+// then calculate the old gen committed space as follows:
+//
+//  old_gen_committed = overall_committed - eden_committed - survivor_committed
+//
+// Maybe a better way to do that would be to calculate used for eden
+// and survivor as a sum of ->used() over their regions and then
+// calculate committed as region_num * region_size (i.e., what we use
+// to calculate the used space now). This is something to consider
+// in the future.
+//
+// 3) Another decision that is again not straightforward is what is
+// the max size that each memory pool can grow to. One way to do this
+// would be to use the committed size for the max for the eden and
+// survivors and calculate the old gen max as follows (basically, it's
+// a similar pattern to what we use for the committed space, as
+// described above):
+//
+//  old_gen_max = overall_max - eden_max - survivor_max
+//
+// Unfortunately, the above makes the max of each pool fluctuate over
+// time and, even though this is allowed according to the spec, it
+// broke several assumptions in the M&M framework (there were cases
+// where used would reach a value greater than max). So, for max we
+// use -1, which means "undefined" according to the spec.
+//
+// 4) Now, there is a very subtle issue with all the above. The
+// framework will call get_memory_usage() on the three pools
+// asynchronously. As a result, each call might get a different value
+// for, say, survivor_num which will yield inconsistent values for
+// eden_used, survivor_used, and old_gen_used (as survivor_num is used
+// in the calculation of all three). This would normally be
+// ok. However, it's possible that this might cause the sum of
+// eden_used, survivor_used, and old_gen_used to go over the max heap
+// size and this seems to sometimes cause JConsole (and maybe other
+// clients) to get confused. There's not a really an easy / clean
+// solution to this problem, due to the asynchrounous nature of the
+// framework.
+
+class G1MonitoringSupport : public CHeapObj {
+  G1CollectedHeap* _g1h;
+  VirtualSpace* _g1_storage_addr;
+
+  // jstat performance counters
+  //  incremental collections both fully and partially young
+  CollectorCounters*   _incremental_collection_counters;
+  //  full stop-the-world collections
+  CollectorCounters*   _full_collection_counters;
+  //  young collection set counters.  The _eden_counters,
+  // _from_counters, and _to_counters are associated with
+  // this "generational" counter.
+  GenerationCounters*  _young_collection_counters;
+  //  non-young collection set counters. The _old_space_counters
+  // below are associated with this "generational" counter.
+  GenerationCounters*  _non_young_collection_counters;
+  // Counters for the capacity and used for
+  //   the whole heap
+  HSpaceCounters*      _old_space_counters;
+  //   the young collection
+  HSpaceCounters*      _eden_counters;
+  //   the survivor collection (only one, _to_counters, is actively used)
+  HSpaceCounters*      _from_counters;
+  HSpaceCounters*      _to_counters;
+
+  // It returns x - y if x > y, 0 otherwise.
+  // As described in the comment above, some of the inputs to the
+  // calculations we have to do are obtained concurrently and hence
+  // may be inconsistent with each other. So, this provides a
+  // defensive way of performing the subtraction and avoids the value
+  // going negative (which would mean a very large result, given that
+  // the parameter are size_t).
+  static size_t subtract_up_to_zero(size_t x, size_t y) {
+    if (x > y) {
+      return x - y;
+    } else {
+      return 0;
+    }
+  }
+
+ public:
+  G1MonitoringSupport(G1CollectedHeap* g1h, VirtualSpace* g1_storage_addr);
+
+  G1CollectedHeap* g1h() { return _g1h; }
+  VirtualSpace* g1_storage_addr() { return _g1_storage_addr; }
+
+  // Performance Counter accessors
+  void update_counters();
+  void update_eden_counters();
+
+  CollectorCounters* incremental_collection_counters() {
+    return _incremental_collection_counters;
+  }
+  CollectorCounters* full_collection_counters() {
+    return _full_collection_counters;
+  }
+  GenerationCounters* non_young_collection_counters() {
+    return _non_young_collection_counters;
+  }
+  HSpaceCounters*      old_space_counters() { return _old_space_counters; }
+  HSpaceCounters*      eden_counters() { return _eden_counters; }
+  HSpaceCounters*      from_counters() { return _from_counters; }
+  HSpaceCounters*      to_counters() { return _to_counters; }
+
+  // Monitoring support used by
+  //   MemoryService
+  //   jstat counters
+  size_t overall_committed();
+  size_t overall_used();
+
+  size_t eden_space_committed();
+  size_t eden_space_used();
+
+  size_t survivor_space_committed();
+  size_t survivor_space_used();
+
+  size_t old_space_committed();
+  size_t old_space_used();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,9 @@


 void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
-  assert(pre_val->is_oop_or_null(true), "Error");
+  // Nulls should have been already filtered.
+  assert(pre_val->is_oop(true), "Error");
+
   if (!JavaThread::satb_mark_queue_set().is_active()) return;
   Thread* thr = Thread::current();
   if (thr->is_Java_thread()) {
@@ -59,20 +61,6 @@
   }
 }

-// When we know the current java thread:
-template <class T> void
-G1SATBCardTableModRefBS::write_ref_field_pre_static(T* field,
-                                                    oop new_val,
-                                                    JavaThread* jt) {
-  if (!JavaThread::satb_mark_queue_set().is_active()) return;
-  T heap_oop = oopDesc::load_heap_oop(field);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop pre_val = oopDesc::decode_heap_oop_not_null(heap_oop);
-    assert(pre_val->is_oop(true /* ignore mark word */), "Error");
-    jt->satb_mark_queue().enqueue(pre_val);
-  }
-}
-
 template <class T> void
 G1SATBCardTableModRefBS::write_ref_array_pre_work(T* dst, int count) {
   if (!JavaThread::satb_mark_queue_set().is_active()) return;
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Mon May 02 10:51:36 2011 -0700
@@ -37,12 +37,11 @@
 // snapshot-at-the-beginning marking.

 class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
-private:
+public:
   // Add "pre_val" to a set of objects that may have been disconnected from the
   // pre-marking object graph.
   static void enqueue(oop pre_val);

-public:
   G1SATBCardTableModRefBS(MemRegion whole_heap,
                           int max_covered_regions);

@@ -61,10 +60,6 @@
     }
   }

-  // When we know the current java thread:
-  template <class T> static void write_ref_field_pre_static(T* field, oop newVal,
-                                                            JavaThread* jt);
-
   // We export this to make it available in cases where the static
   // type of the barrier set is known.  Note that it is non-virtual.
   template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon May 02 10:51:36 2011 -0700
@@ -89,13 +89,9 @@
           "The number of discovered reference objects to process before "   \
           "draining concurrent marking work queues.")                       \
                                                                             \
-  experimental(bool, G1UseConcMarkReferenceProcessing, false,               \
+  experimental(bool, G1UseConcMarkReferenceProcessing, true,                \
           "If true, enable reference discovery during concurrent "          \
-          "marking and reference processing at the end of remark "          \
-          "(unsafe).")                                                      \
-                                                                            \
-  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
-          "If true, count frac of ptr writes with null pre-vals.")          \
+          "marking and reference processing at the end of remark.")         \
                                                                             \
   product(intx, G1SATBBufferSize, 1*K,                                      \
           "Number of entries in an SATB log buffer.")                       \
@@ -150,12 +146,6 @@
   develop(bool, G1PrintParCleanupStats, false,                              \
           "When true, print extra stats about parallel cleanup.")           \
                                                                             \
-  develop(bool, G1DisablePreBarrier, false,                                 \
-          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
-                                                                            \
-  develop(bool, G1DisablePostBarrier, false,                                \
-          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
-                                                                            \
   product(intx, G1UpdateBufferSize, 256,                                    \
           "Size of an update buffer")                                       \
                                                                             \
@@ -310,6 +300,11 @@
   develop(uintx, G1StressConcRegionFreeingDelayMillis, 0,                   \
           "Artificial delay during concurrent region freeing")              \
                                                                             \
+  develop(uintx, G1DummyRegionsPerGC, 0,                                    \
+          "The number of dummy regions G1 will allocate at the end of "     \
+          "each evacuation pause in order to artificially fill up the "     \
+          "heap and stress the marking implementation.")                    \
+                                                                            \
   develop(bool, ReduceInitialCardMarksForG1, false,                         \
           "When ReduceInitialCardMarks is true, this flag setting "         \
           " controls whether G1 allows the RICM optimization")              \
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,45 +33,43 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"

-void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                                        DirtyCardToOopClosure* dcto_cl,
-                                                        MemRegionClosure* cl,
-                                                        bool clear,
-                                                        int n_threads) {
-  if (n_threads > 0) {
-    assert((n_threads == 1 && ParallelGCThreads == 0) ||
-           n_threads <= (int)ParallelGCThreads,
-           "# worker threads != # requested!");
-    // Make sure the LNC array is valid for the space.
-    jbyte**   lowest_non_clean;
-    uintptr_t lowest_non_clean_base_chunk_index;
-    size_t    lowest_non_clean_chunk_size;
-    get_LNC_array_for_space(sp, lowest_non_clean,
-                            lowest_non_clean_base_chunk_index,
-                            lowest_non_clean_chunk_size);
+void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                                             DirtyCardToOopClosure* dcto_cl,
+                                                             ClearNoncleanCardWrapper* cl,
+                                                             int n_threads) {
+  assert(n_threads > 0, "Error: expected n_threads > 0");
+  assert((n_threads == 1 && ParallelGCThreads == 0) ||
+         n_threads <= (int)ParallelGCThreads,
+         "# worker threads != # requested!");
+  // Make sure the LNC array is valid for the space.
+  jbyte**   lowest_non_clean;
+  uintptr_t lowest_non_clean_base_chunk_index;
+  size_t    lowest_non_clean_chunk_size;
+  get_LNC_array_for_space(sp, lowest_non_clean,
+                          lowest_non_clean_base_chunk_index,
+                          lowest_non_clean_chunk_size);

-    int n_strides = n_threads * StridesPerThread;
-    SequentialSubTasksDone* pst = sp->par_seq_tasks();
-    pst->set_n_threads(n_threads);
-    pst->set_n_tasks(n_strides);
+  int n_strides = n_threads * StridesPerThread;
+  SequentialSubTasksDone* pst = sp->par_seq_tasks();
+  pst->set_n_threads(n_threads);
+  pst->set_n_tasks(n_strides);

-    int stride = 0;
-    while (!pst->is_task_claimed(/* reference */ stride)) {
-      process_stride(sp, mr, stride, n_strides, dcto_cl, cl, clear,
-                     lowest_non_clean,
-                     lowest_non_clean_base_chunk_index,
-                     lowest_non_clean_chunk_size);
-    }
-    if (pst->all_tasks_completed()) {
-      // Clear lowest_non_clean array for next time.
-      intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
-      uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
-      for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
-        intptr_t ind = ch - lowest_non_clean_base_chunk_index;
-        assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
-               "Bounds error");
-        lowest_non_clean[ind] = NULL;
-      }
+  int stride = 0;
+  while (!pst->is_task_claimed(/* reference */ stride)) {
+    process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
+                   lowest_non_clean,
+                   lowest_non_clean_base_chunk_index,
+                   lowest_non_clean_chunk_size);
+  }
+  if (pst->all_tasks_completed()) {
+    // Clear lowest_non_clean array for next time.
+    intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
+    uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
+    for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
+      intptr_t ind = ch - lowest_non_clean_base_chunk_index;
+      assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
+             "Bounds error");
+      lowest_non_clean[ind] = NULL;
     }
   }
 }
@@ -82,8 +80,7 @@
                MemRegion used,
                jint stride, int n_strides,
                DirtyCardToOopClosure* dcto_cl,
-               MemRegionClosure* cl,
-               bool clear,
+               ClearNoncleanCardWrapper* cl,
                jbyte** lowest_non_clean,
                uintptr_t lowest_non_clean_base_chunk_index,
                size_t    lowest_non_clean_chunk_size) {
@@ -129,7 +126,11 @@
                              lowest_non_clean_base_chunk_index,
                              lowest_non_clean_chunk_size);

-    non_clean_card_iterate_work(chunk_mr, cl, clear);
+    // We do not call the non_clean_card_iterate_serial() version because
+    // we want to clear the cards, and the ClearNoncleanCardWrapper closure
+    // itself does the work of finding contiguous dirty ranges of cards to
+    // process (and clear).
+    cl->do_MemRegion(chunk_mr);

     // Find the next chunk of the stride.
     chunk_card_start += CardsPerStrideChunk * n_strides;
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -176,10 +176,6 @@
   object_mark_sweep()->compact(ZapUnusedHeapArea);
 }

-void PSOldGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::old_space_id);
-}
-
 size_t PSOldGen::contiguous_available() const {
   return object_space()->free_in_bytes() + virtual_space()->uncommitted_size();
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -143,9 +143,6 @@
   void adjust_pointers();
   void compact();

-  // Parallel old
-  virtual void move_and_update(ParCompactionManager* cm);
-
   // Size info
   size_t capacity_in_bytes() const        { return object_space()->capacity_in_bytes(); }
   size_t used_in_bytes() const            { return object_space()->used_in_bytes(); }
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2104,11 +2104,7 @@
     // klasses are used in the update of an object?
     compact_perm(vmthread_cm);

-    if (UseParallelOldGCCompacting) {
-      compact();
-    } else {
-      compact_serial(vmthread_cm);
-    }
+    compact();

     // Reset the mark bitmap, summary data, and do other bookkeeping.  Must be
     // done before resizing.
@@ -2582,18 +2578,16 @@
     // each thread?
     if (total_dense_prefix_regions > 0) {
       uint tasks_for_dense_prefix = 1;
-      if (UseParallelDensePrefixUpdate) {
-        if (total_dense_prefix_regions <=
-            (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
-          // Don't over partition.  This assumes that
-          // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
-          // so there are not many regions to process.
-          tasks_for_dense_prefix = parallel_gc_threads;
-        } else {
-          // Over partition
-          tasks_for_dense_prefix = parallel_gc_threads *
-            PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
-        }
+      if (total_dense_prefix_regions <=
+          (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
+        // Don't over partition.  This assumes that
+        // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
+        // so there are not many regions to process.
+        tasks_for_dense_prefix = parallel_gc_threads;
+      } else {
+        // Over partition
+        tasks_for_dense_prefix = parallel_gc_threads *
+          PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
       }
       size_t regions_per_thread = total_dense_prefix_regions /
         tasks_for_dense_prefix;
@@ -2733,21 +2727,6 @@
 }
 #endif  // #ifdef ASSERT

-void PSParallelCompact::compact_serial(ParCompactionManager* cm) {
-  EventMark m("5 compact serial");
-  TraceTime tm("compact serial", print_phases(), true, gclog_or_tty);
-
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-
-  PSYoungGen* young_gen = heap->young_gen();
-  PSOldGen* old_gen = heap->old_gen();
-
-  old_gen->start_array()->reset();
-  old_gen->move_and_update(cm);
-  young_gen->move_and_update(cm);
-}
-
 void
 PSParallelCompact::follow_weak_klass_links() {
   // All klasses on the revisit stack are marked at this point.
@@ -3530,11 +3509,8 @@
            "Object liveness is wrong.");
     return ParMarkBitMap::incomplete;
   }
-  assert(UseParallelOldGCDensePrefix ||
-         (HeapMaximumCompactionInterval > 1) ||
-         (MarkSweepAlwaysCompactCount > 1) ||
-         (forwarding_ptr == new_pointer),
-    "Calculation of new location is incorrect");
+  assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 ||
+         forwarding_ptr == new_pointer, "new location is incorrect");
   return ParMarkBitMap::incomplete;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1027,9 +1027,6 @@
                                        ParallelTaskTerminator* terminator_ptr,
                                        uint parallel_gc_threads);

-  // For debugging only - compacts the old gen serially
-  static void compact_serial(ParCompactionManager* cm);
-
   // If objects are left in eden after a collection, try to move the boundary
   // and absorb them into the old gen.  Returns true if eden was emptied.
   static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,12 +121,6 @@
   }
 }

-
-
-void PSPermGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::perm_space_id);
-}
-
 void PSPermGen::precompact() {
   // Reset start array first.
   _start_array.reset();
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,9 +51,6 @@
   // MarkSweep code
   virtual void precompact();

-  // Parallel old
-  virtual void move_and_update(ParCompactionManager* cm);
-
   virtual const char* name() const { return "PSPermGen"; }
 };
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -792,12 +792,6 @@
   to_mark_sweep()->compact(false);
 }

-void PSYoungGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::eden_space_id);
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::from_space_id);
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::to_space_id);
-}
-
 void PSYoungGen::print() const { print_on(tty); }
 void PSYoungGen::print_on(outputStream* st) const {
   st->print(" %-15s", "PSYoungGen");
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -127,9 +127,6 @@
   void adjust_pointers();
   void compact();

-  // Parallel Old
-  void move_and_update(ParCompactionManager* cm);
-
   // Called during/after gc
   void swap_spaces();
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Mon May 02 10:51:36 2011 -0700
@@ -76,7 +76,7 @@
     _beforeSweep = 0;
     _coalBirths = 0;
     _coalDeaths = 0;
-    _splitBirths = split_birth? 1 : 0;
+    _splitBirths = (split_birth ? 1 : 0);
     _splitDeaths = 0;
     _returnedBytes = 0;
   }
--- a/src/share/vm/gc_implementation/shared/generationCounters.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/shared/generationCounters.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,15 +51,18 @@

     cname = PerfDataManager::counter_name(_name_space, "minCapacity");
     PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->committed_size(), CHECK);

     cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
     PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->reserved_size(), CHECK);

     cname = PerfDataManager::counter_name(_name_space, "capacity");
     _current_size = PerfDataManager::create_variable(SUN_GC, cname,
-                                      PerfData::U_Bytes,
+                                     PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->committed_size(), CHECK);
   }
 }
--- a/src/share/vm/gc_implementation/shared/generationCounters.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/gc_implementation/shared/generationCounters.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,11 @@
   }

   virtual void update_all() {
-    _current_size->set_value(_virtual_space->committed_size());
+    _current_size->set_value(_virtual_space == NULL ? 0 :
+                             _virtual_space->committed_size());
   }

   const char* name_space() const        { return _name_space; }
+
 };
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GENERATIONCOUNTERS_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp	Mon May 02 10:51:36 2011 -0700
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+#include "memory/generation.hpp"
+#include "memory/resourceArea.hpp"
+
+HSpaceCounters::HSpaceCounters(const char* name,
+                               int ordinal,
+                               size_t max_size,
+                               size_t initial_capacity,
+                               GenerationCounters* gc) {
+
+  if (UsePerfData) {
+    EXCEPTION_MARK;
+    ResourceMark rm;
+
+    const char* cns =
+      PerfDataManager::name_space(gc->name_space(), "space", ordinal);
+
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    strcpy(_name_space, cns);
+
+    const char* cname = PerfDataManager::counter_name(_name_space, "name");
+    PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     (jlong)max_size, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "capacity");
+    _capacity = PerfDataManager::create_variable(SUN_GC, cname,
+                                                 PerfData::U_Bytes,
+                                                 initial_capacity, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "used");
+    _used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes,
+                                             (jlong) 0, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "initCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     initial_capacity, CHECK);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp	Mon May 02 10:51:36 2011 -0700
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+
+#ifndef SERIALGC
+#include "gc_implementation/shared/generationCounters.hpp"
+#include "memory/generation.hpp"
+#include "runtime/perfData.hpp"
+#endif
+
+// A HSpaceCounter is a holder class for performance counters
+// that track a collections (logical spaces) in a heap;
+
+class HeapSpaceUsedHelper;
+class G1SpaceMonitoringSupport;
+
+class HSpaceCounters: public CHeapObj {
+  friend class VMStructs;
+
+ private:
+  PerfVariable*        _capacity;
+  PerfVariable*        _used;
+
+  // Constant PerfData types don't need to retain a reference.
+  // However, it's a good idea to document them here.
+
+  char*             _name_space;
+
+ public:
+
+  HSpaceCounters(const char* name, int ordinal, size_t max_size,
+                 size_t initial_capacity, GenerationCounters* gc);
+
+  ~HSpaceCounters() {
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+  }
+
+  inline void update_capacity(size_t v) {
+    _capacity->set_value(v);
+  }
+
+  inline void update_used(size_t v) {
+    _used->set_value(v);
+  }
+
+  debug_only(
+    // for security reasons, we do not allow arbitrary reads from
+    // the counters as they may live in shared memory.
+    jlong used() {
+      return _used->get_value();
+    }
+    jlong capacity() {
+      return _used->get_value();
+    }
+  )
+
+  inline void update_all(size_t capacity, size_t used) {
+    update_capacity(capacity);
+    update_used(used);
+  }
+
+  const char* name_space() const        { return _name_space; }
+};
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Mon May 02 10:51:36 2011 -0700
@@ -104,6 +104,7 @@
     java_lang_math_sqrt,                                        // implementation of java.lang.Math.sqrt  (x)
     java_lang_math_log,                                         // implementation of java.lang.Math.log   (x)
     java_lang_math_log10,                                       // implementation of java.lang.Math.log10 (x)
+    java_lang_ref_reference_get,                                // implementation of java.lang.ref.Reference.get()
     number_of_method_entries,
     invalid = -1
   };
@@ -140,7 +141,7 @@
   // Method activation
   static MethodKind method_kind(methodHandle m);
   static address    entry_for_kind(MethodKind k)                { assert(0 <= k && k < number_of_method_entries, "illegal kind"); return _entry_table[k]; }
-  static address    entry_for_method(methodHandle m)            { return _entry_table[method_kind(m)]; }
+  static address    entry_for_method(methodHandle m)            { return entry_for_kind(method_kind(m)); }

   static void       print_method_kind(MethodKind kind)          PRODUCT_RETURN;
--- a/src/share/vm/interpreter/cppInterpreter.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/interpreter/cppInterpreter.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,7 @@
     method_entry(java_lang_math_sqrt  );
     method_entry(java_lang_math_log   );
     method_entry(java_lang_math_log10 );
+    method_entry(java_lang_ref_reference_get);
     Interpreter::_native_entry_begin = Interpreter::code()->code_end();
     method_entry(native);
     method_entry(native_synchronized);
--- a/src/share/vm/interpreter/interpreter.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/interpreter/interpreter.cpp	Mon May 02 10:51:36 2011 -0700
@@ -208,12 +208,6 @@
     return empty;
   }

-  // Accessor method?
-  if (m->is_accessor()) {
-    assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
-    return accessor;
-  }
-
   // Special intrinsic method?
   // Note: This test must come _after_ the test for native methods,
   //       otherwise we will run into problems with JDK 1.2, see also
@@ -227,6 +221,15 @@
     case vmIntrinsics::_dsqrt : return java_lang_math_sqrt ;
     case vmIntrinsics::_dlog  : return java_lang_math_log  ;
     case vmIntrinsics::_dlog10: return java_lang_math_log10;
+
+    case vmIntrinsics::_Reference_get:
+                                return java_lang_ref_reference_get;
+  }
+
+  // Accessor method?
+  if (m->is_accessor()) {
+    assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
+    return accessor;
   }

   // Note: for now: zero locals for all non-empty methods
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Mon May 02 10:51:36 2011 -0700
@@ -372,6 +372,7 @@
   method_entry(java_lang_math_sqrt )
   method_entry(java_lang_math_log  )
   method_entry(java_lang_math_log10)
+  method_entry(java_lang_ref_reference_get)

   // all native method kinds (must be one contiguous block)
   Interpreter::_native_entry_begin = Interpreter::code()->code_end();
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -456,35 +456,35 @@
 }


-void CardTableModRefBS::non_clean_card_iterate(Space* sp,
-                                               MemRegion mr,
-                                               DirtyCardToOopClosure* dcto_cl,
-                                               MemRegionClosure* cl,
-                                               bool clear) {
+void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
+                                                                 MemRegion mr,
+                                                                 DirtyCardToOopClosure* dcto_cl,
+                                                                 ClearNoncleanCardWrapper* cl) {
   if (!mr.is_empty()) {
     int n_threads = SharedHeap::heap()->n_par_threads();
     if (n_threads > 0) {
 #ifndef SERIALGC
-      par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, clear, n_threads);
+      non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
 #else  // SERIALGC
       fatal("Parallel gc not supported here.");
 #endif // SERIALGC
     } else {
-      non_clean_card_iterate_work(mr, cl, clear);
+      // We do not call the non_clean_card_iterate_serial() version below because
+      // we want to clear the cards (which non_clean_card_iterate_serial() does not
+      // do for us), and the ClearNoncleanCardWrapper closure itself does the work
+      // of finding contiguous dirty ranges of cards to process (and clear).
+      cl->do_MemRegion(mr);
     }
   }
 }

-// NOTE: For this to work correctly, it is important that
-// we look for non-clean cards below (so as to catch those
-// marked precleaned), rather than look explicitly for dirty
-// cards (and miss those marked precleaned). In that sense,
-// the name precleaned is currently somewhat of a misnomer.
-void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr,
-                                                    MemRegionClosure* cl,
-                                                    bool clear) {
-  // Figure out whether we have to worry about parallelism.
-  bool is_par = (SharedHeap::heap()->n_par_threads() > 1);
+// The iterator itself is not MT-aware, but
+// MT-aware callers and closures can use this to
+// accomplish dirty card iteration in parallel. The
+// iterator itself does not clear the dirty cards, or
+// change their values in any manner.
+void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
+                                                      MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (mri.word_size() > 0) {
@@ -506,22 +506,6 @@
           MemRegion cur_cards(addr_for(cur_entry),
                               non_clean_cards * card_size_in_words);
           MemRegion dirty_region = cur_cards.intersection(mri);
-          if (clear) {
-            for (size_t i = 0; i < non_clean_cards; i++) {
-              // Clean the dirty cards (but leave the other non-clean
-              // alone.)  If parallel, do the cleaning atomically.
-              jbyte cur_entry_val = cur_entry[i];
-              if (card_is_dirty_wrt_gen_iter(cur_entry_val)) {
-                if (is_par) {
-                  jbyte res = Atomic::cmpxchg(clean_card, &cur_entry[i], cur_entry_val);
-                  assert(res != clean_card,
-                         "Dirty card mysteriously cleaned");
-                } else {
-                  cur_entry[i] = clean_card;
-                }
-              }
-            }
-          }
           cl->do_MemRegion(dirty_region);
         }
         cur_entry = next_entry;
@@ -530,22 +514,6 @@
   }
 }

-void CardTableModRefBS::mod_oop_in_space_iterate(Space* sp,
-                                                 OopClosure* cl,
-                                                 bool clear,
-                                                 bool before_save_marks) {
-  // Note that dcto_cl is resource-allocated, so there is no
-  // corresponding "delete".
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision());
-  MemRegion used_mr;
-  if (before_save_marks) {
-    used_mr = sp->used_region_at_save_marks();
-  } else {
-    used_mr = sp->used_region();
-  }
-  non_clean_card_iterate(sp, used_mr, dcto_cl, dcto_cl, clear);
-}
-
 void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
   assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
   assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
@@ -593,9 +561,8 @@
   memset(first, dirty_card, last-first);
 }

-// NOTES:
-// (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate()
-//     iterates over dirty cards ranges in increasing address order.
+// Unlike several other card table methods, dirty_card_iterate()
+// iterates over dirty cards ranges in increasing address order.
 void CardTableModRefBS::dirty_card_iterate(MemRegion mr,
                                            MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
@@ -698,7 +665,7 @@

 void CardTableModRefBS::verify_clean_region(MemRegion mr) {
   GuaranteeNotModClosure blk(this);
-  non_clean_card_iterate_work(mr, &blk, false);
+  non_clean_card_iterate_serial(mr, &blk);
 }

 // To verify a MemRegion is entirely dirty this closure is passed to
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,6 +44,7 @@
 class Generation;
 class OopsInGenClosure;
 class DirtyCardToOopClosure;
+class ClearNoncleanCardWrapper;

 class CardTableModRefBS: public ModRefBarrierSet {
   // Some classes get to look at some private stuff.
@@ -165,25 +166,28 @@

   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
-  // of mr. cl and dcto_cl must either be the same closure or cl must
-  // wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl
-  // may be modified. Note that this function will operate in a parallel
-  // mode if worker threads are available.
-  void non_clean_card_iterate(Space* sp, MemRegion mr,
-                              DirtyCardToOopClosure* dcto_cl,
-                              MemRegionClosure* cl,
-                              bool clear);
+  // of mr. Dirty cards are _not_ cleared by the iterator method itself,
+  // but closures may arrange to do so on their own should they so wish.
+  void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl);

-  // Utility function used to implement the other versions below.
-  void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl,
-                                   bool clear);
+  // A variant of the above that will operate in a parallel mode if
+  // worker threads are available, and clear the dirty cards as it
+  // processes them.
+  // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
+  // which may itself be modified by the method.
+  void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
+                                                DirtyCardToOopClosure* dcto_cl,
+                                                ClearNoncleanCardWrapper* cl);

-  void par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                       DirtyCardToOopClosure* dcto_cl,
-                                       MemRegionClosure* cl,
-                                       bool clear,
-                                       int n_threads);
+ private:
+  // Work method used to implement non_clean_card_iterate_possibly_parallel()
+  // above in the parallel case.
+  void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                            DirtyCardToOopClosure* dcto_cl,
+                                            ClearNoncleanCardWrapper* cl,
+                                            int n_threads);

+ protected:
   // Dirty the bytes corresponding to "mr" (not all of which must be
   // covered.)
   void dirty_MemRegion(MemRegion mr);
@@ -240,8 +244,7 @@
                       MemRegion used,
                       jint stride, int n_strides,
                       DirtyCardToOopClosure* dcto_cl,
-                      MemRegionClosure* cl,
-                      bool clear,
+                      ClearNoncleanCardWrapper* cl,
                       jbyte** lowest_non_clean,
                       uintptr_t lowest_non_clean_base_chunk_index,
                       size_t lowest_non_clean_chunk_size);
@@ -402,9 +405,6 @@
   virtual void invalidate(MemRegion mr, bool whole_heap = false);
   void clear(MemRegion mr);
   void dirty(MemRegion mr);
-  void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
-                                bool clear = false,
-                                bool before_save_marks = false);

   // *** Card-table-RemSet-specific things.

@@ -415,18 +415,15 @@
   // *decreasing* address order.  (This order aids with imprecise card
   // marking, where a dirty card may cause scanning, and summarization
   // marking, of objects that extend onto subsequent cards.)
-  // If "clear" is true, the card is (conceptually) marked unmodified before
-  // applying the closure.
-  void mod_card_iterate(MemRegionClosure* cl, bool clear = false) {
-    non_clean_card_iterate_work(_whole_heap, cl, clear);
+  void mod_card_iterate(MemRegionClosure* cl) {
+    non_clean_card_iterate_serial(_whole_heap, cl);
   }

   // Like the "mod_cards_iterate" above, except only invokes the closure
   // for cards within the MemRegion "mr" (which is required to be
   // card-aligned and sized.)
-  void mod_card_iterate(MemRegion mr, MemRegionClosure* cl,
-                        bool clear = false) {
-    non_clean_card_iterate_work(mr, cl, clear);
+  void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
+    non_clean_card_iterate_serial(mr, cl);
   }

   static uintx ct_max_alignment_constraint();
@@ -503,4 +500,5 @@
   void set_CTRS(CardTableRS* rs) { _rs = rs; }
 };

+
 #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP
--- a/src/share/vm/memory/cardTableRS.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/cardTableRS.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -105,107 +105,111 @@
   g->younger_refs_iterate(blk);
 }

-class ClearNoncleanCardWrapper: public MemRegionClosure {
-  MemRegionClosure* _dirty_card_closure;
-  CardTableRS* _ct;
-  bool _is_par;
-private:
-  // Clears the given card, return true if the corresponding card should be
-  // processed.
-  bool clear_card(jbyte* entry) {
-    if (_is_par) {
-      while (true) {
-        // In the parallel case, we may have to do this several times.
-        jbyte entry_val = *entry;
-        assert(entry_val != CardTableRS::clean_card_val(),
-               "We shouldn't be looking at clean cards, and this should "
-               "be the only place they get cleaned.");
-        if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
-            || _ct->is_prev_youngergen_card_val(entry_val)) {
-          jbyte res =
-            Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
-          if (res == entry_val) {
-            break;
-          } else {
-            assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
-                   "The CAS above should only fail if another thread did "
-                   "a GC write barrier.");
-          }
-        } else if (entry_val ==
-                   CardTableRS::cur_youngergen_and_prev_nonclean_card) {
-          // Parallelism shouldn't matter in this case.  Only the thread
-          // assigned to scan the card should change this value.
-          *entry = _ct->cur_youngergen_card_val();
-          break;
-        } else {
-          assert(entry_val == _ct->cur_youngergen_card_val(),
-                 "Should be the only possibility.");
-          // In this case, the card was clean before, and become
-          // cur_youngergen only because of processing of a promoted object.
-          // We don't have to look at the card.
-          return false;
-        }
+inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
+  if (_is_par) {
+    return clear_card_parallel(entry);
+  } else {
+    return clear_card_serial(entry);
+  }
+}
+
+inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) {
+  while (true) {
+    // In the parallel case, we may have to do this several times.
+    jbyte entry_val = *entry;
+    assert(entry_val != CardTableRS::clean_card_val(),
+           "We shouldn't be looking at clean cards, and this should "
+           "be the only place they get cleaned.");
+    if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
+        || _ct->is_prev_youngergen_card_val(entry_val)) {
+      jbyte res =
+        Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
+      if (res == entry_val) {
+        break;
+      } else {
+        assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
+               "The CAS above should only fail if another thread did "
+               "a GC write barrier.");
       }
-      return true;
+    } else if (entry_val ==
+               CardTableRS::cur_youngergen_and_prev_nonclean_card) {
+      // Parallelism shouldn't matter in this case.  Only the thread
+      // assigned to scan the card should change this value.
+      *entry = _ct->cur_youngergen_card_val();
+      break;
     } else {
-      jbyte entry_val = *entry;
-      assert(entry_val != CardTableRS::clean_card_val(),
-             "We shouldn't be looking at clean cards, and this should "
-             "be the only place they get cleaned.");
-      assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
-             "This should be possible in the sequential case.");
-      *entry = CardTableRS::clean_card_val();
-      return true;
+      assert(entry_val == _ct->cur_youngergen_card_val(),
+             "Should be the only possibility.");
+      // In this case, the card was clean before, and become
+      // cur_youngergen only because of processing of a promoted object.
+      // We don't have to look at the card.
+      return false;
     }
   }
+  return true;
+}

-public:
-  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure,
-                           CardTableRS* ct) :
+
+inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) {
+  jbyte entry_val = *entry;
+  assert(entry_val != CardTableRS::clean_card_val(),
+         "We shouldn't be looking at clean cards, and this should "
+         "be the only place they get cleaned.");
+  assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
+         "This should be possible in the sequential case.");
+  *entry = CardTableRS::clean_card_val();
+  return true;
+}
+
+ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
+  MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
     _dirty_card_closure(dirty_card_closure), _ct(ct) {
     _is_par = (SharedHeap::heap()->n_par_threads() > 0);
+}
+
+void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
+  assert(mr.word_size() > 0, "Error");
+  assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
+  // mr.end() may not necessarily be card aligned.
+  jbyte* cur_entry = _ct->byte_for(mr.last());
+  const jbyte* limit = _ct->byte_for(mr.start());
+  HeapWord* end_of_non_clean = mr.end();
+  HeapWord* start_of_non_clean = end_of_non_clean;
+  while (cur_entry >= limit) {
+    HeapWord* cur_hw = _ct->addr_for(cur_entry);
+    if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) {
+      // Continue the dirty range by opening the
+      // dirty window one card to the left.
+      start_of_non_clean = cur_hw;
+    } else {
+      // We hit a "clean" card; process any non-empty
+      // "dirty" range accumulated so far.
+      if (start_of_non_clean < end_of_non_clean) {
+        const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+        _dirty_card_closure->do_MemRegion(mrd);
+      }
+      // Reset the dirty window, while continuing to look
+      // for the next dirty card that will start a
+      // new dirty window.
+      end_of_non_clean = cur_hw;
+      start_of_non_clean = cur_hw;
+    }
+    // Note that "cur_entry" leads "start_of_non_clean" in
+    // its leftward excursion after this point
+    // in the loop and, when we hit the left end of "mr",
+    // will point off of the left end of the card-table
+    // for "mr".
+    cur_entry--;
   }
-  void do_MemRegion(MemRegion mr) {
-    // We start at the high end of "mr", walking backwards
-    // while accumulating a contiguous dirty range of cards in
-    // [start_of_non_clean, end_of_non_clean) which we then
-    // process en masse.
-    HeapWord* end_of_non_clean = mr.end();
-    HeapWord* start_of_non_clean = end_of_non_clean;
-    jbyte*       entry = _ct->byte_for(mr.last());
-    const jbyte* first_entry = _ct->byte_for(mr.start());
-    while (entry >= first_entry) {
-      HeapWord* cur = _ct->addr_for(entry);
-      if (!clear_card(entry)) {
-        // We hit a clean card; process any non-empty
-        // dirty range accumulated so far.
-        if (start_of_non_clean < end_of_non_clean) {
-          MemRegion mr2(start_of_non_clean, end_of_non_clean);
-          _dirty_card_closure->do_MemRegion(mr2);
-        }
-        // Reset the dirty window while continuing to
-        // look for the next dirty window to process.
-        end_of_non_clean = cur;
-        start_of_non_clean = end_of_non_clean;
-      }
-      // Open the left end of the window one card to the left.
-      start_of_non_clean = cur;
-      // Note that "entry" leads "start_of_non_clean" in
-      // its leftward excursion after this point
-      // in the loop and, when we hit the left end of "mr",
-      // will point off of the left end of the card-table
-      // for "mr".
-      entry--;
-    }
-    // If the first card of "mr" was dirty, we will have
-    // been left with a dirty window, co-initial with "mr",
-    // which we now process.
-    if (start_of_non_clean < end_of_non_clean) {
-      MemRegion mr2(start_of_non_clean, end_of_non_clean);
-      _dirty_card_closure->do_MemRegion(mr2);
-    }
+  // If the first card of "mr" was dirty, we will have
+  // been left with a dirty window, co-initial with "mr",
+  // which we now process.
+  if (start_of_non_clean < end_of_non_clean) {
+    const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+    _dirty_card_closure->do_MemRegion(mrd);
   }
-};
+}
+
 // clean (by dirty->clean before) ==> cur_younger_gen
 // dirty                          ==> cur_youngergen_and_prev_nonclean_card
 // precleaned                     ==> cur_youngergen_and_prev_nonclean_card
@@ -246,8 +250,35 @@
                                                    cl->gen_boundary());
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);

-  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
-                                dcto_cl, &clear_cl, false);
+  const MemRegion urasm = sp->used_region_at_save_marks();
+#ifdef ASSERT
+  // Convert the assertion check to a warning if we are running
+  // CMS+ParNew until related bug is fixed.
+  MemRegion ur    = sp->used_region();
+  assert(ur.contains(urasm) || (UseConcMarkSweepGC && UseParNewGC),
+         err_msg("Did you forget to call save_marks()? "
+                 "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
+                 "[" PTR_FORMAT ", " PTR_FORMAT ")",
+                 urasm.start(), urasm.end(), ur.start(), ur.end()));
+  // In the case of CMS+ParNew, issue a warning
+  if (!ur.contains(urasm)) {
+    assert(UseConcMarkSweepGC && UseParNewGC, "Tautology: see assert above");
+    warning("CMS+ParNew: Did you forget to call save_marks()? "
+            "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
+            "[" PTR_FORMAT ", " PTR_FORMAT ")",
+             urasm.start(), urasm.end(), ur.start(), ur.end());
+    MemRegion ur2 = sp->used_region();
+    MemRegion urasm2 = sp->used_region_at_save_marks();
+    if (!ur.equals(ur2)) {
+      warning("CMS+ParNew: Flickering used_region()!!");
+    }
+    if (!urasm.equals(urasm2)) {
+      warning("CMS+ParNew: Flickering used_region_at_save_marks()!!");
+    }
+  }
+#endif
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm,
+                                                   dcto_cl, &clear_cl);
 }

 void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
--- a/src/share/vm/memory/cardTableRS.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/cardTableRS.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,4 +166,21 @@

 };

+class ClearNoncleanCardWrapper: public MemRegionClosure {
+  MemRegionClosure* _dirty_card_closure;
+  CardTableRS* _ct;
+  bool _is_par;
+private:
+  // Clears the given card, return true if the corresponding card should be
+  // processed.
+  inline bool clear_card(jbyte* entry);
+  // Work methods called by the clear_card()
+  inline bool clear_card_serial(jbyte* entry);
+  inline bool clear_card_parallel(jbyte* entry);
+
+public:
+  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
+  void do_MemRegion(MemRegion mr);
+};
+
 #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP
--- a/src/share/vm/memory/dump.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/dump.cpp	Mon May 02 10:51:36 2011 -0700
@@ -623,24 +623,48 @@
   }
 };

-// Itable indices are calculated based on methods array order
-// (see klassItable::compute_itable_index()).  Must reinitialize
+// Vtable and Itable indices are calculated based on methods array
+// order (see klassItable::compute_itable_index()).  Must reinitialize
 // after ALL methods of ALL classes have been reordered.
 // We assume that since checkconstraints is false, this method
 // cannot throw an exception.  An exception here would be
 // problematic since this is the VMThread, not a JavaThread.

-class ReinitializeItables: public ObjectClosure {
+class ReinitializeTables: public ObjectClosure {
 private:
   Thread* _thread;

 public:
-  ReinitializeItables(Thread* thread) : _thread(thread) {}
+  ReinitializeTables(Thread* thread) : _thread(thread) {}
+
+  // Initialize super vtable first, check if already initialized to avoid
+  // quadradic behavior.  The vtable is cleared in remove_unshareable_info.
+  void reinitialize_vtables(klassOop k) {
+    if (k->blueprint()->oop_is_instanceKlass()) {
+      instanceKlass* ik = instanceKlass::cast(k);
+      if (ik->vtable()->is_initialized()) return;
+      if (ik->super() != NULL) {
+        reinitialize_vtables(ik->super());
+      }
+      ik->vtable()->initialize_vtable(false, _thread);
+    }
+  }

   void do_object(oop obj) {
     if (obj->blueprint()->oop_is_instanceKlass()) {
       instanceKlass* ik = instanceKlass::cast((klassOop)obj);
+      ResourceMark rm(_thread);
       ik->itable()->initialize_itable(false, _thread);
+      reinitialize_vtables((klassOop)obj);
+#ifdef ASSERT
+      ik->vtable()->verify(tty, true);
+#endif // ASSERT
+    } else if (obj->blueprint()->oop_is_arrayKlass()) {
+      // The vtable for array klasses are that of its super class,
+      // ie. java.lang.Object.
+      arrayKlass* ak = arrayKlass::cast((klassOop)obj);
+      if (ak->vtable()->is_initialized()) return;
+      ak->vtable()->initialize_vtable(false, _thread);
     }
   }
 };
@@ -1205,9 +1229,9 @@
     gen->ro_space()->object_iterate(&sort);
     gen->rw_space()->object_iterate(&sort);

-    ReinitializeItables reinit_itables(THREAD);
-    gen->ro_space()->object_iterate(&reinit_itables);
-    gen->rw_space()->object_iterate(&reinit_itables);
+    ReinitializeTables reinit_tables(THREAD);
+    gen->ro_space()->object_iterate(&reinit_tables);
+    gen->rw_space()->object_iterate(&reinit_tables);
     tty->print_cr("done. ");
     tty->cr();
--- a/src/share/vm/memory/genCollectedHeap.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Mon May 02 10:51:36 2011 -0700
@@ -427,13 +427,13 @@
   // explicitly mark reachable objects in younger generations, to avoid
   // excess storage retention.)  If "collecting_perm_gen" is false, then
   // roots that may only contain references to permGen objects are not
-  // scanned. The "so" argument determines which of the roots
+  // scanned; instead, the older_gens closure is applied to all outgoing
+  // references in the perm gen.  The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
   // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
   // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Symbols_and_Strings" applies the closure to all entries in
-  // SymbolsTable and StringTable.
+  // "SO_Strings" applies the closure to all entries in the StringTable.
   void gen_process_strong_roots(int level,
                                 bool younger_gens_as_roots,
                                 // The remaining arguments are in an order
--- a/src/share/vm/memory/modRefBarrierSet.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/modRefBarrierSet.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,15 +88,6 @@
     assert(false, "can't call");
   }

-  // Invoke "cl->do_oop" on (the address of) every possibly-modifed
-  // reference field in objects in "sp".  If "clear" is "true", the oops
-  // are no longer considered possibly modified after application of the
-  // closure.  If' "before_save_marks" is true, oops in objects allocated
-  // after the last call to "save_marks" on "sp" will not be considered.
-  virtual void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
-                                        bool clear = false,
-                                        bool before_save_marks = false) = 0;
-
   // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
   // is true, the caller asserts that the entire heap is being invalidated,
   // which may admit an optimized implementation for some barriers.
--- a/src/share/vm/memory/sharedHeap.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Mon May 02 10:51:36 2011 -0700
@@ -46,7 +46,6 @@
   SH_PS_Management_oops_do,
   SH_PS_SystemDictionary_oops_do,
   SH_PS_jvmti_oops_do,
-  SH_PS_SymbolTable_oops_do,
   SH_PS_StringTable_oops_do,
   SH_PS_CodeCache_oops_do,
   // Leave this one last.
@@ -161,13 +160,9 @@
   if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
     if (so & SO_AllClasses) {
       SystemDictionary::oops_do(roots);
-    } else
-      if (so & SO_SystemClasses) {
-        SystemDictionary::always_strong_oops_do(roots);
-      }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_SymbolTable_oops_do)) {
+    } else if (so & SO_SystemClasses) {
+      SystemDictionary::always_strong_oops_do(roots);
+    }
   }

   if (!_process_strong_tasks->is_task_claimed(SH_PS_StringTable_oops_do)) {
--- a/src/share/vm/memory/sharedHeap.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/memory/sharedHeap.hpp	Mon May 02 10:51:36 2011 -0700
@@ -192,9 +192,8 @@
     SO_None                = 0x0,
     SO_AllClasses          = 0x1,
     SO_SystemClasses       = 0x2,
-    SO_Symbols             = 0x4,
-    SO_Strings             = 0x8,
-    SO_CodeCache           = 0x10
+    SO_Strings             = 0x4,
+    SO_CodeCache           = 0x8
   };

   FlexibleWorkGang* workers() const { return _workers; }
@@ -208,14 +207,13 @@

   // Invoke the "do_oop" method the closure "roots" on all root locations.
   // If "collecting_perm_gen" is false, then roots that may only contain
-  // references to permGen objects are not scanned.  If true, the
-  // "perm_gen" closure is applied to all older-to-younger refs in the
+  // references to permGen objects are not scanned; instead, in that case,
+  // the "perm_blk" closure is applied to all outgoing refs in the
   // permanent generation.  The "so" argument determines which of roots
   // the closure is applied to:
   // "SO_None" does none;
   // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
   // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Symbols" applies the closure to all entries in SymbolsTable;
   // "SO_Strings" applies the closure to all entries in StringTable;
   // "SO_CodeCache" applies the closure to all elements of the CodeCache.
   void process_strong_roots(bool activate_scope,
--- a/src/share/vm/oops/instanceKlass.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Mon May 02 10:51:36 2011 -0700
@@ -401,6 +401,8 @@
   ReferenceType reference_type() const     { return _reference_type; }
   void set_reference_type(ReferenceType t) { _reference_type = t; }

+  static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); }
+
   // find local field, returns true if found
   bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const;
   // find field in direct superinterfaces, returns the interface in which the field is defined
--- a/src/share/vm/oops/instanceKlassKlass.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/oops/instanceKlassKlass.cpp	Mon May 02 10:51:36 2011 -0700
@@ -690,7 +690,8 @@
     guarantee(method_ordering->is_perm(),              "should be in permspace");
     guarantee(method_ordering->is_typeArray(),         "should be type array");
     int length = method_ordering->length();
-    if (JvmtiExport::can_maintain_original_method_order()) {
+    if (JvmtiExport::can_maintain_original_method_order() ||
+        (UseSharedSpaces && length != 0)) {
       guarantee(length == methods->length(),           "invalid method ordering length");
       jlong sum = 0;
       for (j = 0; j < length; j++) {
--- a/src/share/vm/oops/klass.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/oops/klass.cpp	Mon May 02 10:51:36 2011 -0700
@@ -453,6 +453,14 @@
       ik->unlink_class();
     }
   }
+  // Clear the Java vtable if the oop has one.
+  // The vtable isn't shareable because it's in the wrong order wrt the methods
+  // once the method names get moved and resorted.
+  klassVtable* vt = vtable();
+  if (vt != NULL) {
+    assert(oop_is_instance() || oop_is_array(), "nothing else has vtable");
+    vt->clear_vtable();
+  }
   set_subklass(NULL);
   set_next_sibling(NULL);
 }
--- a/src/share/vm/oops/klassVtable.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/oops/klassVtable.cpp	Mon May 02 10:51:36 2011 -0700
@@ -645,6 +645,15 @@
   }
 }

+// CDS/RedefineClasses support - clear vtables so they can be reinitialized
+void klassVtable::clear_vtable() {
+  for (int i = 0; i < _length; i++) table()[i].clear();
+}
+
+bool klassVtable::is_initialized() {
+  return _length == 0 || table()[0].method() != NULL;
+}
+

 // Garbage collection
 void klassVtable::oop_follow_contents() {
--- a/src/share/vm/oops/klassVtable.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/oops/klassVtable.hpp	Mon May 02 10:51:36 2011 -0700
@@ -75,7 +75,15 @@

   void initialize_vtable(bool checkconstraints, TRAPS);   // initialize vtable of a new klass

-  // conputes vtable length (in words) and the number of miranda methods
+  // CDS/RedefineClasses support - clear vtables so they can be reinitialized
+  // at dump time.  Clearing gives us an easy way to tell if the vtable has
+  // already been reinitialized at dump time (see dump.cpp).  Vtables can
+  // be initialized at run time by RedefineClasses so dumping the right order
+  // is necessary.
+  void clear_vtable();
+  bool is_initialized();
+
+  // computes vtable length (in words) and the number of miranda methods
   static void compute_vtable_size_and_num_mirandas(int &vtable_length, int &num_miranda_methods,
                                                    klassOop super, objArrayOop methods,
                                                    AccessFlags class_flags, Handle classloader,
--- a/src/share/vm/opto/compile.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/compile.cpp	Mon May 02 10:51:36 2011 -0700
@@ -629,7 +629,7 @@
     initial_gvn()->transform_no_reclaim(top());

     // Set up tf(), start(), and find a CallGenerator.
-    CallGenerator* cg;
+    CallGenerator* cg = NULL;
     if (is_osr_compilation()) {
       const TypeTuple *domain = StartOSRNode::osr_domain();
       const TypeTuple *range = TypeTuple::make_range(method()->signature());
@@ -644,9 +644,24 @@
       StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
       initial_gvn()->set_type_bottom(s);
       init_start(s);
-      float past_uses = method()->interpreter_invocation_count();
-      float expected_uses = past_uses;
-      cg = CallGenerator::for_inline(method(), expected_uses);
+      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
+        // With java.lang.ref.reference.get() we must go through the
+        // intrinsic when G1 is enabled - even when get() is the root
+        // method of the compile - so that, if necessary, the value in
+        // the referent field of the reference object gets recorded by
+        // the pre-barrier code.
+        // Specifically, if G1 is enabled, the value in the referent
+        // field is recorded by the G1 SATB pre barrier. This will
+        // result in the referent being marked live and the reference
+        // object removed from the list of discovered references during
+        // reference processing.
+        cg = find_intrinsic(method(), false);
+      }
+      if (cg == NULL) {
+        float past_uses = method()->interpreter_invocation_count();
+        float expected_uses = past_uses;
+        cg = CallGenerator::for_inline(method(), expected_uses);
+      }
     }
     if (failing())  return;
     if (cg == NULL) {
@@ -2041,6 +2056,52 @@
   // Note that OffsetBot and OffsetTop are very negative.
 }

+// Eliminate trivially redundant StoreCMs and accumulate their
+// precedence edges.
+static void eliminate_redundant_card_marks(Node* n) {
+  assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
+  if (n->in(MemNode::Address)->outcnt() > 1) {
+    // There are multiple users of the same address so it might be
+    // possible to eliminate some of the StoreCMs
+    Node* mem = n->in(MemNode::Memory);
+    Node* adr = n->in(MemNode::Address);
+    Node* val = n->in(MemNode::ValueIn);
+    Node* prev = n;
+    bool done = false;
+    // Walk the chain of StoreCMs eliminating ones that match.  As
+    // long as it's a chain of single users then the optimization is
+    // safe.  Eliminating partially redundant StoreCMs would require
+    // cloning copies down the other paths.
+    while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
+      if (adr == mem->in(MemNode::Address) &&
+          val == mem->in(MemNode::ValueIn)) {
+        // redundant StoreCM
+        if (mem->req() > MemNode::OopStore) {
+          // Hasn't been processed by this code yet.
+          n->add_prec(mem->in(MemNode::OopStore));
+        } else {
+          // Already converted to precedence edge
+          for (uint i = mem->req(); i < mem->len(); i++) {
+            // Accumulate any precedence edges
+            if (mem->in(i) != NULL) {
+              n->add_prec(mem->in(i));
+            }
+          }
+          // Everything above this point has been processed.
+          done = true;
+        }
+        // Eliminate the previous StoreCM
+        prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
+        assert(mem->outcnt() == 0, "should be dead");
+        mem->disconnect_inputs(NULL);
+      } else {
+        prev = mem;
+      }
+      mem = prev->in(MemNode::Memory);
+    }
+  }
+}
+
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
@@ -2167,9 +2228,19 @@
     frc.inc_float_count();
     goto handle_mem;

+  case Op_StoreCM:
+    {
+      // Convert OopStore dependence into precedence edge
+      Node* prec = n->in(MemNode::OopStore);
+      n->del_req(MemNode::OopStore);
+      n->add_prec(prec);
+      eliminate_redundant_card_marks(n);
+    }
+
+    // fall through
+
   case Op_StoreB:
   case Op_StoreC:
-  case Op_StoreCM:
   case Op_StorePConditional:
   case Op_StoreI:
   case Op_StoreL:
--- a/src/share/vm/opto/graphKit.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1453,19 +1453,22 @@
 }


-void GraphKit::pre_barrier(Node* ctl,
+void GraphKit::pre_barrier(bool do_load,
+                           Node* ctl,
                            Node* obj,
                            Node* adr,
                            uint  adr_idx,
                            Node* val,
                            const TypeOopPtr* val_type,
+                           Node* pre_val,
                            BasicType bt) {
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-      g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt);
       break;

     case BarrierSet::CardTableModRef:
@@ -1528,7 +1531,11 @@
   uint adr_idx = C->get_alias_index(adr_type);
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );

-  pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt);
+  pre_barrier(true /* do_load */,
+              control(), obj, adr, adr_idx, val, val_type,
+              NULL /* pre_val */,
+              bt);
+
   Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
   post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
@@ -3479,12 +3486,31 @@
 }

 // G1 pre/post barriers
-void GraphKit::g1_write_barrier_pre(Node* obj,
+void GraphKit::g1_write_barrier_pre(bool do_load,
+                                    Node* obj,
                                     Node* adr,
                                     uint alias_idx,
                                     Node* val,
                                     const TypeOopPtr* val_type,
+                                    Node* pre_val,
                                     BasicType bt) {
+
+  // Some sanity checks
+  // Note: val is unused in this routine.
+
+  if (do_load) {
+    // We need to generate the load of the previous value
+    assert(obj != NULL, "must have a base");
+    assert(adr != NULL, "where are loading from?");
+    assert(pre_val == NULL, "loaded already?");
+    assert(val_type != NULL, "need a type");
+  } else {
+    // In this case both val_type and alias_idx are unused.
+    assert(pre_val != NULL, "must be loaded already");
+    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
+  }
+  assert(bt == T_OBJECT, "or we shouldn't be here");
+
   IdealKit ideal(this, true);

   Node* tls = __ thread(); // ThreadLocalStorage
@@ -3506,32 +3532,28 @@
                                           PtrQueue::byte_offset_of_index());
   const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
                                           PtrQueue::byte_offset_of_buf());
+
   // Now the actual pointers into the thread
-
-  // set_control( ctl);
-
   Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
   Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
   Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));

   // Now some of the values
-
   Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);

   // if (!marking)
   __ if_then(marking, BoolTest::ne, zero); {
     Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);

-    const Type* t1 = adr->bottom_type();
-    const Type* t2 = val->bottom_type();
-
-    Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
-    // if (orig != NULL)
-    __ if_then(orig, BoolTest::ne, null()); {
-      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
+    if (do_load) {
       // load original value
       // alias_idx correct??
+      pre_val = __ load(no_ctrl, adr, val_type, bt, alias_idx);
+    }
+
+    // if (pre_val != NULL)
+    __ if_then(pre_val, BoolTest::ne, null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);

       // is the queue for this thread full?
       __ if_then(index, BoolTest::ne, zero, likely); {
@@ -3545,10 +3567,9 @@
         next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
 #endif

-        // Now get the buffer location we will log the original value into and store it
+        // Now get the buffer location we will log the previous value into and store it
         Node *log_addr = __ AddP(no_base, buffer, next_indexX);
-        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
-
+        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw);
         // update the index
         __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);

@@ -3556,9 +3577,9 @@

         // logging buffer is full, call the runtime
         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls);
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
       } __ end_if();  // (!index)
-    } __ end_if();  // (orig != NULL)
+    } __ end_if();  // (pre_val != NULL)
   } __ end_if();  // (!marking)

   // Final sync IdealKit and GraphKit.
--- a/src/share/vm/opto/graphKit.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Mon May 02 10:51:36 2011 -0700
@@ -544,8 +544,10 @@
                              BasicType bt);

   // For the few case where the barriers need special help
-  void pre_barrier(Node* ctl, Node* obj, Node* adr, uint adr_idx,
-                   Node* val, const TypeOopPtr* val_type, BasicType bt);
+  void pre_barrier(bool do_load, Node* ctl,
+                   Node* obj, Node* adr, uint adr_idx, Node* val, const TypeOopPtr* val_type,
+                   Node* pre_val,
+                   BasicType bt);

   void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
                     Node* val, BasicType bt, bool use_precise);
@@ -671,11 +673,13 @@
                           Node* adr,  uint adr_idx, Node* val, bool use_precise);

   // G1 pre/post barriers
-  void g1_write_barrier_pre(Node* obj,
+  void g1_write_barrier_pre(bool do_load,
+                            Node* obj,
                             Node* adr,
                             uint alias_idx,
                             Node* val,
                             const TypeOopPtr* val_type,
+                            Node* pre_val,
                             BasicType bt);

   void g1_write_barrier_post(Node* store,
--- a/src/share/vm/opto/lcm.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/lcm.cpp	Mon May 02 10:51:36 2011 -0700
@@ -688,20 +688,22 @@
       }
       ready_cnt[n->_idx] = local; // Count em up

-      // A few node types require changing a required edge to a precedence edge
-      // before allocation.
+#ifdef ASSERT
       if( UseConcMarkSweepGC || UseG1GC ) {
         if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
-          // Note: Required edges with an index greater than oper_input_base
-          // are not supported by the allocator.
-          // Note2: Can only depend on unmatched edge being last,
-          // can not depend on its absolute position.
-          Node *oop_store = n->in(n->req() - 1);
-          n->del_req(n->req() - 1);
-          n->add_prec(oop_store);
-          assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+          // Check the precedence edges
+          for (uint prec = n->req(); prec < n->len(); prec++) {
+            Node* oop_store = n->in(prec);
+            if (oop_store != NULL) {
+              assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+            }
+          }
         }
       }
+#endif
+
+      // A few node types require changing a required edge to a precedence edge
+      // before allocation.
       if( n->is_Mach() && n->req() > TypeFunc::Parms &&
           (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
            n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
--- a/src/share/vm/opto/library_call.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/library_call.cpp	Mon May 02 10:51:36 2011 -0700
@@ -166,6 +166,10 @@
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
   int classify_unsafe_addr(Node* &base, Node* &offset);
   Node* make_unsafe_address(Node* base, Node* offset);
+  // Helper for inline_unsafe_access.
+  // Generates the guards that check whether the result of
+  // Unsafe.getObject should be recorded in an SATB log buffer.
+  void insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
   bool inline_unsafe_allocate();
@@ -240,6 +244,8 @@
   bool inline_numberOfTrailingZeros(vmIntrinsics::ID id);
   bool inline_bitCount(vmIntrinsics::ID id);
   bool inline_reverseBytes(vmIntrinsics::ID id);
+
+  bool inline_reference_get();
 };


@@ -336,6 +342,14 @@
     if (!UsePopCountInstruction)  return NULL;
     break;

+  case vmIntrinsics::_Reference_get:
+    // It is only when G1 is enabled that we absolutely
+    // need to use the intrinsic version of Reference.get()
+    // so that the value in the referent field, if necessary,
+    // can be registered by the pre-barrier code.
+    if (!UseG1GC) return NULL;
+    break;
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -387,6 +401,7 @@
     tty->print_cr("Intrinsic %s", str);
   }
 #endif
+
   if (kit.try_to_inline()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
       CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
@@ -402,11 +417,19 @@
   }

   if (PrintIntrinsics) {
-    tty->print("Did not inline intrinsic %s%s at bci:%d in",
+    if (jvms->has_method()) {
+      // Not a root compile.
+      tty->print("Did not inline intrinsic %s%s at bci:%d in",
+                 vmIntrinsics::name_at(intrinsic_id()),
+                 (is_virtual() ? " (virtual)" : ""), kit.bci());
+      kit.caller()->print_short_name(tty);
+      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    } else {
+      // Root compile
+      tty->print("Did not generate intrinsic %s%s at bci:%d in",
                vmIntrinsics::name_at(intrinsic_id()),
                (is_virtual() ? " (virtual)" : ""), kit.bci());
-    kit.caller()->print_short_name(tty);
-    tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    }
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
   return NULL;
@@ -418,6 +441,14 @@
   const bool is_native_ptr  = true;
   const bool is_static      = true;

+  if (!jvms()->has_method()) {
+    // Root JVMState has a null method.
+    assert(map()->memory()->Opcode() == Op_Parm, "");
+    // Insert the memory aliasing node
+    set_all_memory(reset_memory());
+  }
+  assert(merged_memory(), "");
+
   switch (intrinsic_id()) {
   case vmIntrinsics::_hashCode:
     return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
@@ -658,6 +689,9 @@
   case vmIntrinsics::_getCallerClass:
     return inline_native_Reflection_getCallerClass();

+  case vmIntrinsics::_Reference_get:
+    return inline_reference_get();
+
   default:
     // If you get here, it may be that someone has added a new intrinsic
     // to the list in vmSymbols.hpp without implementing it here.
@@ -2076,6 +2110,106 @@

 const static BasicType T_ADDRESS_HOLDER = T_LONG;

+// Helper that guards and inserts a G1 pre-barrier.
+void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val) {
+  assert(UseG1GC, "should not call this otherwise");
+
+  // We could be accessing the referent field of a reference object. If so, when G1
+  // is enabled, we need to log the value in the referent field in an SATB buffer.
+  // This routine performs some compile time filters and generates suitable
+  // runtime filters that guard the pre-barrier code.
+
+  // Some compile time checks.
+
+  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
+  const TypeX* otype = offset->find_intptr_t_type();
+  if (otype != NULL && otype->is_con() &&
+      otype->get_con() != java_lang_ref_Reference::referent_offset) {
+    // Constant offset but not the reference_offset so just return
+    return;
+  }
+
+  // We only need to generate the runtime guards for instances.
+  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
+  if (btype != NULL) {
+    if (btype->isa_aryptr()) {
+      // Array type so nothing to do
+      return;
+    }
+
+    const TypeInstPtr* itype = btype->isa_instptr();
+    if (itype != NULL) {
+      // Can the klass of base_oop be statically determined
+      // to be _not_ a sub-class of Reference?
+      ciKlass* klass = itype->klass();
+      if (klass->is_subtype_of(env()->Reference_klass()) &&
+          !env()->Reference_klass()->is_subtype_of(klass)) {
+        return;
+      }
+    }
+  }
+
+  // The compile time filters did not reject base_oop/offset so
+  // we need to generate the following runtime filters
+  //
+  // if (offset == java_lang_ref_Reference::_reference_offset) {
+  //   if (base != null) {
+  //     if (klass(base)->reference_type() != REF_NONE)) {
+  //       pre_barrier(_, pre_val, ...);
+  //     }
+  //   }
+  // }
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  IdealKit ideal(this);
+#define __ ideal.
+
+  const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() +
+                                        sizeof(oopDesc);
+
+  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
+
+  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
+    __ if_then(base_oop, BoolTest::ne, null(), likely); {
+
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+
+      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
+      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
+
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+
+      Node* one = __ ConI(1);
+
+      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
+
+        // Update graphKit from IdeakKit.
+        sync_kit(ideal);
+
+        // Use the pre-barrier to record the value in the referent field
+        pre_barrier(false /* do_load */,
+                    __ ctrl(),
+                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+                    pre_val /* pre_val */,
+                    T_OBJECT);
+
+        // Update IdealKit from graphKit.
+        __ sync_kit(this);
+
+      } __ end_if(); // _ref_type != ref_none
+    } __ end_if(); // base  != NULL
+  } __ end_if(); // offset == referent_offset
+
+  // Final sync IdealKit and GraphKit.
+  final_sync(ideal);
+#undef __
+}
+
+
 // Interpret Unsafe.fieldOffset cookies correctly:
 extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);

@@ -2152,9 +2286,11 @@
   // Build address expression.  See the code in inline_unsafe_prefetch.
   Node *adr;
   Node *heap_base_oop = top();
+  Node* offset = top();
+
   if (!is_native_ptr) {
     // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
-    Node* offset = pop_pair();
+    offset = pop_pair();
     // The base is either a Java object or a value produced by Unsafe.staticFieldBase
     Node* base   = pop();
     // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
@@ -2195,6 +2331,13 @@
   // or Compile::must_alias will throw a diagnostic assert.)
   bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);

+  // If we are reading the value of the referent field of a Reference
+  // object (either by using Unsafe directly or through reflection)
+  // then, if G1 is enabled, we need to record the referent in an
+  // SATB log buffer using the pre-barrier mechanism.
+  bool need_read_barrier = UseG1GC && !is_native_ptr && !is_store &&
+                           offset != top() && heap_base_oop != top();
+
   if (!is_store && type == T_OBJECT) {
     // Attempt to infer a sharper value type from the offset and base type.
     ciKlass* sharpened_klass = NULL;
@@ -2278,8 +2421,13 @@
     case T_SHORT:
     case T_INT:
     case T_FLOAT:
+      push(p);
+      break;
     case T_OBJECT:
-      push( p );
+      if (need_read_barrier) {
+        insert_g1_pre_barrier(heap_base_oop, offset, p);
+      }
+      push(p);
       break;
     case T_ADDRESS:
       // Cast to an int type.
@@ -2534,7 +2682,10 @@
   case T_OBJECT:
      // reference stores need a store barrier.
     // (They don't if CAS fails, but it isn't worth checking.)
-    pre_barrier(control(), base, adr, alias_idx, newval, value_type->make_oopptr(), T_OBJECT);
+    pre_barrier(true /* do_load*/,
+                control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
+                NULL /* pre_val*/,
+                T_OBJECT);
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
@@ -5231,3 +5382,44 @@
                     copyfunc_addr, copyfunc_name, adr_type,
                     src_start, dest_start, copy_length XTOP);
 }
+
+//----------------------------inline_reference_get----------------------------
+
+bool LibraryCallKit::inline_reference_get() {
+  const int nargs = 1; // self
+
+  guarantee(java_lang_ref_Reference::referent_offset > 0,
+            "should have already been set");
+
+  int referent_offset = java_lang_ref_Reference::referent_offset;
+
+  // Restore the stack and pop off the argument
+  _sp += nargs;
+  Node *reference_obj = pop();
+
+  // Null check on self without removing any arguments.
+  _sp += nargs;
+  reference_obj = do_null_check(reference_obj, T_OBJECT);
+  _sp -= nargs;;
+
+  if (stopped()) return true;
+
+  Node *adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
+
+  ciInstanceKlass* klass = env()->Object_klass();
+  const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
+
+  Node* no_ctrl = NULL;
+  Node *result = make_load(no_ctrl, adr, object_type, T_OBJECT);
+
+  // Use the pre-barrier to record the value in the referent field
+  pre_barrier(false /* do_load */,
+              control(),
+              NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+              result /* pre_val */,
+              T_OBJECT);
+
+  push(result);
+  return true;
+}
+
--- a/src/share/vm/opto/memnode.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/memnode.cpp	Mon May 02 10:51:36 2011 -0700
@@ -2155,9 +2155,12 @@
   Node* mem     = in(MemNode::Memory);
   Node* address = in(MemNode::Address);

-  // Back-to-back stores to same address?  Fold em up.
-  // Generally unsafe if I have intervening uses...
-  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) {
+  // Back-to-back stores to same address?  Fold em up.  Generally
+  // unsafe if I have intervening uses...  Also disallowed for StoreCM
+  // since they must follow each StoreP operation.  Redundant StoreCMs
+  // are eliminated just before matching in final_graph_reshape.
+  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address) &&
+      mem->Opcode() != Op_StoreCM) {
     // Looking at a dead closed cycle of memory?
     assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
--- a/src/share/vm/opto/output.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/opto/output.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1354,15 +1354,20 @@
         // Check that oop-store precedes the card-mark
         else if( mach->ideal_Opcode() == Op_StoreCM ) {
           uint storeCM_idx = j;
-          Node *oop_store = mach->in(mach->_cnt);  // First precedence edge
-          assert( oop_store != NULL, "storeCM expects a precedence edge");
-          uint i4;
-          for( i4 = 0; i4 < last_inst; ++i4 ) {
-            if( b->_nodes[i4] == oop_store ) break;
+          int count = 0;
+          for (uint prec = mach->req(); prec < mach->len(); prec++) {
+            Node *oop_store = mach->in(prec);  // Precedence edge
+            if (oop_store == NULL) continue;
+            count++;
+            uint i4;
+            for( i4 = 0; i4 < last_inst; ++i4 ) {
+              if( b->_nodes[i4] == oop_store ) break;
+            }
+            // Note: This test can provide a false failure if other precedence
+            // edges have been added to the storeCMNode.
+            assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
           }
-          // Note: This test can provide a false failure if other precedence
-          // edges have been added to the storeCMNode.
-          assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+          assert(count > 0, "storeCM expects at least one precedence edge");
         }
 #endif
--- a/src/share/vm/prims/jni.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/prims/jni.cpp	Mon May 02 10:51:36 2011 -0700
@@ -29,6 +29,9 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "interpreter/linkResolver.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif // SERIALGC
 #include "memory/allocation.inline.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/oopFactory.hpp"
@@ -1724,6 +1727,26 @@
     o = JvmtiExport::jni_GetField_probe(thread, obj, o, k, fieldID, false);
   }
   jobject ret = JNIHandles::make_local(env, o->obj_field(offset));
+#ifndef SERIALGC
+  // If G1 is enabled and we are accessing the value of the referent
+  // field in a reference object then we need to register a non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL &&
+        offset == java_lang_ref_Reference::referent_offset &&
+        instanceKlass::cast(k)->reference_type() != REF_NONE) {
+      assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+      needs_barrier = true;
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
   DTRACE_PROBE1(hotspot_jni, GetObjectField__return, ret);
   return ret;
 JNI_END
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Mon May 02 10:51:36 2011 -0700
@@ -525,7 +525,7 @@
     ObjectLocker ol(loader, THREAD);

     // need the path as java.lang.String
-    Handle path = java_lang_String::create_from_str(segment, THREAD);
+    Handle path = java_lang_String::create_from_platform_dependent_str(segment, THREAD);
     if (HAS_PENDING_EXCEPTION) {
       CLEAR_PENDING_EXCEPTION;
       return JVMTI_ERROR_INTERNAL;
--- a/src/share/vm/prims/unsafe.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Mon May 02 10:51:36 2011 -0700
@@ -24,6 +24,9 @@

 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif // SERIALGC
 #include "memory/allocation.inline.hpp"
 #include "prims/jni.h"
 #include "prims/jvm.h"
@@ -193,7 +196,32 @@
   UnsafeWrapper("Unsafe_GetObject");
   if (obj == NULL)  THROW_0(vmSymbols::java_lang_NullPointerException());
   GET_OOP_FIELD(obj, offset, v)
-  return JNIHandles::make_local(env, v);
+  jobject ret = JNIHandles::make_local(env, v);
+#ifndef SERIALGC
+  // We could be accessing the referent field in a reference
+  // object. If G1 is enabled then we need to register a non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL) {
+      if (offset == java_lang_ref_Reference::referent_offset) {
+        oop o = JNIHandles::resolve_non_null(obj);
+        klassOop k = o->klass();
+        if (instanceKlass::cast(k)->reference_type() != REF_NONE) {
+          assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+          needs_barrier = true;
+        }
+      }
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
+  return ret;
 UNSAFE_END

 UNSAFE_ENTRY(void, Unsafe_SetObject140(JNIEnv *env, jobject unsafe, jobject obj, jint offset, jobject x_h))
@@ -226,7 +254,32 @@
 UNSAFE_ENTRY(jobject, Unsafe_GetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
   UnsafeWrapper("Unsafe_GetObject");
   GET_OOP_FIELD(obj, offset, v)
-  return JNIHandles::make_local(env, v);
+  jobject ret = JNIHandles::make_local(env, v);
+#ifndef SERIALGC
+  // We could be accessing the referent field in a reference
+  // object. If G1 is enabled then we need to register non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL) {
+      if (offset == java_lang_ref_Reference::referent_offset && obj != NULL) {
+        oop o = JNIHandles::resolve(obj);
+        klassOop k = o->klass();
+        if (instanceKlass::cast(k)->reference_type() != REF_NONE) {
+          assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+          needs_barrier = true;
+        }
+      }
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
+  return ret;
 UNSAFE_END

 UNSAFE_ENTRY(void, Unsafe_SetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
--- a/src/share/vm/runtime/arguments.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Mon May 02 10:51:36 2011 -0700
@@ -244,6 +244,12 @@
   { "MaxLiveObjectEvacuationRatio",
                            JDK_Version::jdk_update(6,24), JDK_Version::jdk(8) },
   { "ForceSharedSpaces",   JDK_Version::jdk_update(6,25), JDK_Version::jdk(8) },
+  { "UseParallelOldGCCompacting",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
+  { "UseParallelDensePrefixUpdate",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
+  { "UseParallelOldGCDensePrefix",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
   { "AllowTransitionalJSR292",       JDK_Version::jdk(7), JDK_Version::jdk(8) },
   { NULL, JDK_Version(0), JDK_Version(0) }
 };
@@ -801,26 +807,22 @@

   JDK_Version since = JDK_Version();

-  if (parse_argument(arg, origin)) {
-    // do nothing
-  } else if (is_newly_obsolete(arg, &since)) {
-    enum { bufsize = 256 };
-    char buffer[bufsize];
-    since.to_string(buffer, bufsize);
-    jio_fprintf(defaultStream::error_stream(),
-      "Warning: The flag %s has been EOL'd as of %s and will"
-      " be ignored\n", arg, buffer);
-  } else {
-    if (!ignore_unrecognized) {
-      jio_fprintf(defaultStream::error_stream(),
-                  "Unrecognized VM option '%s'\n", arg);
-      // allow for commandline "commenting out" options like -XX:#+Verbose
-      if (strlen(arg) == 0 || arg[0] != '#') {
-        return false;
-      }
-    }
+  if (parse_argument(arg, origin) || ignore_unrecognized) {
+    return true;
   }
-  return true;
+
+  const char * const argname = *arg == '+' || *arg == '-' ? arg + 1 : arg;
+  if (is_newly_obsolete(arg, &since)) {
+    char version[256];
+    since.to_string(version, sizeof(version));
+    warning("ignoring option %s; support was removed in %s", argname, version);
+    return true;
+  }
+
+  jio_fprintf(defaultStream::error_stream(),
+              "Unrecognized VM option '%s'\n", argname);
+  // allow for commandline "commenting out" options like -XX:#+Verbose
+  return arg[0] == '#';
 }

 bool Arguments::process_settings_file(const char* file_name, bool should_exist, jboolean ignore_unrecognized) {
--- a/src/share/vm/runtime/globals.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/runtime/globals.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1358,13 +1358,6 @@
   product(bool, UseParallelOldGC, false,                                    \
           "Use the Parallel Old garbage collector")                         \
                                                                             \
-  product(bool, UseParallelOldGCCompacting, true,                           \
-          "In the Parallel Old garbage collector use parallel compaction")  \
-                                                                            \
-  product(bool, UseParallelDensePrefixUpdate, true,                         \
-          "In the Parallel Old garbage collector use parallel dense"        \
-          " prefix update")                                                 \
-                                                                            \
   product(uintx, HeapMaximumCompactionInterval, 20,                         \
           "How often should we maximally compact the heap (not allowing "   \
           "any dead space)")                                                \
@@ -1384,9 +1377,6 @@
           "The standard deviation used by the par compact dead wood"        \
           "limiter (a number between 0-100).")                              \
                                                                             \
-  product(bool, UseParallelOldGCDensePrefix, true,                          \
-          "Use a dense prefix with the Parallel Old garbage collector")     \
-                                                                            \
   product(uintx, ParallelGCThreads, 0,                                      \
           "Number of parallel threads parallel gc will use")                \
                                                                             \
--- a/src/share/vm/runtime/javaCalls.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/runtime/javaCalls.cpp	Mon May 02 10:51:36 2011 -0700
@@ -389,7 +389,7 @@
   // to Java
   if (!os::stack_shadow_pages_available(THREAD, method)) {
     // Throw stack overflow exception with preinitialized exception.
-    Exceptions::throw_stack_overflow_exception(THREAD, __FILE__, __LINE__);
+    Exceptions::throw_stack_overflow_exception(THREAD, __FILE__, __LINE__, method);
     return;
   } else {
     // Touch pages checked if the OS needs them to be touched to be mapped.
--- a/src/share/vm/runtime/vmThread.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/runtime/vmThread.cpp	Mon May 02 10:51:36 2011 -0700
@@ -291,7 +291,9 @@
     // Among other things, this ensures that Eden top is correct.
     Universe::heap()->prepare_for_verify();
     os::check_heap();
-    Universe::verify(true, true); // Silent verification to not polute normal output
+    // Silent verification so as not to pollute normal output,
+    // unless we really asked for it.
+    Universe::verify(true, !(PrintGCDetails || Verbose));
   }

   CompileBroker::set_should_block();
--- a/src/share/vm/services/g1MemoryPool.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/services/g1MemoryPool.cpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,10 +34,10 @@
                                      size_t init_size,
                                      bool support_usage_threshold) :
   _g1h(g1h), CollectedMemoryPool(name,
-                                 MemoryPool::Heap,
-                                 init_size,
-                                 undefined_max(),
-                                 support_usage_threshold) {
+                                   MemoryPool::Heap,
+                                   init_size,
+                                   undefined_max(),
+                                   support_usage_threshold) {
   assert(UseG1GC, "sanity");
 }

@@ -48,44 +48,27 @@

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::eden_space_used(G1CollectedHeap* g1h) {
-  size_t young_list_length = g1h->young_list()->length();
-  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
-  size_t survivor_used = survivor_space_used(g1h);
-  eden_used = subtract_up_to_zero(eden_used, survivor_used);
-  return eden_used;
+  return g1h->g1mm()->eden_space_used();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::survivor_space_committed(G1CollectedHeap* g1h) {
-  return MAX2(survivor_space_used(g1h), (size_t) HeapRegion::GrainBytes);
+  return g1h->g1mm()->survivor_space_committed();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::survivor_space_used(G1CollectedHeap* g1h) {
-  size_t survivor_num = g1h->g1_policy()->recorded_survivor_regions();
-  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
-  return survivor_used;
+  return g1h->g1mm()->survivor_space_used();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::old_space_committed(G1CollectedHeap* g1h) {
-  size_t committed = overall_committed(g1h);
-  size_t eden_committed = eden_space_committed(g1h);
-  size_t survivor_committed = survivor_space_committed(g1h);
-  committed = subtract_up_to_zero(committed, eden_committed);
-  committed = subtract_up_to_zero(committed, survivor_committed);
-  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
-  return committed;
+  return g1h->g1mm()->old_space_committed();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::old_space_used(G1CollectedHeap* g1h) {
-  size_t used = overall_used(g1h);
-  size_t eden_used = eden_space_used(g1h);
-  size_t survivor_used = survivor_space_used(g1h);
-  used = subtract_up_to_zero(used, eden_used);
-  used = subtract_up_to_zero(used, survivor_used);
-  return used;
+  return g1h->g1mm()->old_space_used();
 }

 G1EdenPool::G1EdenPool(G1CollectedHeap* g1h) :
--- a/src/share/vm/services/g1MemoryPool.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/services/g1MemoryPool.hpp	Mon May 02 10:51:36 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,68 +46,9 @@
 // get, as this does affect the performance and behavior of G1. Which
 // is why we introduce the three memory pools implemented here.
 //
-// The above approach inroduces a couple of challenging issues in the
-// implementation of the three memory pools:
-//
-// 1) The used space calculation for a pool is not necessarily
-// independent of the others. We can easily get from G1 the overall
-// used space in the entire heap, the number of regions in the young
-// generation (includes both eden and survivors), and the number of
-// survivor regions. So, from that we calculate:
-//
-//  survivor_used = survivor_num * region_size
-//  eden_used     = young_region_num * region_size - survivor_used
-//  old_gen_used  = overall_used - eden_used - survivor_used
-//
-// Note that survivor_used and eden_used are upper bounds. To get the
-// actual value we would have to iterate over the regions and add up
-// ->used(). But that'd be expensive. So, we'll accept some lack of
-// accuracy for those two. But, we have to be careful when calculating
-// old_gen_used, in case we subtract from overall_used more then the
-// actual number and our result goes negative.
-//
-// 2) Calculating the used space is straightforward, as described
-// above. However, how do we calculate the committed space, given that
-// we allocate space for the eden, survivor, and old gen out of the
-// same pool of regions? One way to do this is to use the used value
-// as also the committed value for the eden and survivor spaces and
-// then calculate the old gen committed space as follows:
-//
-//  old_gen_committed = overall_committed - eden_committed - survivor_committed
+// See comments in g1MonitoringSupport.hpp for additional details
+// on this model.
 //
-// Maybe a better way to do that would be to calculate used for eden
-// and survivor as a sum of ->used() over their regions and then
-// calculate committed as region_num * region_size (i.e., what we use
-// to calculate the used space now). This is something to consider
-// in the future.
-//
-// 3) Another decision that is again not straightforward is what is
-// the max size that each memory pool can grow to. One way to do this
-// would be to use the committed size for the max for the eden and
-// survivors and calculate the old gen max as follows (basically, it's
-// a similar pattern to what we use for the committed space, as
-// described above):
-//
-//  old_gen_max = overall_max - eden_max - survivor_max
-//
-// Unfortunately, the above makes the max of each pool fluctuate over
-// time and, even though this is allowed according to the spec, it
-// broke several assumptions in the M&M framework (there were cases
-// where used would reach a value greater than max). So, for max we
-// use -1, which means "undefined" according to the spec.
-//
-// 4) Now, there is a very subtle issue with all the above. The
-// framework will call get_memory_usage() on the three pools
-// asynchronously. As a result, each call might get a different value
-// for, say, survivor_num which will yield inconsistent values for
-// eden_used, survivor_used, and old_gen_used (as survivor_num is used
-// in the calculation of all three). This would normally be
-// ok. However, it's possible that this might cause the sum of
-// eden_used, survivor_used, and old_gen_used to go over the max heap
-// size and this seems to sometimes cause JConsole (and maybe other
-// clients) to get confused. There's not a really an easy / clean
-// solution to this problem, due to the asynchrounous nature of the
-// framework.


 // This class is shared by the three G1 memory pool classes
@@ -116,22 +57,6 @@
 // (see comment above), we put the calculations in this class so that
 // we can easily share them among the subclasses.
 class G1MemoryPoolSuper : public CollectedMemoryPool {
-private:
-  // It returns x - y if x > y, 0 otherwise.
-  // As described in the comment above, some of the inputs to the
-  // calculations we have to do are obtained concurrently and hence
-  // may be inconsistent with each other. So, this provides a
-  // defensive way of performing the subtraction and avoids the value
-  // going negative (which would mean a very large result, given that
-  // the parameter are size_t).
-  static size_t subtract_up_to_zero(size_t x, size_t y) {
-    if (x > y) {
-      return x - y;
-    } else {
-      return 0;
-    }
-  }
-
 protected:
   G1CollectedHeap* _g1h;

@@ -148,13 +73,6 @@
     return (size_t) -1;
   }

-  static size_t overall_committed(G1CollectedHeap* g1h) {
-    return g1h->capacity();
-  }
-  static size_t overall_used(G1CollectedHeap* g1h) {
-    return g1h->used_unlocked();
-  }
-
   static size_t eden_space_committed(G1CollectedHeap* g1h);
   static size_t eden_space_used(G1CollectedHeap* g1h);
--- a/src/share/vm/utilities/exceptions.cpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/utilities/exceptions.cpp	Mon May 02 10:51:36 2011 -0700
@@ -207,7 +207,7 @@
 }


-void Exceptions::throw_stack_overflow_exception(Thread* THREAD, const char* file, int line) {
+void Exceptions::throw_stack_overflow_exception(Thread* THREAD, const char* file, int line, methodHandle method) {
   Handle exception;
   if (!THREAD->has_pending_exception()) {
     klassOop k = SystemDictionary::StackOverflowError_klass();
@@ -215,13 +215,13 @@
     exception = Handle(THREAD, e);  // fill_in_stack trace does gc
     assert(instanceKlass::cast(k)->is_initialized(), "need to increase min_stack_allowed calculation");
     if (StackTraceInThrowable) {
-      java_lang_Throwable::fill_in_stack_trace(exception);
+      java_lang_Throwable::fill_in_stack_trace(exception, method());
     }
   } else {
     // if prior exception, throw that one instead
     exception = Handle(THREAD, THREAD->pending_exception());
   }
-  _throw_oop(THREAD, file, line, exception());
+  _throw(THREAD, file, line, exception);
 }

 void Exceptions::fthrow(Thread* thread, const char* file, int line, Symbol* h_name, const char* format, ...) {
--- a/src/share/vm/utilities/exceptions.hpp	Mon May 02 00:55:09 2011 -0700
+++ b/src/share/vm/utilities/exceptions.hpp	Mon May 02 10:51:36 2011 -0700
@@ -144,7 +144,7 @@
                              const char* message,
                              ExceptionMsgToUtf8Mode to_utf8_safe = safe_to_utf8);

-  static void throw_stack_overflow_exception(Thread* thread, const char* file, int line);
+  static void throw_stack_overflow_exception(Thread* thread, const char* file, int line, methodHandle method);

   // for AbortVMOnException flag
   NOT_PRODUCT(static void debug_check_abort(Handle exception, const char* message = NULL);)