Mercurial > hg > icedtea9-forest > hotspot

--- a/.hgtags	Sat Jan 31 17:19:42 2009 -0800
+++ b/.hgtags	Fri Feb 27 15:13:00 2009 -0800
@@ -18,3 +18,9 @@
 f9d938ede1960d18cb7cf23c645b026519c1a678 jdk7-b41
 ad8c8ca4ab0f4c86e74c061958f44a8f4a930f2c jdk7-b42
 fc6a5ae3fef5ebacfa896dbb3ae37715e388e282 jdk7-b43
+809e899c638bd9b21836abf9d09ab2a30ff3900b jdk7-b44
+945bf754069766e76873c53102fae48abf04cf5b jdk7-b45
+16bb38eeda35b46268eefa4c1f829eb086e0ca46 jdk7-b46
+fcb923bad68e2b10380a030ea83a723f4dc3d4d6 jdk7-b47
+bcb33806d186561c781992e5f4d8a90bb033f9f0 jdk7-b48
+8b22ccb5aba2c6c11bddf6488a7bb7ef5b4bf2be jdk7-b49
--- a/agent/src/os/linux/ps_core.c	Sat Jan 31 17:19:42 2009 -0800
+++ b/agent/src/os/linux/ps_core.c	Fri Feb 27 15:13:00 2009 -0800
@@ -238,8 +238,8 @@
   // Ignore the rest of the FileMapHeader. We don't need those fields here.
 };

-static bool read_int(struct ps_prochandle* ph, uintptr_t addr, int* pvalue) {
-   int i;
+static bool read_jboolean(struct ps_prochandle* ph, uintptr_t addr, jboolean* pvalue) {
+   jboolean i;
    if (ps_pdread(ph, (psaddr_t) addr, &i, sizeof(i)) == PS_OK) {
       *pvalue = i;
       return true;
@@ -295,7 +295,7 @@
          int fd = -1, m = 0;
          uintptr_t base = 0, useSharedSpacesAddr = 0;
          uintptr_t sharedArchivePathAddrAddr = 0, sharedArchivePathAddr = 0;
-         int useSharedSpaces = 0;
+         jboolean useSharedSpaces = 0;
          map_info* mi = 0;

          memset(classes_jsa, 0, sizeof(classes_jsa));
@@ -306,12 +306,15 @@
             return false;
          }

-         if (read_int(ph, useSharedSpacesAddr, &useSharedSpaces) != true) {
+         // Hotspot vm types are not exported to build this library. So
+         // using equivalent type jboolean to read the value of
+         // UseSharedSpaces which is same as hotspot type "bool".
+         if (read_jboolean(ph, useSharedSpacesAddr, &useSharedSpaces) != true) {
             print_debug("can't read the value of 'UseSharedSpaces' flag\n");
             return false;
          }

-         if (useSharedSpaces == 0) {
+         if ((int)useSharedSpaces == 0) {
             print_debug("UseSharedSpaces is false, assuming -Xshare:off!\n");
             return true;
          }
--- a/agent/src/os/solaris/proc/saproc.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/agent/src/os/solaris/proc/saproc.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -502,8 +502,8 @@
 };

 static bool
-read_int(struct ps_prochandle* ph, psaddr_t addr, int* pvalue) {
-  int i;
+read_jboolean(struct ps_prochandle* ph, psaddr_t addr, jboolean* pvalue) {
+  jboolean i;
   if (ps_pread(ph, addr, &i, sizeof(i)) == PS_OK) {
     *pvalue = i;
     return true;
@@ -575,10 +575,13 @@
   }

   // read the value of the flag "UseSharedSpaces"
-  int value = 0;
-  if (read_int(ph, useSharedSpacesAddr, &value) != true) {
+  // Since hotspot types are not available to build this library. So
+  // equivalent type "jboolean" is used to read the value of "UseSharedSpaces"
+  // which is same as hotspot type "bool".
+  jboolean value = 0;
+  if (read_jboolean(ph, useSharedSpacesAddr, &value) != true) {
     THROW_NEW_DEBUGGER_EXCEPTION_("can't read 'UseSharedSpaces' flag", 1);
-  } else if (value == 0) {
+  } else if ((int)value == 0) {
     print_debug("UseSharedSpaces is false, assuming -Xshare:off!\n");
     return 1;
   }
--- a/make/hotspot_version	Sat Jan 31 17:19:42 2009 -0800
+++ b/make/hotspot_version	Fri Feb 27 15:13:00 2009 -0800
@@ -33,9 +33,9 @@
 # Don't put quotes (fail windows build).
 HOTSPOT_VM_COPYRIGHT=Copyright 2008

-HS_MAJOR_VER=14
+HS_MAJOR_VER=15
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=10
+HS_BUILD_NUMBER=02

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/src/cpu/sparc/vm/jni_sparc.h	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/sparc/vm/jni_sparc.h	Fri Feb 27 15:13:00 2009 -0800
@@ -28,5 +28,11 @@
 #define JNICALL

 typedef int jint;
-typedef long long jlong;
+
+#ifdef _LP64
+  typedef long jlong;
+#else
+  typedef long long jlong;
+#endif
+
 typedef signed char jbyte;
--- a/src/cpu/sparc/vm/sparc.ad	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Feb 27 15:13:00 2009 -0800
@@ -762,7 +762,7 @@
     case Assembler::stdf_op3: st_op = Op_StoreD; break;

     case Assembler::ldsb_op3: ld_op = Op_LoadB; break;
-    case Assembler::lduh_op3: ld_op = Op_LoadC; break;
+    case Assembler::lduh_op3: ld_op = Op_LoadUS; break;
     case Assembler::ldsh_op3: ld_op = Op_LoadS; break;
     case Assembler::ldx_op3:  // may become LoadP or stay LoadI
     case Assembler::ldsw_op3: // may become LoadP or stay LoadI
@@ -3869,6 +3869,8 @@
   constraint(ALLOC_IN_RC(dflt_reg));
   match(RegD);

+  match(regD_low);
+
   format %{ %}
   interface(REG_INTER);
 %}
@@ -3883,7 +3885,7 @@

 operand regD_low() %{
   constraint(ALLOC_IN_RC(dflt_low_reg));
-  match(RegD);
+  match(regD);

   format %{ %}
   interface(REG_INTER);
@@ -5314,9 +5316,9 @@
   ins_pipe(iload_mask_mem);
 %}

-// Load Char (16bit UNsigned) into a Long Register
-instruct loadUCL(iRegL dst, memory mem, immL_FFFF bytemask) %{
-  match(Set dst (AndL (ConvI2L (LoadC mem)) bytemask));
+// Load Unsigned Short/Char (16bit UNsigned) into a Long Register
+instruct loadUS2L(iRegL dst, memory mem, immL_FFFF bytemask) %{
+  match(Set dst (AndL (ConvI2L (LoadUS mem)) bytemask));
   ins_cost(MEMORY_REF_COST);

   size(4);
@@ -5326,9 +5328,9 @@
   ins_pipe(iload_mask_mem);
 %}

-// Load Char (16bit unsigned)
-instruct loadC(iRegI dst, memory mem) %{
-  match(Set dst (LoadC mem));
+// Load Unsigned Short/Char (16bit unsigned)
+instruct loadUS(iRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
   ins_cost(MEMORY_REF_COST);

   size(4);
--- a/src/cpu/x86/vm/assembler_x86.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -5212,15 +5212,15 @@
 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
                                            bool clear_pc) {
   // we must set sp to zero to clear frame
-  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
+  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
   // must clear fp, so that compiled frames are not confused; it is
   // possible that we need it only for debugging
   if (clear_fp) {
-    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
+    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
   }

   if (clear_pc) {
-    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
+    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
   }
 }

@@ -5670,7 +5670,7 @@
   // get oop result if there is one and reset the value in the thread
   if (oop_result->is_valid()) {
     movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-    movptr(Address(java_thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
     verify_oop(oop_result, "broken oop in call_VM_base");
   }
 }
@@ -6426,13 +6426,13 @@
     get_thread(java_thread);
   }
   // we must set sp to zero to clear frame
-  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
+  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
   if (clear_fp) {
-    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
   }

   if (clear_pc)
-    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);

 }

@@ -6943,29 +6943,32 @@

   Label slow_case, done;

-  // x ?<= pi/4
-  fld_d(ExternalAddress((address)&pi_4));
-  fld_s(1);                // Stack:  X  PI/4  X
-  fabs();                  // Stack: |X| PI/4  X
-  fcmp(tmp);
-  jcc(Assembler::above, slow_case);
-
-  // fastest case: -pi/4 <= x <= pi/4
-  switch(trig) {
-  case 's':
-    fsin();
-    break;
-  case 'c':
-    fcos();
-    break;
-  case 't':
-    ftan();
-    break;
-  default:
-    assert(false, "bad intrinsic");
-    break;
-  }
-  jmp(done);
+  ExternalAddress pi4_adr = (address)&pi_4;
+  if (reachable(pi4_adr)) {
+    // x ?<= pi/4
+    fld_d(pi4_adr);
+    fld_s(1);                // Stack:  X  PI/4  X
+    fabs();                  // Stack: |X| PI/4  X
+    fcmp(tmp);
+    jcc(Assembler::above, slow_case);
+
+    // fastest case: -pi/4 <= x <= pi/4
+    switch(trig) {
+    case 's':
+      fsin();
+      break;
+    case 'c':
+      fcos();
+      break;
+    case 't':
+      ftan();
+      break;
+    default:
+      assert(false, "bad intrinsic");
+      break;
+    }
+    jmp(done);
+  }

   // slow case: runtime call
   bind(slow_case);
--- a/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -213,7 +213,7 @@

 inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
   /* it's possible we could catch this special case implicitly */
-  if (op1 == 0x80000000 && op2 == -1) return op1;
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
   else return op1 / op2;
 }

@@ -231,7 +231,7 @@

 inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
   /* it's possible we could catch this special case implicitly */
-  if (op1 == 0x80000000 && op2 == -1) return 0;
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
   else return op1 % op2;
 }
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -779,7 +779,7 @@
     case T_OBJECT:  // fall through
     case T_ARRAY:
       if (c->as_jobject() == NULL) {
-        __ movptr(as_Address(addr), (int32_t)NULL_WORD);
+        __ movptr(as_Address(addr), NULL_WORD);
       } else {
         if (is_literal_address(addr)) {
           ShouldNotReachHere();
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -78,10 +78,10 @@
     movptr(rax, Address(thread, Thread::pending_exception_offset()));
     // make sure that the vm_results are cleared
     if (oop_result1->is_valid()) {
-      movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+      movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
     }
     if (oop_result2->is_valid()) {
-      movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+      movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
     }
     if (frame_size() == no_frame_size) {
       leave();
@@ -96,12 +96,12 @@
   // get oop results if there are any and reset the values in the thread
   if (oop_result1->is_valid()) {
     movptr(oop_result1, Address(thread, JavaThread::vm_result_offset()));
-    movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+    movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
     verify_oop(oop_result1);
   }
   if (oop_result2->is_valid()) {
     movptr(oop_result2, Address(thread, JavaThread::vm_result_2_offset()));
-    movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+    movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
     verify_oop(oop_result2);
   }
   return call_offset;
@@ -728,8 +728,8 @@

   // clear exception fields in JavaThread because they are no longer needed
   // (fields must be cleared because they are processed by GC otherwise)
-  __ movptr(Address(thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(thread, JavaThread::exception_oop_offset()), NULL_WORD);
+  __ movptr(Address(thread, JavaThread::exception_pc_offset()), NULL_WORD);

   // pop the stub frame off
   __ leave();
@@ -878,7 +878,7 @@

     // load and clear pending exception
     __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
-    __ movptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);

     // check that there is really a valid exception
     __ verify_not_null_oop(rax);
@@ -971,14 +971,14 @@
         // load pending exception oop into rax,
         __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
         // clear pending exception
-        __ movptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+        __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);

         // load issuing PC (the return address for this stub) into rdx
         __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));

         // make sure that the vm_results are cleared (may be unnecessary)
-        __ movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
-        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+        __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
+        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);

         // verify that that there is really a valid exception in rax,
         __ verify_not_null_oop(exception_oop);
@@ -1393,7 +1393,7 @@
         __ ret(0);

         __ bind(miss);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 0); // result
+        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -594,7 +594,7 @@
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rax);

   // for c++ interpreter can rsi really be munged?
-  __ lea(state, Address(rbp, -sizeof(BytecodeInterpreter)));                               // restore state
+  __ lea(state, Address(rbp, -(int)sizeof(BytecodeInterpreter)));                               // restore state
   __ movptr(rbx, Address(state, byte_offset_of(BytecodeInterpreter, _method)));            // restore method
   __ movptr(rdi, Address(state, byte_offset_of(BytecodeInterpreter, _locals)));            // get locals pointer

@@ -658,7 +658,7 @@
     const Address size_of_stack    (rbx, methodOopDesc::max_stack_offset());
     // Always give one monitor to allow us to start interp if sync method.
     // Any additional monitors need a check when moving the expression stack
-    const one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
+    const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
   __ load_unsigned_word(rax, size_of_stack);                            // get size of expression stack in words
   __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), one_monitor));
   __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size));
@@ -1829,7 +1829,7 @@
   Label unwind_and_forward;

   // restore state pointer.
-  __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+  __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));

   __ movptr(rbx, STATE(_method));                       // get method
 #ifdef _LP64
@@ -1877,14 +1877,14 @@

   // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
   if (UseSSE < 2) {
-    __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+    __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));
     __ movptr(rbx, STATE(_result._to_call._callee));                   // get method just executed
     __ movl(rcx, Address(rbx, methodOopDesc::result_index_offset()));
     __ cmpl(rcx, AbstractInterpreter::BasicType_as_index(T_FLOAT));    // Result stub address array index
     __ jcc(Assembler::equal, do_float);
     __ cmpl(rcx, AbstractInterpreter::BasicType_as_index(T_DOUBLE));    // Result stub address array index
     __ jcc(Assembler::equal, do_double);
-#ifdef COMPILER2
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
     __ empty_FPU_stack();
 #endif // COMPILER2
     __ jmp(done_conv);
@@ -1928,7 +1928,7 @@

   // Restore rsi/r13 as compiled code may not preserve it

-  __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+  __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));

   // restore stack to what we had when we left (in case i2c extended it)

@@ -1942,7 +1942,7 @@
 #else
   __ movptr(rcx, STATE(_thread));                       // get thread
   __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
-#endif / __LP64
+#endif // _LP64
   __ jcc(Assembler::notZero, return_with_exception);

   // get method just executed
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -139,7 +139,7 @@
 #ifdef CC_INTERP

 inline interpreterState frame::get_interpreterState() const {
-  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
+  return ((interpreterState)addr_at( -((int)sizeof(BytecodeInterpreter))/wordSize ));
 }

 inline intptr_t*    frame::sender_sp()        const {
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -133,7 +133,7 @@
                              + in_ByteSize(wordSize));
   switch (state) {
     case atos: movptr(rax, oop_addr);
-               movptr(oop_addr, (int32_t)NULL_WORD);
+               movptr(oop_addr, NULL_WORD);
                verify_oop(rax, state);                break;
     case ltos:
                movl(rdx, val_addr1);               // fall through
@@ -148,8 +148,8 @@
   }
   // Clean up tos value in the thread object
   movl(tos_addr,  (int32_t) ilgl);
-  movptr(val_addr,  (int32_t)NULL_WORD);
-  NOT_LP64(movl(val_addr1, (int32_t)NULL_WORD));
+  movptr(val_addr,  NULL_WORD);
+  NOT_LP64(movptr(val_addr1, NULL_WORD));
 }


@@ -944,7 +944,7 @@
     movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes ()));

     // Free entry
-    movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);
+    movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);

     if (UseBiasedLocking) {
       biased_locking_exit(obj_reg, header_reg, done);
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -120,7 +120,7 @@
   void empty_expression_stack()                            {
        movptr(rsp, Address(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
       // NULL last_sp until next java call
-      movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+      movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   }

   // Tagged stack helpers for swap and dup
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -30,7 +30,7 @@

 #ifdef CC_INTERP
 void InterpreterMacroAssembler::get_method(Register reg) {
-  movptr(reg, Address(rbp, -(sizeof(BytecodeInterpreter) + 2 * wordSize)));
+  movptr(reg, Address(rbp, -((int)sizeof(BytecodeInterpreter) + 2 * wordSize)));
   movptr(reg, Address(reg, byte_offset_of(BytecodeInterpreter, _method)));
 }
 #endif // CC_INTERP
--- a/src/cpu/x86/vm/interpreterRT_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/interpreterRT_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -54,7 +54,7 @@
   __ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), (int32_t)NULL_WORD); // do not use temp() to avoid AGI
   Label L;
   __ jcc(Assembler::notZero, L);
-  __ movptr(temp(), ((int32_t)NULL_WORD));
+  __ movptr(temp(), NULL_WORD);
   __ bind(L);
   __ movptr(Address(to(), to_offset * wordSize), temp());
 }
@@ -110,7 +110,7 @@
   virtual void pass_object() {
     // pass address of from
     intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
-    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL : from_addr;
+    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
     debug_only(verify_tag(frame::TagReference));
     _from -= Interpreter::stackElementSize();
    }
--- a/src/cpu/x86/vm/jni_x86.h	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/jni_x86.h	Fri Feb 27 15:13:00 2009 -0800
@@ -32,7 +32,13 @@
   #define JNICALL

   typedef int jint;
+
+#ifdef _LP64
+  typedef long jlong;
+#else
   typedef long long jlong;
+#endif
+
 #else
   #define JNIEXPORT __declspec(dllexport)
   #define JNIIMPORT __declspec(dllimport)
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -129,11 +129,11 @@
   // Get the exception pc in case we are deoptimized
   __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
 #ifdef ASSERT
-  __ movptr(Address(rcx, JavaThread::exception_handler_pc_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_handler_pc_offset()), NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
 #endif
   // Clear the exception oop so GC no longer processes it as a root.
-  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);

   __ pop(rcx);
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -39,6 +39,8 @@
 RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_static_call_blob;

+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
 class RegisterSaver {
   enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
   // Capture info about frame layout
@@ -1299,7 +1301,7 @@

   // Now compute actual number of stack words we need rounding to make
   // stack properly aligned.
-  stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);

   int stack_size = stack_slots * VMRegImpl::stack_slot_size;

@@ -1793,7 +1795,7 @@
   // reset handle block
   __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));

-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);

   // Any exception pending?
   __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
@@ -1865,7 +1867,7 @@
     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)

     __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
-    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);


     // should be a peal
@@ -2431,7 +2433,7 @@
   __ get_thread(rdi);
   __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset()));
   __ movptr(Address(rbp, wordSize), rdx);
-  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD);

 #ifdef ASSERT
   // verify that there is really an exception oop in JavaThread
@@ -2489,8 +2491,8 @@
   __ jcc(Assembler::notEqual, noException);
   __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset()));
   __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
-  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);

   __ verify_oop(rax);

@@ -2582,7 +2584,7 @@
           rbx); // Make it walkable
 #else /* CC_INTERP */
   // This value is corrected by layout_activation_impl
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
 #endif /* CC_INTERP */
   __ movptr(sp_temp, rsp);              // pass to next frame
@@ -2802,7 +2804,7 @@
           rbx); // Make it walkable
 #else /* CC_INTERP */
   // This value is corrected by layout_activation_impl
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
 #endif /* CC_INTERP */
   __ movptr(sp_temp, rsp);              // pass to next frame
@@ -3020,7 +3022,7 @@
   // exception pending => remove activation and forward to exception handler

   __ get_thread(thread);
-  __ movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -39,6 +39,8 @@
 RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_static_call_blob;

+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
 #define __ masm->

 class SimpleRuntimeFrame {
@@ -1286,7 +1288,7 @@

   // Now compute actual number of stack words we need rounding to make
   // stack properly aligned.
-  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);

   int stack_size = stack_slots * VMRegImpl::stack_slot_size;

@@ -2954,10 +2956,16 @@
   __ pushptr(Address(rcx, 0));     // Save return address
   __ enter();                      // Save old & set new rbp
   __ subptr(rsp, rbx);             // Prolog
+#ifdef CC_INTERP
+  __ movptr(Address(rbp,
+                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
+            sender_sp); // Make it walkable
+#else // CC_INTERP
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
             sender_sp);            // Make it walkable
   // This value is corrected by layout_activation_impl
   __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+#endif // CC_INTERP
   __ mov(sender_sp, rsp);          // Pass sender_sp to next frame
   __ addptr(rsi, wordSize);        // Bump array pointer (sizes)
   __ addptr(rcx, wordSize);        // Bump array pointer (pcs)
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -407,7 +407,7 @@
     __ get_thread(rcx);
     __ pop(rdx);
     __ movptr(rax, Address(rcx, Thread::pending_exception_offset()));
-    __ movptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+    __ movptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD);

 #ifdef ASSERT
     // make sure exception is set
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -472,7 +472,7 @@
     // setup rax & rdx, remove return address & clear pending exception
     __ pop(rdx);
     __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
-    __ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD);
+    __ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);

 #ifdef ASSERT
     // make sure exception is set
@@ -954,9 +954,9 @@
     __ jcc(Assembler::zero, exit); // if obj is NULL it is OK
     // Check if the oop is in the right area of memory
     __ movptr(c_rarg2, rax);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_oop_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask());
     __ andptr(c_rarg2, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits());
     __ cmpptr(c_rarg2, c_rarg3);
     __ jcc(Assembler::notZero, error);

@@ -969,9 +969,9 @@
     __ jcc(Assembler::zero, error); // if klass is NULL it is broken
     // Check if the klass is in the right area of memory
     __ mov(c_rarg2, rax);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_mask());
     __ andptr(c_rarg2, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_bits());
     __ cmpptr(c_rarg2, c_rarg3);
     __ jcc(Assembler::notZero, error);

@@ -980,9 +980,9 @@
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
     // Check if the klass' klass is in the right area of memory
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_mask());
     __ andptr(rax, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_bits());
     __ cmpptr(rax, c_rarg3);
     __ jcc(Assembler::notZero, error);
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -110,7 +110,7 @@
     if (message != NULL) {
       __ lea(rbx, ExternalAddress((address)message));
     } else {
-      __ movptr(rbx, (int32_t)NULL_WORD);
+      __ movptr(rbx, NULL_WORD);
     }
     __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), rax, rbx);
   }
@@ -123,7 +123,7 @@
 address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
   address entry = __ pc();
   // NULL last_sp until next java call
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ dispatch_next(state);
   return entry;
 }
@@ -176,7 +176,7 @@
   // Restore stack bottom in case i2c adjusted stack
   __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
   // and NULL it as marker that rsp is now tos until next java call
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);

   __ restore_bcp();
   __ restore_locals();
@@ -211,7 +211,7 @@

   // The stack is not extended by deopt but we must NULL last_sp as this
   // entry is like a "return".
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ restore_bcp();
   __ restore_locals();
   // handle exceptions
@@ -382,7 +382,7 @@
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
   // if the compilation did not complete (either went background or bailed out).
-  __ movptr(rax, (int32_t)false);
+  __ movptr(rax, (intptr_t)false);
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rax);

   __ movptr(rbx, Address(rbp, method_offset));   // restore methodOop
@@ -1028,7 +1028,7 @@

   // reset handle block
   __ movptr(t, Address(thread, JavaThread::active_handles_offset()));
-  __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);

   // If result was an oop then unbox and save it in the frame
   { Label L;
@@ -1488,7 +1488,7 @@

   // Restore sp to interpreter_frame_last_sp even though we are going
   // to empty the expression stack for the exception processing.
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   // rax,: exception
   // rdx: return address/pc that threw exception
   __ restore_bcp();                              // rsi points to call/send
@@ -1608,7 +1608,7 @@
   __ reset_last_Java_frame(rcx, true, true);
   // Restore the last_sp and null it out
   __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);

   __ restore_bcp();
   __ restore_locals();
@@ -1636,7 +1636,7 @@
   // restore exception
   __ get_thread(rcx);
   __ movptr(rax, Address(rcx, JavaThread::vm_result_offset()));
-  __ movptr(Address(rcx, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::vm_result_offset()), NULL_WORD);
   __ verify_oop(rax);

   // Inbetween activations - previous activation type unknown yet
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -137,7 +137,7 @@
         // Do the actual store
         // noreg means NULL
         if (val == noreg) {
-          __ movl(Address(rdx, 0), NULL_WORD);
+          __ movptr(Address(rdx, 0), NULL_WORD);
           // No post barrier for NULL
         } else {
           __ movl(Address(rdx, 0), val);
@@ -152,7 +152,7 @@
     case BarrierSet::CardTableExtension:
       {
         if (val == noreg) {
-          __ movl(obj, NULL_WORD);
+          __ movptr(obj, NULL_WORD);
         } else {
           __ movl(obj, val);
           // flatten object address if needed
@@ -168,7 +168,7 @@
     case BarrierSet::ModRef:
     case BarrierSet::Other:
       if (val == noreg) {
-        __ movl(obj, NULL_WORD);
+        __ movptr(obj, NULL_WORD);
       } else {
         __ movl(obj, val);
       }
--- a/src/cpu/x86/vm/x86_32.ad	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Feb 27 15:13:00 2009 -0800
@@ -3371,7 +3371,7 @@
          masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
          masm.get_thread (scrReg) ;
          masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
-         masm.movptr(tmpReg, 0);                         // consider: xor vs mov
+         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
          if (os::is_MP()) { masm.lock(); }
          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
       } else
@@ -3387,7 +3387,7 @@

          if ((EmitSync & 64) == 0) {
            // Optimistic form: consider XORL tmpReg,tmpReg
-           masm.movptr(tmpReg, 0 ) ;
+           masm.movptr(tmpReg, NULL_WORD) ;
          } else {
            // Can suffer RTS->RTO upgrades on shared or cold $ lines
            // Test-And-CAS instead of CAS
@@ -3587,7 +3587,7 @@
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
          masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
          masm.jmpb  (DONE_LABEL) ;
       } else {
          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
@@ -3596,7 +3596,7 @@
          masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
          masm.jccb  (Assembler::notZero, CheckSucc) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
          masm.jmpb  (DONE_LABEL) ;
       }

@@ -3644,7 +3644,7 @@
          // We currently use (3), although it's likely that switching to (2)
          // is correct for the future.

-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
          if (os::is_MP()) {
             if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
               masm.mfence();
@@ -6413,9 +6413,9 @@
   ins_pipe( ialu_reg_mem );
 %}

-// Load Char (16bit unsigned)
-instruct loadC(eRegI dst, memory mem) %{
-  match(Set dst (LoadC mem));
+// Load Unsigned Short/Char (16bit unsigned)
+instruct loadUS(eRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));

   ins_cost(125);
   format %{ "MOVZX  $dst,$mem" %}
--- a/src/cpu/x86/vm/x86_64.ad	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Feb 27 15:13:00 2009 -0800
@@ -6096,25 +6096,25 @@
 //   ins_pipe(ialu_reg_mem);
 // %}

-// Load Char (16 bit UNsigned)
-instruct loadC(rRegI dst, memory mem)
-%{
-  match(Set dst (LoadC mem));
+// Load Unsigned Short/Char (16 bit UNsigned)
+instruct loadUS(rRegI dst, memory mem)
+%{
+  match(Set dst (LoadUS mem));

   ins_cost(125);
-  format %{ "movzwl  $dst, $mem\t# char" %}
+  format %{ "movzwl  $dst, $mem\t# ushort/char" %}
   opcode(0x0F, 0xB7);
   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
   ins_pipe(ialu_reg_mem);
 %}

-// Load Char (16 bit UNsigned) into long
-// instruct loadC2L(rRegL dst, memory mem)
+// Load Unsigned Short/Char (16 bit UNsigned) into long
+// instruct loadUS2L(rRegL dst, memory mem)
 // %{
-//   match(Set dst (ConvI2L (LoadC mem)));
+//   match(Set dst (ConvI2L (LoadUS mem)));

 //   ins_cost(125);
-//   format %{ "movzwl  $dst, $mem\t# char -> long" %}
+//   format %{ "movzwl  $dst, $mem\t# ushort/char -> long" %}
 //   opcode(0x0F, 0xB7);
 //   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
 //   ins_pipe(ialu_reg_mem);
@@ -9490,14 +9490,14 @@
 %{
   match(Set dst (AndL dst src));

-  format %{ "movzbq  $dst, $src\t# long & 0xFF" %}
+  format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
   opcode(0x0F, 0xB6);
   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
   ins_pipe(ialu_reg);
 %}

 // And Register with Immediate 65535
-instruct andL_rReg_imm65535(rRegI dst, immL_65535 src)
+instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
 %{
   match(Set dst (AndL dst src));
--- a/src/os/linux/vm/os_linux.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/os/linux/vm/os_linux.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -279,7 +279,11 @@
  *        ...
  *        7: The default directories, normally /lib and /usr/lib.
  */
+#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
+#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+#else
 #define DEFAULT_LIBPATH "/lib:/usr/lib"
+#endif

 #define EXTENSIONS_DIR  "/lib/ext"
 #define ENDORSED_DIR    "/lib/endorsed"
@@ -1160,7 +1164,10 @@

         /*                                     1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2   2   2   2 */
         /*              3  4  5  6  7  8   9   0   1   2   3   4   5   6   7   8   9   0   1   2   3   4   5   6   7   8 */
-        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu",
+        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld "
+                   UINTX_FORMAT UINTX_FORMAT UINTX_FORMAT
+                   " %lu "
+                   UINTX_FORMAT UINTX_FORMAT UINTX_FORMAT,
              &state,          /* 3  %c  */
              &ppid,           /* 4  %d  */
              &pgrp,           /* 5  %d  */
@@ -1180,13 +1187,13 @@
              &nice,           /* 19 %ld  */
              &junk,           /* 20 %ld  */
              &it_real,        /* 21 %ld  */
-             &start,          /* 22 %lu  */
-             &vsize,          /* 23 %lu  */
-             &rss,            /* 24 %ld  */
+             &start,          /* 22 UINTX_FORMAT  */
+             &vsize,          /* 23 UINTX_FORMAT  */
+             &rss,            /* 24 UINTX_FORMAT  */
              &rsslim,         /* 25 %lu  */
-             &scodes,         /* 26 %lu  */
-             &ecode,          /* 27 %lu  */
-             &stack_start);   /* 28 %lu  */
+             &scodes,         /* 26 UINTX_FORMAT  */
+             &ecode,          /* 27 UINTX_FORMAT  */
+             &stack_start);   /* 28 UINTX_FORMAT  */
       }

       if (i != 28 - 2) {
@@ -1425,6 +1432,10 @@
   return buf;
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
+  return localtime_r(clock, res);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // runtime exit support

@@ -2024,7 +2035,8 @@
                 CAST_FROM_FN_PTR(address, os::jvm_path),
                 dli_fname, sizeof(dli_fname), NULL);
   assert(ret != 0, "cannot locate libjvm");
-  realpath(dli_fname, buf);
+  if (realpath(dli_fname, buf) == NULL)
+    return;

   if (strcmp(Arguments::sun_java_launcher(), "gamma") == 0) {
     // Support for the gamma launcher.  Typical value for buf is
@@ -2048,7 +2060,8 @@
         assert(strstr(p, "/libjvm") == p, "invalid library name");
         p = strstr(p, "_g") ? "_g" : "";

-        realpath(java_home_var, buf);
+        if (realpath(java_home_var, buf) == NULL)
+          return;
         sprintf(buf + strlen(buf), "/jre/lib/%s", cpu_arch);
         if (0 == access(buf, F_OK)) {
           // Use current module name "libjvm[_g].so" instead of
@@ -2059,7 +2072,8 @@
           sprintf(buf + strlen(buf), "/hotspot/libjvm%s.so", p);
         } else {
           // Go back to path of .so
-          realpath(dli_fname, buf);
+          if (realpath(dli_fname, buf) == NULL)
+            return;
         }
       }
     }
@@ -4184,11 +4198,11 @@
   // Skip blank chars
   do s++; while (isspace(*s));

-  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
-                 &idummy, &idummy, &idummy, &idummy, &idummy, &idummy,
+  count = sscanf(s,"%*c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
+                 &idummy, &idummy, &idummy, &idummy, &idummy,
                  &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
                  &user_time, &sys_time);
-  if ( count != 13 ) return -1;
+  if ( count != 12 ) return -1;
   if (user_sys_cpu_time) {
     return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
   } else {
--- a/src/os/solaris/vm/os_solaris.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -323,6 +323,10 @@
   return (size_t)(base - bottom);
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
+  return localtime_r(clock, res);
+}
+
 // interruptible infrastructure

 // setup_interruptible saves the thread state before going into an
--- a/src/os/windows/vm/os_windows.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/os/windows/vm/os_windows.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -327,6 +327,14 @@
   return sz;
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm* res) {
+  const struct tm* time_struct_ptr = localtime(clock);
+  if (time_struct_ptr != NULL) {
+    *res = *time_struct_ptr;
+    return res;
+  }
+  return NULL;
+}

 LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo);
--- a/src/share/vm/adlc/dict2.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/adlc/dict2.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -316,9 +316,12 @@
   return strcmp((const char *)k1,(const char *)k2);
 }

-// Slimey cheap key comparator.
+// Cheap key comparator.
 int cmpkey(const void *key1, const void *key2) {
-  return (int)((intptr_t)key1 - (intptr_t)key2);
+  if (key1 == key2) return 0;
+  intptr_t delta = (intptr_t)key1 - (intptr_t)key2;
+  if (delta > 0) return 1;
+  return -1;
 }

 //=============================================================================
--- a/src/share/vm/adlc/forms.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/adlc/forms.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -248,7 +248,7 @@
 // True if 'opType', an ideal name, loads or stores.
 Form::DataType Form::is_load_from_memory(const char *opType) const {
   if( strcmp(opType,"LoadB")==0 )  return Form::idealB;
-  if( strcmp(opType,"LoadC")==0 )  return Form::idealC;
+  if( strcmp(opType,"LoadUS")==0 )  return Form::idealC;
   if( strcmp(opType,"LoadD")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadD_unaligned")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadF")==0 )  return Form::idealF;
--- a/src/share/vm/adlc/formssel.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/adlc/formssel.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -3314,7 +3314,7 @@
     "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
     "StoreB","StoreC","Store" ,"StoreFP",
     "LoadI" ,"LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
-    "LoadB" ,"LoadC" ,"LoadS" ,"Load"   ,
+    "LoadB" ,"LoadUS" ,"LoadS" ,"Load"   ,
     "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
     "Store8B","Store4B","Store8C","Store4C","Store2C",
     "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
--- a/src/share/vm/asm/codeBuffer.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/asm/codeBuffer.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -123,6 +123,10 @@
     // addresses constructed before expansions will not be confused.
     cb->free_blob();
   }
+
+  // free any overflow storage
+  delete _overflow_arena;
+
 #ifdef ASSERT
   Copy::fill_to_bytes(this, sizeof(*this), badResourceValue);
 #endif
--- a/src/share/vm/classfile/classFileParser.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/classFileParser.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -232,7 +232,9 @@
     length >= 1, "Illegal constant pool size %u in class file %s",
     length, CHECK_(nullHandle));
   constantPoolOop constant_pool =
-                      oopFactory::new_constantPool(length, CHECK_(nullHandle));
+                      oopFactory::new_constantPool(length,
+                                                   methodOopDesc::IsSafeConc,
+                                                   CHECK_(nullHandle));
   constantPoolHandle cp (THREAD, constant_pool);

   cp->set_partially_loaded();    // Enables heap verify to work on partial constantPoolOops
@@ -1675,7 +1677,8 @@
   // All sizing information for a methodOop is finally available, now create it
   methodOop m_oop  = oopFactory::new_method(
     code_length, access_flags, linenumber_table_length,
-    total_lvt_length, checked_exceptions_length, CHECK_(nullHandle));
+    total_lvt_length, checked_exceptions_length,
+    methodOopDesc::IsSafeConc, CHECK_(nullHandle));
   methodHandle m (THREAD, m_oop);

   ClassLoadingService::add_class_method_size(m_oop->size()*HeapWordSize);
--- a/src/share/vm/classfile/javaClasses.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -441,6 +441,7 @@

 bool java_lang_Class::offsets_computed = false;
 int  java_lang_Class::classRedefinedCount_offset = -1;
+int  java_lang_Class::parallelCapable_offset = -1;

 void java_lang_Class::compute_offsets() {
   assert(!offsets_computed, "offsets should be initialized only once");
@@ -451,6 +452,23 @@
   // so don't go fatal.
   compute_optional_offset(classRedefinedCount_offset,
     k, vmSymbols::classRedefinedCount_name(), vmSymbols::int_signature());
+
+  // The field indicating parallelCapable (parallelLockMap) is only present starting in 7,
+  klassOop k1 = SystemDictionary::classloader_klass();
+  compute_optional_offset(parallelCapable_offset,
+    k1, vmSymbols::parallelCapable_name(), vmSymbols::concurrenthashmap_signature());
+}
+
+// For class loader classes, parallelCapable defined
+// based on non-null field
+// Written to by java.lang.ClassLoader, vm only reads this field, doesn't set it
+bool java_lang_Class::parallelCapable(oop class_loader) {
+  if (!JDK_Version::is_gte_jdk17x_version()
+     || parallelCapable_offset == -1) {
+     // Default for backward compatibility is false
+     return false;
+  }
+  return (class_loader->obj_field(parallelCapable_offset) != NULL);
 }

 int java_lang_Class::classRedefinedCount(oop the_class_mirror) {
@@ -866,7 +884,7 @@
     }
     nmethod* nm = method->code();
     if (WizardMode && nm != NULL) {
-      sprintf(buf + (int)strlen(buf), "(nmethod %#x)", nm);
+      sprintf(buf + (int)strlen(buf), "(nmethod " PTR_FORMAT ")", (intptr_t)nm);
     }
   }
--- a/src/share/vm/classfile/javaClasses.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/javaClasses.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -141,6 +141,7 @@
   static void compute_offsets();
   static bool offsets_computed;
   static int classRedefinedCount_offset;
+  static int parallelCapable_offset;

  public:
   // Instance creation
@@ -168,6 +169,8 @@
   // Support for classRedefinedCount field
   static int classRedefinedCount(oop the_class_mirror);
   static void set_classRedefinedCount(oop the_class_mirror, int value);
+  // Support for parallelCapable field
+  static bool parallelCapable(oop the_class_mirror);
   // Debugging
   friend class JavaClasses;
   friend class instanceKlass;   // verification code accesses offsets
--- a/src/share/vm/classfile/systemDictionary.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/systemDictionary.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -90,6 +90,14 @@
 #endif

 // ----------------------------------------------------------------------------
+// Parallel class loading check
+
+bool SystemDictionary::is_parallelCapable(Handle class_loader) {
+  if (UnsyncloadClass || class_loader.is_null()) return true;
+  if (AlwaysLockClassLoader) return false;
+  return java_lang_Class::parallelCapable(class_loader());
+}
+// ----------------------------------------------------------------------------
 // Resolving of classes

 // Forwards to resolve_or_null
@@ -196,7 +204,8 @@
 // super-class callers:
 //   ClassFileParser - for defineClass & jvmtiRedefineClasses
 //   load_shared_class - while loading a class from shared archive
-//   resolve_instance_class_or_fail:
+//   resolve_instance_class_or_null:
+//     via: handle_parallel_super_load
 //      when resolving a class that has an existing placeholder with
 //      a saved superclass [i.e. a defineClass is currently in progress]
 //      if another thread is trying to resolve the class, it must do
@@ -283,12 +292,9 @@
       if (probe && probe->check_seen_thread(THREAD, PlaceholderTable::LOAD_SUPER)) {
           throw_circularity_error = true;
       }
-
-      // add placeholder entry even if error - callers will remove on error
+    }
+    if (!throw_circularity_error) {
       PlaceholderEntry* newprobe = placeholders()->find_and_add(p_index, p_hash, child_name, class_loader, PlaceholderTable::LOAD_SUPER, class_name, THREAD);
-      if (throw_circularity_error) {
-         newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_SUPER);
-      }
     }
   }
   if (throw_circularity_error) {
@@ -325,7 +331,6 @@
   return superk_h();
 }

-
 void SystemDictionary::validate_protection_domain(instanceKlassHandle klass,
                                                   Handle class_loader,
                                                   Handle protection_domain,
@@ -421,7 +426,7 @@
   bool calledholdinglock
       = ObjectSynchronizer::current_thread_holds_lock((JavaThread*)THREAD, lockObject);
   assert(calledholdinglock,"must hold lock for notify");
-  assert(!UnsyncloadClass, "unexpected double_lock_wait");
+  assert((!(lockObject() == _system_loader_lock_obj) && !is_parallelCapable(lockObject)), "unexpected double_lock_wait");
   ObjectSynchronizer::notifyall(lockObject, THREAD);
   intptr_t recursions =  ObjectSynchronizer::complete_exit(lockObject, THREAD);
   SystemDictionary_lock->wait();
@@ -439,7 +444,7 @@
 // even in non-circularity situations.
 // Note: only one thread can define the class, but multiple can resolve
 // Note: must call resolve_super_or_fail even if null super -
-// to force placeholder entry creation for this class
+// to force placeholder entry creation for this class for circularity detection
 // Caller must check for pending exception
 // Returns non-null klassOop if other thread has completed load
 // and we are done,
@@ -477,9 +482,9 @@
     SystemDictionary_lock->notify_all();
   }

-  // UnsyncloadClass does NOT wait for parallel superclass loads to complete
-  // Bootstrap classloader does wait for parallel superclass loads
- if (UnsyncloadClass) {
+  // parallelCapable class loaders do NOT wait for parallel superclass loads to complete
+  // Serial class loaders and bootstrap classloader do wait for superclass loads
+ if (!class_loader.is_null() && is_parallelCapable(class_loader)) {
     MutexLocker mu(SystemDictionary_lock, THREAD);
     // Check if classloading completed while we were loading superclass or waiting
     klassOop check = find_class(d_index, d_hash, name, class_loader);
@@ -566,10 +571,10 @@
   // This lock must be acquired here so the waiter will find
   // any successful result in the SystemDictionary and not attempt
   // the define
-  // Classloaders that support parallelism, e.g. bootstrap classloader,
+  // ParallelCapable Classloaders and the bootstrap classloader,
   // or all classloaders with UnsyncloadClass do not acquire lock here
   bool DoObjectLock = true;
-  if (UnsyncloadClass || (class_loader.is_null())) {
+  if (is_parallelCapable(class_loader)) {
     DoObjectLock = false;
   }

@@ -627,6 +632,9 @@
     // Five cases:
     // All cases need to prevent modifying bootclasssearchpath
     // in parallel with a classload of same classname
+    // Redefineclasses uses existence of the placeholder for the duration
+    // of the class load to prevent concurrent redefinition of not completely
+    // defined classes.
     // case 1. traditional classloaders that rely on the classloader object lock
     //   - no other need for LOAD_INSTANCE
     // case 2. traditional classloaders that break the classloader object lock
@@ -642,12 +650,13 @@
     //    This classloader supports parallelism at the classloader level,
     //    but only allows a single load of a class/classloader pair.
     //    No performance benefit and no deadlock issues.
-    // case 5. Future: parallel user level classloaders - without objectLocker
+    // case 5. parallelCapable user level classloaders - without objectLocker
+    //    Allow parallel classloading of a class/classloader pair
     symbolHandle nullsymbolHandle;
     bool throw_circularity_error = false;
     {
       MutexLocker mu(SystemDictionary_lock, THREAD);
-      if (!UnsyncloadClass) {
+      if (class_loader.is_null() || !is_parallelCapable(class_loader)) {
         PlaceholderEntry* oldprobe = placeholders()->get_entry(p_index, p_hash, name, class_loader);
         if (oldprobe) {
           // only need check_seen_thread once, not on each loop
@@ -681,25 +690,25 @@
         }
       }
       // All cases: add LOAD_INSTANCE
-      // case 3: UnsyncloadClass: allow competing threads to try
+      // case 3: UnsyncloadClass || case 5: parallelCapable: allow competing threads to try
       // LOAD_INSTANCE in parallel
       // add placeholder entry even if error - callers will remove on error
-      if (!class_has_been_loaded) {
+      if (!throw_circularity_error && !class_has_been_loaded) {
         PlaceholderEntry* newprobe = placeholders()->find_and_add(p_index, p_hash, name, class_loader, PlaceholderTable::LOAD_INSTANCE, nullsymbolHandle, THREAD);
-        if (throw_circularity_error) {
-          newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
-        }
         // For class loaders that do not acquire the classloader object lock,
         // if they did not catch another thread holding LOAD_INSTANCE,
         // need a check analogous to the acquire ObjectLocker/find_class
         // i.e. now that we hold the LOAD_INSTANCE token on loading this class/CL
         // one final check if the load has already completed
+        // class loaders holding the ObjectLock shouldn't find the class here
         klassOop check = find_class(d_index, d_hash, name, class_loader);
         if (check != NULL) {
         // Klass is already loaded, so just return it
           k = instanceKlassHandle(THREAD, check);
           class_has_been_loaded = true;
           newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
+          placeholders()->find_and_remove(p_index, p_hash, name, class_loader, THREAD);
+          SystemDictionary_lock->notify_all();
         }
       }
     }
@@ -714,18 +723,14 @@
       // Do actual loading
       k = load_instance_class(name, class_loader, THREAD);

-      // In custom class loaders, the usual findClass calls
-      // findLoadedClass, which directly searches  the SystemDictionary, then
-      // defineClass. If these are not atomic with respect to other threads,
-      // the findLoadedClass can fail, but the defineClass can get a
-      // LinkageError:: duplicate class definition.
+      // For UnsyncloadClass and AllowParallelDefineClass only:
       // If they got a linkageError, check if a parallel class load succeeded.
       // If it did, then for bytecode resolution the specification requires
       // that we return the same result we did for the other thread, i.e. the
       // successfully loaded instanceKlass
-      // Note: Class can not be unloaded as long as any classloader refs exist
       // Should not get here for classloaders that support parallelism
-      // with the new cleaner mechanism, e.g. bootstrap classloader
+      // with the new cleaner mechanism
+      // Bootstrap goes through here to allow for an extra guarantee check
       if (UnsyncloadClass || (class_loader.is_null())) {
         if (k.is_null() && HAS_PENDING_EXCEPTION
           && PENDING_EXCEPTION->is_a(SystemDictionary::linkageError_klass())) {
@@ -841,6 +846,12 @@
                                 Handle protection_domain,
                                 TRAPS) {

+  // UseNewReflection
+  // The result of this call should be consistent with the result
+  // of the call to resolve_instance_class_or_null().
+  // See evaluation 6790209 and 4474172 for more details.
+  class_loader = Handle(THREAD, java_lang_ClassLoader::non_reflection_class_loader(class_loader()));
+
   unsigned int d_hash = dictionary()->compute_hash(class_name, class_loader);
   int d_index = dictionary()->hash_to_index(d_hash);

@@ -955,10 +966,10 @@
   instanceKlassHandle k = ClassFileParser(st).parseClassFile(class_name,
                                                              class_loader,
                                                              protection_domain,
-                                                             cp_patches,
                                                              parsed_name,
                                                              THREAD);

+
   // We don't redefine the class, so we just need to clean up whether there
   // was an error or not (don't want to modify any system dictionary
   // data structures).
@@ -1013,11 +1024,17 @@
                                                ClassFileStream* st,
                                                TRAPS) {

-  // Make sure we are synchronized on the class loader before we initiate
-  // loading.
+  // Classloaders that support parallelism, e.g. bootstrap classloader,
+  // or all classloaders with UnsyncloadClass do not acquire lock here
+  bool DoObjectLock = true;
+  if (is_parallelCapable(class_loader)) {
+    DoObjectLock = false;
+  }
+
+  // Make sure we are synchronized on the class loader before we proceed
   Handle lockObject = compute_loader_lock_object(class_loader, THREAD);
   check_loader_lock_contention(lockObject, THREAD);
-  ObjectLocker ol(lockObject, THREAD);
+  ObjectLocker ol(lockObject, THREAD, DoObjectLock);

   symbolHandle parsed_name;

@@ -1069,7 +1086,13 @@
            "external class name format used internally");

     // Add class just loaded
-    define_instance_class(k, THREAD);
+    // If a class loader supports parallel classloading handle parallel define requests
+    // find_or_define_instance_class may return a different instanceKlass
+    if (is_parallelCapable(class_loader)) {
+      k = find_or_define_instance_class(class_name, class_loader, k, THREAD);
+    } else {
+      define_instance_class(k, THREAD);
+    }
   }

   // If parsing the class file or define_instance_class failed, we
@@ -1299,7 +1322,7 @@
     }
 #endif // KERNEL

-    // find_or_define_instance_class may return a different k
+    // find_or_define_instance_class may return a different instanceKlass
     if (!k.is_null()) {
       k = find_or_define_instance_class(class_name, class_loader, k, CHECK_(nh));
     }
@@ -1316,14 +1339,24 @@

     KlassHandle spec_klass (THREAD, SystemDictionary::classloader_klass());

-    // UnsyncloadClass option means don't synchronize loadClass() calls.
-    // loadClassInternal() is synchronized and public loadClass(String) is not.
-    // This flag is for diagnostic purposes only. It is risky to call
+    // Call public unsynchronized loadClass(String) directly for all class loaders
+    // for parallelCapable class loaders. JDK >=7, loadClass(String, boolean) will
+    // acquire a class-name based lock rather than the class loader object lock.
+    // JDK < 7 already acquire the class loader lock in loadClass(String, boolean),
+    // so the call to loadClassInternal() was not required.
+    //
+    // UnsyncloadClass flag means both call loadClass(String) and do
+    // not acquire the class loader lock even for class loaders that are
+    // not parallelCapable. This was a risky transitional
+    // flag for diagnostic purposes only. It is risky to call
     // custom class loaders without synchronization.
     // WARNING If a custom class loader does NOT synchronizer findClass, or callers of
-    // findClass, this flag risks unexpected timing bugs in the field.
+    // findClass, the UnsyncloadClass flag risks unexpected timing bugs in the field.
     // Do NOT assume this will be supported in future releases.
-    if (!UnsyncloadClass && has_loadClassInternal()) {
+    //
+    // Added MustCallLoadClassInternal in case we discover in the field
+    // a customer that counts on this call
+    if (MustCallLoadClassInternal && has_loadClassInternal()) {
       JavaCalls::call_special(&result,
                               class_loader,
                               spec_klass,
@@ -1365,14 +1398,17 @@

   Handle class_loader_h(THREAD, k->class_loader());

-  // for bootstrap classloader don't acquire lock
-  if (!class_loader_h.is_null()) {
+ // for bootstrap and other parallel classloaders don't acquire lock,
+ // use placeholder token
+ // If a parallelCapable class loader calls define_instance_class instead of
+ // find_or_define_instance_class to get here, we have a timing
+ // hole with systemDictionary updates and check_constraints
+ if (!class_loader_h.is_null() && !is_parallelCapable(class_loader_h)) {
     assert(ObjectSynchronizer::current_thread_holds_lock((JavaThread*)THREAD,
          compute_loader_lock_object(class_loader_h, THREAD)),
          "define called without lock");
   }

-
   // Check class-loading constraints. Throw exception if violation is detected.
   // Grabs and releases SystemDictionary_lock
   // The check_constraints/find_class call and update_dictionary sequence
@@ -1427,59 +1463,63 @@

 // Support parallel classloading
 // Initial implementation for bootstrap classloader
-// For future:
 // For custom class loaders that support parallel classloading,
-// in case they do not synchronize around
-// FindLoadedClass/DefineClass calls, we check for parallel
+// With AllowParallelDefine flag==true, in case they do not synchronize around
+// FindLoadedClass/DefineClass, calls, we check for parallel
 // loading for them, wait if a defineClass is in progress
 // and return the initial requestor's results
+// With AllowParallelDefine flag==false, call through to define_instance_class
+// which will throw LinkageError: duplicate class definition.
 // For better performance, the class loaders should synchronize
-// findClass(), i.e. FindLoadedClass/DefineClass or they
+// findClass(), i.e. FindLoadedClass/DefineClassIfAbsent or they
 // potentially waste time reading and parsing the bytestream.
 // Note: VM callers should ensure consistency of k/class_name,class_loader
 instanceKlassHandle SystemDictionary::find_or_define_instance_class(symbolHandle class_name, Handle class_loader, instanceKlassHandle k, TRAPS) {

   instanceKlassHandle nh = instanceKlassHandle(); // null Handle
+  symbolHandle name_h(THREAD, k->name()); // passed in class_name may be null

-  unsigned int d_hash = dictionary()->compute_hash(class_name, class_loader);
+  unsigned int d_hash = dictionary()->compute_hash(name_h, class_loader);
   int d_index = dictionary()->hash_to_index(d_hash);

 // Hold SD lock around find_class and placeholder creation for DEFINE_CLASS
-  unsigned int p_hash = placeholders()->compute_hash(class_name, class_loader);
+  unsigned int p_hash = placeholders()->compute_hash(name_h, class_loader);
   int p_index = placeholders()->hash_to_index(p_hash);
   PlaceholderEntry* probe;

   {
     MutexLocker mu(SystemDictionary_lock, THREAD);
     // First check if class already defined
-    klassOop check = find_class(d_index, d_hash, class_name, class_loader);
+    klassOop check = find_class(d_index, d_hash, name_h, class_loader);
     if (check != NULL) {
       return(instanceKlassHandle(THREAD, check));
     }

     // Acquire define token for this class/classloader
     symbolHandle nullsymbolHandle;
-    probe = placeholders()->find_and_add(p_index, p_hash, class_name, class_loader, PlaceholderTable::DEFINE_CLASS, nullsymbolHandle, THREAD);
-    // Check if another thread defining in parallel
-    if (probe->definer() == NULL) {
-      // Thread will define the class
-      probe->set_definer(THREAD);
+    probe = placeholders()->find_and_add(p_index, p_hash, name_h, class_loader, PlaceholderTable::DEFINE_CLASS, nullsymbolHandle, THREAD);
+    // Wait if another thread defining in parallel
+    // All threads wait - even those that will throw duplicate class: otherwise
+    // caller is surprised by LinkageError: duplicate, but findLoadedClass fails
+    // if other thread has not finished updating dictionary
+    while (probe->definer() != NULL) {
+      SystemDictionary_lock->wait();
+    }
+    // Only special cases allow parallel defines and can use other thread's results
+    // Other cases fall through, and may run into duplicate defines
+    // caught by finding an entry in the SystemDictionary
+    if ((UnsyncloadClass || AllowParallelDefineClass) && (probe->instanceKlass() != NULL)) {
+        probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
+        placeholders()->find_and_remove(p_index, p_hash, name_h, class_loader, THREAD);
+        SystemDictionary_lock->notify_all();
+#ifdef ASSERT
+        klassOop check = find_class(d_index, d_hash, name_h, class_loader);
+        assert(check != NULL, "definer missed recording success");
+#endif
+        return(instanceKlassHandle(THREAD, probe->instanceKlass()));
     } else {
-      // Wait for defining thread to finish and return results
-      while (probe->definer() != NULL) {
-        SystemDictionary_lock->wait();
-      }
-      if (probe->instanceKlass() != NULL) {
-        probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
-        return(instanceKlassHandle(THREAD, probe->instanceKlass()));
-      } else {
-        // If definer had an error, try again as any new thread would
-        probe->set_definer(THREAD);
-#ifdef ASSERT
-        klassOop check = find_class(d_index, d_hash, class_name, class_loader);
-        assert(check == NULL, "definer missed recording success");
-#endif
-      }
+      // This thread will define the class (even if earlier thread tried and had an error)
+      probe->set_definer(THREAD);
     }
   }

@@ -1490,7 +1530,7 @@
   // definer must notify any waiting threads
   {
     MutexLocker mu(SystemDictionary_lock, THREAD);
-    PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, class_name, class_loader);
+    PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, name_h, class_loader);
     assert(probe != NULL, "DEFINE_CLASS placeholder lost?");
     if (probe != NULL) {
       if (HAS_PENDING_EXCEPTION) {
@@ -1501,6 +1541,7 @@
       }
       probe->set_definer(NULL);
       probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
+      placeholders()->find_and_remove(p_index, p_hash, name_h, class_loader, THREAD);
       SystemDictionary_lock->notify_all();
     }
   }
@@ -1512,7 +1553,6 @@

   return k;
 }
-
 Handle SystemDictionary::compute_loader_lock_object(Handle class_loader, TRAPS) {
   // If class_loader is NULL we synchronize on _system_loader_lock_obj
   if (class_loader.is_null()) {
@@ -1902,11 +1942,11 @@
     warning("Cannot find sun/jkernel/DownloadManager");
   }
 #endif // KERNEL
+
   { // Compute whether we should use loadClass or loadClassInternal when loading classes.
     methodOop method = instanceKlass::cast(classloader_klass())->find_method(vmSymbols::loadClassInternal_name(), vmSymbols::string_class_signature());
     _has_loadClassInternal = (method != NULL);
   }
-
   { // Compute whether we should use checkPackageAccess or NOT
     methodOop method = instanceKlass::cast(classloader_klass())->find_method(vmSymbols::checkPackageAccess_name(), vmSymbols::class_protectiondomain_signature());
     _has_checkPackageAccess = (method != NULL);
--- a/src/share/vm/classfile/systemDictionary.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/systemDictionary.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -526,6 +526,7 @@
   static instanceKlassHandle load_instance_class(symbolHandle class_name, Handle class_loader, TRAPS);
   static Handle compute_loader_lock_object(Handle class_loader, TRAPS);
   static void check_loader_lock_contention(Handle loader_lock, TRAPS);
+  static bool is_parallelCapable(Handle class_loader);

   static klassOop find_shared_class(symbolHandle class_name);
--- a/src/share/vm/classfile/vmSymbols.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -362,6 +362,7 @@
   template(class_signature,                           "Ljava/lang/Class;")                                        \
   template(string_signature,                          "Ljava/lang/String;")                                       \
   template(reference_signature,                       "Ljava/lang/ref/Reference;")                                \
+  template(concurrenthashmap_signature,               "Ljava/util/concurrent/ConcurrentHashMap;")                 \
   /* signature symbols needed by intrinsics */                                                                    \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
                                                                                                                   \
@@ -374,6 +375,9 @@
   /* used by ClassFormatError when class name is not known yet */                                                 \
   template(unknown_class_name,                        "<Unknown>")                                                \
                                                                                                                   \
+  /* used to identify class loaders handling parallel class loading */                                            \
+  template(parallelCapable_name,                      "parallelLockMap;")                                         \
+                                                                                                                  \
   /* JVM monitoring and management support */                                                                     \
   template(java_lang_StackTraceElement_array,          "[Ljava/lang/StackTraceElement;")                          \
   template(java_lang_management_ThreadState,           "java/lang/management/ThreadState")                        \
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -706,6 +706,30 @@
   }
 }

+// Apply the given closure to each live object in the space
+//   The usage of CompactibleFreeListSpace
+// by the ConcurrentMarkSweepGeneration for concurrent GC's allows
+// objects in the space with references to objects that are no longer
+// valid.  For example, an object may reference another object
+// that has already been sweep up (collected).  This method uses
+// obj_is_alive() to determine whether it is safe to apply the closure to
+// an object.  See obj_is_alive() for details on how liveness of an
+// object is decided.
+
+void CompactibleFreeListSpace::safe_object_iterate(ObjectClosure* blk) {
+  assert_lock_strong(freelistLock());
+  NOT_PRODUCT(verify_objects_initialized());
+  HeapWord *cur, *limit;
+  size_t curSize;
+  for (cur = bottom(), limit = end(); cur < limit;
+       cur += curSize) {
+    curSize = block_size(cur);
+    if (block_is_obj(cur) && obj_is_alive(cur)) {
+      blk->do_object(oop(cur));
+    }
+  }
+}
+
 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                   UpwardsObjectClosure* cl) {
   assert_locked();
@@ -861,7 +885,9 @@
     } else {
       // must read from what 'p' points to in each loop.
       klassOop k = ((volatile oopDesc*)p)->klass_or_null();
-      if (k != NULL && ((oopDesc*)p)->is_parsable()) {
+      if (k != NULL &&
+          ((oopDesc*)p)->is_parsable() &&
+          ((oopDesc*)p)->is_conc_safe()) {
         assert(k->is_oop(), "Should really be klass oop.");
         oop o = (oop)p;
         assert(o->is_oop(), "Should be an oop");
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -481,6 +481,15 @@
   void oop_iterate(OopClosure* cl);

   void object_iterate(ObjectClosure* blk);
+  // Apply the closure to each object in the space whose references
+  // point to objects in the heap.  The usage of CompactibleFreeListSpace
+  // by the ConcurrentMarkSweepGeneration for concurrent GC's allows
+  // objects in the space with references to objects that are no longer
+  // valid.  For example, an object may reference another object
+  // that has already been sweep up (collected).  This method uses
+  // obj_is_alive() to determine whether it is safe to iterate of
+  // an object.
+  void safe_object_iterate(ObjectClosure* blk);
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);

   // Requires that "mr" be entirely within the space.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -3018,6 +3018,16 @@
 }

 void
+ConcurrentMarkSweepGeneration::safe_object_iterate(ObjectClosure* cl) {
+  if (freelistLock()->owned_by_self()) {
+    Generation::safe_object_iterate(cl);
+  } else {
+    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
+    Generation::safe_object_iterate(cl);
+  }
+}
+
+void
 ConcurrentMarkSweepGeneration::pre_adjust_pointers() {
 }

@@ -6623,7 +6633,11 @@
   if (_bitMap->isMarked(addr)) {
     // it's marked; is it potentially uninitialized?
     if (p->klass_or_null() != NULL) {
-      if (CMSPermGenPrecleaningEnabled && !p->is_parsable()) {
+      // If is_conc_safe is false, the object may be undergoing
+      // change by the VM outside a safepoint.  Don't try to
+      // scan it, but rather leave it for the remark phase.
+      if (CMSPermGenPrecleaningEnabled &&
+          (!p->is_conc_safe() || !p->is_parsable())) {
         // Signal precleaning to redirty the card since
         // the klass pointer is already installed.
         assert(size == 0, "Initial value");
@@ -7001,7 +7015,6 @@
       _mut->clear_range(mr);
     }
   DEBUG_ONLY(})
-
   // Note: the finger doesn't advance while we drain
   // the stack below.
   PushOrMarkClosure pushOrMarkClosure(_collector,
@@ -8062,9 +8075,13 @@
     #ifdef DEBUG
       if (oop(addr)->klass_or_null() != NULL &&
           (   !_collector->should_unload_classes()
-           || oop(addr)->is_parsable())) {
+           || (oop(addr)->is_parsable()) &&
+               oop(addr)->is_conc_safe())) {
         // Ignore mark word because we are running concurrent with mutators
         assert(oop(addr)->is_oop(true), "live block should be an oop");
+        // is_conc_safe is checked before performing this assertion
+        // because an object that is not is_conc_safe may yet have
+        // the return from size() correct.
         assert(size ==
                CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()),
                "P-mark and computed size do not agree");
@@ -8077,6 +8094,13 @@
            (!_collector->should_unload_classes()
             || oop(addr)->is_parsable()),
            "Should be an initialized object");
+    // Note that there are objects used during class redefinition
+    // (e.g., merge_cp in VM_RedefineClasses::merge_cp_and_rewrite()
+    // which are discarded with their is_conc_safe state still
+    // false.  These object may be floating garbage so may be
+    // seen here.  If they are floating garbage their size
+    // should be attainable from their klass.  Do not that
+    // is_conc_safe() is true for oop(addr).
     // Ignore mark word because we are running concurrent with mutators
     assert(oop(addr)->is_oop(true), "live block should be an oop");
     // Verify that the bit map has no bits marked between
@@ -8484,7 +8508,7 @@
   size_t i = num;
   oop  cur = _overflow_list;
   const markOop proto = markOopDesc::prototype();
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8501,45 +8525,131 @@
   return !stack->isEmpty();
 }

-// Multi-threaded; use CAS to break off a prefix
+#define BUSY  (oop(0x1aff1aff))
+// (MT-safe) Get a prefix of at most "num" from the list.
+// The overflow list is chained through the mark word of
+// each object in the list. We fetch the entire list,
+// break off a prefix of the right size and return the
+// remainder. If other threads try to take objects from
+// the overflow list at that time, they will wait for
+// some time to see if data becomes available. If (and
+// only if) another thread places one or more object(s)
+// on the global list before we have returned the suffix
+// to the global list, we will walk down our local list
+// to find its end and append the global list to
+// our suffix before returning it. This suffix walk can
+// prove to be expensive (quadratic in the amount of traffic)
+// when there are many objects in the overflow list and
+// there is much producer-consumer contention on the list.
+// *NOTE*: The overflow list manipulation code here and
+// in ParNewGeneration:: are very similar in shape,
+// except that in the ParNew case we use the old (from/eden)
+// copy of the object to thread the list via its klass word.
+// Because of the common code, if you make any changes in
+// the code below, please check the ParNew version to see if
+// similar changes might be needed.
+// CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
                                                OopTaskQueue* work_q) {
-  assert(work_q->size() == 0, "That's the current policy");
+  assert(work_q->size() == 0, "First empty local work queue");
   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
   if (_overflow_list == NULL) {
     return false;
   }
   // Grab the entire list; we'll put back a suffix
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-  if (prefix == NULL) {  // someone grabbed it before we did ...
-    // ... we could spin for a short while, but for now we don't
-    return false;
-  }
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+  Thread* tid = Thread::current();
+  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, num/100);
+  // If the list is busy, we spin for a short while,
+  // sleeping between attempts to get the list.
+  for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // Nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+      // Try and grab the prefix
+      prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
+  }
+  // If the list was found to be empty, or we spun long
+  // enough, we give up and return empty-handed. If we leave
+  // the list in the BUSY state below, it must be the case that
+  // some other thread holds the overflow list and will set it
+  // to a non-BUSY state in the future.
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
+  }
+  assert(prefix != NULL && prefix != BUSY, "Error");
   size_t i = num;
   oop cur = prefix;
+  // Walk down the first "num" objects, unless we reach the end.
   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
-  if (cur->mark() != NULL) {
+  if (cur->mark() == NULL) {
+    // We have "num" or fewer elements in the list, so there
+    // is nothing to return to the global list.
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above, if it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+    }
+  } else {
+    // Chop off the suffix and rerturn it to the global list.
+    assert(cur->mark() != BUSY, "Error");
     oop suffix_head = cur->mark(); // suffix will be put back on global list
     cur->set_mark(NULL);           // break off suffix
-    // Find tail of suffix so we can prepend suffix to global list
-    for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
-    oop suffix_tail = cur;
-    assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
-           "Tautology");
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix without incurring the cost
+    // of a walk down the list.
     oop observed_overflow_list = _overflow_list;
-    do {
-      cur = observed_overflow_list;
-      suffix_tail->set_mark(markOop(cur));
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
       observed_overflow_list =
-        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
-    } while (cur != observed_overflow_list);
+        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else sneaked in (at least) an element; we'll need
+      // to do a splice. Find tail of suffix so we can prepend suffix to global
+      // list.
+      for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
+      oop suffix_tail = cur;
+      assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
+             "Tautology");
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          suffix_tail->set_mark(markOop(cur_overflow_list));
+        } else { // cur_overflow_list == BUSY
+          suffix_tail->set_mark(NULL);
+        }
+        // ... and try to place spliced list back on overflow_list ...
+        observed_overflow_list =
+          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
+      // ... until we have succeeded in doing so.
+    }
   }

   // Push the prefix elements on work_q
   assert(prefix != NULL, "control point invariant");
   const markOop proto = markOopDesc::prototype();
   oop next;
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (cur = prefix; cur != NULL; cur = next) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8573,11 +8683,16 @@
   oop cur_overflow_list;
   do {
     cur_overflow_list = observed_overflow_list;
-    p->set_mark(markOop(cur_overflow_list));
+    if (cur_overflow_list != BUSY) {
+      p->set_mark(markOop(cur_overflow_list));
+    } else {
+      p->set_mark(NULL);
+    }
     observed_overflow_list =
       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
   } while (cur_overflow_list != observed_overflow_list);
 }
+#undef BUSY

 // Single threaded
 // General Note on GrowableArray: pushes may silently fail
@@ -8586,7 +8701,7 @@
 // a lot of code in the JVM. The prudent thing for GrowableArray
 // to do (for now) is to exit with an error. However, that may
 // be too draconian in some cases because the caller may be
-// able to recover without much harm. For suych cases, we
+// able to recover without much harm. For such cases, we
 // should probably introduce a "soft_push" method which returns
 // an indication of success or failure with the assumption that
 // the caller may be able to recover from a failure; code in
@@ -8594,8 +8709,6 @@
 // failures where possible, thus, incrementally hardening the VM
 // in such low resource situations.
 void CMSCollector::preserve_mark_work(oop p, markOop m) {
-  int PreserveMarkStackSize = 128;
-
   if (_preserved_oop_stack == NULL) {
     assert(_preserved_mark_stack == NULL,
            "bijection with preserved_oop_stack");
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -595,7 +595,7 @@
   size_t        _ser_kac_preclean_ovflw;
   size_t        _ser_kac_ovflw;
   size_t        _par_kac_ovflw;
-  NOT_PRODUCT(size_t _num_par_pushes;)
+  NOT_PRODUCT(ssize_t _num_par_pushes;)

   // ("Weak") Reference processing support
   ReferenceProcessor*            _ref_processor;
@@ -1212,6 +1212,7 @@
   // More iteration support
   virtual void oop_iterate(MemRegion mr, OopClosure* cl);
   virtual void oop_iterate(OopClosure* cl);
+  virtual void safe_object_iterate(ObjectClosure* cl);
   virtual void object_iterate(ObjectClosure* cl);

   // Need to declare the full complement of closures, whether we'll
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -24,7 +24,7 @@

 // We need to sort heap regions by collection desirability.

-class CSetChooserCache {
+class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
 private:
   enum {
     CacheLength = 16
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -33,7 +33,7 @@
   PYA_cancel     // It's been completed by somebody else: cancel.
 };

-class ConcurrentG1Refine {
+class ConcurrentG1Refine: public CHeapObj {
   ConcurrentG1RefineThread* _cg1rThread;

   volatile jint _pya;
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -30,7 +30,7 @@
 // A generic CM bit map.  This is essentially a wrapper around the BitMap
 // class, with one bit per (1<<_shifter) HeapWords.

-class CMBitMapRO {
+class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
  protected:
   HeapWord* _bmStartWord;      // base address of range covered by map
   size_t    _bmWordSize;       // map size (in #HeapWords covered)
@@ -139,7 +139,7 @@

 // Represents a marking stack used by the CM collector.
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
-class CMMarkStack {
+class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
   oop*   _base;      // bottom of stack
   jint   _index;     // one more than last occupied index
@@ -237,7 +237,7 @@
   void oops_do(OopClosure* f);
 };

-class CMRegionStack {
+class CMRegionStack VALUE_OBJ_CLASS_SPEC {
   MemRegion* _base;
   jint _capacity;
   jint _index;
@@ -312,7 +312,7 @@

 class ConcurrentMarkThread;

-class ConcurrentMark {
+class ConcurrentMark: public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -141,7 +141,7 @@
     _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
     _length(0), _scan_only_length(0),
     _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
 {
   guarantee( check_list_empty(false), "just making sure..." );
 }
@@ -159,16 +159,15 @@
 }

 void YoungList::add_survivor_region(HeapRegion* hr) {
-  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->is_survivor(), "should be flagged as survivor region");
   assert(hr->get_next_young_region() == NULL, "cause it should!");

   hr->set_next_young_region(_survivor_head);
   if (_survivor_head == NULL) {
-    _survivors_tail = hr;
+    _survivor_tail = hr;
   }
   _survivor_head = hr;

-  hr->set_survivor();
   ++_survivor_length;
 }

@@ -239,7 +238,7 @@

   empty_list(_survivor_head);
   _survivor_head = NULL;
-  _survivors_tail = NULL;
+  _survivor_tail = NULL;
   _survivor_length = 0;

   _last_sampled_rs_lengths = 0;
@@ -391,6 +390,7 @@

   // Add survivor regions to SurvRateGroup.
   _g1h->g1_policy()->note_start_adding_survivor_regions();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */);
   for (HeapRegion* curr = _survivor_head;
        curr != NULL;
        curr = curr->get_next_young_region()) {
@@ -401,7 +401,7 @@
   if (_survivor_head != NULL) {
     _head           = _survivor_head;
     _length         = _survivor_length + _scan_only_length;
-    _survivors_tail->set_next_young_region(_scan_only_head);
+    _survivor_tail->set_next_young_region(_scan_only_head);
   } else {
     _head           = _scan_only_head;
     _length         = _scan_only_length;
@@ -418,9 +418,9 @@
   _curr_scan_only   = NULL;

   _survivor_head    = NULL;
-  _survivors_tail   = NULL;
+  _survivor_tail   = NULL;
   _survivor_length  = 0;
-  _g1h->g1_policy()->finished_recalculating_age_indexes();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(false /* is_survivors */);

   assert(check_list_well_formed(), "young list should be well formed");
 }
@@ -553,7 +553,7 @@
   if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
     alloc_region = newAllocRegion_work(word_size, true, zero_filled);
     if (purpose == GCAllocForSurvived && alloc_region != NULL) {
-      _young_list->add_survivor_region(alloc_region);
+      alloc_region->set_survivor();
     }
     ++_gc_alloc_region_counts[purpose];
   } else {
@@ -949,6 +949,10 @@
     GCOverheadReporter::recordSTWEnd(end);
     g1_policy()->record_full_collection_end();

+#ifdef TRACESPINNING
+    ParallelTaskTerminator::print_termination_counts();
+#endif
+
     gc_epilogue(true);

     // Abandon concurrent refinement.  This must happen last: in the
@@ -1285,7 +1289,9 @@
   _unclean_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
-  _surviving_young_words(NULL)
+  _surviving_young_words(NULL),
+  _in_cset_fast_test(NULL),
+  _in_cset_fast_test_base(NULL)
 {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
@@ -2485,6 +2491,19 @@
     g1_policy()->record_collection_pause_start(start_time_sec,
                                                start_used_bytes);

+    guarantee(_in_cset_fast_test == NULL, "invariant");
+    guarantee(_in_cset_fast_test_base == NULL, "invariant");
+    _in_cset_fast_test_length = n_regions();
+    _in_cset_fast_test_base =
+                             NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
+    memset(_in_cset_fast_test_base, false,
+                                     _in_cset_fast_test_length * sizeof(bool));
+    // We're biasing _in_cset_fast_test to avoid subtracting the
+    // beginning of the heap every time we want to index; basically
+    // it's the same with what we do with the card table.
+    _in_cset_fast_test = _in_cset_fast_test_base -
+              ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
+
 #if SCAN_ONLY_VERBOSE
     _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
@@ -2553,6 +2572,12 @@
       free_collection_set(g1_policy()->collection_set());
       g1_policy()->clear_collection_set();

+      FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
+      // this is more for peace of mind; we're nulling them here and
+      // we're expecting them to be null at the beginning of the next GC
+      _in_cset_fast_test = NULL;
+      _in_cset_fast_test_base = NULL;
+
       if (popular_region != NULL) {
         // We have to wait until now, because we don't want the region to
         // be rescheduled for pop-evac during RS update.
@@ -2572,6 +2597,9 @@
         _young_list->print();
 #endif // SCAN_ONLY_VERBOSE

+        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+                                             _young_list->first_survivor_region(),
+                                             _young_list->last_survivor_region());
         _young_list->reset_auxilary_lists();
       }
     } else {
@@ -2598,7 +2626,9 @@
 #endif // SCAN_ONLY_VERBOSE

     double end_time_sec = os::elapsedTime();
-    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    if (!evacuation_failed()) {
+      g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    }
     GCOverheadReporter::recordSTWEnd(end_time_sec);
     g1_policy()->record_collection_pause_end(popular_region != NULL,
                                              abandoned);
@@ -2621,8 +2651,13 @@
       }
     }

-    if (mark_in_progress())
+    if (mark_in_progress()) {
       concurrent_mark()->update_g1_committed();
+    }
+
+#ifdef TRACESPINNING
+    ParallelTaskTerminator::print_termination_counts();
+#endif

     gc_epilogue(false);
   }
@@ -2733,6 +2768,13 @@
     _gc_alloc_region_list = r->next_gc_alloc_region();
     r->set_next_gc_alloc_region(NULL);
     r->set_is_gc_alloc_region(false);
+    if (r->is_survivor()) {
+      if (r->is_empty()) {
+        r->set_not_young();
+      } else {
+        _young_list->add_survivor_region(r);
+      }
+    }
     if (r->is_empty()) {
       ++_free_regions;
     }
@@ -3129,6 +3171,20 @@
   return block;
 }

+void G1CollectedHeap::retire_alloc_region(HeapRegion* alloc_region,
+                                            bool par) {
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+}
+
 HeapWord*
 G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
                                          HeapRegion*    alloc_region,
@@ -3146,16 +3202,7 @@
     // Otherwise, continue; this new region is empty, too.
   }
   assert(alloc_region != NULL, "We better have an allocation region");
-  // Another thread might have obtained alloc_region for the given
-  // purpose, and might be attempting to allocate in it, and might
-  // succeed.  Therefore, we can't do the "finalization" stuff on the
-  // region below until we're sure the last allocation has happened.
-  // We ensure this by allocating the remaining space with a garbage
-  // object.
-  if (par) par_allocate_remaining_space(alloc_region);
-  // Now we can do the post-GC stuff on the region.
-  alloc_region->note_end_of_copying();
-  g1_policy()->record_after_bytes(alloc_region->used());
+  retire_alloc_region(alloc_region, par);

   if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
     // Cannot allocate more regions for the given purpose.
@@ -3164,7 +3211,7 @@
     if (purpose != alt_purpose) {
       HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
       // Has not the alternative region been aliased?
-      if (alloc_region != alt_region) {
+      if (alloc_region != alt_region && alt_region != NULL) {
         // Try to allocate in the alternative region.
         if (par) {
           block = alt_region->par_allocate(word_size);
@@ -3173,9 +3220,10 @@
         }
         // Make an alias.
         _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
-      }
-      if (block != NULL) {
-        return block;
+        if (block != NULL) {
+          return block;
+        }
+        retire_alloc_region(alt_region, par);
       }
       // Both the allocation region and the alternative one are full
       // and aliased, replace them with a new allocation region.
@@ -3476,6 +3524,7 @@
   OverflowQueue* _overflowed_refs;

   G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+  ageTable           _age_table;

   size_t           _alloc_buffer_waste;
   size_t           _undo_waste;
@@ -3517,6 +3566,7 @@
       _refs(g1h->task_queue(queue_num)),
       _hash_seed(17), _queue_num(queue_num),
       _term_attempts(0),
+      _age_table(false),
 #if G1_DETAILED_STATS
       _pushes(0), _pops(0), _steals(0),
       _steal_attempts(0),  _overflow_pushes(0),
@@ -3551,8 +3601,9 @@

   RefToScanQueue*   refs()            { return _refs;             }
   OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
-
-  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return &_alloc_buffers[purpose];
   }

@@ -3560,6 +3611,9 @@
   size_t undo_waste()                            { return _undo_waste; }

   void push_on_queue(oop* ref) {
+    assert(ref != NULL, "invariant");
+    assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), "invariant");
+
     if (!refs()->push(ref)) {
       overflowed_refs()->push(ref);
       IF_G1_DETAILED_STATS(note_overflow_push());
@@ -3572,6 +3626,10 @@
     if (!refs()->pop_local(ref)) {
       ref = NULL;
     } else {
+      assert(ref != NULL, "invariant");
+      assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref),
+             "invariant");
+
       IF_G1_DETAILED_STATS(note_pop());
     }
   }
@@ -3601,8 +3659,7 @@

       obj = alloc_buf->allocate(word_sz);
       assert(obj != NULL, "buffer was definitely big enough...");
-    }
-    else {
+    } else {
       obj = _g1h->par_allocate_during_gc(purpose, word_sz);
     }
     return obj;
@@ -3695,24 +3752,57 @@
     }
   }

+private:
+  void deal_with_reference(oop* ref_to_scan) {
+    if (has_partial_array_mask(ref_to_scan)) {
+      _partial_scan_cl->do_oop_nv(ref_to_scan);
+    } else {
+      // Note: we can use "raw" versions of "region_containing" because
+      // "obj_to_scan" is definitely in the heap, and is not in a
+      // humongous region.
+      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+      _evac_cl->set_region(r);
+      _evac_cl->do_oop_nv(ref_to_scan);
+    }
+  }
+
+public:
   void trim_queue() {
+    // I've replicated the loop twice, first to drain the overflow
+    // queue, second to drain the task queue. This is better than
+    // having a single loop, which checks both conditions and, inside
+    // it, either pops the overflow queue or the task queue, as each
+    // loop is tighter. Also, the decision to drain the overflow queue
+    // first is not arbitrary, as the overflow queue is not visible
+    // to the other workers, whereas the task queue is. So, we want to
+    // drain the "invisible" entries first, while allowing the other
+    // workers to potentially steal the "visible" entries.
+
     while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
-      oop *ref_to_scan = NULL;
-      if (overflowed_refs_to_scan() == 0) {
-        pop_from_queue(ref_to_scan);
-      } else {
+      while (overflowed_refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
         pop_from_overflow_queue(ref_to_scan);
+        assert(ref_to_scan != NULL, "invariant");
+        // We shouldn't have pushed it on the queue if it was not
+        // pointing into the CSet.
+        assert(ref_to_scan != NULL, "sanity");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+        deal_with_reference(ref_to_scan);
       }
-      if (ref_to_scan != NULL) {
-        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
-          _partial_scan_cl->do_oop_nv(ref_to_scan);
-        } else {
-          // Note: we can use "raw" versions of "region_containing" because
-          // "obj_to_scan" is definitely in the heap, and is not in a
-          // humongous region.
-          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-          _evac_cl->set_region(r);
-          _evac_cl->do_oop_nv(ref_to_scan);
+
+      while (refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
+        pop_from_queue(ref_to_scan);
+
+        if (ref_to_scan != NULL) {
+          // We shouldn't have pushed it on the queue if it was not
+          // pointing into the CSet.
+          assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+          deal_with_reference(ref_to_scan);
         }
       }
     }
@@ -3728,16 +3818,25 @@
 // Should probably be made inline and moved in g1OopClosures.inline.hpp.
 void G1ParScanClosure::do_oop_nv(oop* p) {
   oop obj = *p;
+
   if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        _par_scan_state->push_on_queue(p);
-        return;
-      }
+    if (_g1->in_cset_fast_test(obj)) {
+      // We're not going to even bother checking whether the object is
+      // already forwarded or not, as this usually causes an immediate
+      // stall. We'll try to prefetch the object (for write, given that
+      // we might need to install the forwarding reference) and we'll
+      // get back to it when pop it from the queue
+      Prefetch::write(obj->mark_addr(), 0);
+      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // slightly paranoid test; I'm trying to catch potential
+      // problems before we go into push_on_queue to know where the
+      // problem is coming from
+      assert(obj == *p, "the value of *p should not have changed");
+      _par_scan_state->push_on_queue(p);
+    } else {
+      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
     }
-    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
   }
 }

@@ -3765,7 +3864,9 @@
           (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+                                           : m->age();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
   HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
   oop       obj     = oop(obj_ptr);
@@ -3777,13 +3878,39 @@
     return _g1->handle_evacuation_failure_par(cl, old);
   }

+  // We're going to allocate linearly, so might as well prefetch ahead.
+  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
+
   oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
-    obj->set_mark(m);
     if (g1p->track_object_age(alloc_purpose)) {
-      obj->incr_age();
+      // We could simply do obj->incr_age(). However, this causes a
+      // performance issue. obj->incr_age() will first check whether
+      // the object has a displaced mark by checking its mark word;
+      // getting the mark word from the new location of the object
+      // stalls. So, given that we already have the mark word and we
+      // are about to install it anyway, it's better to increase the
+      // age on the mark word, when the object does not have a
+      // displaced mark word. We're not expecting many objects to have
+      // a displaced marked word, so that case is not optimized
+      // further (it could be...) and we simply call obj->incr_age().
+
+      if (m->has_displaced_mark_helper()) {
+        // in this case, we have to install the mark word first,
+        // otherwise obj looks to be forwarded (the old mark word,
+        // which contains the forward pointer, was copied)
+        obj->set_mark(m);
+        obj->incr_age();
+      } else {
+        m = m->incr_age();
+        obj->set_mark(m);
+      }
+      _par_scan_state->age_table()->add(obj, word_sz);
+    } else {
+      obj->set_mark(m);
     }
+
     // preserve "next" mark bit
     if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
       if (!use_local_bitmaps ||
@@ -3805,9 +3932,11 @@

     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
       arrayOop(old)->set_length(0);
-      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+      _par_scan_state->push_on_queue(set_partial_array_mask(old));
     } else {
-      _scanner->set_region(_g1->heap_region_containing(obj));
+      // No point in using the slower heap_region_containing() method,
+      // given that we know obj is in the heap.
+      _scanner->set_region(_g1->heap_region_containing_raw(obj));
       obj->oop_iterate_backwards(_scanner);
     }
   } else {
@@ -3817,47 +3946,55 @@
   return obj;
 }

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
-void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
+void G1ParCopyClosure<do_gen_barrier, barrier,
+                      do_mark_forwardee, skip_cset_test>::do_oop_work(oop* p) {
   oop obj = *p;
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is nonNull");

-  if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
+  // The only time we skip the cset test is when we're scanning
+  // references popped from the queue. And we only push on the queue
+  // references that we know point into the cset, so no point in
+  // checking again. But we'll leave an assert here for peace of mind.
+  assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant");
+
+  // here the null check is implicit in the cset_fast_test() test
+  if (skip_cset_test || _g1->in_cset_fast_test(obj)) {
 #if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
-                             p, (void*) obj);
+    gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" "
+                           "into CS.", p, (void*) obj);
 #endif
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        *p = copy_to_survivor_space(obj);
-      }
-      // When scanning the RS, we only care about objs in CS.
-      if (barrier == G1BarrierRS) {
-        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
-      }
+    if (obj->is_forwarded()) {
+      *p = obj->forwardee();
+    } else {
+      *p = copy_to_survivor_space(obj);
     }
-    // When scanning moved objs, must look at all oops.
-    if (barrier == G1BarrierEvac) {
+    // When scanning the RS, we only care about objs in CS.
+    if (barrier == G1BarrierRS) {
       _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
     }
-
-    if (do_gen_barrier) {
-      par_do_barrier(p);
-    }
-  }
-}
-
-template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
-
-template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+  }
+
+  // When scanning moved objs, must look at all oops.
+  if (barrier == G1BarrierEvac && obj != NULL) {
+    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+  }
+
+  if (do_gen_barrier && obj != NULL) {
+    par_do_barrier(p);
+  }
+}
+
+template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
+
+template<class T> void G1ParScanPartialArrayClosure::process_array_chunk(
   oop obj, int start, int end) {
   // process our set of indices (include header in first chunk)
   assert(start < end, "invariant");
   T* const base      = (T*)objArrayOop(obj)->base();
-  T* const start_addr = base + start;
+  T* const start_addr = (start == 0) ? (T*) obj : base + start;
   T* const end_addr   = base + end;
   MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
   _scanner.set_region(_g1->heap_region_containing(obj));
@@ -3866,7 +4003,8 @@

 void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
   assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
-  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(has_partial_array_mask(p), "invariant");
+  oop old = clear_partial_array_mask(p);
   assert(old->is_objArray(), "must be obj array");
   assert(old->is_forwarded(), "must be forwarded");
   assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
@@ -3884,7 +4022,7 @@
     end = start + ParGCArrayScanChunk;
     arrayOop(old)->set_length(end);
     // Push remainder.
-    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+    _par_scan_state->push_on_queue(set_partial_array_mask(old));
   } else {
     // Restore length so that the heap remains parsable in
     // case of evacuation failure.
@@ -3893,11 +4031,6 @@

   // process our set of indices (include header in first chunk)
   process_array_chunk<oop>(obj, start, end);
-  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
-  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
-  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
-  _scanner.set_region(_g1->heap_region_containing(obj));
-  obj->oop_iterate(&_scanner, mr);
 }

 int G1ScanAndBalanceClosure::_nq = 0;
@@ -3931,6 +4064,13 @@
                           pss->hash_seed(),
                           ref_to_scan)) {
         IF_G1_DETAILED_STATS(pss->note_steal());
+
+        // slightly paranoid tests; I'm trying to catch potential
+        // problems before we go into push_on_queue to know where the
+        // problem is coming from
+        assert(ref_to_scan != NULL, "invariant");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                   _g1h->obj_in_cs(*ref_to_scan), "invariant");
         pss->push_on_queue(ref_to_scan);
         continue;
       }
@@ -3976,10 +4116,10 @@
     ResourceMark rm;
     HandleMark   hm;

-    G1ParScanThreadState pss(_g1h, i);
-    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
-    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
-    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss);

     pss.set_evac_closure(&scan_evac_cl);
     pss.set_evac_failure_closure(&evac_failure_cl);
@@ -4024,6 +4164,9 @@
       _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
       _g1h->g1_policy()->record_termination_time(i, term_ms);
     }
+    if (G1UseSurvivorSpace) {
+      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
+    }
     _g1h->update_surviving_young_words(pss.surviving_young_words()+1);

     // Clean up any par-expanded rem sets.
@@ -4263,7 +4406,7 @@
   // Is this the right thing to do here?  We don't save marks
   // on individual heap regions when we allocate from
   // them in parallel, so this seems like the correct place for this.
-  all_alloc_regions_note_end_of_copying();
+  retire_all_alloc_regions();
   {
     G1IsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
@@ -4903,7 +5046,7 @@
   return no_allocs;
 }

-void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+void G1CollectedHeap::retire_all_alloc_regions() {
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
     if (r != NULL) {
@@ -4916,8 +5059,7 @@
         }
       }
       if (!has_processed_alias) {
-        r->note_end_of_copying();
-        g1_policy()->record_after_bytes(r->used());
+        retire_alloc_region(r, false /* par */);
       }
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -90,7 +90,7 @@
   HeapRegion* _curr_scan_only;

   HeapRegion* _survivor_head;
-  HeapRegion* _survivors_tail;
+  HeapRegion* _survivor_tail;
   size_t      _survivor_length;

   void          empty_list(HeapRegion* list);
@@ -105,6 +105,7 @@
   bool          is_empty() { return _length == 0; }
   size_t        length() { return _length; }
   size_t        scan_only_length() { return _scan_only_length; }
+  size_t        survivor_length() { return _survivor_length; }

   void rs_length_sampling_init();
   bool rs_length_sampling_more();
@@ -120,6 +121,7 @@
   HeapRegion* first_region() { return _head; }
   HeapRegion* first_scan_only_region() { return _scan_only_head; }
   HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* last_survivor_region() { return _survivor_tail; }
   HeapRegion* par_get_next_scan_only_region() {
     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
     HeapRegion* ret = _curr_scan_only;
@@ -219,7 +221,7 @@
   // The to-space memory regions into which objects are being copied during
   // a GC.
   HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
-  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+  size_t _gc_alloc_region_counts[GCAllocPurposeCount];

   // A list of the regions that have been set to be alloc regions in the
   // current collection.
@@ -247,6 +249,27 @@
   NumberSeq _pop_obj_rc_at_copy;
   void print_popularity_summary_info() const;

+  // This is used for a quick test on whether a reference points into
+  // the collection set or not. Basically, we have an array, with one
+  // byte per region, and that byte denotes whether the corresponding
+  // region is in the collection set or not. The entry corresponding
+  // the bottom of the heap, i.e., region 0, is pointed to by
+  // _in_cset_fast_test_base.  The _in_cset_fast_test field has been
+  // biased so that it actually points to address 0 of the address
+  // space, to make the test as fast as possible (we can simply shift
+  // the address to address into it, instead of having to subtract the
+  // bottom of the heap from the address before shifting it; basically
+  // it works in the same way the card table works).
+  bool* _in_cset_fast_test;
+
+  // The allocated array used for the fast test on whether a reference
+  // points into the collection set or not. This field is also used to
+  // free the array.
+  bool* _in_cset_fast_test_base;
+
+  // The length of the _in_cset_fast_test_base array.
+  size_t _in_cset_fast_test_length;
+
   volatile unsigned _gc_time_stamp;

   size_t* _surviving_young_words;
@@ -260,8 +283,8 @@
   // Returns "true" iff none of the gc alloc regions have any allocations
   // since the last call to "save_marks".
   bool all_alloc_regions_no_allocs_since_save_marks();
-  // Calls "note_end_of_copying on all gc alloc_regions.
-  void all_alloc_regions_note_end_of_copying();
+  // Perform finalization stuff on all allocation regions.
+  void retire_all_alloc_regions();

   // The number of regions allocated to hold humongous objects.
   int         _num_humongous_regions;
@@ -330,6 +353,10 @@
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);

+  // Retires an allocation region when it is full or at the end of a
+  // GC pause.
+  void  retire_alloc_region(HeapRegion* alloc_region, bool par);
+
   // Helper function for two callbacks below.
   // "full", if true, indicates that the GC is for a System.gc() request,
   // and should collect the entire heap.  If "clear_all_soft_refs" is true,
@@ -368,6 +395,38 @@
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);

+  // We register a region with the fast "in collection set" test. We
+  // simply set to true the array slot corresponding to this region.
+  void register_region_with_in_cset_fast_test(HeapRegion* r) {
+    assert(_in_cset_fast_test_base != NULL, "sanity");
+    assert(r->in_collection_set(), "invariant");
+    int index = r->hrs_index();
+    assert(0 <= (size_t) index && (size_t) index < _in_cset_fast_test_length,
+           "invariant");
+    assert(!_in_cset_fast_test_base[index], "invariant");
+    _in_cset_fast_test_base[index] = true;
+  }
+
+  // This is a fast test on whether a reference points into the
+  // collection set or not. It does not assume that the reference
+  // points into the heap; if it doesn't, it will return false.
+  bool in_cset_fast_test(oop obj) {
+    assert(_in_cset_fast_test != NULL, "sanity");
+    if (_g1_committed.contains((HeapWord*) obj)) {
+      // no need to subtract the bottom of the heap from obj,
+      // _in_cset_fast_test is biased
+      size_t index = ((size_t) obj) >> HeapRegion::LogOfHRGrainBytes;
+      bool ret = _in_cset_fast_test[index];
+      // let's make sure the result is consistent with what the slower
+      // test returns
+      assert( ret || !obj_in_cs(obj), "sanity");
+      assert(!ret ||  obj_in_cs(obj), "sanity");
+      return ret;
+    } else {
+      return false;
+    }
+  }
+
 protected:

   // Shrink the garbage-first heap by at most the given size (in bytes!).
@@ -850,6 +909,7 @@

   // Iterate over all objects, calling "cl.do_object" on each.
   virtual void object_iterate(ObjectClosure* cl);
+  virtual void safe_object_iterate(ObjectClosure* cl) { object_iterate(cl); }

   // Iterate over all objects allocated since the last collection, calling
   // "cl.do_object" on each.  The heap must have been initialized properly
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -36,8 +36,11 @@

 inline HeapRegion*
 G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
-  HeapRegion* res = _hrs->addr_to_region(addr);
-  assert(res != NULL, "addr outside of heap?");
+  assert(_g1_reserved.contains(addr), "invariant");
+  size_t index = ((intptr_t) addr - (intptr_t) _g1_reserved.start())
+                                              >> HeapRegion::LogOfHRGrainBytes;
+  HeapRegion* res = _hrs->at(index);
+  assert(res == _hrs->addr_to_region(addr), "sanity");
   return res;
 }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -196,8 +196,13 @@
   _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
                                                  G1YoungSurvRateNumRegionsSummary)),
   _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
-                                              G1YoungSurvRateNumRegionsSummary))
+                                              G1YoungSurvRateNumRegionsSummary)),
   // add here any more surv rate groups
+  _recorded_survivor_regions(0),
+  _recorded_survivor_head(NULL),
+  _recorded_survivor_tail(NULL),
+  _survivors_age_table(true)
+
 {
   _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
   _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
@@ -272,6 +277,15 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
   _tenuring_threshold = MaxTenuringThreshold;

+  if (G1UseSurvivorSpace) {
+    // if G1FixedSurvivorSpaceSize is 0 which means the size is not
+    // fixed, then _max_survivor_regions will be calculated at
+    // calculate_young_list_target_config during initialization
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  } else {
+    _max_survivor_regions = 0;
+  }
+
   initialize_all();
 }

@@ -283,6 +297,9 @@
 void G1CollectorPolicy::initialize_flags() {
   set_min_alignment(HeapRegion::GrainBytes);
   set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  if (SurvivorRatio < 1) {
+    vm_exit_during_initialization("Invalid survivor ratio specified");
+  }
   CollectorPolicy::initialize_flags();
 }

@@ -301,6 +318,8 @@
                                   "-XX:+UseConcMarkSweepGC.");
   }

+  initialize_gc_policy_counters();
+
   if (G1Gen) {
     _in_young_gc_mode = true;

@@ -322,6 +341,12 @@
   }
 }

+// Create the jstat counters for the policy.
+void G1CollectorPolicy::initialize_gc_policy_counters()
+{
+  _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen);
+}
+
 void G1CollectorPolicy::calculate_young_list_min_length() {
   _young_list_min_length = 0;

@@ -352,6 +377,7 @@
     guarantee( so_length < _young_list_target_length, "invariant" );
     _young_list_so_prefix_length = so_length;
   }
+  calculate_survivors_policy();
 }

 // This method calculate the optimal scan-only set for a fixed young
@@ -448,6 +474,9 @@
   if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
     // we are in fully-young mode and there are free regions in the heap

+    double survivor_regions_evac_time =
+        predict_survivor_regions_evac_time();
+
     size_t min_so_length = 0;
     size_t max_so_length = 0;

@@ -497,9 +526,8 @@
       scanned_cards = predict_non_young_card_num(adj_rs_lengths);
     // calculate this once, so that we don't have to recalculate it in
     // the innermost loop
-    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
-                                                       scanned_cards);
-
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards)
+                          + survivor_regions_evac_time;
     // the result
     size_t final_young_length = 0;
     size_t final_so_length = 0;
@@ -548,14 +576,14 @@
     bool done = false;
     // this is the outermost loop
     while (!done) {
-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
                              ", incr " SIZE_FORMAT ", pass %s",
                              from_so_length, to_so_length, so_length_incr,
                              (pass == pass_type_coarse) ? "coarse" :
                              (pass == pass_type_fine) ? "fine" : "final");
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       size_t so_length = from_so_length;
       size_t init_free_regions =
@@ -651,11 +679,11 @@
           guarantee( so_length_incr == so_coarse_increments, "invariant" );
           guarantee( final_so_length >= min_so_length, "invariant" );

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
           // leave this in for debugging, just in case
           gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
                                  final_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

           from_so_length =
             (final_so_length - min_so_length > so_coarse_increments) ?
@@ -687,12 +715,12 @@
             // of the optimal
             size_t new_so_length = 950 * final_so_length / 1000;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
             // leave this in for debugging, just in case
             gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
                                    ", setting it to " SIZE_FORMAT,
                                     final_so_length, new_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

             from_so_length = new_so_length;
             to_so_length = new_so_length;
@@ -719,7 +747,8 @@
     }

     // we should have at least one region in the target young length
-    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    _young_list_target_length =
+        MAX2((size_t) 1, final_young_length + _recorded_survivor_regions);
     if (final_so_length >= final_young_length)
       // and we need to ensure that the S-O length is not greater than
       // the target young length (this is being a bit careful)
@@ -734,7 +763,7 @@
     double end_time_sec = os::elapsedTime();
     double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
                            ", SO = " SIZE_FORMAT ", "
@@ -747,9 +776,9 @@
                            calculations,
                            full_young_gcs() ? "full" : "partial",
                            should_initiate_conc_mark() ? " i-m" : "",
-                           in_marking_window(),
-                           in_marking_window_im());
-#endif // 0
+                           _in_marking_window,
+                           _in_marking_window_im);
+#endif // TRACE_CALC_YOUNG_CONFIG

     if (_young_list_target_length < _young_list_min_length) {
       // bummer; this means that, if we do a pause when the optimal
@@ -768,14 +797,14 @@
         // S-O length
         so_length = calculate_optimal_so_length(_young_list_min_length);

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("adjusted target length from "
                              SIZE_FORMAT " to " SIZE_FORMAT
                              ", SO " SIZE_FORMAT,
                              _young_list_target_length, _young_list_min_length,
                              so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       _young_list_target_length =
         MAX2(_young_list_min_length, (size_t)1);
@@ -785,12 +814,12 @@
     // we are in a partially-young mode or we've run out of regions (due
     // to evacuation failure)

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
                            ", SO " SIZE_FORMAT,
                            _young_list_min_length, 0);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

     // we'll do the pause as soon as possible and with no S-O prefix
     // (see above for the reasons behind the latter)
@@ -884,6 +913,16 @@
   return true;
 }

+double G1CollectorPolicy::predict_survivor_regions_evac_time() {
+  double survivor_regions_evac_time = 0.0;
+  for (HeapRegion * r = _recorded_survivor_head;
+       r != NULL && r != _recorded_survivor_tail->get_next_young_region();
+       r = r->get_next_young_region()) {
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+  }
+  return survivor_regions_evac_time;
+}
+
 void G1CollectorPolicy::check_prediction_validity() {
   guarantee( adaptive_young_list_length(), "should not call this otherwise" );

@@ -995,11 +1034,15 @@
   _short_lived_surv_rate_group->start_adding_regions();
   // also call this on any additional surv rate groups

+  record_survivor_regions(0, NULL, NULL);
+
   _prev_region_num_young   = _region_num_young;
   _prev_region_num_tenured = _region_num_tenured;

   _free_regions_at_end_of_collection = _g1->free_regions();
   _scan_only_regions_at_end_of_collection = 0;
+  // Reset survivors SurvRateGroup.
+  _survivor_surv_rate_group->reset();
   calculate_young_list_min_length();
   calculate_young_list_target_config();
  }
@@ -1104,6 +1147,10 @@
   _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
   tag_scan_only(short_lived_so_length);

+  if (G1UseSurvivorSpace) {
+    _survivors_age_table.clear();
+  }
+
   assert( verify_young_ages(), "region age verification" );
 }

@@ -1965,9 +2012,6 @@
   // </NEW PREDICTION>

   _target_pause_time_ms = -1.0;
-
-  // TODO: calculate tenuring threshold
-  _tenuring_threshold = MaxTenuringThreshold;
 }

 // <NEW PREDICTION>
@@ -2058,7 +2102,7 @@
     guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
                "invariant" );
     int age = hr->age_in_surv_rate_group();
-    double yg_surv_rate = predict_yg_surv_rate(age);
+    double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }

@@ -2091,7 +2135,7 @@
   }
 #if PREDICTIONS_VERBOSE
   if (young) {
-    _recorded_young_bytes += hr->asSpace()->used();
+    _recorded_young_bytes += hr->used();
   } else {
     _recorded_marked_bytes += hr->max_live_bytes();
   }
@@ -2119,11 +2163,6 @@
       predict_non_young_card_num(_predicted_rs_lengths);
   _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;

-  _predicted_young_survival_ratio = 0.0;
-  for (int i = 0; i < _recorded_young_regions; ++i)
-    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
-  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
-
   _predicted_scan_only_scan_time_ms =
     predict_scan_only_time_ms(_recorded_scan_only_regions);
   _predicted_rs_update_time_ms =
@@ -2673,8 +2712,11 @@
   assert(in_young_gc_mode(), "should be in young GC mode");
   bool ret;
   size_t young_list_length = _g1->young_list_length();
-
-  if (young_list_length < _young_list_target_length) {
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  if (young_list_length < young_list_max_length) {
     ret = true;
     ++_region_num_young;
   } else {
@@ -2710,17 +2752,39 @@
 }


-uint G1CollectorPolicy::max_regions(int purpose) {
+size_t G1CollectorPolicy::max_regions(int purpose) {
   switch (purpose) {
     case GCAllocForSurvived:
-      return G1MaxSurvivorRegions;
+      return _max_survivor_regions;
     case GCAllocForTenured:
-      return UINT_MAX;
+      return REGIONS_UNLIMITED;
     default:
-      return UINT_MAX;
+      ShouldNotReachHere();
+      return REGIONS_UNLIMITED;
   };
 }

+// Calculates survivor space parameters.
+void G1CollectorPolicy::calculate_survivors_policy()
+{
+  if (!G1UseSurvivorSpace) {
+    return;
+  }
+  if (G1FixedSurvivorSpaceSize == 0) {
+    _max_survivor_regions = _young_list_target_length / SurvivorRatio;
+  } else {
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  }
+
+  if (G1FixedTenuringThreshold) {
+    _tenuring_threshold = MaxTenuringThreshold;
+  } else {
+    _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
+        HeapRegion::GrainWords * _max_survivor_regions);
+  }
+}
+
+
 void
 G1CollectorPolicy_BestRegionsFirst::
 set_single_region_collection_set(HeapRegion* hr) {
@@ -2743,7 +2807,11 @@
   double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;

   size_t young_list_length = _g1->young_list_length();
-  bool reached_target_length = young_list_length >= _young_list_target_length;
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  bool reached_target_length = young_list_length >= young_list_max_length;

   if (in_young_gc_mode()) {
     if (reached_target_length) {
@@ -2985,6 +3053,7 @@
   _collection_set = hr;
   _collection_set_size++;
   _collection_set_bytes_used_before += hr->used();
+  _g1->register_region_with_in_cset_fast_test(hr);
 }

 void
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -49,7 +49,7 @@
 class MainBodySummary;
 class PopPreambleSummary;

-class PauseSummary {
+class PauseSummary: public CHeapObj {
   define_num_seq(total)
     define_num_seq(other)

@@ -58,7 +58,7 @@
   virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
 };

-class MainBodySummary {
+class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
@@ -75,7 +75,7 @@
   define_num_seq(clear_ct)  // parallel only
 };

-class PopPreambleSummary {
+class PopPreambleSummary: public CHeapObj {
   define_num_seq(pop_preamble)
     define_num_seq(pop_update_rs)
     define_num_seq(pop_scan_rs)
@@ -557,6 +557,8 @@
     return get_new_neg_prediction(_young_gc_eff_seq);
   }

+  double predict_survivor_regions_evac_time();
+
   // </NEW PREDICTION>

 public:
@@ -599,8 +601,8 @@

   // Returns an estimate of the survival rate of the region at yg-age
   // "yg_age".
-  double predict_yg_surv_rate(int age) {
-    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+  double predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) {
+    TruncatedSeq* seq = surv_rate_group->get_seq(age);
     if (seq->num() == 0)
       gclog_or_tty->print("BARF! age is %d", age);
     guarantee( seq->num() > 0, "invariant" );
@@ -610,6 +612,10 @@
     return pred;
   }

+  double predict_yg_surv_rate(int age) {
+    return predict_yg_surv_rate(age, _short_lived_surv_rate_group);
+  }
+
   double accum_yg_surv_rate_pred(int age) {
     return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
   }
@@ -822,6 +828,9 @@

   virtual void init();

+  // Create jstat counters for the policy.
+  virtual void initialize_gc_policy_counters();
+
   virtual HeapWord* mem_allocate_work(size_t size,
                                       bool is_tlab,
                                       bool* gc_overhead_limit_was_exceeded);
@@ -1047,8 +1056,12 @@
   // Print stats on young survival ratio
   void print_yg_surv_rate_info() const;

-  void finished_recalculating_age_indexes() {
-    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+  void finished_recalculating_age_indexes(bool is_survivors) {
+    if (is_survivors) {
+      _survivor_surv_rate_group->finished_recalculating_age_indexes();
+    } else {
+      _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    }
     // do that for any other surv rate groups
   }

@@ -1097,6 +1110,17 @@
   // maximum amount of suvivors regions.
   int _tenuring_threshold;

+  // The limit on the number of regions allocated for survivors.
+  size_t _max_survivor_regions;
+
+  // The amount of survor regions after a collection.
+  size_t _recorded_survivor_regions;
+  // List of survivor regions.
+  HeapRegion* _recorded_survivor_head;
+  HeapRegion* _recorded_survivor_tail;
+
+  ageTable _survivors_age_table;
+
 public:

   inline GCAllocPurpose
@@ -1116,7 +1140,9 @@
     return GCAllocForTenured;
   }

-  uint max_regions(int purpose);
+  static const size_t REGIONS_UNLIMITED = ~(size_t)0;
+
+  size_t max_regions(int purpose);

   // The limit on regions for a particular purpose is reached.
   void note_alloc_region_limit_reached(int purpose) {
@@ -1132,6 +1158,23 @@
   void note_stop_adding_survivor_regions() {
     _survivor_surv_rate_group->stop_adding_regions();
   }
+
+  void record_survivor_regions(size_t      regions,
+                               HeapRegion* head,
+                               HeapRegion* tail) {
+    _recorded_survivor_regions = regions;
+    _recorded_survivor_head    = head;
+    _recorded_survivor_tail    = tail;
+  }
+
+  void record_thread_age_table(ageTable* age_table)
+  {
+    _survivors_age_table.merge_par(age_table);
+  }
+
+  // Calculates survivor space parameters.
+  void calculate_survivors_policy();
+
 };

 // This encapsulates a particular strategy for a g1 Collector.
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -28,7 +28,7 @@
 /***** ALL TIMES ARE IN SECS!!!!!!! *****/

 // this is the "interface"
-class G1MMUTracker {
+class G1MMUTracker: public CHeapObj {
 protected:
   double          _time_slice;
   double          _max_gc_time; // this is per time slice
@@ -67,7 +67,7 @@
   }
 };

-class G1MMUTrackerQueueElem {
+class G1MMUTrackerQueueElem VALUE_OBJ_CLASS_SPEC {
 private:
   double _start_time;
   double _end_time;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -77,6 +77,18 @@

 #define G1_PARTIAL_ARRAY_MASK 1

+inline bool has_partial_array_mask(oop* ref) {
+  return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK;
+}
+
+inline oop* set_partial_array_mask(oop obj) {
+  return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK);
+}
+
+inline oop clear_partial_array_mask(oop* ref) {
+  return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK);
+}
+
 class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
   G1ParScanClosure _scanner;
   template <class T> void process_array_chunk(oop obj, int start, int end);
@@ -101,7 +113,8 @@
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
   void do_oop_work(oop* p);
@@ -119,14 +132,22 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };

-typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
+typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, false, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true,  false> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, true,  false> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   true,  false> G1ParScanAndMarkHeapRSClosure;
+// This is the only case when we set skip_cset_test. Basically, this
+// closure is (should?) only be called directly while we're draining
+// the overflow and task queues. In that case we know that the
+// reference in question points into the collection set, otherwise we
+// would not have pushed it on the queue.
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+// We need a separate closure to handle references during evacuation
+// failure processing, as it cannot asume that the reference already
+ // points to the collection set (like G1ParScanHeapEvacClosure does).
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;

 class FilterIntoCSClosure: public OopClosure {
   G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -572,6 +572,9 @@
   }
   guarantee( _cards_scanned == NULL, "invariant" );
   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  for (uint i = 0; i < n_workers(); ++i) {
+    _cards_scanned[i] = 0;
+  }
   _total_cards_scanned = 0;
 }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -30,7 +30,7 @@
 class HRInto_G1RemSet;
 class ConcurrentG1Refine;

-class G1RemSet {
+class G1RemSet: public CHeapObj {
 protected:
   G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -28,7 +28,7 @@

 #define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
                                                                             \
-  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+  product(intx, ParallelGCG1AllocBufferSize, 8*K,                           \
           "Size of parallel G1 allocation buffers in to-space.")            \
                                                                             \
   product(intx, G1TimeSliceMS, 500,                                         \
@@ -281,7 +281,17 @@
   develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
           "Forces flushing of log buffers before verification.")            \
                                                                             \
-  product(intx, G1MaxSurvivorRegions, 0,                                    \
-          "The maximum number of survivor regions")
+  product(bool, G1UseSurvivorSpace, true,                                   \
+          "When true, use survivor space.")                                 \
+                                                                            \
+  product(bool, G1FixedTenuringThreshold, false,                            \
+          "When set, G1 will not adjust the tenuring threshold")            \
+                                                                            \
+  product(bool, G1FixedEdenSize, false,                                     \
+          "When set, G1 will not allocate unused survivor space regions")   \
+                                                                            \
+  product(uintx, G1FixedSurvivorSpaceSize, 0,                               \
+          "If non-0 is the size of the G1 survivor space, "                 \
+          "otherwise SurvivorRatio is used to determine the size")

 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -32,11 +32,13 @@
   G1BarrierNone, G1BarrierRS, G1BarrierEvac
 };

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure;
 class G1ParScanClosure;

-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true>
+                                                      G1ParScanHeapEvacClosure;

 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -566,7 +566,11 @@
   void note_end_of_copying() {
     assert(top() >= _next_top_at_mark_start,
            "Increase only");
-    _next_top_at_mark_start = top();
+    // Survivor regions will be scanned on the start of concurrent
+    // marking.
+    if (!is_survivor()) {
+      _next_top_at_mark_start = top();
+    }
   }

   // Returns "false" iff no object in the region was allocated when the
@@ -829,7 +833,7 @@

 // A linked lists of heap regions.  It leaves the "next" field
 // unspecified; that's up to subtypes.
-class RegionList {
+class RegionList VALUE_OBJ_CLASS_SPEC {
 protected:
   virtual HeapRegion* get_next(HeapRegion* chr) = 0;
   virtual void set_next(HeapRegion* chr,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -65,9 +65,11 @@
   // We need access in order to union things into the base table.
   BitMap* bm() { return &_bm; }

+#if PRT_COUNT_OCCUPIED
   void recount_occupied() {
     _occupied = (jint) bm()->count_one_bits();
   }
+#endif

   PerRegionTable(HeapRegion* hr) :
     _hr(hr),
@@ -1144,7 +1146,9 @@
   size_t i = _outgoing_region_map.get_next_one_offset(0);
   while (i < _outgoing_region_map.size()) {
     HeapRegion* to_region = g1h->region_at(i);
-    to_region->rem_set()->clear_incoming_entry(hr());
+    if (!to_region->in_collection_set()) {
+      to_region->rem_set()->clear_incoming_entry(hr());
+    }
     i = _outgoing_region_map.get_next_one_offset(i+1);
   }
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -58,7 +58,7 @@
 //      is represented.  If a deleted PRT is re-used, a thread adding a bit,
 //      thinking the PRT is for a different region, does no harm.

-class OtherRegionsTable: public CHeapObj {
+class OtherRegionsTable VALUE_OBJ_CLASS_SPEC {
   friend class HeapRegionRemSetIterator;

   G1CollectedHeap* _g1h;
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -29,7 +29,7 @@

 class PtrQueueSet;

-class PtrQueue: public CHeapObj {
+class PtrQueue VALUE_OBJ_CLASS_SPEC {

 protected:
   // The ptr queue set to which this queue belongs.
@@ -130,7 +130,7 @@
 // In particular, the individual queues allocate buffers from this shared
 // set, and return completed buffers to the set.
 // All these variables are are protected by the TLOQ_CBL_mon. XXX ???
-class PtrQueueSet: public CHeapObj {
+class PtrQueueSet VALUE_OBJ_CLASS_SPEC {

 protected:
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -33,7 +33,7 @@
 // old versions synchronously.


-class SparsePRTEntry {
+class SparsePRTEntry: public CHeapObj {
 public:
   enum SomePublicConstants {
     CardsPerEntry = (short)4,
@@ -167,7 +167,7 @@
 };

   // ValueObj because will be embedded in HRRS iterator.
-class RSHashTableIter: public CHeapObj {
+class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
     short _tbl_ind;
     short _bl_ind;
     short _card_ind;
@@ -213,7 +213,7 @@

 class SparsePRTIter;

-class SparsePRT : public CHeapObj {
+class SparsePRT VALUE_OBJ_CLASS_SPEC {
   //  Iterations are done on the _cur hash table, since they only need to
   //  see entries visible at the start of a collection pause.
   //  All other operations are done using the _next hash table.
--- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -29,23 +29,14 @@
                              const char* name,
                              size_t summary_surv_rates_len) :
     _g1p(g1p), _name(name),
-    _all_regions_allocated(0),
-    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
-    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
-    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
     _summary_surv_rates_len(summary_surv_rates_len),
     _summary_surv_rates_max_len(0),
-    _summary_surv_rates(NULL) {
-
-  // the following will set up the arrays with length 1
-  _curr_length = 1;
-  stop_adding_regions();
-  guarantee( _array_length == 1, "invariant" );
-  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
-  _surv_rate_pred[0]->add(0.4);
-  all_surviving_words_recorded(false);
-  _curr_length = 0;
-
+    _summary_surv_rates(NULL),
+    _surv_rate(NULL),
+    _accum_surv_rate_pred(NULL),
+    _surv_rate_pred(NULL)
+{
+  reset();
   if (summary_surv_rates_len > 0) {
     size_t length = summary_surv_rates_len;
       _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
@@ -60,61 +51,80 @@
   start_adding_regions();
 }

+
+void SurvRateGroup::reset()
+{
+  _all_regions_allocated = 0;
+  _scan_only_prefix      = 0;
+  _setup_seq_num         = 0;
+  _stats_arrays_length   = 0;
+  _accum_surv_rate       = 0.0;
+  _last_pred             = 0.0;
+  // the following will set up the arrays with length 1
+  _region_num            = 1;
+  stop_adding_regions();
+  guarantee( _stats_arrays_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _region_num = 0;
+}
+
+
 void
 SurvRateGroup::start_adding_regions() {
-  _setup_seq_num   = _array_length;
-  _curr_length     = _scan_only_prefix;
+  _setup_seq_num   = _stats_arrays_length;
+  _region_num      = _scan_only_prefix;
   _accum_surv_rate = 0.0;

 #if 0
-  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
-                         _setup_seq_num, _curr_length);
+  gclog_or_tty->print_cr("[%s] start adding regions, seq num %d, length %d",
+                         _name, _setup_seq_num, _region_num);
 #endif // 0
 }

 void
 SurvRateGroup::stop_adding_regions() {
-  size_t length = _curr_length;

 #if 0
-  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+  gclog_or_tty->print_cr("[%s] stop adding regions, length %d", _name, _region_num);
 #endif // 0

-  if (length > _array_length) {
+  if (_region_num > _stats_arrays_length) {
     double* old_surv_rate = _surv_rate;
     double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
     TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;

-    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                             "Not enough space for surv rate array.");
     }
-    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_accum_surv_rate_pred == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                          "Not enough space for accum surv rate pred array.");
     }
-    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * _region_num,
                             "Not enough space for surv rate pred array.");
     }

-    for (size_t i = 0; i < _array_length; ++i)
+    for (size_t i = 0; i < _stats_arrays_length; ++i)
       _surv_rate_pred[i] = old_surv_rate_pred[i];

 #if 0
-    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
-                  _array_length, length - 1);
+    gclog_or_tty->print_cr("[%s] stop adding regions, new seqs %d to %d",
+                  _name, _array_length, _region_num - 1);
 #endif // 0

-    for (size_t i = _array_length; i < length; ++i) {
+    for (size_t i = _stats_arrays_length; i < _region_num; ++i) {
       _surv_rate_pred[i] = new TruncatedSeq(10);
       // _surv_rate_pred[i]->add(last_pred);
     }

-    _array_length = length;
+    _stats_arrays_length = _region_num;

     if (old_surv_rate != NULL)
       FREE_C_HEAP_ARRAY(double, old_surv_rate);
@@ -124,7 +134,7 @@
       FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
   }

-  for (size_t i = 0; i < _array_length; ++i)
+  for (size_t i = 0; i < _stats_arrays_length; ++i)
     _surv_rate[i] = 0.0;
 }

@@ -135,7 +145,7 @@

   double ret = _accum_surv_rate;
   if (adjustment > 0) {
-    TruncatedSeq* seq = get_seq(_curr_length+1);
+    TruncatedSeq* seq = get_seq(_region_num+1);
     double surv_rate = _g1p->get_new_prediction(seq);
     ret += surv_rate;
   }
@@ -145,23 +155,23 @@

 int
 SurvRateGroup::next_age_index() {
-  TruncatedSeq* seq = get_seq(_curr_length);
+  TruncatedSeq* seq = get_seq(_region_num);
   double surv_rate = _g1p->get_new_prediction(seq);
   _accum_surv_rate += surv_rate;

-  ++_curr_length;
+  ++_region_num;
   return (int) ++_all_regions_allocated;
 }

 void
 SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
-  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  guarantee( scan_only_prefix <= _region_num, "pre-condition" );
   _scan_only_prefix = scan_only_prefix;
 }

 void
 SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
-  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _region_num,
              "pre-condition" );
   guarantee( _surv_rate[age_in_group] <= 0.00001,
              "should only update each slot once" );
@@ -178,15 +188,15 @@

 void
 SurvRateGroup::all_surviving_words_recorded(bool propagate) {
-  if (propagate && _curr_length > 0) { // conservative
-    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+  if (propagate && _region_num > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_region_num-1]->last();

 #if 0
     gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
                   surv_rate, _curr_length, _array_length - 1);
 #endif // 0

-    for (size_t i = _curr_length; i < _array_length; ++i) {
+    for (size_t i = _region_num; i < _stats_arrays_length; ++i) {
       guarantee( _surv_rate[i] <= 0.00001,
                  "the slot should not have been updated" );
       _surv_rate_pred[i]->add(surv_rate);
@@ -195,7 +205,7 @@

   double accum = 0.0;
   double pred = 0.0;
-  for (size_t i = 0; i < _array_length; ++i) {
+  for (size_t i = 0; i < _stats_arrays_length; ++i) {
     pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
     if (pred > 1.0) pred = 1.0;
     accum += pred;
@@ -209,8 +219,8 @@
 void
 SurvRateGroup::print() {
   gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
-                _name, _curr_length, _scan_only_prefix);
-  for (size_t i = 0; i < _curr_length; ++i) {
+                _name, _region_num, _scan_only_prefix);
+  for (size_t i = 0; i < _region_num; ++i) {
     gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
                   i, _surv_rate[i] * 100.0,
                   _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
--- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -29,7 +29,7 @@
   G1CollectorPolicy* _g1p;
   const char* _name;

-  size_t  _array_length;
+  size_t  _stats_arrays_length;
   double* _surv_rate;
   double* _accum_surv_rate_pred;
   double  _last_pred;
@@ -40,7 +40,7 @@
   size_t         _summary_surv_rates_max_len;

   int _all_regions_allocated;
-  size_t _curr_length;
+  size_t _region_num;
   size_t _scan_only_prefix;
   size_t _setup_seq_num;

@@ -48,6 +48,7 @@
   SurvRateGroup(G1CollectorPolicy* g1p,
                 const char* name,
                 size_t summary_surv_rates_len);
+  void reset();
   void start_adding_regions();
   void stop_adding_regions();
   void record_scan_only_prefix(size_t scan_only_prefix);
@@ -55,22 +56,21 @@
   void all_surviving_words_recorded(bool propagate);
   const char* name() { return _name; }

-  size_t region_num() { return _curr_length; }
+  size_t region_num() { return _region_num; }
   size_t scan_only_length() { return _scan_only_prefix; }
   double accum_surv_rate_pred(int age) {
     assert(age >= 0, "must be");
-    if ((size_t)age < _array_length)
+    if ((size_t)age < _stats_arrays_length)
       return _accum_surv_rate_pred[age];
     else {
-      double diff = (double) (age - _array_length + 1);
-      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+      double diff = (double) (age - _stats_arrays_length + 1);
+      return _accum_surv_rate_pred[_stats_arrays_length-1] + diff * _last_pred;
     }
   }

   double accum_surv_rate(size_t adjustment);

   TruncatedSeq* get_seq(size_t age) {
-    guarantee( 0 <= age, "pre-condition" );
     if (age >= _setup_seq_num) {
       guarantee( _setup_seq_num > 0, "invariant" );
       age = _setup_seq_num-1;
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Fri Feb 27 15:13:00 2009 -0800
@@ -28,6 +28,7 @@
 binaryTreeDictionary.cpp                binaryTreeDictionary.hpp
 binaryTreeDictionary.cpp                globals.hpp
 binaryTreeDictionary.cpp                ostream.hpp
+binaryTreeDictionary.cpp                space.inline.hpp
 binaryTreeDictionary.cpp                spaceDecorator.hpp

 binaryTreeDictionary.hpp                freeBlockDictionary.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Fri Feb 27 15:13:00 2009 -0800
@@ -31,9 +31,10 @@
 cardTableRS.cpp				concurrentMark.hpp
 cardTableRS.cpp				g1SATBCardTableModRefBS.hpp

-collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectedHeap.inline.hpp
 collectionSetChooser.cpp		g1CollectorPolicy.hpp
 collectionSetChooser.cpp		collectionSetChooser.hpp
+collectionSetChooser.cpp		space.inline.hpp

 collectionSetChooser.hpp		heapRegion.hpp
 collectionSetChooser.hpp                growableArray.hpp
@@ -42,14 +43,16 @@
 concurrentG1Refine.cpp			concurrentG1Refine.hpp
 concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
 concurrentG1Refine.cpp			copy.hpp
-concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.inline.hpp
 concurrentG1Refine.cpp			g1RemSet.hpp
+concurrentG1Refine.cpp			space.inline.hpp

 concurrentG1Refine.hpp			globalDefinitions.hpp
+concurrentG1Refine.hpp			allocation.hpp

 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
 concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
-concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.inline.hpp
 concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
 concurrentG1RefineThread.cpp		handles.inline.hpp
 concurrentG1RefineThread.cpp		mutexLocker.hpp
@@ -166,10 +169,11 @@
 g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
 g1CollectorPolicy.cpp			debug.hpp
 g1CollectorPolicy.cpp			java.hpp
-g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.inline.hpp
 g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
 g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
 g1CollectorPolicy.cpp			mutexLocker.hpp
+g1CollectorPolicy.cpp			gcPolicyCounters.hpp

 g1CollectorPolicy.hpp                   collectorPolicy.hpp
 g1CollectorPolicy.hpp                   collectionSetChooser.hpp
@@ -187,7 +191,7 @@
 g1MarkSweep.cpp                         codeCache.hpp
 g1MarkSweep.cpp                         events.hpp
 g1MarkSweep.cpp                         fprofiler.hpp
-g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.inline.hpp
 g1MarkSweep.cpp                         g1MarkSweep.hpp
 g1MarkSweep.cpp                         gcLocker.hpp
 g1MarkSweep.cpp                         genCollectedHeap.hpp
@@ -226,7 +230,7 @@
 g1MMUTracker.cpp			mutexLocker.hpp

 g1MMUTracker.hpp			debug.hpp
-
+g1MMUTracker.hpp			allocation.hpp
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp
@@ -264,12 +268,13 @@
 heapRegion.cpp                          iterator.hpp
 heapRegion.cpp                          oop.inline.hpp

-heapRegion.hpp                          space.hpp
+heapRegion.hpp                          space.inline.hpp
 heapRegion.hpp                          spaceDecorator.hpp
 heapRegion.hpp                          g1BlockOffsetTable.inline.hpp
 heapRegion.hpp                          watermark.hpp
 heapRegion.hpp				g1_specialized_oop_closures.hpp
 heapRegion.hpp				survRateGroup.hpp
+heapRegion.hpp				ageTable.hpp

 heapRegionRemSet.hpp			sparsePRT.hpp

@@ -283,7 +288,7 @@
 heapRegionRemSet.cpp                    space.inline.hpp

 heapRegionSeq.cpp                       allocation.hpp
-heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.inline.hpp
 heapRegionSeq.cpp                       heapRegionSeq.hpp

 heapRegionSeq.hpp                       growableArray.hpp
@@ -334,18 +339,18 @@
 survRateGroup.hpp			numberSeq.hpp

 survRateGroup.cpp			allocation.hpp
-survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectedHeap.inline.hpp
 survRateGroup.cpp			g1CollectorPolicy.hpp
 survRateGroup.cpp			heapRegion.hpp
 survRateGroup.cpp			survRateGroup.hpp

 thread.cpp				concurrentMarkThread.inline.hpp

-universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectedHeap.inline.hpp
 universe.cpp                            g1CollectorPolicy.hpp

 vm_operations_g1.hpp			vmGCOperations.hpp

 vm_operations_g1.cpp			vm_operations_g1.hpp
-vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.inline.hpp
 vm_operations_g1.cpp                    isGCActiveMark.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_parNew	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_parNew	Fri Feb 27 15:13:00 2009 -0800
@@ -29,6 +29,8 @@
 asParNewGeneration.cpp                  cmsAdaptiveSizePolicy.hpp
 asParNewGeneration.cpp                  cmsGCAdaptivePolicyCounters.hpp
 asParNewGeneration.cpp                  defNewGeneration.inline.hpp
+asParNewGeneration.cpp                  markOop.inline.hpp
+asParNewGeneration.cpp                  markSweep.inline.hpp
 asParNewGeneration.cpp                  oop.pcgc.inline.hpp
 asParNewGeneration.cpp                  parNewGeneration.hpp
 asParNewGeneration.cpp                  referencePolicy.hpp
@@ -40,7 +42,7 @@
 parCardTableModRefBS.cpp                java.hpp
 parCardTableModRefBS.cpp                mutexLocker.hpp
 parCardTableModRefBS.cpp                sharedHeap.hpp
-parCardTableModRefBS.cpp                space.hpp
+parCardTableModRefBS.cpp                space.inline.hpp
 parCardTableModRefBS.cpp                universe.hpp
 parCardTableModRefBS.cpp                virtualspace.hpp

@@ -77,6 +79,7 @@
 parNewGeneration.cpp                    sharedHeap.hpp
 parNewGeneration.cpp                    space.hpp
 parNewGeneration.cpp                    spaceDecorator.hpp
+parNewGeneration.cpp                    thread.hpp
 parNewGeneration.cpp                    workgroup.hpp

 parNewGeneration.hpp                    defNewGeneration.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Fri Feb 27 15:13:00 2009 -0800
@@ -302,6 +302,8 @@
 psOldGen.hpp                            spaceCounters.hpp

 psPermGen.cpp                           gcUtil.hpp
+psPermGen.cpp                           markOop.inline.hpp
+psPermGen.cpp                           markSweep.inline.hpp
 psPermGen.cpp                           parallelScavengeHeap.hpp
 psPermGen.cpp                           psMarkSweepDecorator.hpp
 psPermGen.cpp                           psParallelCompact.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_shared	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_shared	Fri Feb 27 15:13:00 2009 -0800
@@ -100,4 +100,4 @@
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp

-vmGCOperations.cpp                      g1CollectedHeap.hpp
+vmGCOperations.cpp                      g1CollectedHeap.inline.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -404,6 +404,8 @@
     if (terminator()->offer_termination()) break;
     par_scan_state()->end_term_time();
   }
+  assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
+         "Broken overflow list?");
   // Finish the last termination pause.
   par_scan_state()->end_term_time();
 }
@@ -456,6 +458,8 @@
   _is_alive_closure(this),
   _plab_stats(YoungPLABSize, PLABWeight)
 {
+  NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
+  NOT_PRODUCT(_num_par_pushes = 0;)
   _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
   guarantee(_task_queues != NULL, "task_queues allocation failure.");

@@ -993,12 +997,19 @@
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       if (Verbose && PrintGCDetails) {
         gclog_or_tty->print("queue overflow!\n");
       }
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1110,9 +1121,16 @@
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1135,89 +1153,190 @@
   return forward_ptr;
 }

-void ParNewGeneration::push_on_overflow_list(oop from_space_obj) {
-  oop cur_overflow_list = _overflow_list;
+#ifndef PRODUCT
+// It's OK to call this multi-threaded;  the worst thing
+// that can happen is that we'll get a bunch of closely
+// spaced simulated oveflows, but that's OK, in fact
+// probably good as it would exercise the overflow code
+// under contention.
+bool ParNewGeneration::should_simulate_overflow() {
+  if (_overflow_counter-- <= 0) { // just being defensive
+    _overflow_counter = ParGCWorkQueueOverflowInterval;
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+#define BUSY (oop(0x1aff1aff))
+void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
   // if the object has been forwarded to itself, then we cannot
   // use the klass pointer for the linked list.  Instead we have
   // to allocate an oopDesc in the C-Heap and use that for the linked list.
+  // XXX This is horribly inefficient when a promotion failure occurs
+  // and should be fixed. XXX FIX ME !!!
+#ifndef PRODUCT
+  Atomic::inc_ptr(&_num_par_pushes);
+  assert(_num_par_pushes > 0, "Tautology");
+#endif
   if (from_space_obj->forwardee() == from_space_obj) {
     oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
     listhead->forward_to(from_space_obj);
     from_space_obj = listhead;
   }
-  while (true) {
-    from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
-    oop observed_overflow_list =
+  oop observed_overflow_list = _overflow_list;
+  oop cur_overflow_list;
+  do {
+    cur_overflow_list = observed_overflow_list;
+    if (cur_overflow_list != BUSY) {
+      from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
+    } else {
+      from_space_obj->set_klass_to_list_ptr(NULL);
+    }
+    observed_overflow_list =
       (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
-    if (observed_overflow_list == cur_overflow_list) break;
-    // Otherwise...
-    cur_overflow_list = observed_overflow_list;
-  }
+  } while (cur_overflow_list != observed_overflow_list);
 }

+// *NOTE*: The overflow list manipulation code here and
+// in CMSCollector:: are very similar in shape,
+// except that in the CMS case we thread the objects
+// directly into the list via their mark word, and do
+// not need to deal with special cases below related
+// to chunking of object arrays and promotion failure
+// handling.
+// CR 6797058 has been filed to attempt consolidation of
+// the common code.
+// Because of the common code, if you make any changes in
+// the code below, please check the CMS version to see if
+// similar changes might be needed.
+// See CMSCollector::par_take_from_overflow_list() for
+// more extensive documentation comments.
 bool
 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
   ObjToScanQueue* work_q = par_scan_state->work_queue();
+  assert(work_q->size() == 0, "Should first empty local work queue");
   // How many to take?
-  int objsFromOverflow = MIN2(work_q->max_elems()/4,
-                              (juint)ParGCDesiredObjsFromOverflowList);
+  size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
+                                 (size_t)ParGCDesiredObjsFromOverflowList);

   if (_overflow_list == NULL) return false;

   // Otherwise, there was something there; try claiming the list.
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-
-  if (prefix == NULL) {
-    return false;
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+  // Trim off a prefix of at most objsFromOverflow items
+  Thread* tid = Thread::current();
+  size_t spin_count = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
+  for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
+    // someone grabbed it before we did ...
+    // ... we spin for a short while...
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+     // try and grab the prefix
+     prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
   }
-  // Trim off a prefix of at most objsFromOverflow items
-  int i = 1;
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
+  }
+  assert(prefix != NULL && prefix != BUSY, "Error");
+  size_t i = 1;
   oop cur = prefix;
   while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
     i++; cur = oop(cur->klass());
   }

   // Reattach remaining (suffix) to overflow list
-  if (cur->klass_or_null() != NULL) {
-    oop suffix = oop(cur->klass());
-    cur->set_klass_to_list_ptr(NULL);
-
-    // Find last item of suffix list
-    oop last = suffix;
-    while (last->klass_or_null() != NULL) {
-      last = oop(last->klass());
+  if (cur->klass_or_null() == NULL) {
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above and it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
     }
-    // Atomically prepend suffix to current overflow list
-    oop cur_overflow_list = _overflow_list;
-    while (true) {
-      last->set_klass_to_list_ptr(cur_overflow_list);
-      oop observed_overflow_list =
-        (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
-      if (observed_overflow_list == cur_overflow_list) break;
-      // Otherwise...
-      cur_overflow_list = observed_overflow_list;
+  } else {
+    assert(cur->klass_or_null() != BUSY, "Error");
+    oop suffix = oop(cur->klass());       // suffix will be put back on global list
+    cur->set_klass_to_list_ptr(NULL);     // break off suffix
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix.
+    oop observed_overflow_list = _overflow_list;
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
+      observed_overflow_list =
+        (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else got in in between; we'll need to do a splice.
+      // Find the last item of suffix list
+      oop last = suffix;
+      while (last->klass_or_null() != NULL) {
+        last = oop(last->klass());
+      }
+      // Atomically prepend suffix to current overflow list
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          last->set_klass_to_list_ptr(cur_overflow_list);
+        } else { // cur_overflow_list == BUSY
+          last->set_klass_to_list_ptr(NULL);
+        }
+        observed_overflow_list =
+          (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
     }
   }

   // Push objects on prefix list onto this thread's work queue
-  assert(cur != NULL, "program logic");
+  assert(prefix != NULL && prefix != BUSY, "program logic");
   cur = prefix;
-  int n = 0;
+  ssize_t n = 0;
   while (cur != NULL) {
     oop obj_to_push = cur->forwardee();
     oop next        = oop(cur->klass_or_null());
     cur->set_klass(obj_to_push->klass());
-    if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
-      obj_to_push = cur;
+    // This may be an array object that is self-forwarded. In that case, the list pointer
+    // space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
+    if (!is_in_reserved(cur)) {
+      // This can become a scaling bottleneck when there is work queue overflow coincident
+      // with promotion failure.
+      oopDesc* f = cur;
+      FREE_C_HEAP_ARRAY(oopDesc, f);
+    } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
       assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
+      obj_to_push = cur;
     }
-    work_q->push(obj_to_push);
+    bool ok = work_q->push(obj_to_push);
+    assert(ok, "Should have succeeded");
     cur = next;
     n++;
   }
   par_scan_state->note_overflow_refill(n);
+#ifndef PRODUCT
+  assert(_num_par_pushes >= n, "Too many pops?");
+  Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
+#endif
   return true;
 }
+#undef BUSY

 void ParNewGeneration::ref_processor_init()
 {
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -278,6 +278,7 @@
   friend class ParNewRefProcTask;
   friend class ParNewRefProcTaskExecutor;
   friend class ParScanThreadStateSet;
+  friend class ParEvacuateFollowersClosure;

  private:
   // XXX use a global constant instead of 64!
@@ -296,6 +297,7 @@
   // klass-pointers (klass information already copied to the forwarded
   // image.)  Manipulated with CAS.
   oop _overflow_list;
+  NOT_PRODUCT(ssize_t _num_par_pushes;)

   // If true, older generation does not support promotion undo, so avoid.
   static bool _avoid_promotion_undo;
@@ -372,8 +374,12 @@
   oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
                              oop obj, size_t obj_sz, markOop m);

+  // in support of testing overflow code
+  NOT_PRODUCT(int _overflow_counter;)
+  NOT_PRODUCT(bool should_simulate_overflow();)
+
   // Push the given (from-space) object on the global overflow list.
-  void push_on_overflow_list(oop from_space_obj);
+  void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);

   // If the global overflow list is non-empty, move some tasks from it
   // onto "work_q" (which must be empty).  No more than 1/4 of the
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -200,6 +200,7 @@

   void oop_iterate(OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
+  void safe_object_iterate(ObjectClosure* cl) { object_iterate(cl); }
   void permanent_oop_iterate(OopClosure* cl);
   void permanent_object_iterate(ObjectClosure* cl);
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -362,6 +362,10 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+
+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
 }

 bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -116,7 +116,7 @@
   // ObjectSpace stuff
   //

-  _object_space = new MutableSpace();
+  _object_space = new MutableSpace(virtual_space()->alignment());

   if (_object_space == NULL)
     vm_exit_during_initialization("Could not allocate an old gen space");
@@ -385,10 +385,10 @@
   start_array()->set_covered_region(new_memregion);
   Universe::heap()->barrier_set()->resize_covered_region(new_memregion);

-  HeapWord* const virtual_space_high = (HeapWord*) virtual_space()->high();
-
   // ALWAYS do this last!!
-  object_space()->set_end(virtual_space_high);
+  object_space()->initialize(new_memregion,
+                             SpaceDecorator::DontClear,
+                             SpaceDecorator::DontMangle);

   assert(new_word_size == heap_word_size(object_space()->capacity_in_bytes()),
     "Sanity");
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2203,6 +2203,10 @@
                            collection_exit.ticks());
     gc_task_manager()->print_task_time_stamps();
   }
+
+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
 }

 bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -615,6 +615,10 @@
     gc_task_manager()->print_task_time_stamps();
   }

+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
+
   return !promotion_failure_occurred;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -78,7 +78,7 @@
   _special = false;
 }

-bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpace::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));

@@ -92,15 +92,6 @@
     _committed_high_addr += bytes;
   }

-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }

@@ -255,7 +246,7 @@
   DEBUG_ONLY(verify());
 }

-bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpaceHighToLow::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));

@@ -269,15 +260,6 @@
     _committed_low_addr -= bytes;
   }

-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -80,7 +80,7 @@
   inline  void   set_reserved(char* low_addr, char* high_addr, bool special);
   inline  void   set_reserved(ReservedSpace rs);
   inline  void   set_committed(char* low_addr, char* high_addr);
-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
   void           release();
@@ -127,7 +127,7 @@
   PSVirtualSpaceHighToLow(ReservedSpace rs, size_t alignment);
   PSVirtualSpaceHighToLow(ReservedSpace rs);

-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -64,12 +64,12 @@
   }

   if (UseNUMA) {
-    _eden_space = new MutableNUMASpace();
+    _eden_space = new MutableNUMASpace(virtual_space()->alignment());
   } else {
-    _eden_space = new MutableSpace();
+    _eden_space = new MutableSpace(virtual_space()->alignment());
   }
-  _from_space = new MutableSpace();
-  _to_space   = new MutableSpace();
+  _from_space = new MutableSpace(virtual_space()->alignment());
+  _to_space   = new MutableSpace(virtual_space()->alignment());

   if (_eden_space == NULL || _from_space == NULL || _to_space == NULL) {
     vm_exit_during_initialization("Could not allocate a young gen space");
--- a/src/share/vm/gc_implementation/shared/ageTable.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/ageTable.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -67,6 +67,12 @@
   }
 }

+void ageTable::merge_par(ageTable* subTable) {
+  for (int i = 0; i < table_size; i++) {
+    Atomic::add_ptr(subTable->sizes[i], &sizes[i]);
+  }
+}
+
 int ageTable::compute_tenuring_threshold(size_t survivor_capacity) {
   size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100);
   size_t total = 0;
--- a/src/share/vm/gc_implementation/shared/ageTable.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/ageTable.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -56,6 +56,7 @@
   // Merge another age table with the current one.  Used
   // for parallel young generation gc.
   void merge(ageTable* subTable);
+  void merge_par(ageTable* subTable);

   // calculate new tenuring threshold based on age information
   int compute_tenuring_threshold(size_t survivor_capacity);
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -27,7 +27,7 @@
 # include "incls/_mutableNUMASpace.cpp.incl"


-MutableNUMASpace::MutableNUMASpace() {
+MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
   _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true);
   _page_size = os::vm_page_size();
   _adaptation_cycles = 0;
@@ -221,7 +221,7 @@
         }
       }
       if (!found) {
-        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i]));
+        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
       }
     }

@@ -443,10 +443,10 @@
   // Is there bottom?
   if (new_region.start() < intersection.start()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(p, new_region.start(), sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(p, intersection.end());
         } else {
@@ -462,10 +462,10 @@
   // Is there top?
   if (intersection.end() < new_region.end()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(new_region.end(), p, sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(intersection.start(), p);
         } else {
@@ -504,12 +504,12 @@
             // That's the only case we have to make an additional bias_region() call.
             HeapWord* start = invalid_region->start();
             HeapWord* end = invalid_region->end();
-            if (UseLargePages && page_size() >= os::large_page_size()) {
-              HeapWord *p = (HeapWord*)round_down((intptr_t) start, os::large_page_size());
+            if (UseLargePages && page_size() >= alignment()) {
+              HeapWord *p = (HeapWord*)round_down((intptr_t) start, alignment());
               if (new_region.contains(p)) {
                 start = p;
               }
-              p = (HeapWord*)round_to((intptr_t) end, os::large_page_size());
+              p = (HeapWord*)round_to((intptr_t) end, alignment());
               if (new_region.contains(end)) {
                 end = p;
               }
@@ -526,7 +526,8 @@

 void MutableNUMASpace::initialize(MemRegion mr,
                                   bool clear_space,
-                                  bool mangle_space) {
+                                  bool mangle_space,
+                                  bool setup_pages) {
   assert(clear_space, "Reallocation will destory data!");
   assert(lgrp_spaces()->length() > 0, "There should be at least one space");

@@ -538,7 +539,7 @@

   // Compute chunk sizes
   size_t prev_page_size = page_size();
-  set_page_size(UseLargePages ? os::large_page_size() : os::vm_page_size());
+  set_page_size(UseLargePages ? alignment() : os::vm_page_size());
   HeapWord* rounded_bottom = (HeapWord*)round_to((intptr_t) bottom(), page_size());
   HeapWord* rounded_end = (HeapWord*)round_down((intptr_t) end(), page_size());
   size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
@@ -666,7 +667,7 @@
     }

     // Clear space (set top = bottom) but never mangle.
-    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle);
+    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);

     set_adaptation_cycles(samples_count());
   }
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -82,8 +82,8 @@
     char* last_page_scanned()            { return _last_page_scanned; }
     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
-      _space = new MutableSpace();
+    LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
+      _space = new MutableSpace(alignment);
       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
     }
     ~LGRPSpace() {
@@ -183,10 +183,10 @@

  public:
   GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
-  MutableNUMASpace();
+  MutableNUMASpace(size_t alignment);
   virtual ~MutableNUMASpace();
   // Space initialization.
-  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
   // Update space layout if necessary. Do all adaptive resizing job.
   virtual void update();
   // Update allocation rate averages.
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -25,7 +25,10 @@
 # include "incls/_precompiled.incl"
 # include "incls/_mutableSpace.cpp.incl"

-MutableSpace::MutableSpace(): ImmutableSpace(), _top(NULL) {
+MutableSpace::MutableSpace(size_t alignment): ImmutableSpace(), _top(NULL), _alignment(alignment) {
+  assert(MutableSpace::alignment() >= 0 &&
+         MutableSpace::alignment() % os::vm_page_size() == 0,
+         "Space should be aligned");
   _mangler = new MutableSpaceMangler(this);
 }

@@ -33,16 +36,88 @@
   delete _mangler;
 }

+void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) {
+  if (!mr.is_empty()) {
+    size_t page_size = UseLargePages ? alignment() : os::vm_page_size();
+    HeapWord *start = (HeapWord*)round_to((intptr_t) mr.start(), page_size);
+    HeapWord *end =  (HeapWord*)round_down((intptr_t) mr.end(), page_size);
+    if (end > start) {
+      size_t size = pointer_delta(end, start, sizeof(char));
+      if (clear_space) {
+        // Prefer page reallocation to migration.
+        os::free_memory((char*)start, size);
+      }
+      os::numa_make_global((char*)start, size);
+    }
+  }
+}
+
+void MutableSpace::pretouch_pages(MemRegion mr) {
+  for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) {
+    char t = *p; *p = t;
+  }
+}
+
 void MutableSpace::initialize(MemRegion mr,
                               bool clear_space,
-                              bool mangle_space) {
-  HeapWord* bottom = mr.start();
-  HeapWord* end    = mr.end();
+                              bool mangle_space,
+                              bool setup_pages) {
+
+  assert(Universe::on_page_boundary(mr.start()) && Universe::on_page_boundary(mr.end()),
+         "invalid space boundaries");

-  assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end),
-         "invalid space boundaries");
-  set_bottom(bottom);
-  set_end(end);
+  if (setup_pages && (UseNUMA || AlwaysPreTouch)) {
+    // The space may move left and right or expand/shrink.
+    // We'd like to enforce the desired page placement.
+    MemRegion head, tail;
+    if (last_setup_region().is_empty()) {
+      // If it's the first initialization don't limit the amount of work.
+      head = mr;
+      tail = MemRegion(mr.end(), mr.end());
+    } else {
+      // Is there an intersection with the address space?
+      MemRegion intersection = last_setup_region().intersection(mr);
+      if (intersection.is_empty()) {
+        intersection = MemRegion(mr.end(), mr.end());
+      }
+      // All the sizes below are in words.
+      size_t head_size = 0, tail_size = 0;
+      if (mr.start() <= intersection.start()) {
+        head_size = pointer_delta(intersection.start(), mr.start());
+      }
+      if(intersection.end() <= mr.end()) {
+        tail_size = pointer_delta(mr.end(), intersection.end());
+      }
+      // Limit the amount of page manipulation if necessary.
+      if (NUMASpaceResizeRate > 0 && !AlwaysPreTouch) {
+        const size_t change_size = head_size + tail_size;
+        const float setup_rate_words = NUMASpaceResizeRate >> LogBytesPerWord;
+        head_size = MIN2((size_t)(setup_rate_words * head_size / change_size),
+                         head_size);
+        tail_size = MIN2((size_t)(setup_rate_words * tail_size / change_size),
+                         tail_size);
+      }
+      head = MemRegion(intersection.start() - head_size, intersection.start());
+      tail = MemRegion(intersection.end(), intersection.end() + tail_size);
+    }
+    assert(mr.contains(head) && mr.contains(tail), "Sanity");
+
+    if (UseNUMA) {
+      numa_setup_pages(head, clear_space);
+      numa_setup_pages(tail, clear_space);
+    }
+
+    if (AlwaysPreTouch) {
+      pretouch_pages(head);
+      pretouch_pages(tail);
+    }
+
+    // Remember where we stopped so that we can continue later.
+    set_last_setup_region(MemRegion(head.start(), tail.end()));
+  }
+
+  set_bottom(mr.start());
+  set_end(mr.end());

   if (clear_space) {
     clear(mangle_space);
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -25,7 +25,10 @@
 // A MutableSpace is a subtype of ImmutableSpace that supports the
 // concept of allocation. This includes the concepts that a space may
 // be only partially full, and the querry methods that go with such
-// an assumption.
+// an assumption. MutableSpace is also responsible for minimizing the
+// page allocation time by having the memory pretouched (with
+// AlwaysPretouch) and for optimizing page placement on NUMA systems
+// by make the underlying region interleaved (with UseNUMA).
 //
 // Invariant: (ImmutableSpace +) bottom() <= top() <= end()
 // top() is inclusive and end() is exclusive.
@@ -37,15 +40,23 @@

   // Helper for mangling unused space in debug builds
   MutableSpaceMangler* _mangler;
-
+  // The last region which page had been setup to be interleaved.
+  MemRegion _last_setup_region;
+  size_t _alignment;
  protected:
   HeapWord* _top;

   MutableSpaceMangler* mangler() { return _mangler; }

+  void numa_setup_pages(MemRegion mr, bool clear_space);
+  void pretouch_pages(MemRegion mr);
+
+  void set_last_setup_region(MemRegion mr) { _last_setup_region = mr;   }
+  MemRegion last_setup_region() const      { return _last_setup_region; }
+
  public:
   virtual ~MutableSpace();
-  MutableSpace();
+  MutableSpace(size_t page_size);

   // Accessors
   HeapWord* top() const                    { return _top;    }
@@ -57,13 +68,20 @@
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }

+  size_t alignment()                       { return _alignment; }
+
   // Returns a subregion containing all objects in this space.
   MemRegion used_region() { return MemRegion(bottom(), top()); }

+  static const bool SetupPages = true;
+  static const bool DontSetupPages = false;
+
   // Initialization
   virtual void initialize(MemRegion mr,
                           bool clear_space,
-                          bool mangle_space);
+                          bool mangle_space,
+                          bool setup_pages = SetupPages);
+
   virtual void clear(bool mangle_space);
   // Does the usual initialization but optionally resets top to bottom.
 #if 0  // MANGLE_SPACE
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -42,6 +42,7 @@
 class CollectedHeap : public CHeapObj {
   friend class VMStructs;
   friend class IsGCActiveMark; // Block structured external access to _is_gc_active
+  friend class constantPoolCacheKlass; // allocate() method inserts is_conc_safe

 #ifdef ASSERT
   static int       _fire_out_of_memory_count;
@@ -82,8 +83,6 @@
   // Reinitialize tlabs before resuming mutators.
   virtual void resize_all_tlabs();

-  debug_only(static void check_for_valid_allocation_state();)
-
  protected:
   // Allocate from the current thread's TLAB, with broken-out slow path.
   inline static HeapWord* allocate_from_tlab(Thread* thread, size_t size);
@@ -142,6 +141,7 @@
     PRODUCT_RETURN;
   virtual void check_for_non_bad_heap_word_value(HeapWord* addr, size_t size)
     PRODUCT_RETURN;
+  debug_only(static void check_for_valid_allocation_state();)

  public:
   enum Name {
@@ -466,6 +466,10 @@
   // This includes objects in permanent memory.
   virtual void object_iterate(ObjectClosure* cl) = 0;

+  // Similar to object_iterate() except iterates only
+  // over live objects.
+  virtual void safe_object_iterate(ObjectClosure* cl) = 0;
+
   // Behaves the same as oop_iterate, except only traverses
   // interior pointers contained in permanent memory. If there
   // is no permanent memory, does nothing.
--- a/src/share/vm/includeDB_compiler2	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/includeDB_compiler2	Fri Feb 27 15:13:00 2009 -0800
@@ -140,6 +140,7 @@
 c2_globals_<os_family>.hpp              macros.hpp

 c2_init_<arch>.cpp                      compile.hpp
+c2_init_<arch>.cpp                      node.hpp

 c2compiler.cpp                          ad_<arch_model>.hpp
 c2compiler.cpp                          c2compiler.hpp
@@ -839,6 +840,7 @@
 phase.cpp                               compile.hpp
 phase.cpp                               compileBroker.hpp
 phase.cpp                               nmethod.hpp
+phase.cpp                               node.hpp
 phase.cpp                               phase.hpp

 phase.hpp                               port.hpp
--- a/src/share/vm/includeDB_core	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/includeDB_core	Fri Feb 27 15:13:00 2009 -0800
@@ -1311,6 +1311,7 @@
 cppInterpreter_<arch>.cpp               debug.hpp
 cppInterpreter_<arch>.cpp               deoptimization.hpp
 cppInterpreter_<arch>.cpp               frame.inline.hpp
+cppInterpreter_<arch>.cpp               interfaceSupport.hpp
 cppInterpreter_<arch>.cpp               interpreterRuntime.hpp
 cppInterpreter_<arch>.cpp               interpreter.hpp
 cppInterpreter_<arch>.cpp               interpreterGenerator.hpp
@@ -2014,7 +2015,7 @@
 instanceKlass.cpp                       vmSymbols.hpp

 instanceKlass.hpp                       accessFlags.hpp
-instanceKlass.hpp                       bitMap.hpp
+instanceKlass.hpp                       bitMap.inline.hpp
 instanceKlass.hpp                       constMethodOop.hpp
 instanceKlass.hpp                       constantPoolOop.hpp
 instanceKlass.hpp                       handles.hpp
@@ -3771,6 +3772,7 @@

 spaceDecorator.cpp                      copy.hpp
 spaceDecorator.cpp                      spaceDecorator.hpp
+spaceDecorator.cpp                      space.inline.hpp

 specialized_oop_closures.cpp            ostream.hpp
 specialized_oop_closures.cpp            specialized_oop_closures.hpp
--- a/src/share/vm/includeDB_features	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/includeDB_features	Fri Feb 27 15:13:00 2009 -0800
@@ -59,6 +59,8 @@

 dump_<arch_model>.cpp                   assembler_<arch>.inline.hpp
 dump_<arch_model>.cpp                   compactingPermGenGen.hpp
+dump_<arch_model>.cpp                   generation.inline.hpp
+dump_<arch_model>.cpp                   space.inline.hpp

 forte.cpp                               collectedHeap.inline.hpp
 forte.cpp                               debugInfoRec.hpp
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -163,7 +163,7 @@
 #ifdef USELABELS
 // Have to do this dispatch this way in C++ because otherwise gcc complains about crossing an
 // initialization (which is is the initialization of the table pointer...)
-#define DISPATCH(opcode) goto *dispatch_table[opcode]
+#define DISPATCH(opcode) goto *(void*)dispatch_table[opcode]
 #define CONTINUE {                              \
         opcode = *pc;                           \
         DO_UPDATE_INSTRUCTION_COUNT(opcode);    \
@@ -341,7 +341,7 @@
  */
 #undef CHECK_NULL
 #define CHECK_NULL(obj_)                                                 \
-    if ((obj_) == 0) {                                                   \
+    if ((obj_) == NULL) {                                                \
         VM_JAVA_ERROR(vmSymbols::java_lang_NullPointerException(), "");  \
     }

@@ -1362,7 +1362,7 @@

 #define NULL_COMPARISON_NOT_OP(name)                                         \
       CASE(_if##name): {                                                     \
-          int skip = (!(STACK_OBJECT(-1) == 0))                              \
+          int skip = (!(STACK_OBJECT(-1) == NULL))                           \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
@@ -1372,7 +1372,7 @@

 #define NULL_COMPARISON_OP(name)                                             \
       CASE(_if##name): {                                                     \
-          int skip = ((STACK_OBJECT(-1) == 0))                               \
+          int skip = ((STACK_OBJECT(-1) == NULL))                            \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -66,7 +66,6 @@
 friend class InterpreterGenerator;
 friend class InterpreterMacroAssembler;
 friend class frame;
-friend class SharedRuntime;
 friend class VMStructs;

 public:
--- a/src/share/vm/interpreter/rewriter.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/interpreter/rewriter.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -48,9 +48,14 @@


 // Creates a constant pool cache given an inverse_index_map
+// This creates the constant pool cache initially in a state
+// that is unsafe for concurrent GC processing but sets it to
+// a safe mode before the constant pool cache is returned.
 constantPoolCacheHandle Rewriter::new_constant_pool_cache(intArray& inverse_index_map, TRAPS) {
   const int length = inverse_index_map.length();
-  constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length, CHECK_(constantPoolCacheHandle()));
+  constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length,
+                                             methodOopDesc::IsUnsafeConc,
+                                             CHECK_(constantPoolCacheHandle()));
   cache->initialize(inverse_index_map);
   return constantPoolCacheHandle(THREAD, cache);
 }
--- a/src/share/vm/libadt/dict.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/libadt/dict.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -346,9 +346,12 @@
   return strcmp((const char *)k1,(const char *)k2);
 }

-// Slimey cheap key comparator.
+// Cheap key comparator.
 int32 cmpkey(const void *key1, const void *key2) {
-  return (int32)((intptr_t)key1 - (intptr_t)key2);
+  if (key1 == key2) return 0;
+  intptr_t delta = (intptr_t)key1 - (intptr_t)key2;
+  if (delta > 0) return 1;
+  return -1;
 }

 //=============================================================================
--- a/src/share/vm/libadt/port.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/libadt/port.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -34,17 +34,6 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
-#undef bzero
-inline void bzero(void *b, int len) { memset(b,0,len); }
-#undef bcopy
-inline void bcopy(const void *s, void *d, size_t len) { memmove(d,s,len); }
-#undef bcmp
-inline int bcmp(const void *s,const void *t,int len) { return memcmp(s,t,len);}
-extern "C" unsigned long strtoul(const char *s, char **end, int base);
-
-// Definition for sys_errlist varies from Sun 4.1 & Solaris.
-// We use the new Solaris definition.
-#include <string.h>

 // Access to the C++ class virtual function pointer
 // Put the class in the macro
--- a/src/share/vm/memory/genCollectedHeap.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -610,6 +610,10 @@
     Universe::print_heap_after_gc();
   }

+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
+
   if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
     tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
     vm_exit(-1);
@@ -910,6 +914,13 @@
   perm_gen()->object_iterate(cl);
 }

+void GenCollectedHeap::safe_object_iterate(ObjectClosure* cl) {
+  for (int i = 0; i < _n_gens; i++) {
+    _gens[i]->safe_object_iterate(cl);
+  }
+  perm_gen()->safe_object_iterate(cl);
+}
+
 void GenCollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->object_iterate_since_last_GC(cl);
--- a/src/share/vm/memory/genCollectedHeap.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -215,6 +215,7 @@
   void oop_iterate(OopClosure* cl);
   void oop_iterate(MemRegion mr, OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
+  void safe_object_iterate(ObjectClosure* cl);
   void object_iterate_since_last_GC(ObjectClosure* cl);
   Space* space_containing(const void* addr) const;
--- a/src/share/vm/memory/generation.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/generation.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -319,6 +319,21 @@
   space_iterate(&blk);
 }

+class GenerationSafeObjIterateClosure : public SpaceClosure {
+ private:
+  ObjectClosure* _cl;
+ public:
+  virtual void do_space(Space* s) {
+    s->safe_object_iterate(_cl);
+  }
+  GenerationSafeObjIterateClosure(ObjectClosure* cl) : _cl(cl) {}
+};
+
+void Generation::safe_object_iterate(ObjectClosure* cl) {
+  GenerationSafeObjIterateClosure blk(cl);
+  space_iterate(&blk);
+}
+
 void Generation::prepare_for_compaction(CompactPoint* cp) {
   // Generic implementation, can be specialized
   CompactibleSpace* space = first_compaction_space();
--- a/src/share/vm/memory/generation.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/generation.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -518,6 +518,11 @@
   // each.
   virtual void object_iterate(ObjectClosure* cl);

+  // Iterate over all safe objects in the generation, calling "cl.do_object" on
+  // each.  An object is safe if its references point to other objects in
+  // the heap.  This defaults to object_iterate() unless overridden.
+  virtual void safe_object_iterate(ObjectClosure* cl);
+
   // Iterate over all objects allocated in the generation since the last
   // collection, calling "cl.do_object" on each.  The generation must have
   // been initialized properly to support this function, or else this call
--- a/src/share/vm/memory/heapInspection.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/heapInspection.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -263,6 +263,9 @@
   if (!cit.allocation_failed()) {
     // Iterate over objects in the heap
     RecordInstanceClosure ric(&cit);
+    // If this operation encounters a bad object when using CMS,
+    // consider using safe_object_iterate() which avoids perm gen
+    // objects that may contain bad references.
     Universe::heap()->object_iterate(&ric);

     // Report if certain classes are not counted because of
@@ -317,5 +320,8 @@

   // Iterate over objects in the heap
   FindInstanceClosure fic(k, result);
+  // If this operation encounters a bad object when using CMS,
+  // consider using safe_object_iterate() which avoids perm gen
+  // objects that may contain bad references.
   Universe::heap()->object_iterate(&fic);
 }
--- a/src/share/vm/memory/oopFactory.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/oopFactory.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -82,15 +82,19 @@
 }


-constantPoolOop oopFactory::new_constantPool(int length, TRAPS) {
+constantPoolOop oopFactory::new_constantPool(int length,
+                                             bool is_conc_safe,
+                                             TRAPS) {
   constantPoolKlass* ck = constantPoolKlass::cast(Universe::constantPoolKlassObj());
-  return ck->allocate(length, CHECK_NULL);
+  return ck->allocate(length, is_conc_safe, CHECK_NULL);
 }


-constantPoolCacheOop oopFactory::new_constantPoolCache(int length, TRAPS) {
+constantPoolCacheOop oopFactory::new_constantPoolCache(int length,
+                                                       bool is_conc_safe,
+                                                       TRAPS) {
   constantPoolCacheKlass* ck = constantPoolCacheKlass::cast(Universe::constantPoolCacheKlassObj());
-  return ck->allocate(length, CHECK_NULL);
+  return ck->allocate(length, is_conc_safe, CHECK_NULL);
 }


@@ -105,11 +109,13 @@
                                            int compressed_line_number_size,
                                            int localvariable_table_length,
                                            int checked_exceptions_length,
+                                           bool is_conc_safe,
                                            TRAPS) {
   klassOop cmkObj = Universe::constMethodKlassObj();
   constMethodKlass* cmk = constMethodKlass::cast(cmkObj);
   return cmk->allocate(byte_code_size, compressed_line_number_size,
                        localvariable_table_length, checked_exceptions_length,
+                       is_conc_safe,
                        CHECK_NULL);
 }

@@ -117,14 +123,17 @@
 methodOop oopFactory::new_method(int byte_code_size, AccessFlags access_flags,
                                  int compressed_line_number_size,
                                  int localvariable_table_length,
-                                 int checked_exceptions_length, TRAPS) {
+                                 int checked_exceptions_length,
+                                 bool is_conc_safe,
+                                 TRAPS) {
   methodKlass* mk = methodKlass::cast(Universe::methodKlassObj());
   assert(!access_flags.is_native() || byte_code_size == 0,
          "native methods should not contain byte codes");
   constMethodOop cm = new_constMethod(byte_code_size,
                                       compressed_line_number_size,
                                       localvariable_table_length,
-                                      checked_exceptions_length, CHECK_NULL);
+                                      checked_exceptions_length,
+                                      is_conc_safe, CHECK_NULL);
   constMethodHandle rw(THREAD, cm);
   return mk->allocate(rw, access_flags, CHECK_NULL);
 }
--- a/src/share/vm/memory/oopFactory.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/oopFactory.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -81,8 +81,12 @@
   static symbolHandle    new_symbol_handle(const char* name, TRAPS) { return new_symbol_handle(name, (int)strlen(name), CHECK_(symbolHandle())); }

   // Constant pools
-  static constantPoolOop      new_constantPool     (int length, TRAPS);
-  static constantPoolCacheOop new_constantPoolCache(int length, TRAPS);
+  static constantPoolOop      new_constantPool     (int length,
+                                                    bool is_conc_safe,
+                                                    TRAPS);
+  static constantPoolCacheOop new_constantPoolCache(int length,
+                                                    bool is_conc_safe,
+                                                    TRAPS);

   // Instance classes
   static klassOop        new_instanceKlass(int vtable_len, int itable_len, int static_field_size,
@@ -93,9 +97,20 @@
   static constMethodOop  new_constMethod(int byte_code_size,
                                          int compressed_line_number_size,
                                          int localvariable_table_length,
-                                         int checked_exceptions_length, TRAPS);
+                                         int checked_exceptions_length,
+                                         bool is_conc_safe,
+                                         TRAPS);
 public:
-  static methodOop       new_method(int byte_code_size, AccessFlags access_flags, int compressed_line_number_size, int localvariable_table_length, int checked_exceptions_length, TRAPS);
+  // Set is_conc_safe for methods which cannot safely be
+  // processed by concurrent GC even after the return of
+  // the method.
+  static methodOop       new_method(int byte_code_size,
+                                    AccessFlags access_flags,
+                                    int compressed_line_number_size,
+                                    int localvariable_table_length,
+                                    int checked_exceptions_length,
+                                    bool is_conc_safe,
+                                    TRAPS);

   // Method Data containers
   static methodDataOop   new_methodData(methodHandle method, TRAPS);
--- a/src/share/vm/memory/referenceProcessor.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/referenceProcessor.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -721,12 +721,6 @@
                              iter.obj(), iter.obj()->blueprint()->internal_name());
     }
     assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
-    // If discovery is concurrent, we may have objects with null referents,
-    // being those that were concurrently cleared after they were discovered
-    // (and not subsequently precleaned).
-    assert(   (discovery_is_atomic() && iter.referent()->is_oop())
-           || (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)),
-           "Adding a bad referent");
     iter.next();
   }
   // Remember to keep sentinel pointer around
--- a/src/share/vm/memory/space.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/space.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -569,7 +569,15 @@
   if (prev > mr.start()) {
     region_start_addr = prev;
     blk_start_addr    = prev;
-    assert(blk_start_addr == block_start(region_start_addr), "invariant");
+    // The previous invocation may have pushed "prev" beyond the
+    // last allocated block yet there may be still be blocks
+    // in this region due to a particular coalescing policy.
+    // Relax the assertion so that the case where the unallocated
+    // block is maintained and "prev" is beyond the unallocated
+    // block does not cause the assertion to fire.
+    assert((BlockOffsetArrayUseUnallocatedBlock &&
+            (!is_in(prev))) ||
+           (blk_start_addr == block_start(region_start_addr)), "invariant");
   } else {
     region_start_addr = mr.start();
     blk_start_addr    = block_start(region_start_addr);
@@ -705,6 +713,12 @@
   object_iterate_from(bm, blk);
 }

+// For a continguous space object_iterate() and safe_object_iterate()
+// are the same.
+void ContiguousSpace::safe_object_iterate(ObjectClosure* blk) {
+  object_iterate(blk);
+}
+
 void ContiguousSpace::object_iterate_from(WaterMark mark, ObjectClosure* blk) {
   assert(mark.space() == this, "Mark does not match space");
   HeapWord* p = mark.point();
--- a/src/share/vm/memory/space.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/memory/space.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -193,6 +193,9 @@
   // each.  Objects allocated by applications of the closure are not
   // included in the iteration.
   virtual void object_iterate(ObjectClosure* blk) = 0;
+  // Similar to object_iterate() except only iterates over
+  // objects whose internal references point to objects in the space.
+  virtual void safe_object_iterate(ObjectClosure* blk) = 0;

   // Iterate over all objects that intersect with mr, calling "cl->do_object"
   // on each.  There is an exception to this: if this closure has already
@@ -843,6 +846,9 @@
   void oop_iterate(OopClosure* cl);
   void oop_iterate(MemRegion mr, OopClosure* cl);
   void object_iterate(ObjectClosure* blk);
+  // For contiguous spaces this method will iterate safely over objects
+  // in the space (i.e., between bottom and top) when at a safepoint.
+  void safe_object_iterate(ObjectClosure* blk);
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
   // iterates on objects up to the safe limit
   HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
--- a/src/share/vm/oops/constMethodKlass.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constMethodKlass.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -49,10 +49,16 @@
   return constMethodOop(obj)->object_is_parsable();
 }

+bool constMethodKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constMethod(), "must be constMethod oop");
+  return constMethodOop(obj)->is_conc_safe();
+}
+
 constMethodOop constMethodKlass::allocate(int byte_code_size,
                                           int compressed_line_number_size,
                                           int localvariable_table_length,
                                           int checked_exceptions_length,
+                                          bool is_conc_safe,
                                           TRAPS) {

   int size = constMethodOopDesc::object_size(byte_code_size,
@@ -75,6 +81,7 @@
                                 compressed_line_number_size,
                                 localvariable_table_length);
   assert(cm->size() == size, "wrong size for object");
+  cm->set_is_conc_safe(is_conc_safe);
   cm->set_partially_loaded();
   assert(cm->is_parsable(), "Is safely parsable by gc");
   return cm;
--- a/src/share/vm/oops/constMethodKlass.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constMethodKlass.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -32,12 +32,16 @@
   // Testing
   bool oop_is_constMethod() const { return true; }
   virtual bool oop_is_parsable(oop obj) const;
+  virtual bool oop_is_conc_safe(oop obj) const;
+

   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constMethodKlass);
   constMethodOop allocate(int byte_code_size, int compressed_line_number_size,
                           int localvariable_table_length,
-                          int checked_exceptions_length, TRAPS);
+                          int checked_exceptions_length,
+                          bool is_conc_safe,
+                          TRAPS);
   static klassOop create_klass(TRAPS);

   // Sizing
--- a/src/share/vm/oops/constMethodOop.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constMethodOop.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -104,6 +104,7 @@
   // loads and stores.  This value may updated and read without a lock by
   // multiple threads, so is volatile.
   volatile uint64_t _fingerprint;
+  volatile bool     _is_conc_safe; // if true, safe for concurrent GC processing

 public:
   oop* oop_block_beg() const { return adr_method(); }
@@ -273,6 +274,8 @@
   oop*  adr_method() const             { return (oop*)&_method;          }
   oop*  adr_stackmap_data() const      { return (oop*)&_stackmap_data;   }
   oop*  adr_exception_table() const    { return (oop*)&_exception_table; }
+  bool is_conc_safe() { return _is_conc_safe; }
+  void set_is_conc_safe(bool v) { _is_conc_safe = v; }

   // Unique id for the method
   static const u2 MAX_IDNUM;
--- a/src/share/vm/oops/constantPoolKlass.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constantPoolKlass.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -25,7 +25,7 @@
 # include "incls/_precompiled.incl"
 # include "incls/_constantPoolKlass.cpp.incl"

-constantPoolOop constantPoolKlass::allocate(int length, TRAPS) {
+constantPoolOop constantPoolKlass::allocate(int length, bool is_conc_safe, TRAPS) {
   int size = constantPoolOopDesc::object_size(length);
   KlassHandle klass (THREAD, as_klassOop());
   constantPoolOop c =
@@ -38,6 +38,9 @@
   c->set_flags(0);
   // only set to non-zero if constant pool is merged by RedefineClasses
   c->set_orig_length(0);
+  // if constant pool may change during RedefineClasses, it is created
+  // unsafe for GC concurrent processing.
+  c->set_is_conc_safe(is_conc_safe);
   // all fields are initialized; needed for GC

   // initialize tag array
@@ -207,6 +210,11 @@
   return size;
 }

+bool constantPoolKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constantPool(), "must be constantPool");
+  return constantPoolOop(obj)->is_conc_safe();
+}
+
 #ifndef SERIALGC
 int constantPoolKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
   assert (obj->is_constantPool(), "obj must be constant pool");
--- a/src/share/vm/oops/constantPoolKlass.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constantPoolKlass.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -34,7 +34,7 @@

   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constantPoolKlass);
-  constantPoolOop allocate(int length, TRAPS);
+  constantPoolOop allocate(int length, bool is_conc_safe, TRAPS);
   static klassOop create_klass(TRAPS);

   // Casting from klassOop
@@ -48,6 +48,8 @@
   int object_size() const        { return align_object_size(header_size()); }

   // Garbage collection
+  // Returns true is the object is safe for GC concurrent processing.
+  virtual bool oop_is_conc_safe(oop obj) const;
   void oop_follow_contents(oop obj);
   int oop_adjust_pointers(oop obj);
--- a/src/share/vm/oops/constantPoolOop.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constantPoolOop.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -962,7 +962,7 @@
       }
       case JVM_CONSTANT_Long: {
         u8 val = Bytes::get_Java_u8(bytes);
-        printf("long         %lldl", *(jlong *) &val);
+        printf("long         "INT64_FORMAT, *(jlong *) &val);
         ent_size = 8;
         idx++; // Long takes two cpool slots
         break;
--- a/src/share/vm/oops/constantPoolOop.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/constantPoolOop.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -43,6 +43,8 @@
   klassOop             _pool_holder;   // the corresponding class
   int                  _flags;         // a few header bits to describe contents for GC
   int                  _length; // number of elements in the array
+  volatile bool        _is_conc_safe; // if true, safe for concurrent
+                                      // GC processing
   // only set to non-zero if constant pool is merged by RedefineClasses
   int                  _orig_length;

@@ -379,6 +381,9 @@
   static int object_size(int length)   { return align_object_size(header_size() + length); }
   int object_size()                    { return object_size(length()); }

+  bool is_conc_safe()                  { return _is_conc_safe; }
+  void set_is_conc_safe(bool v)        { _is_conc_safe = v; }
+
   friend class constantPoolKlass;
   friend class ClassFileParser;
   friend class SystemDictionary;
--- a/src/share/vm/oops/cpCacheKlass.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/cpCacheKlass.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -32,13 +32,43 @@
 }


-constantPoolCacheOop constantPoolCacheKlass::allocate(int length, TRAPS) {
+constantPoolCacheOop constantPoolCacheKlass::allocate(int length,
+                                                      bool is_conc_safe,
+                                                      TRAPS) {
   // allocate memory
   int size = constantPoolCacheOopDesc::object_size(length);
+
   KlassHandle klass (THREAD, as_klassOop());
-  constantPoolCacheOop cache = (constantPoolCacheOop)
-    CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
+
+  // This is the original code.  The code from permanent_obj_allocate()
+  // was in-lined to allow the setting of is_conc_safe before the klass
+  // is installed.
+  // constantPoolCacheOop cache = (constantPoolCacheOop)
+  //   CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
+
+  oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
+  constantPoolCacheOop cache = (constantPoolCacheOop) obj;
+  cache->set_is_conc_safe(is_conc_safe);
+  // The store to is_conc_safe must be visible before the klass
+  // is set.  This should be done safely because _is_conc_safe has
+  // been declared volatile.  If there are any problems, consider adding
+  // OrderAccess::storestore();
+  CollectedHeap::post_allocation_install_obj_klass(klass, obj, size);
+  NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj,
+                                                              size));
+
+  // The length field affects the size of the object.  The allocation
+  // above allocates the correct size (see calculation of "size") but
+  // the size() method of the constant pool cache oop will not reflect
+  // that size until the correct length is set.
   cache->set_length(length);
+
+  // The store of the length must be visible before is_conc_safe is
+  // set to a safe state.
+  // This should be done safely because _is_conc_safe has
+  // been declared volatile.  If there are any problems, consider adding
+  // OrderAccess::storestore();
+  cache->set_is_conc_safe(methodOopDesc::IsSafeConc);
   cache->set_constant_pool(NULL);
   return cache;
 }
@@ -114,7 +144,6 @@
   return size;
 }

-
 int constantPoolCacheKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_constantPoolCache(), "obj must be constant pool cache");
   constantPoolCacheOop cache = (constantPoolCacheOop)obj;
@@ -131,6 +160,11 @@
   return size;
 }

+bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constantPoolCache(), "should be constant pool");
+  return constantPoolCacheOop(obj)->is_conc_safe();
+}
+
 #ifndef SERIALGC
 void constantPoolCacheKlass::oop_copy_contents(PSPromotionManager* pm,
                                                oop obj) {
--- a/src/share/vm/oops/cpCacheKlass.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/cpCacheKlass.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -32,7 +32,7 @@

   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constantPoolCacheKlass);
-  constantPoolCacheOop allocate(int length, TRAPS);
+  constantPoolCacheOop allocate(int length, bool is_conc_safe, TRAPS);
   static klassOop create_klass(TRAPS);

   // Casting from klassOop
@@ -48,6 +48,7 @@
   // Garbage collection
   void oop_follow_contents(oop obj);
   int oop_adjust_pointers(oop obj);
+  virtual bool oop_is_conc_safe(oop obj) const;

   // Parallel Scavenge and Parallel Old
   PARALLEL_GC_DECLS
--- a/src/share/vm/oops/cpCacheOop.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/cpCacheOop.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -291,6 +291,9 @@
  private:
   int             _length;
   constantPoolOop _constant_pool;                // the corresponding constant pool
+  // If true, safe for concurrent GC processing,
+  // Set unconditionally in constantPoolCacheKlass::allocate()
+  volatile bool        _is_conc_safe;

   // Sizing
   debug_only(friend class ClassVerifier;)
@@ -316,6 +319,12 @@
   constantPoolOop constant_pool() const          { return _constant_pool; }
   ConstantPoolCacheEntry* entry_at(int i) const  { assert(0 <= i && i < length(), "index out of bounds"); return base() + i; }

+  // GC support
+  // If the _length field has not been set, the size of the
+  // constantPoolCache cannot be correctly calculated.
+  bool is_conc_safe()                            { return _is_conc_safe; }
+  void set_is_conc_safe(bool v)                  { _is_conc_safe = v; }
+
   // Code generation
   static ByteSize base_offset()                  { return in_ByteSize(sizeof(constantPoolCacheOopDesc)); }
--- a/src/share/vm/oops/klass.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/klass.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -606,8 +606,19 @@
   #undef assert_same_query

   // Unless overridden, oop is parsable if it has a klass pointer.
+  // Parsability of an object is object specific.
   virtual bool oop_is_parsable(oop obj) const { return true; }

+  // Unless overridden, oop is safe for concurrent GC processing
+  // after its allocation is complete.  The exception to
+  // this is the case where objects are changed after allocation.
+  // Class redefinition is one of the known exceptions. During
+  // class redefinition, an allocated class can changed in order
+  // order to create a merged class (the combiniation of the
+  // old class definition that has to be perserved and the new class
+  // definition which is being created.
+  virtual bool oop_is_conc_safe(oop obj) const { return true; }
+
   // Access flags
   AccessFlags access_flags() const         { return _access_flags;  }
   void set_access_flags(AccessFlags flags) { _access_flags = flags; }
--- a/src/share/vm/oops/methodOop.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/methodOop.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -792,15 +792,34 @@
   AccessFlags flags = m->access_flags();
   int checked_exceptions_len = m->checked_exceptions_length();
   int localvariable_len = m->localvariable_table_length();
-  methodOop newm_oop = oopFactory::new_method(new_code_length, flags, new_compressed_linenumber_size, localvariable_len, checked_exceptions_len, CHECK_(methodHandle()));
+  // Allocate newm_oop with the is_conc_safe parameter set
+  // to IsUnsafeConc to indicate that newm_oop is not yet
+  // safe for concurrent processing by a GC.
+  methodOop newm_oop = oopFactory::new_method(new_code_length,
+                                              flags,
+                                              new_compressed_linenumber_size,
+                                              localvariable_len,
+                                              checked_exceptions_len,
+                                              IsUnsafeConc,
+                                              CHECK_(methodHandle()));
   methodHandle newm (THREAD, newm_oop);
   int new_method_size = newm->method_size();
   // Create a shallow copy of methodOopDesc part, but be careful to preserve the new constMethodOop
   constMethodOop newcm = newm->constMethod();
   int new_const_method_size = newm->constMethod()->object_size();
+
   memcpy(newm(), m(), sizeof(methodOopDesc));
   // Create shallow copy of constMethodOopDesc, but be careful to preserve the methodOop
+  // is_conc_safe is set to false because that is the value of
+  // is_conc_safe initialzied into newcm and the copy should
+  // not overwrite that value.  During the window during which it is
+  // tagged as unsafe, some extra work could be needed during precleaning
+  // or concurrent marking but those phases will be correct.  Setting and
+  // resetting is done in preference to a careful copying into newcm to
+  // avoid having to know the precise layout of a constMethodOop.
+  m->constMethod()->set_is_conc_safe(false);
   memcpy(newcm, m->constMethod(), sizeof(constMethodOopDesc));
+  m->constMethod()->set_is_conc_safe(true);
   // Reset correct method/const method, method size, and parameter info
   newcm->set_method(newm());
   newm->set_constMethod(newcm);
@@ -831,6 +850,10 @@
            m->localvariable_table_start(),
            localvariable_len * sizeof(LocalVariableTableElement));
   }
+
+  // Only set is_conc_safe to true when changes to newcm are
+  // complete.
+  newcm->set_is_conc_safe(true);
   return newm;
 }
--- a/src/share/vm/oops/methodOop.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/methodOop.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -129,6 +129,10 @@
   volatile address           _from_interpreted_entry; // Cache of _code ? _adapter->i2c_entry() : _i2i_entry

  public:
+
+  static const bool IsUnsafeConc         = false;
+  static const bool IsSafeConc           = true;
+
   // accessors for instance variables
   constMethodOop constMethod() const             { return _constMethod; }
   void set_constMethod(constMethodOop xconst)    { oop_store_without_check((oop*)&_constMethod, (oop)xconst); }
--- a/src/share/vm/oops/oop.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/oop.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -108,6 +108,13 @@
   // installation of their klass pointer.
   bool is_parsable();

+  // Some perm gen objects that have been allocated and initialized
+  // can be changed by the VM when not at a safe point (class rededfinition
+  // is an example).  Such objects should not be examined by the
+  // concurrent processing of a garbage collector if is_conc_safe()
+  // returns false.
+  bool is_conc_safe();
+
   // type test operations (inlined in oop.inline.h)
   bool is_instance()           const;
   bool is_instanceRef()        const;
--- a/src/share/vm/oops/oop.inline.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/oop.inline.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -435,6 +435,10 @@
   return blueprint()->oop_is_parsable(this);
 }

+inline bool oopDesc::is_conc_safe() {
+  return blueprint()->oop_is_conc_safe(this);
+}
+
 inline void update_barrier_set(void* p, oop v) {
   assert(oopDesc::bs() != NULL, "Uninitialized bs in oop!");
   oopDesc::bs()->write_ref_field(p, v);
--- a/src/share/vm/oops/oopsHierarchy.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/oops/oopsHierarchy.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -126,8 +126,11 @@
   operator jobject () const           { return (jobject)obj(); }
   // from javaClasses.cpp
   operator JavaThread* () const       { return (JavaThread*)obj(); }
+
+#ifndef _LP64
   // from jvm.cpp
   operator jlong* () const            { return (jlong*)obj(); }
+#endif

   // from parNewGeneration and other things that want to get to the end of
   // an oop for stuff (like constMethodKlass.cpp, objArrayKlass.cpp)
--- a/src/share/vm/opto/block.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/block.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -880,6 +880,7 @@
 }

 void PhaseCFG::verify( ) const {
+#ifdef ASSERT
   // Verify sane CFG
   for( uint i = 0; i < _num_blocks; i++ ) {
     Block *b = _blocks[i];
@@ -894,10 +895,20 @@
                 "CreateEx must be first instruction in block" );
       }
       for( uint k = 0; k < n->req(); k++ ) {
-        Node *use = n->in(k);
-        if( use && use != n ) {
-          assert( _bbs[use->_idx] || use->is_Con(),
+        Node *def = n->in(k);
+        if( def && def != n ) {
+          assert( _bbs[def->_idx] || def->is_Con(),
                   "must have block; constants for debug info ok" );
+          // Verify that instructions in the block is in correct order.
+          // Uses must follow their definition if they are at the same block.
+          // Mostly done to check that MachSpillCopy nodes are placed correctly
+          // when CreateEx node is moved in build_ifg_physical().
+          if( _bbs[def->_idx] == b &&
+              !(b->head()->is_Loop() && n->is_Phi()) &&
+              // See (+++) comment in reg_split.cpp
+              !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) {
+            assert( b->find_node(def) < j, "uses must follow definitions" );
+          }
         }
       }
     }
@@ -914,6 +925,7 @@
       assert( b->_num_succs == 2, "Conditional branch must have two targets");
     }
   }
+#endif
 }
 #endif
--- a/src/share/vm/opto/c2_globals.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -191,6 +191,9 @@
   notproduct(bool, VerifyHashTableKeys, true,                               \
           "Verify the immutability of keys in the VN hash tables")          \
                                                                             \
+  notproduct(bool, VerifyRegisterAllocator , false,                         \
+          "Verify Register Allocator")                                      \
+                                                                            \
   develop_pd(intx, FLOATPRESSURE,                                           \
           "Number of float LRG's that constitute high register pressure")   \
                                                                             \
--- a/src/share/vm/opto/cfgnode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/cfgnode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -858,12 +858,18 @@
   // convert the one to the other.
   const TypePtr* ttp = _type->make_ptr();
   const TypeInstPtr* ttip = (ttp != NULL) ? ttp->isa_instptr() : NULL;
+  const TypeKlassPtr* ttkp = (ttp != NULL) ? ttp->isa_klassptr() : NULL;
   bool is_intf = false;
   if (ttip != NULL) {
     ciKlass* k = ttip->klass();
     if (k->is_loaded() && k->is_interface())
       is_intf = true;
   }
+  if (ttkp != NULL) {
+    ciKlass* k = ttkp->klass();
+    if (k->is_loaded() && k->is_interface())
+      is_intf = true;
+  }

   // Default case: merge all inputs
   const Type *t = Type::TOP;        // Merged type starting value
@@ -921,6 +927,8 @@
     // uplift the type.
     if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() )
       { assert(ft == _type, ""); } // Uplift to interface
+    else if( !t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface() )
+      { assert(ft == _type, ""); } // Uplift to interface
     // Otherwise it's something stupid like non-overlapping int ranges
     // found on dying counted loops.
     else
@@ -936,6 +944,7 @@
     // because the type system doesn't interact well with interfaces.
     const TypePtr *jtp = jt->make_ptr();
     const TypeInstPtr *jtip = (jtp != NULL) ? jtp->isa_instptr() : NULL;
+    const TypeKlassPtr *jtkp = (jtp != NULL) ? jtp->isa_klassptr() : NULL;
     if( jtip && ttip ) {
       if( jtip->is_loaded() &&  jtip->klass()->is_interface() &&
           ttip->is_loaded() && !ttip->klass()->is_interface() ) {
@@ -945,6 +954,14 @@
         jt = ft;
       }
     }
+    if( jtkp && ttkp ) {
+      if( jtkp->is_loaded() &&  jtkp->klass()->is_interface() &&
+          ttkp->is_loaded() && !ttkp->klass()->is_interface() ) {
+        assert(ft == ttkp->cast_to_ptr_type(jtkp->ptr()) ||
+               ft->isa_narrowoop() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), "");
+        jt = ft;
+      }
+    }
     if (jt != ft && jt->base() == ft->base()) {
       if (jt->isa_int() &&
           jt->is_int()->_lo == ft->is_int()->_lo &&
--- a/src/share/vm/opto/chaitin.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/chaitin.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -228,6 +228,11 @@
   // them for real.
   de_ssa();

+#ifdef ASSERT
+  // Veify the graph before RA.
+  verify(&live_arena);
+#endif
+
   {
     NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
     _live = NULL;                 // Mark live as being not available
@@ -306,12 +311,6 @@
     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
     if (C->failing())  return;

-#ifdef ASSERT
-    if( VerifyOpto ) {
-      _cfg.verify();
-      verify_base_ptrs(&live_arena);
-    }
-#endif
     NOT_PRODUCT( C->verify_graph_edges(); )

     compact();                  // Compact LRGs; return new lower max lrg
@@ -340,7 +339,7 @@
     compress_uf_map_for_nodes();

 #ifdef ASSERT
-    if( VerifyOpto ) _ifg->verify(this);
+    verify(&live_arena, true);
 #endif
   } else {
     ifg.SquareUp();
@@ -376,12 +375,6 @@
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split");
     if (C->failing())  return;
-#ifdef ASSERT
-    if( VerifyOpto ) {
-      _cfg.verify();
-      verify_base_ptrs(&live_arena);
-    }
-#endif

     compact();                  // Compact LRGs; return new lower max lrg

@@ -412,7 +405,7 @@
     }
     compress_uf_map_for_nodes();
 #ifdef ASSERT
-    if( VerifyOpto ) _ifg->verify(this);
+    verify(&live_arena, true);
 #endif
     cache_lrg_info();           // Count degree of LRGs

@@ -432,6 +425,11 @@
   // Peephole remove copies
   post_allocate_copy_removal();

+#ifdef ASSERT
+  // Veify the graph after RA.
+  verify(&live_arena);
+#endif
+
   // max_reg is past the largest *register* used.
   // Convert that to a frame_slot number.
   if( _max_reg <= _matcher._new_SP )
@@ -956,7 +954,7 @@
       while ((neighbor = elements.next()) != 0) {
         LRG *n = &lrgs(neighbor);
 #ifdef ASSERT
-        if( VerifyOpto ) {
+        if( VerifyOpto || VerifyRegisterAllocator ) {
           assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
         }
 #endif
--- a/src/share/vm/opto/chaitin.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/chaitin.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -491,6 +491,8 @@
   // Verify that base pointers and derived pointers are still sane
   void verify_base_ptrs( ResourceArea *a ) const;

+  void verify( ResourceArea *a, bool verify_ifg = false ) const;
+
   void dump_for_spill_split_recycle() const;

 public:
--- a/src/share/vm/opto/classes.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/classes.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -129,7 +129,7 @@
 macro(LShiftI)
 macro(LShiftL)
 macro(LoadB)
-macro(LoadC)
+macro(LoadUS)
 macro(LoadD)
 macro(LoadD_unaligned)
 macro(LoadF)
--- a/src/share/vm/opto/compile.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/compile.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2005,7 +2005,7 @@
   case Op_StoreP:
   case Op_StoreN:
   case Op_LoadB:
-  case Op_LoadC:
+  case Op_LoadUS:
   case Op_LoadI:
   case Op_LoadKlass:
   case Op_LoadNKlass:
--- a/src/share/vm/opto/divnode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/divnode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -244,42 +244,73 @@

 //---------------------long_by_long_mulhi--------------------------------------
 // Generate ideal node graph for upper half of a 64 bit x 64 bit multiplication
-static Node *long_by_long_mulhi( PhaseGVN *phase, Node *dividend, jlong magic_const) {
+static Node* long_by_long_mulhi(PhaseGVN* phase, Node* dividend, jlong magic_const) {
   // If the architecture supports a 64x64 mulhi, there is
   // no need to synthesize it in ideal nodes.
   if (Matcher::has_match_rule(Op_MulHiL)) {
-    Node *v = phase->longcon(magic_const);
+    Node* v = phase->longcon(magic_const);
     return new (phase->C, 3) MulHiLNode(dividend, v);
   }

+  // Taken from Hacker's Delight, Fig. 8-2. Multiply high signed.
+  // (http://www.hackersdelight.org/HDcode/mulhs.c)
+  //
+  // int mulhs(int u, int v) {
+  //    unsigned u0, v0, w0;
+  //    int u1, v1, w1, w2, t;
+  //
+  //    u0 = u & 0xFFFF;  u1 = u >> 16;
+  //    v0 = v & 0xFFFF;  v1 = v >> 16;
+  //    w0 = u0*v0;
+  //    t  = u1*v0 + (w0 >> 16);
+  //    w1 = t & 0xFFFF;
+  //    w2 = t >> 16;
+  //    w1 = u0*v1 + w1;
+  //    return u1*v1 + w2 + (w1 >> 16);
+  // }
+  //
+  // Note: The version above is for 32x32 multiplications, while the
+  // following inline comments are adapted to 64x64.
+
   const int N = 64;

-  Node *u_hi = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2)));
-  Node *u_lo = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF)));
+  // u0 = u & 0xFFFFFFFF;  u1 = u >> 32;
+  Node* u0 = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF)));
+  Node* u1 = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2)));
+
+  // v0 = v & 0xFFFFFFFF;  v1 = v >> 32;
+  Node* v0 = phase->longcon(magic_const & 0xFFFFFFFF);
+  Node* v1 = phase->longcon(magic_const >> (N / 2));

-  Node *v_hi = phase->longcon(magic_const >> N/2);
-  Node *v_lo = phase->longcon(magic_const & 0XFFFFFFFF);
+  // w0 = u0*v0;
+  Node* w0 = phase->transform(new (phase->C, 3) MulLNode(u0, v0));

-  Node *hihi_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_hi));
-  Node *hilo_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_lo));
-  Node *lohi_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_hi));
-  Node *lolo_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_lo));
+  // t = u1*v0 + (w0 >> 32);
+  Node* u1v0 = phase->transform(new (phase->C, 3) MulLNode(u1, v0));
+  Node* temp = phase->transform(new (phase->C, 3) URShiftLNode(w0, phase->intcon(N / 2)));
+  Node* t    = phase->transform(new (phase->C, 3) AddLNode(u1v0, temp));
+
+  // w1 = t & 0xFFFFFFFF;
+  Node* w1 = new (phase->C, 3) AndLNode(t, phase->longcon(0xFFFFFFFF));

-  Node *t1 = phase->transform(new (phase->C, 3) URShiftLNode(lolo_product, phase->intcon(N / 2)));
-  Node *t2 = phase->transform(new (phase->C, 3) AddLNode(hilo_product, t1));
+  // w2 = t >> 32;
+  Node* w2 = new (phase->C, 3) RShiftLNode(t, phase->intcon(N / 2));
+
+  // 6732154: Construct both w1 and w2 before transforming, so t
+  // doesn't go dead prematurely.
+  w1 = phase->transform(w1);
+  w2 = phase->transform(w2);

-  // Construct both t3 and t4 before transforming so t2 doesn't go dead
-  // prematurely.
-  Node *t3 = new (phase->C, 3) RShiftLNode(t2, phase->intcon(N / 2));
-  Node *t4 = new (phase->C, 3) AndLNode(t2, phase->longcon(0xFFFFFFFF));
-  t3 = phase->transform(t3);
-  t4 = phase->transform(t4);
+  // w1 = u0*v1 + w1;
+  Node* u0v1 = phase->transform(new (phase->C, 3) MulLNode(u0, v1));
+  w1         = phase->transform(new (phase->C, 3) AddLNode(u0v1, w1));

-  Node *t5 = phase->transform(new (phase->C, 3) AddLNode(t4, lohi_product));
-  Node *t6 = phase->transform(new (phase->C, 3) RShiftLNode(t5, phase->intcon(N / 2)));
-  Node *t7 = phase->transform(new (phase->C, 3) AddLNode(t3, hihi_product));
+  // return u1*v1 + w2 + (w1 >> 32);
+  Node* u1v1  = phase->transform(new (phase->C, 3) MulLNode(u1, v1));
+  Node* temp1 = phase->transform(new (phase->C, 3) AddLNode(u1v1, w2));
+  Node* temp2 = phase->transform(new (phase->C, 3) RShiftLNode(w1, phase->intcon(N / 2)));

-  return new (phase->C, 3) AddLNode(t7, t6);
+  return new (phase->C, 3) AddLNode(temp1, temp2);
 }


@@ -976,7 +1007,7 @@

   // Expand mod
   if( con >= 0 && con < max_jlong && is_power_of_2_long(con+1) ) {
-    uint k = log2_long(con);       // Extract k
+    uint k = exact_log2_long(con+1);  // Extract k

     // Basic algorithm by David Detlefs.  See fastmod_long.java for gory details.
     // Used to help a popular random number generator which does a long-mod
--- a/src/share/vm/opto/gcm.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/gcm.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -29,6 +29,9 @@
 #include "incls/_precompiled.incl"
 #include "incls/_gcm.cpp.incl"

+// To avoid float value underflow
+#define MIN_BLOCK_FREQUENCY 1.e-35f
+
 //----------------------------schedule_node_into_block-------------------------
 // Insert node n into block b. Look for projections of n and make sure they
 // are in b also.
@@ -1380,6 +1383,13 @@
     }
   }

+#ifdef ASSERT
+  for (uint i = 0; i < _num_blocks; i++ ) {
+    Block *b = _blocks[i];
+    assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requiers meaningful block frequency");
+  }
+#endif
+
 #ifndef PRODUCT
   if (PrintCFGBlockFreq) {
     tty->print_cr("CFG Block Frequencies");
@@ -1877,7 +1887,9 @@
   float loop_freq = _freq * trip_count();
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
-    s->_freq *= loop_freq;
+    float block_freq = s->_freq * loop_freq;
+    if (block_freq < MIN_BLOCK_FREQUENCY) block_freq = MIN_BLOCK_FREQUENCY;
+    s->_freq = block_freq;
   }
   CFGLoop* ch = _child;
   while (ch != NULL) {
--- a/src/share/vm/opto/graphKit.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1836,10 +1836,7 @@
     (CardTableModRefBS*)(Universe::heap()->barrier_set());
   Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
   // We store into a byte array, so do not bother to left-shift by zero
-  // Get base of card map
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte),
-         "adjust this code");
-  Node *c = makecon(TypeRawPtr::make((address)ct->byte_map_base));
+  Node *c = byte_map_base_node();
   // Combine
   Node *sb_ctl = control();
   Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
@@ -2945,16 +2942,10 @@

   // Now generate allocation code

-  // With escape analysis, the entire memory state is needed to be able to
-  // eliminate the allocation.  If the allocations cannot be eliminated, this
-  // will be optimized to the raw slice when the allocation is expanded.
-  Node *mem;
-  if (C->do_escape_analysis()) {
-    mem = reset_memory();
-    set_all_memory(mem);
-  } else {
-    mem = memory(Compile::AliasIdxRaw);
-  }
+  // The entire memory state is needed for slow path of the allocation
+  // since GC and deoptimization can happened.
+  Node *mem = reset_memory();
+  set_all_memory(mem); // Create new memory state

   AllocateNode* alloc
     = new (C, AllocateNode::ParmLimit)
@@ -3091,16 +3082,10 @@

   // Now generate allocation code

-  // With escape analysis, the entire memory state is needed to be able to
-  // eliminate the allocation.  If the allocations cannot be eliminated, this
-  // will be optimized to the raw slice when the allocation is expanded.
-  Node *mem;
-  if (C->do_escape_analysis()) {
-    mem = reset_memory();
-    set_all_memory(mem);
-  } else {
-    mem = memory(Compile::AliasIdxRaw);
-  }
+  // The entire memory state is needed for slow path of the allocation
+  // since GC and deoptimization can happened.
+  Node *mem = reset_memory();
+  set_all_memory(mem); // Create new memory state

   // Create the AllocateArrayNode and its result projections
   AllocateArrayNode* alloc
@@ -3233,12 +3218,11 @@

   // Now some of the values

-  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
-  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);

   // if (!marking)
   __ if_then(marking, BoolTest::ne, zero); {
+    Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);

     const Type* t1 = adr->bottom_type();
     const Type* t2 = val->bottom_type();
@@ -3246,6 +3230,7 @@
     Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
     // if (orig != NULL)
     __ if_then(orig, BoolTest::ne, null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);

       // load original value
       // alias_idx correct??
@@ -3365,14 +3350,6 @@

   const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();

-  // Get the address of the card table
-  CardTableModRefBS* ct =
-    (CardTableModRefBS*)(Universe::heap()->barrier_set());
-  Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base));
-  // Get base of card map
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-
   // Offsets into the thread
   const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
                                      PtrQueue::byte_offset_of_index());
@@ -3402,7 +3379,7 @@
   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );

   // Combine card table base and card offset
-  Node *card_adr = __ AddP(no_base, card_table, card_offset );
+  Node *card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );

   // If we know the value being stored does it cross regions?
--- a/src/share/vm/opto/graphKit.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/graphKit.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -83,6 +83,18 @@
   Node* zerocon(BasicType bt)   const { return _gvn.zerocon(bt); }
   // (See also macro MakeConX in type.hpp, which uses intcon or longcon.)

+  // Helper for byte_map_base
+  Node* byte_map_base_node() {
+    // Get base of card map
+    CardTableModRefBS* ct = (CardTableModRefBS*)(Universe::heap()->barrier_set());
+    assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust users of this code");
+    if (ct->byte_map_base != NULL) {
+      return makecon(TypeRawPtr::make((address)ct->byte_map_base));
+    } else {
+      return null();
+    }
+  }
+
   jint  find_int_con(Node* n, jint value_if_unknown) {
     return _gvn.find_int_con(n, value_if_unknown);
   }
--- a/src/share/vm/opto/idealGraphPrinter.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/idealGraphPrinter.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -557,7 +557,7 @@

         // max. 2 chars allowed
         if (value >= -9 && value <= 99) {
-          sprintf(buffer, "%d", value);
+          sprintf(buffer, INT64_FORMAT, value);
           print_prop(short_name, buffer);
         } else {
           print_prop(short_name, "L");
--- a/src/share/vm/opto/ifg.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/ifg.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -471,12 +471,28 @@
     // for the "collect_gc_info" phase later.
     IndexSet liveout(_live->live(b));
     uint last_inst = b->end_idx();
-    // Compute last phi index
-    uint last_phi;
-    for( last_phi = 1; last_phi < last_inst; last_phi++ )
-      if( !b->_nodes[last_phi]->is_Phi() )
+    // Compute first nonphi node index
+    uint first_inst;
+    for( first_inst = 1; first_inst < last_inst; first_inst++ )
+      if( !b->_nodes[first_inst]->is_Phi() )
         break;

+    // Spills could be inserted before CreateEx node which should be
+    // first instruction in block after Phis. Move CreateEx up.
+    for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
+      Node *ex = b->_nodes[insidx];
+      if( ex->is_SpillCopy() ) continue;
+      if( insidx > first_inst && ex->is_Mach() &&
+          ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+        // If the CreateEx isn't above all the MachSpillCopies
+        // then move it to the top.
+        b->_nodes.remove(insidx);
+        b->_nodes.insert(first_inst, ex);
+      }
+      // Stop once a CreateEx or any other node is found
+      break;
+    }
+
     // Reset block's register pressure values for each ifg construction
     uint pressure[2], hrp_index[2];
     pressure[0] = pressure[1] = 0;
@@ -485,7 +501,7 @@
     // Liveout things are presumed live for the whole block.  We accumulate
     // 'area' accordingly.  If they get killed in the block, we'll subtract
     // the unused part of the block from the area.
-    int inst_count = last_inst - last_phi;
+    int inst_count = last_inst - first_inst;
     double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
     assert(!(cost < 0.0), "negative spill cost" );
     IndexSetIterator elements(&liveout);
--- a/src/share/vm/opto/lcm.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/lcm.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -107,7 +107,7 @@
     was_store = false;
     switch( mach->ideal_Opcode() ) {
     case Op_LoadB:
-    case Op_LoadC:
+    case Op_LoadUS:
     case Op_LoadD:
     case Op_LoadF:
     case Op_LoadI:
--- a/src/share/vm/opto/live.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/live.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -271,9 +271,9 @@

 //------------------------------verify_base_ptrs-------------------------------
 // Verify that base pointers and derived pointers are still sane.
-// Basically, if a derived pointer is live at a safepoint, then its
-// base pointer must be live also.
 void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+#ifdef ASSERT
+  Unique_Node_List worklist(a);
   for( uint i = 0; i < _cfg._num_blocks; i++ ) {
     Block *b = _cfg._blocks[i];
     for( uint j = b->end_idx() + 1; j > 1; j-- ) {
@@ -287,28 +287,81 @@
           // Now scan for a live derived pointer
           if (jvms->oopoff() < sfpt->req()) {
             // Check each derived/base pair
-            for (uint idx = jvms->oopoff(); idx < sfpt->req(); idx += 2) {
+            for (uint idx = jvms->oopoff(); idx < sfpt->req(); idx++) {
               Node *check = sfpt->in(idx);
-              uint j = 0;
+              bool is_derived = ((idx - jvms->oopoff()) & 1) == 0;
               // search upwards through spills and spill phis for AddP
-              while(true) {
-                if( !check ) break;
-                int idx = check->is_Copy();
-                if( idx ) {
-                  check = check->in(idx);
-                } else if( check->is_Phi() && check->_idx >= _oldphi ) {
-                  check = check->in(1);
-                } else
-                  break;
-                j++;
-                assert(j < 100000,"Derived pointer checking in infinite loop");
+              worklist.clear();
+              worklist.push(check);
+              uint k = 0;
+              while( k < worklist.size() ) {
+                check = worklist.at(k);
+                assert(check,"Bad base or derived pointer");
+                // See PhaseChaitin::find_base_for_derived() for all cases.
+                int isc = check->is_Copy();
+                if( isc ) {
+                  worklist.push(check->in(isc));
+                } else if( check->is_Phi() ) {
+                  for (uint m = 1; m < check->req(); m++)
+                    worklist.push(check->in(m));
+                } else if( check->is_Con() ) {
+                  if (is_derived) {
+                    // Derived is NULL+offset
+                    assert(!is_derived || check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad derived pointer");
+                  } else {
+                    assert(check->bottom_type()->is_ptr()->_offset == 0,"Bad base pointer");
+                    // Base either ConP(NULL) or loadConP
+                    if (check->is_Mach()) {
+                      assert(check->as_Mach()->ideal_Opcode() == Op_ConP,"Bad base pointer");
+                    } else {
+                      assert(check->Opcode() == Op_ConP &&
+                             check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad base pointer");
+                    }
+                  }
+                } else if( check->bottom_type()->is_ptr()->_offset == 0 ) {
+                  if(check->is_Proj() || check->is_Mach() &&
+                     (check->as_Mach()->ideal_Opcode() == Op_CreateEx ||
+                      check->as_Mach()->ideal_Opcode() == Op_ThreadLocal ||
+                      check->as_Mach()->ideal_Opcode() == Op_CMoveP ||
+                      check->as_Mach()->ideal_Opcode() == Op_CheckCastPP ||
+#ifdef _LP64
+                      UseCompressedOops && check->as_Mach()->ideal_Opcode() == Op_CastPP ||
+                      UseCompressedOops && check->as_Mach()->ideal_Opcode() == Op_DecodeN ||
+#endif
+                      check->as_Mach()->ideal_Opcode() == Op_LoadP ||
+                      check->as_Mach()->ideal_Opcode() == Op_LoadKlass)) {
+                    // Valid nodes
+                  } else {
+                    check->dump();
+                    assert(false,"Bad base or derived pointer");
+                  }
+                } else {
+                  assert(is_derived,"Bad base pointer");
+                  assert(check->is_Mach() && check->as_Mach()->ideal_Opcode() == Op_AddP,"Bad derived pointer");
+                }
+                k++;
+                assert(k < 100000,"Derived pointer checking in infinite loop");
               } // End while
-              assert(check->is_Mach() && check->as_Mach()->ideal_Opcode() == Op_AddP,"Bad derived pointer")
             }
           } // End of check for derived pointers
         } // End of Kcheck for debug info
       } // End of if found a safepoint
     } // End of forall instructions in block
   } // End of forall blocks
+#endif
 }
+
+//------------------------------verify-------------------------------------
+// Verify that graphs and base pointers are still sane.
+void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
+#ifdef ASSERT
+  if( VerifyOpto || VerifyRegisterAllocator ) {
+    _cfg.verify();
+    verify_base_ptrs(a);
+    if(verify_ifg)
+      _ifg->verify(this);
+  }
 #endif
+}
+
+#endif
--- a/src/share/vm/opto/loopnode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/loopnode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2654,7 +2654,7 @@
     case Op_ModF:
     case Op_ModD:
     case Op_LoadB:              // Same with Loads; they can sink
-    case Op_LoadC:              // during loop optimizations.
+    case Op_LoadUS:             // during loop optimizations.
     case Op_LoadD:
     case Op_LoadF:
     case Op_LoadI:
--- a/src/share/vm/opto/macro.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/macro.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -952,13 +952,6 @@
   Node* klass_node        = alloc->in(AllocateNode::KlassNode);
   Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);

-  // With escape analysis, the entire memory state was needed to be able to
-  // eliminate the allocation.  Since the allocations cannot be eliminated,
-  // optimize it to the raw slice.
-  if (mem->is_MergeMem()) {
-    mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
-  }
-
   assert(ctrl != NULL, "must have control");
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
@@ -1016,6 +1009,11 @@
   Node *slow_mem = mem;  // save the current memory state for slow path
   // generate the fast allocation code unless we know that the initial test will always go slow
   if (!always_slow) {
+    // Fast path modifies only raw memory.
+    if (mem->is_MergeMem()) {
+      mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
+    }
+
     Node* eden_top_adr;
     Node* eden_end_adr;

@@ -1239,8 +1237,6 @@
     }
   }

-  mem = result_phi_rawmem;
-
   // An allocate node has separate i_o projections for the uses on the control and i_o paths
   // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
   if (_ioproj_fallthrough == NULL) {
--- a/src/share/vm/opto/matcher.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/matcher.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1824,7 +1824,7 @@
         mem_op = true;
         break;
       case Op_LoadB:
-      case Op_LoadC:
+      case Op_LoadUS:
       case Op_LoadD:
       case Op_LoadF:
       case Op_LoadI:
--- a/src/share/vm/opto/memnode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/memnode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -779,14 +779,14 @@
          "use LoadRangeNode instead");
   switch (bt) {
   case T_BOOLEAN:
-  case T_BYTE:    return new (C, 3) LoadBNode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_INT:     return new (C, 3) LoadINode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_CHAR:    return new (C, 3) LoadCNode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_SHORT:   return new (C, 3) LoadSNode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_LONG:    return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long()   );
-  case T_FLOAT:   return new (C, 3) LoadFNode(ctl, mem, adr, adr_type, rt              );
-  case T_DOUBLE:  return new (C, 3) LoadDNode(ctl, mem, adr, adr_type, rt              );
-  case T_ADDRESS: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr()    );
+  case T_BYTE:    return new (C, 3) LoadBNode (ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_INT:     return new (C, 3) LoadINode (ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_CHAR:    return new (C, 3) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_SHORT:   return new (C, 3) LoadSNode (ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_LONG:    return new (C, 3) LoadLNode (ctl, mem, adr, adr_type, rt->is_long()   );
+  case T_FLOAT:   return new (C, 3) LoadFNode (ctl, mem, adr, adr_type, rt              );
+  case T_DOUBLE:  return new (C, 3) LoadDNode (ctl, mem, adr, adr_type, rt              );
+  case T_ADDRESS: return new (C, 3) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr()    );
   case T_OBJECT:
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
@@ -1076,13 +1076,14 @@
       // of the original value.
       Node* mem_phi = in(Memory);
       Node* offset = in(Address)->in(AddPNode::Offset);
+      Node* region = base->in(0);

       Node* in1 = clone();
       Node* in1_addr = in1->in(Address)->clone();
       in1_addr->set_req(AddPNode::Base, base->in(allocation_index));
       in1_addr->set_req(AddPNode::Address, base->in(allocation_index));
       in1_addr->set_req(AddPNode::Offset, offset);
-      in1->set_req(0, base->in(allocation_index));
+      in1->set_req(0, region->in(allocation_index));
       in1->set_req(Address, in1_addr);
       in1->set_req(Memory, mem_phi->in(allocation_index));

@@ -1091,7 +1092,7 @@
       in2_addr->set_req(AddPNode::Base, base->in(load_index));
       in2_addr->set_req(AddPNode::Address, base->in(load_index));
       in2_addr->set_req(AddPNode::Offset, offset);
-      in2->set_req(0, base->in(load_index));
+      in2->set_req(0, region->in(load_index));
       in2->set_req(Address, in2_addr);
       in2->set_req(Memory, mem_phi->in(load_index));

@@ -1100,7 +1101,7 @@
       in2_addr = phase->transform(in2_addr);
       in2 =      phase->transform(in2);

-      PhiNode* result = PhiNode::make_blank(base->in(0), this);
+      PhiNode* result = PhiNode::make_blank(region, this);
       result->set_req(allocation_index, in1);
       result->set_req(load_index, in2);
       return result;
@@ -1303,6 +1304,7 @@
     Node*    base   = AddPNode::Ideal_base_and_offset(address, phase, ignore);
     if (base != NULL
         && phase->type(base)->higher_equal(TypePtr::NOTNULL)
+        && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw
         && all_controls_dominate(base, phase->C->start())) {
       // A method-invariant, non-null address (constant or 'this' argument).
       set_req(MemNode::Control, NULL);
@@ -1356,7 +1358,7 @@
   // Steps (a), (b):  Walk past independent stores to find an exact match.
   if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) {
     // (c) See if we can fold up on the spot, but don't fold up here.
-    // Fold-up might require truncation (for LoadB/LoadS/LoadC) or
+    // Fold-up might require truncation (for LoadB/LoadS/LoadUS) or
     // just return a prior value, which is done by Identity calls.
     if (can_see_stored_value(prev_mem, phase)) {
       // Make ready for step (d):
@@ -1605,14 +1607,14 @@
   return LoadNode::Ideal(phase, can_reshape);
 }

-//--------------------------LoadCNode::Ideal--------------------------------------
+//--------------------------LoadUSNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
 //  and the value stored was larger than a char, replace this load
 //  with the value stored truncated to a char.  If no truncation is
 //  needed, the replacement is done in LoadNode::Identity().
 //
-Node *LoadCNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+Node *LoadUSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
   Node* mem = in(MemNode::Memory);
   Node* value = can_see_stored_value(mem,phase);
   if( value && !phase->type(value)->higher_equal( _type ) )
--- a/src/share/vm/opto/memnode.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/memnode.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -207,11 +207,11 @@
   virtual BasicType memory_type() const { return T_BYTE; }
 };

-//------------------------------LoadCNode--------------------------------------
-// Load a char (16bits unsigned) from memory
-class LoadCNode : public LoadNode {
+//------------------------------LoadUSNode-------------------------------------
+// Load an unsigned short/char (16bits unsigned) from memory
+class LoadUSNode : public LoadNode {
 public:
-  LoadCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR )
+  LoadUSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR )
     : LoadNode(c,mem,adr,at,ti) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
--- a/src/share/vm/opto/mulnode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/mulnode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -442,16 +442,17 @@
         return load;
     }
     uint lop = load->Opcode();
-    if( lop == Op_LoadC &&
+    if( lop == Op_LoadUS &&
         con == 0x0000FFFF )     // Already zero-extended
       return load;
     // Masking off the high bits of a unsigned-shift-right is not
     // needed either.
     if( lop == Op_URShiftI ) {
       const TypeInt *t12 = phase->type( load->in(2) )->isa_int();
-      if( t12 && t12->is_con() ) {
-        int shift_con = t12->get_con();
-        int mask = max_juint >> shift_con;
+      if( t12 && t12->is_con() ) {  // Shift is by a constant
+        int shift = t12->get_con();
+        shift &= BitsPerJavaInteger - 1;  // semantics of Java shifts
+        int mask = max_juint >> shift;
         if( (mask&con) == mask )  // If AND is useless, skip it
           return load;
       }
@@ -470,19 +471,19 @@
   uint lop = load->Opcode();

   // Masking bits off of a Character?  Hi bits are already zero.
-  if( lop == Op_LoadC &&
+  if( lop == Op_LoadUS &&
       (mask & 0xFFFF0000) )     // Can we make a smaller mask?
     return new (phase->C, 3) AndINode(load,phase->intcon(mask&0xFFFF));

   // Masking bits off of a Short?  Loading a Character does some masking
   if( lop == Op_LoadS &&
       (mask & 0xFFFF0000) == 0 ) {
-    Node *ldc = new (phase->C, 3) LoadCNode(load->in(MemNode::Control),
+    Node *ldus = new (phase->C, 3) LoadUSNode(load->in(MemNode::Control),
                                   load->in(MemNode::Memory),
                                   load->in(MemNode::Address),
                                   load->adr_type());
-    ldc = phase->transform(ldc);
-    return new (phase->C, 3) AndINode(ldc,phase->intcon(mask&0xFFFF));
+    ldus = phase->transform(ldus);
+    return new (phase->C, 3) AndINode(ldus, phase->intcon(mask&0xFFFF));
   }

   // Masking sign bits off of a Byte?  Let the matcher use an unsigned load
@@ -579,9 +580,10 @@
     // needed either.
     if( lop == Op_URShiftL ) {
       const TypeInt *t12 = phase->type( usr->in(2) )->isa_int();
-      if( t12 && t12->is_con() ) {
-        int shift_con = t12->get_con();
-        jlong mask = max_julong >> shift_con;
+      if( t12 && t12->is_con() ) {  // Shift is by a constant
+        int shift = t12->get_con();
+        shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
+        jlong mask = max_julong >> shift;
         if( (mask&con) == mask )  // If AND is useless, skip it
           return usr;
       }
@@ -605,8 +607,8 @@
     const TypeInt *t12 = phase->type(rsh->in(2))->isa_int();
     if( t12 && t12->is_con() ) { // Shift is by a constant
       int shift = t12->get_con();
-      shift &= (BitsPerJavaInteger*2)-1;  // semantics of Java shifts
-      const jlong sign_bits_mask = ~(((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - shift)) -1);
+      shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
+      const jlong sign_bits_mask = ~(((jlong)CONST64(1) << (jlong)(BitsPerJavaLong - shift)) -1);
       // If the AND'ing of the 2 masks has no bits, then only original shifted
       // bits survive.  NO sign-extension bits survive the maskings.
       if( (sign_bits_mask & mask) == 0 ) {
@@ -786,7 +788,7 @@

   // Check for ((x & ((CONST64(1)<<(64-c0))-1)) << c0) which ANDs off high bits
   // before shifting them away.
-  const jlong bits_mask = ((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) - CONST64(1);
+  const jlong bits_mask = ((jlong)CONST64(1) << (jlong)(BitsPerJavaLong - con)) - CONST64(1);
   if( add1_op == Op_AndL &&
       phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
     return new (phase->C, 3) LShiftLNode( add1->in(1), in(2) );
@@ -820,7 +822,7 @@
     return TypeLong::LONG;

   uint shift = r2->get_con();
-  shift &= (BitsPerJavaInteger*2)-1;  // semantics of Java shifts
+  shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
   // Shift by a multiple of 64 does nothing:
   if (shift == 0)  return t1;

@@ -913,7 +915,7 @@
       set_req(2, phase->intcon(0));
       return this;
     }
-    else if( ld->Opcode() == Op_LoadC )
+    else if( ld->Opcode() == Op_LoadUS )
       // Replace zero-extension-load with sign-extension-load
       return new (phase->C, 3) LoadSNode( ld->in(MemNode::Control),
                                 ld->in(MemNode::Memory),
@@ -1235,7 +1237,7 @@
   if ( con == 0 ) return NULL;  // let Identity() handle a 0 shift count
                               // note: mask computation below does not work for 0 shift count
   // We'll be wanting the right-shift amount as a mask of that many bits
-  const jlong mask = (((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) -1);
+  const jlong mask = (((jlong)CONST64(1) << (jlong)(BitsPerJavaLong - con)) -1);

   // Check for ((x << z) + Y) >>> z.  Replace with x + con>>>z
   // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
@@ -1302,7 +1304,7 @@

   if (r2->is_con()) {
     uint shift = r2->get_con();
-    shift &= (2*BitsPerJavaInteger)-1;  // semantics of Java shifts
+    shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
     // Shift by a multiple of 64 does nothing:
     if (shift == 0)  return t1;
     // Calculate reasonably aggressive bounds for the result.
@@ -1325,7 +1327,7 @@
     const TypeLong* tl = TypeLong::make(lo, hi, MAX2(r1->_widen,r2->_widen));
     #ifdef ASSERT
     // Make sure we get the sign-capture idiom correct.
-    if (shift == (2*BitsPerJavaInteger)-1) {
+    if (shift == BitsPerJavaLong - 1) {
       if (r1->_lo >= 0) assert(tl == TypeLong::ZERO, ">>>63 of + is 0");
       if (r1->_hi < 0)  assert(tl == TypeLong::ONE,  ">>>63 of - is +1");
     }
--- a/src/share/vm/opto/superword.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/superword.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1444,7 +1444,7 @@
 // (Start, end] half-open range defining which operands are vector
 void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
   switch (n->Opcode()) {
-  case Op_LoadB:   case Op_LoadC:
+  case Op_LoadB:   case Op_LoadUS:
   case Op_LoadI:   case Op_LoadL:
   case Op_LoadF:   case Op_LoadD:
   case Op_LoadP:
--- a/src/share/vm/opto/type.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/type.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2471,6 +2471,8 @@
   const Type* ft = join(kills);
   const TypeInstPtr* ftip = ft->isa_instptr();
   const TypeInstPtr* ktip = kills->isa_instptr();
+  const TypeKlassPtr* ftkp = ft->isa_klassptr();
+  const TypeKlassPtr* ktkp = kills->isa_klassptr();

   if (ft->empty()) {
     // Check for evil case of 'this' being a class and 'kills' expecting an
@@ -2484,6 +2486,8 @@
     // uplift the type.
     if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface())
       return kills;             // Uplift to interface
+    if (!empty() && ktkp != NULL && ktkp->klass()->is_loaded() && ktkp->klass()->is_interface())
+      return kills;             // Uplift to interface

     return Type::TOP;           // Canonical empty value
   }
@@ -2499,6 +2503,12 @@
     // Happens in a CTW of rt.jar, 320-341, no extra flags
     return ktip->cast_to_ptr_type(ftip->ptr());
   }
+  if (ftkp != NULL && ktkp != NULL &&
+      ftkp->is_loaded() &&  ftkp->klass()->is_interface() &&
+      ktkp->is_loaded() && !ktkp->klass()->is_interface()) {
+    // Happens in a CTW of rt.jar, 320-341, no extra flags
+    return ktkp->cast_to_ptr_type(ftkp->ptr());
+  }

   return ft;
 }
@@ -3657,7 +3667,7 @@

 //------------------------------cast_to_ptr_type-------------------------------
 const Type *TypeKlassPtr::cast_to_ptr_type(PTR ptr) const {
-  assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
+  assert(_base == KlassPtr, "subclass must override cast_to_ptr_type");
   if( ptr == _ptr ) return this;
   return make(ptr, _klass, _offset);
 }
--- a/src/share/vm/opto/type.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/type.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -882,6 +882,8 @@
 public:
   ciSymbol* name()  const { return _klass->name(); }

+  bool  is_loaded() const { return _klass->is_loaded(); }
+
   // ptr to klass 'k'
   static const TypeKlassPtr *make( ciKlass* k ) { return make( TypePtr::Constant, k, 0); }
   // ptr to klass 'k' with offset
--- a/src/share/vm/opto/vectornode.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/opto/vectornode.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -239,7 +239,7 @@
     return Op_XorV;

   case Op_LoadB:
-  case Op_LoadC:
+  case Op_LoadUS:
   case Op_LoadS:
   case Op_LoadI:
   case Op_LoadL:
@@ -269,7 +269,7 @@
     case 16:       return Op_Load16B;
     }
     break;
-  case Op_LoadC:
+  case Op_LoadUS:
     switch (vlen) {
     case  2:       return Op_Load2C;
     case  4:       return Op_Load4C;
--- a/src/share/vm/prims/jni.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/prims/jni.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2691,8 +2691,13 @@

     directBufferSupportInitializeEnded = 1;
   } else {
-    ThreadInVMfromNative tivn(thread); // set state as yield_all can call os:sleep
     while (!directBufferSupportInitializeEnded && !directBufferSupportInitializeFailed) {
+      // Set state as yield_all can call os:sleep. On Solaris, yield_all calls
+      // os::sleep which requires the VM state transition. On other platforms, it
+      // is not necessary. The following call to change the VM state is purposely
+      // put inside the loop to avoid potential deadlock when multiple threads
+      // try to call this method. See 6791815 for more details.
+      ThreadInVMfromNative tivn(thread);
       os::yield_all();
     }
   }
--- a/src/share/vm/prims/jvm.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/prims/jvm.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -2475,7 +2475,8 @@
   if (Arguments::vfprintf_hook() != NULL) {
     jio_fprintf(defaultStream::output_stream(), "%s", s);
   } else {
-    ::write(defaultStream::output_fd(), s, (int)strlen(s));
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = ::write(defaultStream::output_fd(), s, (int)strlen(s));
   }
 }
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1230,8 +1230,14 @@

   // Constant pools are not easily reused so we allocate a new one
   // each time.
+  // merge_cp is created unsafe for concurrent GC processing.  It
+  // should be marked safe before discarding it because, even if
+  // garbage.  If it crosses a card boundary, it may be scanned
+  // in order to find the start of the first complete object on the card.
   constantPoolHandle merge_cp(THREAD,
-    oopFactory::new_constantPool(merge_cp_length, THREAD));
+    oopFactory::new_constantPool(merge_cp_length,
+                                 methodOopDesc::IsUnsafeConc,
+                                 THREAD));
   int orig_length = old_cp->orig_length();
   if (orig_length == 0) {
     // This old_cp is an actual original constant pool. We save
@@ -1274,6 +1280,7 @@
       // rewriting so we can't use the old constant pool with the new
       // class.

+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();  // toss the merged constant pool
     } else if (old_cp->length() < scratch_cp->length()) {
       // The old constant pool has fewer entries than the new constant
@@ -1283,6 +1290,7 @@
       // rewriting so we can't use the new constant pool with the old
       // class.

+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();  // toss the merged constant pool
     } else {
       // The old constant pool has more entries than the new constant
@@ -1296,6 +1304,7 @@
       set_new_constant_pool(scratch_class, merge_cp, merge_cp_length, true,
         THREAD);
       // drop local ref to the merged constant pool
+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();
     }
   } else {
@@ -1325,7 +1334,10 @@
     // GCed.
     set_new_constant_pool(scratch_class, merge_cp, merge_cp_length, true,
       THREAD);
+    merge_cp()->set_is_conc_safe(true);
   }
+  assert(old_cp()->is_conc_safe(), "Just checking");
+  assert(scratch_cp()->is_conc_safe(), "Just checking");

   return JVMTI_ERROR_NONE;
 } // end merge_cp_and_rewrite()
@@ -2314,13 +2326,16 @@
     // worst case merge situation. We want to associate the minimum
     // sized constant pool with the klass to save space.
     constantPoolHandle smaller_cp(THREAD,
-      oopFactory::new_constantPool(scratch_cp_length, THREAD));
+      oopFactory::new_constantPool(scratch_cp_length,
+                                   methodOopDesc::IsUnsafeConc,
+                                   THREAD));
     // preserve orig_length() value in the smaller copy
     int orig_length = scratch_cp->orig_length();
     assert(orig_length != 0, "sanity check");
     smaller_cp->set_orig_length(orig_length);
     scratch_cp->copy_cp_to(1, scratch_cp_length - 1, smaller_cp, 1, THREAD);
     scratch_cp = smaller_cp;
+    smaller_cp()->set_is_conc_safe(true);
   }

   // attach new constant pool to klass
@@ -2516,6 +2531,7 @@

     rewrite_cp_refs_in_stack_map_table(method, THREAD);
   } // end for each method
+  assert(scratch_cp()->is_conc_safe(), "Just checking");
 } // end set_new_constant_pool()
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1320,6 +1320,9 @@
     }

     // do the iteration
+    // If this operation encounters a bad object when using CMS,
+    // consider using safe_object_iterate() which avoids perm gen
+    // objects that may contain bad references.
     Universe::heap()->object_iterate(_blk);

     // when sharing is enabled we must iterate over the shared spaces
--- a/src/share/vm/runtime/arguments.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -229,6 +229,7 @@

   inline void set_base(const char* base);
   inline void add_prefix(const char* prefix);
+  inline void add_suffix_to_prefix(const char* suffix);
   inline void add_suffix(const char* suffix);
   inline void reset_path(const char* base);

@@ -290,6 +291,10 @@
   _items[_scp_prefix] = add_to_path(_items[_scp_prefix], prefix, true);
 }

+inline void SysClassPath::add_suffix_to_prefix(const char* suffix) {
+  _items[_scp_prefix] = add_to_path(_items[_scp_prefix], suffix, false);
+}
+
 inline void SysClassPath::add_suffix(const char* suffix) {
   _items[_scp_suffix] = add_to_path(_items[_scp_suffix], suffix, false);
 }
@@ -512,7 +517,6 @@
   return CommandLineFlags::boolAtPut(name, &value, origin);
 }

-
 static bool set_fp_numeric_flag(char* name, char* value, FlagValueOrigin origin) {
   double v;
   if (sscanf(value, "%lf", &v) != 1) {
@@ -525,7 +529,6 @@
   return false;
 }

-
 static bool set_numeric_flag(char* name, char* value, FlagValueOrigin origin) {
   julong v;
   intx intx_v;
@@ -555,7 +558,6 @@
   return false;
 }

-
 static bool set_string_flag(char* name, const char* value, FlagValueOrigin origin) {
   if (!CommandLineFlags::ccstrAtPut(name, &value, origin))  return false;
   // Contract:  CommandLineFlags always returns a pointer that needs freeing.
@@ -591,7 +593,6 @@
   return true;
 }

-
 bool Arguments::parse_argument(const char* arg, FlagValueOrigin origin) {

   // range of acceptable characters spelled out for portability reasons
@@ -652,7 +653,6 @@
   return false;
 }

-
 void Arguments::add_string(char*** bldarray, int* count, const char* arg) {
   assert(bldarray != NULL, "illegal argument");

@@ -756,7 +756,6 @@
   return true;
 }

-
 bool Arguments::process_settings_file(const char* file_name, bool should_exist, jboolean ignore_unrecognized) {
   FILE* stream = fopen(file_name, "rb");
   if (stream == NULL) {
@@ -932,7 +931,6 @@
   }
 }

-
 // Conflict: required to use shared spaces (-Xshare:on), but
 // incompatible command line options were chosen.

@@ -946,7 +944,6 @@
   }
 }

-
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1361,7 +1358,7 @@

     // Feed the cache size setting into the JDK
     char buffer[1024];
-    sprintf(buffer, "java.lang.Integer.IntegerCache.high=%d", AutoBoxCacheMax);
+    sprintf(buffer, "java.lang.Integer.IntegerCache.high=" INTX_FORMAT, AutoBoxCacheMax);
     add_property(buffer);
   }
   if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
@@ -1714,6 +1711,21 @@
     return result;
   }

+  if (AggressiveOpts) {
+    // Insert alt-rt.jar between user-specified bootclasspath
+    // prefix and the default bootclasspath.  os::set_boot_path()
+    // uses meta_index_dir as the default bootclasspath directory.
+    const char* altclasses_jar = "alt-rt.jar";
+    size_t altclasses_path_len = strlen(get_meta_index_dir()) + 1 +
+                                 strlen(altclasses_jar);
+    char* altclasses_path = NEW_C_HEAP_ARRAY(char, altclasses_path_len);
+    strcpy(altclasses_path, get_meta_index_dir());
+    strcat(altclasses_path, altclasses_jar);
+    scp.add_suffix_to_prefix(altclasses_path);
+    scp_assembly_required = true;
+    FREE_C_HEAP_ARRAY(char, altclasses_path);
+  }
+
   // Parse _JAVA_OPTIONS environment variable (if present) (mimics classic VM)
   result = parse_java_options_environment_variable(&scp, &scp_assembly_required);
   if (result != JNI_OK) {
@@ -1729,7 +1741,6 @@
   return JNI_OK;
 }

-
 jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args,
                                        SysClassPath* scp_p,
                                        bool* scp_assembly_required_p,
@@ -1795,7 +1806,7 @@
       *scp_assembly_required_p = true;
     // -Xrun
     } else if (match_option(option, "-Xrun", &tail)) {
-      if(tail != NULL) {
+      if (tail != NULL) {
         const char* pos = strchr(tail, ':');
         size_t len = (pos == NULL) ? strlen(tail) : pos - tail;
         char* name = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len + 1), tail, len);
@@ -2478,7 +2489,7 @@
     vm_args.version = JNI_VERSION_1_2;
     vm_args.options = options;
     vm_args.nOptions = i;
-    vm_args.ignoreUnrecognized = false;
+    vm_args.ignoreUnrecognized = IgnoreUnrecognizedVMOptions;

     if (PrintVMOptions) {
       const char* tail;
@@ -2525,13 +2536,12 @@

   // If flag "-XX:Flags=flags-file" is used it will be the first option to be processed.
   bool settings_file_specified = false;
+  const char* flags_file;
   int index;
   for (index = 0; index < args->nOptions; index++) {
     const JavaVMOption *option = args->options + index;
     if (match_option(option, "-XX:Flags=", &tail)) {
-      if (!process_settings_file(tail, true, args->ignoreUnrecognized)) {
-        return JNI_EINVAL;
-      }
+      flags_file = tail;
       settings_file_specified = true;
     }
     if (match_option(option, "-XX:+PrintVMOptions", &tail)) {
@@ -2540,6 +2550,24 @@
     if (match_option(option, "-XX:-PrintVMOptions", &tail)) {
       PrintVMOptions = false;
     }
+    if (match_option(option, "-XX:+IgnoreUnrecognizedVMOptions", &tail)) {
+      IgnoreUnrecognizedVMOptions = true;
+    }
+    if (match_option(option, "-XX:-IgnoreUnrecognizedVMOptions", &tail)) {
+      IgnoreUnrecognizedVMOptions = false;
+    }
+  }
+
+  if (IgnoreUnrecognizedVMOptions) {
+    // uncast const to modify the flag args->ignoreUnrecognized
+    *(jboolean*)(&args->ignoreUnrecognized) = true;
+  }
+
+  // Parse specified settings file
+  if (settings_file_specified) {
+    if (!process_settings_file(flags_file, true, args->ignoreUnrecognized)) {
+      return JNI_EINVAL;
+    }
   }

   // Parse default .hotspotrc settings file
@@ -2558,7 +2586,6 @@
     }
   }

-
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
   jint result = parse_vm_init_args(args);
   if (result != JNI_OK) {
--- a/src/share/vm/runtime/globals.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/globals.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -835,8 +835,21 @@
           "Prints the system dictionary at exit")                           \
                                                                             \
   diagnostic(bool, UnsyncloadClass, false,                                  \
-          "Unstable: VM calls loadClass unsynchronized. Custom classloader "\
-          "must call VM synchronized for findClass & defineClass")          \
+          "Unstable: VM calls loadClass unsynchronized. Custom "            \
+          "class loader  must call VM synchronized for findClass "          \
+          "and defineClass.")                                               \
+                                                                            \
+  product(bool, AlwaysLockClassLoader, false,                               \
+          "Require the VM to acquire the class loader lock before calling " \
+          "loadClass() even for class loaders registering "                 \
+          "as parallel capable. Default false. ")                           \
+                                                                            \
+  product(bool, AllowParallelDefineClass, false,                            \
+          "Allow parallel defineClass requests for class loaders "          \
+          "registering as parallel capable. Default false")                 \
+                                                                            \
+  product(bool, MustCallLoadClassInternal, false,                           \
+          "Call loadClassInternal() rather than loadClass().Default false") \
                                                                             \
   product_pd(bool, DontYieldALot,                                           \
           "Throw away obvious excess yield calls (for SOLARIS only)")       \
@@ -1294,7 +1307,14 @@
   product(intx, ParGCArrayScanChunk, 50,                                    \
           "Scan a subset and push remainder, if array is bigger than this") \
                                                                             \
-  product(intx, ParGCDesiredObjsFromOverflowList, 20,                       \
+  notproduct(bool, ParGCWorkQueueOverflowALot, false,                       \
+          "Whether we should simulate work queue overflow in ParNew")       \
+                                                                            \
+  notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000,                   \
+          "An `interval' counter that determines how frequently"            \
+          " we simulate overflow; a smaller number increases frequency")    \
+                                                                            \
+  product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
   product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
@@ -1406,18 +1426,18 @@
   develop(bool, CMSOverflowEarlyRestoration, false,                         \
           "Whether preserved marks should be restored early")               \
                                                                             \
-  product(uintx, CMSMarkStackSize, 32*K,                                    \
+  product(uintx, CMSMarkStackSize, NOT_LP64(32*K) LP64_ONLY(4*M),           \
           "Size of CMS marking stack")                                      \
                                                                             \
-  product(uintx, CMSMarkStackSizeMax, 4*M,                                  \
+  product(uintx, CMSMarkStackSizeMax, NOT_LP64(4*M) LP64_ONLY(512*M),       \
           "Max size of CMS marking stack")                                  \
                                                                             \
   notproduct(bool, CMSMarkStackOverflowALot, false,                         \
           "Whether we should simulate frequent marking stack / work queue"  \
           " overflow")                                                      \
                                                                             \
-  notproduct(intx, CMSMarkStackOverflowInterval, 1000,                      \
-          "A per-thread `interval' counter that determines how frequently"  \
+  notproduct(uintx, CMSMarkStackOverflowInterval, 1000,                     \
+          "An `interval' counter that determines how frequently"            \
           " we simulate overflow; a smaller number increases frequency")    \
                                                                             \
   product(uintx, CMSMaxAbortablePrecleanLoops, 0,                           \
@@ -1635,7 +1655,14 @@
   develop(uintx, WorkStealingYieldsBeforeSleep, 1000,                       \
           "Number of yields before a sleep is done during workstealing")    \
                                                                             \
-  product(uintx, PreserveMarkStackSize, 40,                                 \
+  develop(uintx, WorkStealingHardSpins, 4096,                               \
+          "Number of iterations in a spin loop between checks on "          \
+          "time out of hard spin")                                          \
+                                                                            \
+  develop(uintx, WorkStealingSpinToYieldRatio, 10,                          \
+          "Ratio of hard spins to calls to yield")                          \
+                                                                            \
+  product(uintx, PreserveMarkStackSize, 1024,                               \
            "Size for stack used in promotion failure handling")             \
                                                                             \
   product_pd(bool, UseTLAB, "Use thread-local object allocation")           \
@@ -2167,6 +2194,9 @@
   product(bool, PrintVMOptions, trueInDebug,                                \
          "print VM flag settings")                                          \
                                                                             \
+  product(bool, IgnoreUnrecognizedVMOptions, false,                         \
+         "Ignore unrecognized VM options")                                  \
+                                                                            \
   diagnostic(bool, SerializeVMOutput, true,                                 \
          "Use a mutex to serialize output to tty and hotspot.log")          \
                                                                             \
--- a/src/share/vm/runtime/memprofiler.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/memprofiler.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -104,21 +104,22 @@
   }

   // Print trace line in log
-  fprintf(_log_fp, "%6.1f,%5d,%5d,%6ld,%6ld,%6ld,%6ld,",
-      os::elapsedTime(),
-      Threads::number_of_threads(),
-      SystemDictionary::number_of_classes(),
-      Universe::heap()->used() / K,
-      Universe::heap()->capacity() / K,
-      Universe::heap()->permanent_used() / HWperKB,
-      Universe::heap()->permanent_capacity() / HWperKB);
+  fprintf(_log_fp, "%6.1f,%5d,%5d," UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ","
+          UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ",",
+          os::elapsedTime(),
+          Threads::number_of_threads(),
+          SystemDictionary::number_of_classes(),
+          Universe::heap()->used() / K,
+          Universe::heap()->capacity() / K,
+          Universe::heap()->permanent_used() / HWperKB,
+          Universe::heap()->permanent_capacity() / HWperKB);

-  fprintf(_log_fp, "%6ld,", CodeCache::capacity() / K);
+  fprintf(_log_fp, UINTX_FORMAT_W(6) ",", CodeCache::capacity() / K);

-  fprintf(_log_fp, "%6ld,%6ld,%6ld\n",
-      handles_memory_usage / K,
-      resource_memory_usage / K,
-      OopMapCache::memory_usage() / K);
+  fprintf(_log_fp, UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ",%6ld\n",
+          handles_memory_usage / K,
+          resource_memory_usage / K,
+          OopMapCache::memory_usage() / K);
   fflush(_log_fp);
 }
--- a/src/share/vm/runtime/os.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/os.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -74,13 +74,11 @@
   const int milliseconds_after_second =
     milliseconds_since_19700101 % milliseconds_per_microsecond;
   // Convert the time value to a tm and timezone variable
-  const struct tm *time_struct_temp = localtime(&seconds_since_19700101);
-  if (time_struct_temp == NULL) {
-    assert(false, "Failed localtime");
+  struct tm time_struct;
+  if (localtime_pd(&seconds_since_19700101, &time_struct) == NULL) {
+    assert(false, "Failed localtime_pd");
     return NULL;
   }
-  // Save the results of localtime
-  const struct tm time_struct = *time_struct_temp;
   const time_t zone = timezone;

   // If daylight savings time is in effect,
@@ -93,10 +91,10 @@
     UTC_to_local = UTC_to_local - seconds_per_hour;
   }
   // Compute the time zone offset.
-  //    localtime(3C) sets timezone to the difference (in seconds)
+  //    localtime_pd() sets timezone to the difference (in seconds)
   //    between UTC and and local time.
   //    ISO 8601 says we need the difference between local time and UTC,
-  //    we change the sign of the localtime(3C) result.
+  //    we change the sign of the localtime_pd() result.
   const time_t local_to_UTC = -(UTC_to_local);
   // Then we have to figure out if if we are ahead (+) or behind (-) UTC.
   char sign_local_to_UTC = '+';
--- a/src/share/vm/runtime/os.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/os.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -120,7 +120,8 @@
   // Return current local time in a string (YYYY-MM-DD HH:MM:SS).
   // It is MT safe, but not async-safe, as reading time zone
   // information may require a lock on some platforms.
-  static char* local_time_string(char *buf, size_t buflen);
+  static char*      local_time_string(char *buf, size_t buflen);
+  static struct tm* localtime_pd     (const time_t* clock, struct tm*  res);
   // Fill in buffer with current local time as an ISO-8601 string.
   // E.g., YYYY-MM-DDThh:mm:ss.mmm+zzzz.
   // Returns buffer, or NULL if it failed.
--- a/src/share/vm/runtime/safepoint.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/safepoint.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -730,7 +730,7 @@
   if (DieOnSafepointTimeout) {
     char msg[1024];
     VM_Operation *op = VMThread::vm_operation();
-    sprintf(msg, "Safepoint sync time longer than %d ms detected when executing %s.",
+    sprintf(msg, "Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.",
             SafepointTimeoutDelay,
             op != NULL ? op->name() : "no vm operation");
     fatal(msg);
--- a/src/share/vm/runtime/sharedRuntime.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -192,64 +192,46 @@


 JRT_LEAF(jint, SharedRuntime::f2i(jfloat  x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  jint ltmp   = (jint)lltmp;
-  if (ltmp == lltmp) {
-    return ltmp;
-  } else {
-    if (x < 0) {
-      return min_jint;
-    } else {
-      return max_jint;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jfloat) max_jint)
+    return max_jint;
+  if (x <= (jfloat) min_jint)
+    return min_jint;
+  return (jint) x;
 JRT_END


 JRT_LEAF(jlong, SharedRuntime::f2l(jfloat  x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  if (lltmp != min_jlong) {
-    return lltmp;
-  } else {
-    if (x < 0) {
-      return min_jlong;
-    } else {
-      return max_jlong;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jfloat) max_jlong)
+    return max_jlong;
+  if (x <= (jfloat) min_jlong)
+    return min_jlong;
+  return (jlong) x;
 JRT_END


 JRT_LEAF(jint, SharedRuntime::d2i(jdouble x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  jint ltmp   = (jint)lltmp;
-  if (ltmp == lltmp) {
-    return ltmp;
-  } else {
-    if (x < 0) {
-      return min_jint;
-    } else {
-      return max_jint;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jdouble) max_jint)
+    return max_jint;
+  if (x <= (jdouble) min_jint)
+    return min_jint;
+  return (jint) x;
 JRT_END


 JRT_LEAF(jlong, SharedRuntime::d2l(jdouble x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  if (lltmp != min_jlong) {
-    return lltmp;
-  } else {
-    if (x < 0) {
-      return min_jlong;
-    } else {
-      return max_jlong;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jdouble) max_jlong)
+    return max_jlong;
+  if (x <= (jdouble) min_jlong)
+    return min_jlong;
+  return (jlong) x;
 JRT_END
--- a/src/share/vm/runtime/synchronizer.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/runtime/synchronizer.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -424,7 +424,7 @@
 // asserts is that error message -- often something about negative array
 // indices -- is opaque.

-#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @%X\n", tag); }
+#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); }

 void ObjectMonitor::ctAsserts() {
   CTASSERT(offset_of (ObjectMonitor, _header) == 0);
--- a/src/share/vm/services/heapDumper.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/services/heapDumper.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1700,7 +1700,7 @@
   // The HPROF_GC_CLASS_DUMP and HPROF_GC_INSTANCE_DUMP are the vast bulk
   // of the heap dump.
   HeapObjectDumper obj_dumper(this, writer());
-  Universe::heap()->object_iterate(&obj_dumper);
+  Universe::heap()->safe_object_iterate(&obj_dumper);

   // HPROF_GC_ROOT_THREAD_OBJ + frames + jni locals
   do_threads();
--- a/src/share/vm/utilities/globalDefinitions.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,6 +74,7 @@
 extern int BitsPerHeapOop;

 const int BitsPerJavaInteger = 32;
+const int BitsPerJavaLong    = 64;
 const int BitsPerSize_t      = size_tSize * BitsPerByte;

 // Size of a char[] needed to represent a jint as a string in decimal.
@@ -906,6 +907,14 @@
   return log2_intptr(x);
 }

+//* the argument must be exactly a power of 2
+inline int exact_log2_long(jlong x) {
+  #ifdef ASSERT
+    if (!is_power_of_2_long(x)) basic_fatal("x must be a power of 2");
+  #endif
+  return log2_long(x);
+}
+

 // returns integer round-up to the nearest multiple of s (s must be a power of two)
 inline intptr_t round_to(intptr_t x, uintx s) {
@@ -1087,15 +1096,24 @@
 // Format macros that allow the field width to be specified.  The width must be
 // a string literal (e.g., "8") or a macro that evaluates to one.
 #ifdef _LP64
+#define UINTX_FORMAT_W(width)   UINT64_FORMAT_W(width)
 #define SSIZE_FORMAT_W(width)   INT64_FORMAT_W(width)
 #define SIZE_FORMAT_W(width)    UINT64_FORMAT_W(width)
 #else
+#define UINTX_FORMAT_W(width)   UINT32_FORMAT_W(width)
 #define SSIZE_FORMAT_W(width)   INT32_FORMAT_W(width)
 #define SIZE_FORMAT_W(width)    UINT32_FORMAT_W(width)
 #endif // _LP64

 // Format pointers and size_t (or size_t-like integer types) which change size
-// between 32- and 64-bit.
+// between 32- and 64-bit. The pointer format theoretically should be "%p",
+// however, it has different output on different platforms. On Windows, the data
+// will be padded with zeros automatically. On Solaris, we can use "%016p" &
+// "%08p" on 64 bit & 32 bit platforms to make the data padded with extra zeros.
+// On Linux, "%016p" or "%08p" is not be allowed, at least on the latest GCC
+// 4.3.2. So we have to use "%016x" or "%08x" to simulate the printing format.
+// GCC 4.3.2, however requires the data to be converted to "intptr_t" when
+// using "%x".
 #ifdef  _LP64
 #define PTR_FORMAT    PTR64_FORMAT
 #define UINTX_FORMAT  UINT64_FORMAT
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -116,7 +116,9 @@
   #ifdef _LP64
     #define NULL_WORD  0L
   #else
-    #define NULL_WORD  0
+    // Cast 0 to intptr_t rather than int32_t since they are not the same type
+    // on platforms such as Mac OS X.
+    #define NULL_WORD  ((intptr_t)0)
   #endif
 #else
   #define NULL_WORD  NULL
--- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -115,7 +115,9 @@
   #ifdef _LP64
     #define NULL_WORD  0L
   #else
-    #define NULL_WORD  0
+    // Cast 0 to intptr_t rather than int32_t since they are not the same type
+    // on some platforms.
+    #define NULL_WORD  ((intptr_t)0)
   #endif
 #else
   #define NULL_WORD  NULL
--- a/src/share/vm/utilities/ostream.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/ostream.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -300,7 +300,10 @@
 }

 void fileStream::write(const char* s, size_t len) {
-  if (_file != NULL)  fwrite(s, 1, len, _file);
+  if (_file != NULL)  {
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = fwrite(s, 1, len, _file);
+  }
   update_position(s, len);
 }

@@ -328,7 +331,10 @@
 }

 void fdStream::write(const char* s, size_t len) {
-  if (_fd != -1) ::write(_fd, s, (int)len);
+  if (_fd != -1) {
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = ::write(_fd, s, (int)len);
+  }
   update_position(s, len);
 }
--- a/src/share/vm/utilities/taskqueue.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/taskqueue.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -25,6 +25,12 @@
 # include "incls/_precompiled.incl"
 # include "incls/_taskqueue.cpp.incl"

+#ifdef TRACESPINNING
+uint ParallelTaskTerminator::_total_yields = 0;
+uint ParallelTaskTerminator::_total_spins = 0;
+uint ParallelTaskTerminator::_total_peeks = 0;
+#endif
+
 bool TaskQueueSuper::peek() {
   return _bottom != _age.top();
 }
@@ -69,15 +75,62 @@
 ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
   Atomic::inc(&_offered_termination);

-  juint yield_count = 0;
+  uint yield_count = 0;
+  // Number of hard spin loops done since last yield
+  uint hard_spin_count = 0;
+  // Number of iterations in the hard spin loop.
+  uint hard_spin_limit = WorkStealingHardSpins;
+
+  // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
+  // If it is greater than 0, then start with a small number
+  // of spins and increase number with each turn at spinning until
+  // the count of hard spins exceeds WorkStealingSpinToYieldRatio.
+  // Then do a yield() call and start spinning afresh.
+  if (WorkStealingSpinToYieldRatio > 0) {
+    hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
+    hard_spin_limit = MAX2(hard_spin_limit, 1U);
+  }
+  // Remember the initial spin limit.
+  uint hard_spin_start = hard_spin_limit;
+
+  // Loop waiting for all threads to offer termination or
+  // more work.
   while (true) {
+    // Are all threads offering termination?
     if (_offered_termination == _n_threads) {
-      //inner_termination_loop();
       return true;
     } else {
+      // Look for more work.
+      // Periodically sleep() instead of yield() to give threads
+      // waiting on the cores the chance to grab this code
       if (yield_count <= WorkStealingYieldsBeforeSleep) {
+        // Do a yield or hardspin.  For purposes of deciding whether
+        // to sleep, count this as a yield.
         yield_count++;
-        yield();
+
+        // Periodically call yield() instead spinning
+        // After WorkStealingSpinToYieldRatio spins, do a yield() call
+        // and reset the counts and starting limit.
+        if (hard_spin_count > WorkStealingSpinToYieldRatio) {
+          yield();
+          hard_spin_count = 0;
+          hard_spin_limit = hard_spin_start;
+#ifdef TRACESPINNING
+          _total_yields++;
+#endif
+        } else {
+          // Hard spin this time
+          // Increase the hard spinning period but only up to a limit.
+          hard_spin_limit = MIN2(2*hard_spin_limit,
+                                 (uint) WorkStealingHardSpins);
+          for (uint j = 0; j < hard_spin_limit; j++) {
+            SpinPause();
+          }
+          hard_spin_count++;
+#ifdef TRACESPINNING
+          _total_spins++;
+#endif
+        }
       } else {
         if (PrintGCDetails && Verbose) {
          gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() "
@@ -92,6 +145,9 @@
         sleep(WorkStealingSleepMillis);
       }

+#ifdef TRACESPINNING
+      _total_peeks++;
+#endif
       if (peek_in_queue_set() ||
           (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
@@ -101,6 +157,16 @@
   }
 }

+#ifdef TRACESPINNING
+void ParallelTaskTerminator::print_termination_counts() {
+  gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld  "
+    "Total spins: %lld  Total peeks: %lld",
+    total_yields(),
+    total_spins(),
+    total_peeks());
+}
+#endif
+
 void ParallelTaskTerminator::reset_for_reuse() {
   if (_offered_termination != 0) {
     assert(_offered_termination == _n_threads,
--- a/src/share/vm/utilities/taskqueue.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/taskqueue.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -22,67 +22,76 @@
  *
  */

+#ifdef LP64
+typedef juint TAG_TYPE;
+// for a taskqueue size of 4M
+#define LOG_TASKQ_SIZE 22
+#else
+typedef jushort TAG_TYPE;
+// for a taskqueue size of 16K
+#define LOG_TASKQ_SIZE 14
+#endif
+
 class TaskQueueSuper: public CHeapObj {
 protected:
   // The first free element after the last one pushed (mod _n).
-  // (For now we'll assume only 32-bit CAS).
-  volatile juint _bottom;
+  volatile uint _bottom;

   // log2 of the size of the queue.
   enum SomeProtectedConstants {
-    Log_n = 14
+    Log_n = LOG_TASKQ_SIZE
   };
+#undef LOG_TASKQ_SIZE

   // Size of the queue.
-  juint n() { return (1 << Log_n); }
+  uint n() { return (1 << Log_n); }
   // For computing "x mod n" efficiently.
-  juint n_mod_mask() { return n() - 1; }
+  uint n_mod_mask() { return n() - 1; }

   struct Age {
-    jushort _top;
-    jushort _tag;
+    TAG_TYPE _top;
+    TAG_TYPE _tag;

-    jushort tag() const { return _tag; }
-    jushort top() const { return _top; }
+    TAG_TYPE tag() const { return _tag; }
+    TAG_TYPE top() const { return _top; }

     Age() { _tag = 0; _top = 0; }

     friend bool operator ==(const Age& a1, const Age& a2) {
       return a1.tag() == a2.tag() && a1.top() == a2.top();
     }
-
   };
   Age _age;
   // These make sure we do single atomic reads and writes.
   Age get_age() {
-    jint res = *(volatile jint*)(&_age);
+    uint res = *(volatile uint*)(&_age);
     return *(Age*)(&res);
   }
   void set_age(Age a) {
-    *(volatile jint*)(&_age) = *(int*)(&a);
+    *(volatile uint*)(&_age) = *(uint*)(&a);
   }

-  jushort get_top() {
+  TAG_TYPE get_top() {
     return get_age().top();
   }

   // These both operate mod _n.
-  juint increment_index(juint ind) {
+  uint increment_index(uint ind) {
     return (ind + 1) & n_mod_mask();
   }
-  juint decrement_index(juint ind) {
+  uint decrement_index(uint ind) {
     return (ind - 1) & n_mod_mask();
   }

   // Returns a number in the range [0.._n).  If the result is "n-1", it
   // should be interpreted as 0.
-  juint dirty_size(juint bot, juint top) {
-    return ((jint)bot - (jint)top) & n_mod_mask();
+  uint dirty_size(uint bot, uint top) {
+    return ((int)bot - (int)top) & n_mod_mask();
   }

   // Returns the size corresponding to the given "bot" and "top".
-  juint size(juint bot, juint top) {
-    juint sz = dirty_size(bot, top);
+  uint size(uint bot, uint top) {
+    uint sz = dirty_size(bot, top);
     // Has the queue "wrapped", so that bottom is less than top?
     // There's a complicated special case here.  A pair of threads could
     // perform pop_local and pop_global operations concurrently, starting
@@ -94,7 +103,7 @@
     // owner performs pop_local's, and several concurrent threads
     // attempting to perform the pop_global will all perform the same CAS,
     // and only one can succeed.  Any stealing thread that reads after
-    // either the increment or decrement will seen an empty queue, and will
+    // either the increment or decrement will see an empty queue, and will
     // not join the competitors.  The "sz == -1 || sz == _n-1" state will
     // not be modified  by concurrent queues, so the owner thread can reset
     // the state to _bottom == top so subsequent pushes will be performed
@@ -112,11 +121,11 @@
   // Return an estimate of the number of elements in the queue.
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
-  juint size() {
+  uint size() {
     return size(_bottom, get_top());
   }

-  juint dirty_size() {
+  uint dirty_size() {
     return dirty_size(_bottom, get_top());
   }

@@ -127,15 +136,15 @@

   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
-  juint max_elems() { return n() - 2; }
+  uint max_elems() { return n() - 2; }

 };

 template<class E> class GenericTaskQueue: public TaskQueueSuper {
 private:
   // Slow paths for push, pop_local.  (pop_global has no fast path.)
-  bool push_slow(E t, juint dirty_n_elems);
-  bool pop_local_slow(juint localBot, Age oldAge);
+  bool push_slow(E t, uint dirty_n_elems);
+  bool pop_local_slow(uint localBot, Age oldAge);

 public:
   // Initializes the queue to empty.
@@ -170,7 +179,7 @@

 template<class E>
 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
-  assert(sizeof(Age) == sizeof(jint), "Depends on this.");
+  assert(sizeof(Age) == sizeof(int), "Depends on this.");
 }

 template<class E>
@@ -182,9 +191,9 @@
 template<class E>
 void GenericTaskQueue<E>::oops_do(OopClosure* f) {
   // tty->print_cr("START OopTaskQueue::oops_do");
-  int iters = size();
-  juint index = _bottom;
-  for (int i = 0; i < iters; ++i) {
+  uint iters = size();
+  uint index = _bottom;
+  for (uint i = 0; i < iters; ++i) {
     index = decrement_index(index);
     // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
     //            index, &_elems[index], _elems[index]);
@@ -198,10 +207,10 @@


 template<class E>
-bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
+bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
   if (dirty_n_elems == n() - 1) {
     // Actually means 0, so do the push.
-    juint localBot = _bottom;
+    uint localBot = _bottom;
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
     return true;
@@ -211,7 +220,7 @@

 template<class E>
 bool GenericTaskQueue<E>::
-pop_local_slow(juint localBot, Age oldAge) {
+pop_local_slow(uint localBot, Age oldAge) {
   // This queue was observed to contain exactly one element; either this
   // thread will claim it, or a competing "pop_global".  In either case,
   // the queue will be logically empty afterwards.  Create a new Age value
@@ -230,9 +239,8 @@
     Age tempAge;
     // No competing pop_global has yet incremented "top"; we'll try to
     // install new_age, thus claiming the element.
-    assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint),
-           "Assumption about CAS unit.");
-    *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
+    assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
+    *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
     if (tempAge == oldAge) {
       // We win.
       assert(dirty_size(localBot, get_top()) != n() - 1,
@@ -253,8 +261,8 @@
 bool GenericTaskQueue<E>::pop_global(E& t) {
   Age newAge;
   Age oldAge = get_age();
-  juint localBot = _bottom;
-  juint n_elems = size(localBot, oldAge.top());
+  uint localBot = _bottom;
+  uint n_elems = size(localBot, oldAge.top());
   if (n_elems == 0) {
     return false;
   }
@@ -263,7 +271,7 @@
   newAge._top = increment_index(newAge.top());
   if ( newAge._top == 0 ) newAge._tag++;
   Age resAge;
-  *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
+  *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
   // Note that using "_bottom" here might fail, since a pop_local might
   // have decremented it.
   assert(dirty_size(localBot, newAge._top) != n() - 1,
@@ -287,7 +295,7 @@

 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper {
 private:
-  int _n;
+  uint _n;
   GenericTaskQueue<E>** _queues;

 public:
@@ -300,51 +308,51 @@
     }
   }

-  bool steal_1_random(int queue_num, int* seed, E& t);
-  bool steal_best_of_2(int queue_num, int* seed, E& t);
-  bool steal_best_of_all(int queue_num, int* seed, E& t);
+  bool steal_1_random(uint queue_num, int* seed, E& t);
+  bool steal_best_of_2(uint queue_num, int* seed, E& t);
+  bool steal_best_of_all(uint queue_num, int* seed, E& t);

-  void register_queue(int i, GenericTaskQueue<E>* q);
+  void register_queue(uint i, GenericTaskQueue<E>* q);

-  GenericTaskQueue<E>* queue(int n);
+  GenericTaskQueue<E>* queue(uint n);

   // The thread with queue number "queue_num" (and whose random number seed
   // is at "seed") is trying to steal a task from some other queue.  (It
   // may try several queues, according to some configuration parameter.)
   // If some steal succeeds, returns "true" and sets "t" the stolen task,
   // otherwise returns false.
-  bool steal(int queue_num, int* seed, E& t);
+  bool steal(uint queue_num, int* seed, E& t);

   bool peek();
 };

 template<class E>
-void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) {
-  assert(0 <= i && i < _n, "index out of range.");
+void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) {
+  assert(i < _n, "index out of range.");
   _queues[i] = q;
 }

 template<class E>
-GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) {
+GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) {
   return _queues[i];
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) {
-  for (int i = 0; i < 2 * _n; i++)
+bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) {
+  for (uint i = 0; i < 2 * _n; i++)
     if (steal_best_of_2(queue_num, seed, t))
       return true;
   return false;
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     int best_k;
-    jint best_sz = 0;
-    for (int k = 0; k < _n; k++) {
+    uint best_sz = 0;
+    for (uint k = 0; k < _n; k++) {
       if (k == queue_num) continue;
-      jint sz = _queues[k]->size();
+      uint sz = _queues[k]->size();
       if (sz > best_sz) {
         best_sz = sz;
         best_k = k;
@@ -362,9 +370,9 @@
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
-    int k = queue_num;
+    uint k = queue_num;
     while (k == queue_num) k = randomParkAndMiller(seed) % _n;
     return _queues[2]->pop_global(t);
   } else if (_n == 2) {
@@ -378,20 +386,20 @@
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
-    int k1 = queue_num;
+    uint k1 = queue_num;
     while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
-    int k2 = queue_num;
+    uint k2 = queue_num;
     while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n;
     // Sample both and try the larger.
-    juint sz1 = _queues[k1]->size();
-    juint sz2 = _queues[k2]->size();
+    uint sz1 = _queues[k1]->size();
+    uint sz2 = _queues[k2]->size();
     if (sz2 > sz1) return _queues[k2]->pop_global(t);
     else return _queues[k1]->pop_global(t);
   } else if (_n == 2) {
     // Just try the other one.
-    int k = (queue_num + 1) % 2;
+    uint k = (queue_num + 1) % 2;
     return _queues[k]->pop_global(t);
   } else {
     assert(_n == 1, "can't be zero.");
@@ -402,7 +410,7 @@
 template<class E>
 bool GenericTaskQueueSet<E>::peek() {
   // Try all the queues.
-  for (int j = 0; j < _n; j++) {
+  for (uint j = 0; j < _n; j++) {
     if (_queues[j]->peek())
       return true;
   }
@@ -418,11 +426,19 @@
 // A class to aid in the termination of a set of parallel tasks using
 // TaskQueueSet's for work stealing.

+#undef TRACESPINNING
+
 class ParallelTaskTerminator: public StackObj {
 private:
   int _n_threads;
   TaskQueueSetSuper* _queue_set;
-  jint _offered_termination;
+  int _offered_termination;
+
+#ifdef TRACESPINNING
+  static uint _total_yields;
+  static uint _total_spins;
+  static uint _total_peeks;
+#endif

   bool peek_in_queue_set();
 protected:
@@ -454,13 +470,19 @@
   // the terminator is finished.
   void reset_for_reuse();

+#ifdef TRACESPINNING
+  static uint total_yields() { return _total_yields; }
+  static uint total_spins() { return _total_spins; }
+  static uint total_peeks() { return _total_peeks; }
+  static void print_termination_counts();
+#endif
 };

 #define SIMPLE_STACK 0

 template<class E> inline bool GenericTaskQueue<E>::push(E t) {
 #if SIMPLE_STACK
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   if (_bottom < max_elems()) {
     _elems[localBot] = t;
     _bottom = localBot + 1;
@@ -469,10 +491,10 @@
     return false;
   }
 #else
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
-  jushort top = get_top();
-  juint dirty_n_elems = dirty_size(localBot, top);
+  TAG_TYPE top = get_top();
+  uint dirty_n_elems = dirty_size(localBot, top);
   assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
          "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
@@ -487,19 +509,19 @@

 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
 #if SIMPLE_STACK
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   assert(localBot > 0, "precondition.");
   localBot--;
   t = _elems[localBot];
   _bottom = localBot;
   return true;
 #else
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   // This value cannot be n-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
   // resets the size( to 0 before the next call (which is sequential,
   // since this is pop_local.)
-  juint dirty_n_elems = dirty_size(localBot, get_top());
+  uint dirty_n_elems = dirty_size(localBot, get_top());
   assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
   if (dirty_n_elems == 0) return false;
   localBot = decrement_index(localBot);
@@ -512,7 +534,7 @@
   // If there's still at least one element in the queue, based on the
   // "_bottom" and "age" we've read, then there can be no interference with
   // a "pop_global" operation, and we're done.
-  juint tp = get_top();
+  TAG_TYPE tp = get_top();    // XXX
   if (size(localBot, tp) > 0) {
     assert(dirty_size(localBot, tp) != n() - 1,
            "Shouldn't be possible...");
@@ -581,7 +603,7 @@
   bool is_empty();
   bool stealable_is_empty();
   bool overflow_is_empty();
-  juint stealable_size() { return _region_queue.size(); }
+  uint stealable_size() { return _region_queue.size(); }
   RegionTaskQueue* task_queue() { return &_region_queue; }
 };
--- a/src/share/vm/utilities/vmError.cpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/vmError.cpp	Fri Feb 27 15:13:00 2009 -0800
@@ -674,6 +674,11 @@
     reset_signal_handlers();

   } else {
+    // If UseOsErrorReporting we call this for each level of the call stack
+    // while searching for the exception handler.  Only the first level needs
+    // to be reported.
+    if (UseOSErrorReporting && log_done) return;
+
     // This is not the first error, see if it happened in a different thread
     // or in the same thread during error reporting.
     if (first_error_tid != mytid) {
--- a/src/share/vm/utilities/vmError.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/vmError.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -50,7 +50,7 @@

   // additional info for VM internal errors
   const char * _filename;
-  int          _lineno;
+  size_t       _lineno;

   // used by fatal error handler
   int          _current_step;
--- a/src/share/vm/utilities/workgroup.hpp	Sat Jan 31 17:19:42 2009 -0800
+++ b/src/share/vm/utilities/workgroup.hpp	Fri Feb 27 15:13:00 2009 -0800
@@ -32,7 +32,7 @@

 // An abstract task to be worked on by a gang.
 // You subclass this to supply your own work() method
-class AbstractGangTask: public CHeapObj {
+class AbstractGangTask VALUE_OBJ_CLASS_SPEC {
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.
--- a/test/Makefile	Sat Jan 31 17:19:42 2009 -0800
+++ b/test/Makefile	Fri Feb 27 15:13:00 2009 -0800
@@ -28,9 +28,9 @@

 # Get OS/ARCH specifics
 OSNAME = $(shell uname -s)
-SLASH_JAVA = /java
 ifeq ($(OSNAME), SunOS)
   PLATFORM = solaris
+  SLASH_JAVA = /java
   ARCH = $(shell uname -p)
   ifeq ($(ARCH), i386)
     ARCH=i586
@@ -38,6 +38,7 @@
 endif
 ifeq ($(OSNAME), Linux)
   PLATFORM = linux
+  SLASH_JAVA = /java
   ARCH = $(shell uname -m)
   ifeq ($(ARCH), i386)
     ARCH = i586
@@ -62,6 +63,10 @@
   EXESUFFIX = .exe
 endif

+ifdef ALT_SLASH_JAVA
+  SLASH_JAVA = $(ALT_SLASH_JAVA)
+endif
+
 # Utilities used
 CD    = cd
 CP    = cp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6603011/Test.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6603011
+ * @summary long/int division by constant
+ *
+ * @run main/othervm -Xcomp -Xbatch -XX:-Inline Test
+ */
+
+//
+// -XX:-Inline is essential to this test so that verification functions
+//   divi, modi, divl and modl generate "plain" divides.
+// -Xcomp -Xbatch are also useful to ensure the full range of
+//   dividend and divisor combinations are tested
+//
+
+import java.net.*;
+
+class s {
+  static int  divi(int  dividend, int  divisor) { return dividend / divisor; }
+  static int  modi(int  dividend, int  divisor) { return dividend % divisor; }
+  static long divl(long dividend, long divisor) { return dividend / divisor; }
+  static long modl(long dividend, long divisor) { return dividend % divisor; }
+}
+
+public class Test implements Runnable {
+  // Report verbose messages on failure; turn off to suppress
+  // too much output with gross numbers of failures.
+  static final boolean VERBOSE = true;
+
+  // Initailize DIVISOR so that it is final in this class.
+  static final int DIVISOR;
+  static {
+    int value = 0;
+    try {
+      value = Integer.decode(System.getProperty("divisor"));
+    } catch (Throwable e) {
+    }
+    DIVISOR = value;
+  }
+
+  // The methods of interest. We want the JIT to compile these
+  // and convert the divide into a multiply.
+  public int divbyI (int dividend)   { return dividend / DIVISOR; }
+  public int modbyI (int dividend)   { return dividend % DIVISOR; }
+  public long divbyL (long dividend) { return dividend / DIVISOR; }
+  public long modbyL (long dividend) { return dividend % DIVISOR; }
+
+  public int divisor() { return DIVISOR; }
+
+  public boolean checkI (int dividend) {
+    int quo = divbyI(dividend);
+    int rem = modbyI(dividend);
+    int quo0 = s.divi(dividend, divisor());
+    int rem0 = s.modi(dividend, divisor());
+
+    if (quo != quo0 || rem != rem0) {
+      if (VERBOSE) {
+        System.out.println("Computed: " + dividend + " / " + divisor() + " = " +
+                           quo  + ", " + dividend + " % " + divisor() + " = " + rem );
+        System.out.println("expected: " + dividend + " / " + divisor() + " = " +
+                           quo0 + ", " + dividend + " % " + divisor() + " = " + rem0);
+        // Report sign of rem failure
+        if (rem != 0 && (rem ^ dividend) < 0) {
+          System.out.println("  rem & dividend have different signs");
+        }
+        // Report range of rem failure
+        if (java.lang.Math.abs(rem) >= java.lang.Math.abs(divisor())) {
+          System.out.println("  remainder out of range");
+        }
+        // Report quo/rem identity relationship failure
+        if ((quo * divisor()) + rem != dividend) {
+          System.out.println("  quotien/remainder invariant broken");
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  public boolean checkL (long dividend) {
+    long quo = divbyL(dividend);
+    long rem = modbyL(dividend);
+    long quo0 = s.divl(dividend, divisor());
+    long rem0 = s.modl(dividend, divisor());
+
+    if (quo != quo0 || rem != rem0) {
+      if (VERBOSE) {
+        System.out.println("  " + dividend + " / " + divisor() + " = " +
+                           quo + ", " + dividend + " % " + divisor() + " = " + rem);
+        // Report sign of rem failure
+        if (rem != 0 && (rem ^ dividend) < 0) {
+          System.out.println("  rem & dividend have different signs");
+        }
+        // Report range of rem failure
+        if (java.lang.Math.abs(rem) >= java.lang.Math.abs(divisor())) {
+          System.out.println("  remainder out of range");
+        }
+        // Report quo/rem identity relationship failure
+        if ((quo * divisor()) + rem != dividend) {
+          System.out.println(" (" + quo + " * " + divisor() + ") + " + rem + " != "
+                             + dividend);
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  public void run() {
+    // Don't try to divide by zero
+    if (divisor() == 0) return;
+
+    // Range of dividends to check. Try dividends from start to end
+    // inclusive, as well as variations on those values as shifted
+    // left.
+    int start = -1024;
+    int end = 1024;
+
+    // Test int division using a variety of dividends.
+    int wrong = 0;
+    int total = 0;
+
+    outerloop:
+    for (int i = start; i <= end; i++) {
+      for (int s = 0; s < 32; s += 4) {
+        total++;
+        int dividend = i << s;
+        if (!checkI(dividend)) {
+          wrong++;
+          // Stop on the first failure
+          // break outerloop;
+        }
+      }
+    }
+    if (wrong > 0) {
+      System.out.println("divisor " + divisor() + ": " +
+                         wrong + "/" + total + " wrong int divisions");
+    }
+
+    // Test long division using a variety of dividends.
+    wrong = 0;
+    total = 0;
+
+    outerloop:
+    for (int i = start; i <= end; i++) {
+      for (int s = 0; s < 64; s += 4) {
+        total++;
+        long dividend = i << s;
+        if (!checkL(dividend)) {
+          wrong++;
+          // Stop on the first failure
+          // break outerloop;
+        }
+      }
+    }
+    if (wrong > 0) {
+      System.out.println("divisor " + divisor() + ": " +
+                         wrong + "/" + total + " wrong long divisions");
+    }
+
+  }
+
+  // Reload this class with the "divisor" property set to the input parameter.
+  // This allows the JIT to see q.DIVISOR as a final constant, and change
+  // any divisions or mod operations into multiplies.
+  public static void test_divisor(int divisor,
+                                  URLClassLoader apploader) throws Exception {
+    System.setProperty("divisor", "" + divisor);
+    ClassLoader loader = new URLClassLoader(apploader.getURLs(),
+                                            apploader.getParent());
+    Class c = loader.loadClass("Test");
+    Runnable r = (Runnable)c.newInstance();
+    r.run();
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class cl = Class.forName("Test");
+    URLClassLoader apploader = (URLClassLoader)cl.getClassLoader();
+
+
+    // Test every divisor between -100 and 100.
+    for (int i = -100; i <= 100; i++) {
+      test_divisor(i, apploader);
+    }
+
+    // Try a few divisors outside the typical range.
+    // The values below have been observed in rt.jar.
+    test_divisor(101, apploader);
+    test_divisor(400, apploader);
+    test_divisor(1000, apploader);
+    test_divisor(3600, apploader);
+    test_divisor(9973, apploader);
+    test_divisor(86400, apploader);
+    test_divisor(1000000, apploader);
+  }
+
+}
--- a/test/compiler/6775880/Test.java	Sat Jan 31 17:19:42 2009 -0800
+++ b/test/compiler/6775880/Test.java	Fri Feb 27 15:13:00 2009 -0800
@@ -27,7 +27,7 @@
  * @bug 6775880
  * @summary EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
  * @compile -source 1.4 -target 1.4 Test.java
- * @run main/othervm -server -Xbatch -XX:+DoEscapeAnalysis -XX:+DeoptimizeALot -XX:CompileCommand=exclude,java.lang.AbstractStringBuilder::append Test
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+DoEscapeAnalysis -XX:+DeoptimizeALot -XX:CompileCommand=exclude,java.lang.AbstractStringBuilder::append Test
  */

 public class Test {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6778657/Test.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6778657
+ * @summary Casts in SharedRuntime::f2i, f2l, d2i and d2l rely on undefined C++ behaviour
+ */
+
+public class Test {
+  public static void check_f2i(int expect) {
+    float check = expect;
+    check *= 2;
+    int actual = (int) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_f2l(long expect) {
+    float check = expect;
+    check *= 2;
+    long actual = (long) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_d2i(int expect) {
+    double check = expect;
+    check *= 2;
+    int actual = (int) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_d2l(long expect) {
+    double check = expect;
+    check *= 2;
+    long actual = (long) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void main(String[] args) {
+    check_f2i(Integer.MAX_VALUE);
+    check_f2i(Integer.MIN_VALUE);
+    check_f2l(Long.MAX_VALUE);
+    check_f2l(Long.MIN_VALUE);
+    check_d2i(Integer.MAX_VALUE);
+    check_d2i(Integer.MIN_VALUE);
+    check_d2l(Long.MAX_VALUE);
+    check_d2l(Long.MIN_VALUE);
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6795161/Test.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6795161
+ * @summary Escape analysis leads to data corruption
+ * @run main/othervm -server -Xcomp -XX:CompileOnly=Test -XX:+DoEscapeAnalysis Test
+ */
+
+class Test_Class_1 {
+    static String var_1;
+
+    static void badFunc(int size)
+    {
+        try {
+          for (int i = 0; i < 1; (new byte[size-i])[0] = 0, i++) {}
+        } catch (Exception e) {
+          // don't comment it out, it will lead to correct results ;)
+          //System.out.println("Got exception: " + e);
+        }
+    }
+}
+
+public class Test {
+    static String var_1_copy = Test_Class_1.var_1;
+
+    static byte var_check;
+
+    public static void main(String[] args)
+    {
+        var_check = 1;
+
+        Test_Class_1.badFunc(-1);
+
+        System.out.println("EATester.var_check = " + Test.var_check + " (expected 1)\n");
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6795362/Test6795362.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6795362
+ * @summary 32bit server compiler leads to wrong results on solaris-x86
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6795362.sub Test6795362
+ */
+
+public class Test6795362 {
+    public static void main(String[] args)
+    {
+        sub();
+
+        if (var_bad != 0)
+            throw new InternalError(var_bad + " != 0");
+    }
+
+    static long var_bad = -1L;
+
+    static void sub()
+    {
+        var_bad >>= 65;
+        var_bad /= 65;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6799693/Test.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6799693
+ * @summary Server compiler leads to data corruption when expression throws an Exception
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test Test
+ */
+
+public class Test {
+   static int var_bad = 1;
+
+   public static void main(String[] args)
+   {
+      var_bad++;
+
+      try {
+         for (int i = 0; i < 10; i++) (new byte[((byte)-1 << i)])[0]  = 0;
+      }
+      catch (Exception e) { System.out.println("Got " + e); }
+
+      System.out.println("Test.var_bad = " +  var_bad + " (expected 2)\n");
+   }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6800154/Test6800154.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6800154
+ * @summary Add comments to long_by_long_mulhi() for better understandability
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6800154.divcomp Test6800154
+ */
+
+import java.net.URLClassLoader;
+
+public class Test6800154 implements Runnable {
+    static final long[] DIVIDENDS = {
+        0,
+        1,
+        2,
+        1423487,
+        4444441,
+        4918923241323L,
+        -1,
+        -24351,
+        0x3333,
+        0x0000000080000000L,
+        0x7fffffffffffffffL,
+        0x8000000000000000L
+    };
+
+    static final long[] DIVISORS = {
+        1,
+        2,
+        17,
+        12342,
+        24123,
+        143444,
+        123444442344L,
+        -1,
+        -2,
+        -4423423234231423L,
+        0x0000000080000000L,
+        0x7fffffffffffffffL,
+        0x8000000000000000L
+    };
+
+    // Initialize DIVISOR so that it is final in this class.
+    static final long DIVISOR;
+
+    static {
+        long value = 0;
+        try {
+            value = Long.decode(System.getProperty("divisor"));
+        } catch (Throwable e) {
+        }
+        DIVISOR = value;
+    }
+
+    public static void main(String[] args) throws Exception
+    {
+        Class cl = Class.forName("Test6800154");
+        URLClassLoader apploader = (URLClassLoader) cl.getClassLoader();
+
+        // Iterate over all divisors.
+        for (int i = 0; i < DIVISORS.length; i++) {
+            System.setProperty("divisor", "" + DIVISORS[i]);
+            ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent());
+            Class c = loader.loadClass("Test6800154");
+            Runnable r = (Runnable) c.newInstance();
+            r.run();
+        }
+    }
+
+    public void run()
+    {
+        // Iterate over all dividends.
+        for (int i = 0; i < DIVIDENDS.length; i++) {
+            long dividend = DIVIDENDS[i];
+
+            long expected = divint(dividend);
+            long result = divcomp(dividend);
+
+            if (result != expected)
+                throw new InternalError(dividend + " / " + DIVISOR + " failed: " + result + " != " + expected);
+        }
+    }
+
+    static long divint(long a)  { return a / DIVISOR; }
+    static long divcomp(long a) { return a / DIVISOR; }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6805724/Test6805724.java	Fri Feb 27 15:13:00 2009 -0800
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6805724
+ * @summary ModLNode::Ideal() generates functionally incorrect graph when divisor is any (2^k-1) constant.
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6805724.fcomp Test6805724
+ */
+
+import java.net.URLClassLoader;
+
+public class Test6805724 implements Runnable {
+    // Initialize DIVISOR so that it is final in this class.
+    static final long DIVISOR;  // 2^k-1 constant
+
+    static {
+        long value = 0;
+        try {
+            value = Long.decode(System.getProperty("divisor"));
+        } catch (Throwable t) {
+            // This one is required for the Class.forName() in main.
+        }
+        DIVISOR = value;
+    }
+
+    static long fint(long x) {
+        return x % DIVISOR;
+    }
+
+    static long fcomp(long x) {
+        return x % DIVISOR;
+    }
+
+    public void run() {
+        long a = 0x617981E1L;
+
+        long expected = fint(a);
+        long result = fcomp(a);
+
+        if (result != expected)
+            throw new InternalError(result + " != " + expected);
+    }
+
+    public static void main(String args[]) throws Exception {
+        Class cl = Class.forName("Test6805724");
+        URLClassLoader apploader = (URLClassLoader) cl.getClassLoader();
+
+        // Iterate over all 2^k-1 divisors.
+        for (int k = 1; k < Long.SIZE; k++) {
+            long divisor = (1L << k) - 1;
+            System.setProperty("divisor", "" + divisor);
+            ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent());
+            Class c = loader.loadClass("Test6805724");
+            Runnable r = (Runnable) c.newInstance();
+            r.run();
+        }
+    }
+}