Mercurial > hg > gc-bench

/*
 * Copyright (c) 2017, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package org.openjdk.gcbench.runtime.reads;

import org.openjdk.jmh.annotations.*;

import java.util.concurrent.TimeUnit;

@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(1)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Threads(1)
@State(Scope.Benchmark)
public class ReadBarriersArrays {

    @Param({"1", "1000", "1000000", "1000000000"})
    private int size;

    int[] target;

    @Setup
    public void setup() {
        target = new int[size];
    }

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public void plain() {
        for (int t : target) {
            sink(t);
        }
    }

    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    private void sink(int i) {

    }

    /*
       i7 4790K, 4.0 Ghz, Linux x86_64, JDK 9 (Shenandoah, 2016-09-05)

            Benchmark                         Mode  Cnt  Score   Error  Units

            # Shenandoah
            ReadBarriersArrays.plain           1  avgt   25        0.005 ±      0.001  us/op
            ReadBarriersArrays.plain        1000  avgt   25        1.970 ±      0.003  us/op
            ReadBarriersArrays.plain     1000000  avgt   25     1869.969 ±      0.554  us/op
            ReadBarriersArrays.plain  1000000000  avgt   25  3772318.787 ± 114008.238  us/op

            # G1
            ReadBarriersArrays.plain           1  avgt   25        0.004 ±     0.001  us/op
            ReadBarriersArrays.plain        1000  avgt   25        1.993 ±     0.003  us/op
            ReadBarriersArrays.plain     1000000  avgt   25     1803.248 ±     2.122  us/op
            ReadBarriersArrays.plain  1000000000  avgt   25  1821469.162 ± 10974.715  us/op

            # Parallel
            ReadBarriersArrays.plain           1  avgt   25        0.004 ±     0.001  us/op
            ReadBarriersArrays.plain        1000  avgt   25        2.000 ±     0.006  us/op
            ReadBarriersArrays.plain     1000000  avgt   25     1817.009 ±    35.630  us/op
            ReadBarriersArrays.plain  1000000000  avgt   25  1825045.442 ±  9787.079  us/op

        In Shenandoah, the hottest loop looks like this:

             13.59%   12.98%  ↗  0x00007f33c95428a0: mov    (%rsp),%r9
                              │  0x00007f33c95428a4: mov    -0x8(%r9),%r10      ; <--- read barrier
              0.04%    0.01%  │  0x00007f33c95428a8: mov    %r9,(%rsp)
                              │  0x00007f33c95428ac: mov    0x10(%r10,%rbp,4),%edx ; array access
             36.49%   35.45%  │  0x00007f33c95428b1: mov    0x8(%rsp),%rsi
              0.03%           │  0x00007f33c95428b6: nop
                              │  0x00007f33c95428b7: callq  0x00007f33c1a80f80  ; call sink();
              0.52%    0.52%  │  0x00007f33c95428bc: inc    %ebp                ; increment and test loop counter
              0.01%    0.01%  │  0x00007f33c95428be: cmp    0x10(%rsp),%ebp
                              ╰  0x00007f33c95428c2: jl     0x00007f33c95428a0

        In G1 and Parallel it looks like this:

             14.51%   13.22%  ↗  0x00007fa49c6ceaa0: mov    (%rsp),%r10
              0.01%           │  0x00007fa49c6ceaa4: mov    0x10(%r10,%rbp,4),%edx  ; array access
             31.43%   32.02%  │  0x00007fa49c6ceaa9: mov    %r10,(%rsp)
              0.84%    0.79%  │  0x00007fa49c6ceaad: mov    0x8(%rsp),%rsi
              1.59%    1.65%  │  0x00007fa49c6ceab2: nop
                              │  0x00007fa49c6ceab3: callq  0x00007fa494c0bf80  ; call sink();
              5.25%    4.46%  │  0x00007fa49c6ceab8: inc    %ebp                ; increment and test loop counter
              0.05%    0.03%  │  0x00007fa49c6ceaba: cmp    0x10(%rsp),%ebp
              0.03%           ╰  0x00007fa49c6ceabe: jl     0x00007fa49c6ceaa0

        So, the difference is in read barrier. It does not affect performance much.

        With 1G array, Shenandoah nose-dives into excessive mark (?):

            ....[Hottest Methods (after inlining)]..............................................................
             37.40%    0.61%        libc-2.23.so  __memset_avx2
             23.92%   38.48%           libjvm.so  ParallelTaskTerminator::offer_termination
             12.42%   18.48%           libjvm.so  SpinPause
              9.59%   13.47%         C2, level 4  org.openjdk.shenandoah.reads.ReadBarriersArrays::plain, version 691
              4.31%    8.85%         C1, level 1  org.openjdk.shenandoah.reads.ReadBarriersArrays::sink, version 647
              2.62%    5.14%           libjvm.so  GenericTaskQueueSet<Padded<OverflowTaskQueue<ObjArrayTask, (MemoryType)5, 131072u>, 128ul>, (MemoryType)5>::peek
              1.74%    1.33%           libjvm.so  ShenandoahInitMarkRootsClosure::do_oop
              1.66%    0.21%           libjvm.so  ShenandoahHeapRegionSet::claim_next
              1.24%    1.90%           [unknown]  [unknown]
              1.06%    0.15%           libjvm.so  ResetBitmapTask::work
              0.79%    2.81%           libjvm.so  StringTable::possibly_parallel_oops_do
              0.72%    1.69%           libjvm.so  ShenandoahConcurrentMark::mark_and_push
              0.40%    0.20%           libjvm.so  ShenandoahHeapRegion::top_at_mark_start
              0.39%    0.36%           libjvm.so  BitMap::at_put_range
              0.26%    0.27%           libjvm.so  CMBitMap::clear_range
              0.15%    2.35%           libjvm.so  nmethod::oops_do
              0.09%    0.01%           libjvm.so  SCMConcurrentMarkingTask::work
              0.09%    0.22%           libjvm.so  CodeHeap::next_used
              0.07%    0.25%           libjvm.so  ShenandoahHeapRegion::init_top_at_mark_start
              0.06%    0.25%           libjvm.so  SafepointSynchronize::begin
              1.03%    2.83%  <...other 257 warm methods...>
            ........................................................................

        Setting -Xmx8g -Xms8g alleviates this problem.
     */

}
author	shade
date	Wed, 22 Nov 2017 15:58:02 +0100
parents	f8496889e1ac
children