view src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s @ 10923:f79e943d15a7

Merge jdk8u292-b05
author Andrew John Hughes <gnu_andrew@member.fsf.org>
date Sun, 25 Apr 2021 18:18:49 +0100
parents eeb08cfebded f57189b7648d
children
line wrap: on
line source

/*
 * Copyright (c) 2016, Linaro Ltd. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
        .global _Copy_conjoint_words
        .global _Copy_disjoint_words

s       .req    x0
d       .req    x1
count   .req    x2
t0      .req    x3
t1      .req    x4
t2      .req    x5
t3      .req    x6
t4      .req    x7
t5      .req    x8
t6      .req    x9
t7      .req    x10

        .align  6
_Copy_disjoint_words:
        // Ensure 2 word aligned
        tbz     s, #3, fwd_copy_aligned
        ldr     t0, [s], #8
        str     t0, [d], #8
        sub     count, count, #1

fwd_copy_aligned:
        ldp     t0, t1, [s, #0]
        ldp     t2, t3, [s, #16]
        ldp     t4, t5, [s, #32]
        ldp     t6, t7, [s, #48]!       // Source now biased by -16

        tbnz    d, #3, unal_fwd_copy
        sub     d, d, #16               // and bias dest

        subs    count, count, #16
        blo     fwd_copy_drain

fwd_copy_again:
        prfm    pldl1keep, [s, #256]
        stp     t0, t1, [d, #16]
        ldp     t0, t1, [s, #16]
        stp     t2, t3, [d, #32]
        ldp     t2, t3, [s, #32]
        stp     t4, t5, [d, #48]
        ldp     t4, t5, [s, #48]
        stp     t6, t7, [d, #64]!
        ldp     t6, t7, [s, #64]!
        subs    count, count, #8
        bhs     fwd_copy_again

fwd_copy_drain:
        stp     t0, t1, [d, #16]
        stp     t2, t3, [d, #32]
        stp     t4, t5, [d, #48]
        stp     t6, t7, [d, #64]!

        // count is now -8..-1 for 0..7 words to copy
        adr     t0, 0f
        add     t0, t0, count, lsl #5
        br      t0

        .align  5
        ret                             // -8 == 0 words
        .align  5
        ldr     t0, [s, #16]            // -7 == 1 word
        str     t0, [d, #16]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -6 = 2 words
        stp     t0, t1, [d, #16]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -5 = 3 words
        ldr     t2, [s, #32]
        stp     t0, t1, [d, #16]
        str     t2, [d, #32]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -4 = 4 words
        ldp     t2, t3, [s, #32]
        stp     t0, t1, [d, #16]
        stp     t2, t3, [d, #32]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -3 = 5 words
        ldp     t2, t3, [s, #32]
        ldr     t4, [s, #48]
        stp     t0, t1, [d, #16]
        stp     t2, t3, [d, #32]
        str     t4, [d, #48]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -2 = 6 words
        ldp     t2, t3, [s, #32]
        ldp     t4, t5, [s, #48]
        stp     t0, t1, [d, #16]
        stp     t2, t3, [d, #32]
        stp     t4, t5, [d, #48]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -1 = 7 words
        ldp     t2, t3, [s, #32]
        ldp     t4, t5, [s, #48]
        ldr     t6, [s, #64]
        stp     t0, t1, [d, #16]
        stp     t2, t3, [d, #32]
        stp     t4, t5, [d, #48]
        str     t6, [d, #64]
        // Is always aligned here, code for 7 words is one instruction
        // too large so it just falls through.
        .align  5
0:
        ret

unal_fwd_copy:
        // Bias dest so we only pre index on the last copy
        sub     d, d, #8
        subs    count, count, #16
        blo     unal_fwd_copy_drain

unal_fwd_copy_again:
        prfm    pldl1keep, [s, #256]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        ldp     t0, t1, [s, #16]
        stp     t3, t4, [d, #32]
        ldp     t2, t3, [s, #32]
        stp     t5, t6, [d, #48]
        ldp     t4, t5, [s, #48]
        str     t7, [d, #64]!
        ldp     t6, t7, [s, #64]!
        subs    count, count, #8
        bhs     unal_fwd_copy_again

unal_fwd_copy_drain:
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        stp     t3, t4, [d, #32]
        stp     t5, t6, [d, #48]
        str     t7, [d, #64]!

        // count is now -8..-1 for 0..7 words to copy
        adr     t0, 0f
        add     t0, t0, count, lsl #5
        br      t0

        .align  5
        ret                             // -8 == 0 words
        .align  5
        ldr     t0, [s, #16]            // -7 == 1 word
        str     t0, [d, #8]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -6 = 2 words
        str     t0, [d, #8]
        str     t1, [d, #16]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -5 = 3 words
        ldr     t2, [s, #32]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -4 = 4 words
        ldp     t2, t3, [s, #32]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        str     t3, [d, #32]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -3 = 5 words
        ldp     t2, t3, [s, #32]
        ldr     t4, [s, #48]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        stp     t3, t4, [d, #32]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -2 = 6 words
        ldp     t2, t3, [s, #32]
        ldp     t4, t5, [s, #48]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        stp     t3, t4, [d, #32]
        str     t5, [d, #48]
        ret
        .align  5
        ldp     t0, t1, [s, #16]        // -1 = 7 words
        ldp     t2, t3, [s, #32]
        ldp     t4, t5, [s, #48]
        ldr     t6, [s, #64]
        str     t0, [d, #8]
        stp     t1, t2, [d, #16]
        stp     t3, t4, [d, #32]
        stp     t5, t6, [d, #48]
        // Is always aligned here, code for 7 words is one instruction
        // too large so it just falls through.
        .align  5
0:
        ret

        .align  6
_Copy_conjoint_words:
        sub     t0, d, s
        cmp     t0, count, lsl #3
        bhs     _Copy_disjoint_words

        add     s, s, count, lsl #3
        add     d, d, count, lsl #3

        // Ensure 2 word aligned
        tbz     s, #3, bwd_copy_aligned
        ldr     t0, [s, #-8]!
        str     t0, [d, #-8]!
        sub     count, count, #1

bwd_copy_aligned:
        ldp     t0, t1, [s, #-16]
        ldp     t2, t3, [s, #-32]
        ldp     t4, t5, [s, #-48]
        ldp     t6, t7, [s, #-64]!

        tbnz    d, #3, unal_bwd_copy

        subs    count, count, #16
        blo     bwd_copy_drain

bwd_copy_again:
        prfum   pldl1keep, [s, #-256]
        stp     t0, t1, [d, #-16]
        ldp     t0, t1, [s, #-16]
        stp     t2, t3, [d, #-32]
        ldp     t2, t3, [s, #-32]
        stp     t4, t5, [d, #-48]
        ldp     t4, t5, [s, #-48]
        stp     t6, t7, [d, #-64]!
        ldp     t6, t7, [s, #-64]!
        subs    count, count, #8
        bhs     bwd_copy_again

bwd_copy_drain:
        stp     t0, t1, [d, #-16]
        stp     t2, t3, [d, #-32]
        stp     t4, t5, [d, #-48]
        stp     t6, t7, [d, #-64]!

        // count is now -8..-1 for 0..7 words to copy
        adr     t0, 0f
        add     t0, t0, count, lsl #5
        br      t0

        .align  5
        ret                             // -8 == 0 words
        .align  5
        ldr     t0, [s, #-8]            // -7 == 1 word
        str     t0, [d, #-8]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -6 = 2 words
        stp     t0, t1, [d, #-16]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -5 = 3 words
        ldr     t2, [s, #-24]
        stp     t0, t1, [d, #-16]
        str     t2, [d, #-24]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -4 = 4 words
        ldp     t2, t3, [s, #-32]
        stp     t0, t1, [d, #-16]
        stp     t2, t3, [d, #-32]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -3 = 5 words
        ldp     t2, t3, [s, #-32]
        ldr     t4, [s, #-40]
        stp     t0, t1, [d, #-16]
        stp     t2, t3, [d, #-32]
        str     t4, [d, #-40]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -2 = 6 words
        ldp     t2, t3, [s, #-32]
        ldp     t4, t5, [s, #-48]
        stp     t0, t1, [d, #-16]
        stp     t2, t3, [d, #-32]
        stp     t4, t5, [d, #-48]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -1 = 7 words
        ldp     t2, t3, [s, #-32]
        ldp     t4, t5, [s, #-48]
        ldr     t6, [s, #-56]
        stp     t0, t1, [d, #-16]
        stp     t2, t3, [d, #-32]
        stp     t4, t5, [d, #-48]
        str     t6, [d, #-56]
        // Is always aligned here, code for 7 words is one instruction
        // too large so it just falls through.
        .align  5
0:
        ret

unal_bwd_copy:
        subs    count, count, #16
        blo     unal_bwd_copy_drain

unal_bwd_copy_again:
        prfm    pldl1keep, [s, #-256]
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        ldp     t0, t1, [s, #-16]
        stp     t5, t2, [d, #-40]
        ldp     t2, t3, [s, #-32]
        stp     t7, t4, [d, #-56]
        ldp     t4, t5, [s, #-48]
        str     t6, [d, #-64]!
        ldp     t6, t7, [s, #-64]!
        subs    count, count, #8
        bhs     unal_bwd_copy_again

unal_bwd_copy_drain:
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        stp     t5, t2, [d, #-40]
        stp     t7, t4, [d, #-56]
        str     t6, [d, #-64]!

        // count is now -8..-1 for 0..7 words to copy
        adr     t0, 0f
        add     t0, t0, count, lsl #5
        br      t0

        .align  5
        ret                             // -8 == 0 words
        .align  5
        ldr     t0, [s, #-8]            // -7 == 1 word
        str     t0, [d, #-8]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -6 = 2 words
        str     t1, [d, #-8]
        str     t0, [d, #-16]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -5 = 3 words
        ldr     t2, [s, #-24]
        str     t1, [d, #-8]
        stp     t2, t0, [d, #-24]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -4 = 4 words
        ldp     t2, t3, [s, #-32]
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        str     t2, [d, #-32]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -3 = 5 words
        ldp     t2, t3, [s, #-32]
        ldr     t4, [s, #-40]
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        stp     t4, t2, [d, #-40]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -2 = 6 words
        ldp     t2, t3, [s, #-32]
        ldp     t4, t5, [s, #-48]
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        stp     t5, t2, [d, #-40]
        str     t4, [d, #-48]
        ret
        .align  5
        ldp     t0, t1, [s, #-16]       // -1 = 7 words
        ldp     t2, t3, [s, #-32]
        ldp     t4, t5, [s, #-48]
        ldr     t6, [s, #-56]
        str     t1, [d, #-8]
        stp     t3, t0, [d, #-24]
        stp     t5, t2, [d, #-40]
        stp     t6, t4, [d, #-56]
        // Is always aligned here, code for 7 words is one instruction
        // too large so it just falls through.
        .align  5
0:
        ret