src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
Print this page
rev 9988 : 8148328: aarch64: redundant lsr instructions in stub code.
Summary: avoid redundant lsr instructions in jbyte_arraycopy and jbyte_disjoint_arraycopy.
Reviewed-by: duke
Contributed-by: [email protected]
937 __ tbz(count, 0, Lbyte);
938 __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
939 __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
940 __ bind(Lbyte);
941 }
942 }
944 Label copy_f, copy_b;
946 // All-singing all-dancing memory copy.
947 //
948 // Copy count units of memory from s to d. The size of a unit is
949 // step, which can be positive or negative depending on the direction
950 // of copy. If is_aligned is false, we align the source address.
951 //
953 void copy_memory(bool is_aligned, Register s, Register d,
954 Register count, Register tmp, int step) {
955 copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
956 bool is_backwards = step < 0;
957 int granularity = uabs(step);
958 const Register t0 = r3, t1 = r4;
960 if (is_backwards) {
961 __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
962 __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
963 }
965 Label done, tail;
967 __ cmp(count, 16/granularity);
968 __ br(Assembler::LO, tail);
970 // Now we've got the small case out of the way we can align the
971 // source address on a 2-word boundary.
973 Label aligned;
975 if (is_aligned) {
976 // We may have to adjust by 1 word to get s 2-word-aligned.
977 __ tbz(s, exact_log2(wordSize), aligned);
978 __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards)));
979 __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards)));
980 __ sub(count, count, wordSize/granularity);
981 } else {
982 if (is_backwards) {
983 __ andr(rscratch2, s, 2 * wordSize - 1);
984 } else {
985 __ neg(rscratch2, s);
986 __ andr(rscratch2, rscratch2, 2 * wordSize - 1);
987 }
988 // rscratch2 is the byte adjustment needed to align s.
989 __ cbz(rscratch2, aligned);
990 __ lsr(rscratch2, rscratch2, exact_log2(granularity));
991 __ sub(count, count, rscratch2);
993 #if 0
994 // ?? This code is only correct for a disjoint copy. It may or
995 // may not make sense to use it in that case.
997 // Copy the first pair; s and d may not be aligned.
998 __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0));
999 __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0));
1001 // Align s and d, adjust count
1002 if (is_backwards) {
1003 __ sub(s, s, rscratch2);
1004 __ sub(d, d, rscratch2);
1005 } else {
1006 __ add(s, s, rscratch2);
1007 __ add(d, d, rscratch2);
1008 }
1009 #else
1010 copy_memory_small(s, d, rscratch2, rscratch1, step);
937 __ tbz(count, 0, Lbyte);
938 __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
939 __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
940 __ bind(Lbyte);
941 }
942 }
944 Label copy_f, copy_b;
946 // All-singing all-dancing memory copy.
947 //
948 // Copy count units of memory from s to d. The size of a unit is
949 // step, which can be positive or negative depending on the direction
950 // of copy. If is_aligned is false, we align the source address.
951 //
953 void copy_memory(bool is_aligned, Register s, Register d,
954 Register count, Register tmp, int step) {
955 copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
956 bool is_backwards = step < 0;
957 int shift, granularity = uabs(step);
958 const Register t0 = r3, t1 = r4;
960 if (is_backwards) {
961 __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
962 __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
963 }
965 Label tail;
967 __ cmp(count, 16/granularity);
968 __ br(Assembler::LO, tail);
970 // Now we've got the small case out of the way we can align the
971 // source address on a 2-word boundary.
973 Label aligned;
975 if (is_aligned) {
976 // We may have to adjust by 1 word to get s 2-word-aligned.
977 __ tbz(s, exact_log2(wordSize), aligned);
978 __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards)));
979 __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards)));
980 __ sub(count, count, wordSize/granularity);
981 } else {
982 if (is_backwards) {
983 __ andr(rscratch2, s, 2 * wordSize - 1);
984 } else {
985 __ neg(rscratch2, s);
986 __ andr(rscratch2, rscratch2, 2 * wordSize - 1);
987 }
988 shift = exact_log2(granularity);
989 // rscratch2 is the byte adjustment needed to align s.
990 __ cbz(rscratch2, aligned);
991 if (shift > 0) {
992 __ lsr(rscratch2, rscratch2, shift);
993 }
994 __ sub(count, count, rscratch2);
996 #if 0
997 // ?? This code is only correct for a disjoint copy. It may or
998 // may not make sense to use it in that case.
1000 // Copy the first pair; s and d may not be aligned.
1001 __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0));
1002 __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0));
1004 // Align s and d, adjust count
1005 if (is_backwards) {
1006 __ sub(s, s, rscratch2);
1007 __ sub(d, d, rscratch2);
1008 } else {
1009 __ add(s, s, rscratch2);
1010 __ add(d, d, rscratch2);
1011 }
1012 #else
1013 copy_memory_small(s, d, rscratch2, rscratch1, step);