Skip to content

Commit 04eaa4d

Browse files
authored
Merge pull request #19294 from pshipton/latin0.44.45
(0.45) Fix String creation to treat a modified UTF8 zero as ASCII
2 parents 3406f22 + 27edb76 commit 04eaa4d

File tree

2 files changed

+52
-6
lines changed

2 files changed

+52
-6
lines changed

runtime/gc_base/StringTable.cpp

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
556556
bool anonClassName = J9_ARE_ANY_BITS_SET(stringFlags, J9_STR_ANON_CLASS_NAME);
557557
bool internString = J9_ARE_ANY_BITS_SET(stringFlags, J9_STR_INTERN);
558558
UDATA unicodeLength = 0;
559+
UDATA zerosFound = 0;
559560

560561
Trc_MM_createJavaLangString_Entry(vmThread, length, data, stringFlags);
561562

@@ -596,6 +597,12 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
596597
} else {
597598
for (UDATA i = 0; i < length; ++i) {
598599
if (data[i] > 0x7F) {
600+
/* Check for 0 in modified UTF8. */
601+
if ((0xC0 == data[i]) && ((i + 1) < length) && (0x80 == data[i + 1])) {
602+
zerosFound += 1;
603+
i += 1;
604+
continue;
605+
}
599606
isASCII = false;
600607
if (compressStrings && (J2SE_VERSION(vm) >= J2SE_V17)) {
601608
U_8 *dataTmp = data + i;
@@ -623,7 +630,13 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
623630

624631
if (isASCII) {
625632
for (UDATA i = 0; i < length; ++i) {
626-
hash = (hash << 5) - hash + data[i];
633+
U_8 c = data[i];
634+
/* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
635+
if (0xC0 == c) {
636+
c = 0;
637+
i += 1;
638+
}
639+
hash = (hash << 5) - hash + c;
627640
}
628641
} else {
629642
hash = VM_VMHelpers::computeHashForUTF8(data, length);
@@ -653,7 +666,7 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
653666

654667
if (!isUnicode) {
655668
if (isASCII) {
656-
unicodeLength = length;
669+
unicodeLength = length - zerosFound;
657670
} else {
658671
UDATA tempLength = length;
659672
U_8 *tempData = data;
@@ -707,21 +720,35 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
707720
UDATA lastSlash = 0;
708721
if (translateSlashes) {
709722
if (isASCII) {
723+
UDATA storeIndex = 0;
710724
for (UDATA i = 0; i < length; ++i) {
711725
U_8 c = data[i];
712726
if ('/' == c) {
713727
lastSlash = i;
714728
c = '.';
729+
/* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
730+
} else if (0xC0 == c) {
731+
c = 0;
732+
i += 1;
715733
}
716-
J9JAVAARRAYOFBYTE_STORE(vmThread, charArray, i, c);
734+
J9JAVAARRAYOFBYTE_STORE(vmThread, charArray, storeIndex, c);
735+
storeIndex += 1;
717736
}
718737
} else {
719738
lastSlash = storeLatin1ByteArrayhelper(vmThread, data, length, charArray, true);
720739
}
721740
} else {
722741
if (isASCII) {
742+
UDATA storeIndex = 0;
723743
for (UDATA i = 0; i < length; ++i) {
724-
J9JAVAARRAYOFBYTE_STORE(vmThread, charArray, i, data[i]);
744+
U_8 c = data[i];
745+
/* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
746+
if (0xC0 == c) {
747+
c = 0;
748+
i += 1;
749+
}
750+
J9JAVAARRAYOFBYTE_STORE(vmThread, charArray, storeIndex, c);
751+
storeIndex += 1;
725752
}
726753
} else {
727754
lastSlash = storeLatin1ByteArrayhelper(vmThread, data, length, charArray, false);
@@ -738,17 +765,31 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
738765
UDATA lastSlash = 0;
739766
if (isASCII) {
740767
if (translateSlashes) {
768+
IDATA storeIndex = 0;
741769
for (UDATA i = 0; i < length; ++i) {
742770
U_8 c = data[i];
743771
if ('/' == c) {
744772
lastSlash = i;
745773
c = '.';
774+
/* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
775+
} else if (0xC0 == c) {
776+
c = 0;
777+
i += 1;
746778
}
747-
J9JAVAARRAYOFCHAR_STORE(vmThread, charArray, i, c);
779+
J9JAVAARRAYOFCHAR_STORE(vmThread, charArray, storeIndex, c);
780+
storeIndex += 1;
748781
}
749782
} else {
783+
UDATA storeIndex = 0;
750784
for (UDATA i = 0; i < length; ++i) {
751-
J9JAVAARRAYOFCHAR_STORE(vmThread, charArray, i, data[i]);
785+
U_8 c = data[i];
786+
/* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
787+
if (0xC0 == c) {
788+
c = 0;
789+
i += 1;
790+
}
791+
J9JAVAARRAYOFCHAR_STORE(vmThread, charArray, storeIndex, c);
792+
storeIndex += 1;
752793
}
753794
}
754795
} else {

test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,11 @@ public void test_lastIndexOf2() {
10071007
public void test_lastIndexOf3() {
10081008
AssertJUnit.assertTrue("Returned incorrect index", hw1.lastIndexOf("World") == 5);
10091009
AssertJUnit.assertTrue("Found String outside of index", hw1.lastIndexOf("HeKKKKKKKK") == -1);
1010+
1011+
/* test https://github.com/eclipse-openj9/openj9/issues/19273 */
1012+
String s1 = "a";
1013+
String s2 = "b";
1014+
AssertJUnit.assertTrue("Incorrect index of \\u0000", (s1 + "\u0000" + s2).lastIndexOf("\u0000") == 1);
10101015
}
10111016

10121017
/**

0 commit comments

Comments
 (0)