@@ -556,6 +556,7 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
556
556
bool anonClassName = J9_ARE_ANY_BITS_SET (stringFlags, J9_STR_ANON_CLASS_NAME);
557
557
bool internString = J9_ARE_ANY_BITS_SET (stringFlags, J9_STR_INTERN);
558
558
UDATA unicodeLength = 0 ;
559
+ UDATA zerosFound = 0 ;
559
560
560
561
Trc_MM_createJavaLangString_Entry (vmThread, length, data, stringFlags);
561
562
@@ -596,6 +597,12 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
596
597
} else {
597
598
for (UDATA i = 0 ; i < length; ++i) {
598
599
if (data[i] > 0x7F ) {
600
+ /* Check for 0 in modified UTF8. */
601
+ if ((0xC0 == data[i]) && ((i + 1 ) < length) && (0x80 == data[i + 1 ])) {
602
+ zerosFound += 1 ;
603
+ i += 1 ;
604
+ continue ;
605
+ }
599
606
isASCII = false ;
600
607
if (compressStrings && (J2SE_VERSION (vm) >= J2SE_V17)) {
601
608
U_8 *dataTmp = data + i;
@@ -623,7 +630,13 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
623
630
624
631
if (isASCII) {
625
632
for (UDATA i = 0 ; i < length; ++i) {
626
- hash = (hash << 5 ) - hash + data[i];
633
+ U_8 c = data[i];
634
+ /* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
635
+ if (0xC0 == c) {
636
+ c = 0 ;
637
+ i += 1 ;
638
+ }
639
+ hash = (hash << 5 ) - hash + c;
627
640
}
628
641
} else {
629
642
hash = VM_VMHelpers::computeHashForUTF8 (data, length);
@@ -653,7 +666,7 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
653
666
654
667
if (!isUnicode) {
655
668
if (isASCII) {
656
- unicodeLength = length;
669
+ unicodeLength = length - zerosFound ;
657
670
} else {
658
671
UDATA tempLength = length;
659
672
U_8 *tempData = data;
@@ -707,21 +720,35 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
707
720
UDATA lastSlash = 0 ;
708
721
if (translateSlashes) {
709
722
if (isASCII) {
723
+ UDATA storeIndex = 0 ;
710
724
for (UDATA i = 0 ; i < length; ++i) {
711
725
U_8 c = data[i];
712
726
if (' /' == c) {
713
727
lastSlash = i;
714
728
c = ' .' ;
729
+ /* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
730
+ } else if (0xC0 == c) {
731
+ c = 0 ;
732
+ i += 1 ;
715
733
}
716
- J9JAVAARRAYOFBYTE_STORE (vmThread, charArray, i, c);
734
+ J9JAVAARRAYOFBYTE_STORE (vmThread, charArray, storeIndex, c);
735
+ storeIndex += 1 ;
717
736
}
718
737
} else {
719
738
lastSlash = storeLatin1ByteArrayhelper (vmThread, data, length, charArray, true );
720
739
}
721
740
} else {
722
741
if (isASCII) {
742
+ UDATA storeIndex = 0 ;
723
743
for (UDATA i = 0 ; i < length; ++i) {
724
- J9JAVAARRAYOFBYTE_STORE (vmThread, charArray, i, data[i]);
744
+ U_8 c = data[i];
745
+ /* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
746
+ if (0xC0 == c) {
747
+ c = 0 ;
748
+ i += 1 ;
749
+ }
750
+ J9JAVAARRAYOFBYTE_STORE (vmThread, charArray, storeIndex, c);
751
+ storeIndex += 1 ;
725
752
}
726
753
} else {
727
754
lastSlash = storeLatin1ByteArrayhelper (vmThread, data, length, charArray, false );
@@ -738,17 +765,31 @@ j9gc_createJavaLangString(J9VMThread *vmThread, U_8 *data, UDATA length, UDATA s
738
765
UDATA lastSlash = 0 ;
739
766
if (isASCII) {
740
767
if (translateSlashes) {
768
+ IDATA storeIndex = 0 ;
741
769
for (UDATA i = 0 ; i < length; ++i) {
742
770
U_8 c = data[i];
743
771
if (' /' == c) {
744
772
lastSlash = i;
745
773
c = ' .' ;
774
+ /* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
775
+ } else if (0xC0 == c) {
776
+ c = 0 ;
777
+ i += 1 ;
746
778
}
747
- J9JAVAARRAYOFCHAR_STORE (vmThread, charArray, i, c);
779
+ J9JAVAARRAYOFCHAR_STORE (vmThread, charArray, storeIndex, c);
780
+ storeIndex += 1 ;
748
781
}
749
782
} else {
783
+ UDATA storeIndex = 0 ;
750
784
for (UDATA i = 0 ; i < length; ++i) {
751
- J9JAVAARRAYOFCHAR_STORE (vmThread, charArray, i, data[i]);
785
+ U_8 c = data[i];
786
+ /* Look for the start of the modified UTF8 sequence for 0, which is validated when isASCII is set. */
787
+ if (0xC0 == c) {
788
+ c = 0 ;
789
+ i += 1 ;
790
+ }
791
+ J9JAVAARRAYOFCHAR_STORE (vmThread, charArray, storeIndex, c);
792
+ storeIndex += 1 ;
752
793
}
753
794
}
754
795
} else {
0 commit comments