और मैंने पैच किया, मैंने एसजे
अगेन, फिर से, फिर से पैच किया ...
ओह, मैं आपसे कैसे थक गया
मेरा प्रयास नंबर पांच है।
मेरे सिर में घूम रहा है
यह छोटासंकुचित लाइनों को लाने की कोशिश के StringJoiner
साथ-साथ मेरे रास्ते में आने वाली कठिनाइयों के बारे में एक शानदार कहानी । चेतावनी: विच्छेदन और आंतों के अंदर, मॉनिटर से बच्चों और प्रभावशाली व्यक्तियों को हटा दें।
समस्या का निरूपण
यदि आप एक जावा डेवलपर हैं, तो आप संभवतः संकुचित लाइनों ( जेईपी 254 ) के बारे में जानते हैं जो जावा 9 में दिखाई देता है। यह अत्यंत सरल है कि इस सुधार को निम्नानुसार वर्णित किया गया है: पुराने संस्करणों में (8 समावेशी तक) java.lang.String
लाइन की सामग्री के अंदर के रूप में प्रस्तुत किए गए थे char[]
, जैसे - बाद में byte[]
, - जो स्ट्रिंग्स द्वारा मेमोरी की खपत को कम करने की अनुमति देता है, जिनमें से सभी अक्षर ASCII के हैं (यानी, 1 बाइट उनका प्रतिनिधित्व करने के लिए पर्याप्त है)। अधिक विवरण और एक सुलभ रूप में यहां स्थापित किया गया है । इसके अलावा परिवर्तन के दौर से गुजर रहा है java.lang.AbstractStringBuilder
, जिनमें से वारिस हैं java.lang.StringBuilder
और java.lang.StringBuffer
।
लेकिन java.util.StringJoiner
-थ के साथ , सब कुछ अलग है। जावा 9 से शुरू करते हुए , अंदर एक तार की एक सरणी होती है जिसे एक विधि में विभाजक के माध्यम से समतल किया जाता है toString()
:
String delimiter = this.delimiter;
char[] chars = new char[this.len + addLen];
int k = getChars(this.prefix, chars, 0);
if (size > 0) {
k += getChars(elts[0], chars, k);
for(int i = 1; i < size; ++i) {
k += getChars(delimiter, chars, k);
k += getChars(elts[i], chars, k);
}
}
k += getChars(this.suffix, chars, k);
return new String(chars);
, ASCII ( 2 ), . ? : byte[]
char[]
( 0), : . . .
, StringJoiner
(JDK 14, ):
public final class StringJoiner {
private final String prefix;
private final String delimiter;
private final String suffix;
private String[] elts;
private int size;
private int len;
public StringJoiner(CharSequence delimiter,
CharSequence prefix,
CharSequence suffix) {
this.prefix = prefix.toString();
this.delimiter = delimiter.toString();
this.suffix = suffix.toString();
}
public StringJoiner add(CharSequence newElement) {
final String elt = String.valueOf(newElement);
if (elts == null) {
elts = new String[8];
} else {
if (size == elts.length)
elts = Arrays.copyOf(elts, 2 * size);
len += delimiter.length();
}
len += elt.length();
elts[size++] = elt;
return this;
}
}
: , len
, — size
. toString()
. - , . 2 : ( char[]
) "" ( byte[]
) : allLatin1
:
public final class StringJoiner {
private boolean allLatin1;
public StringJoiner(CharSequence delimiter,
CharSequence prefix,
CharSequence suffix) {
this.allLatin1 = this.prefix.isLatin1() && this.delimiter.isLatin1() && this.suffix.isLatin1();
}
public StringJoiner add(CharSequence newElement) {
final String elt = String.valueOf(newElement);
this.allLatin1 &= elt.isLatin1();
return this;
}
}
, :). toString()
:
@Override
public String toString() {
final String[] elts = this.elts;
if (elts == null && emptyValue != null) {
return emptyValue;
}
final int size = this.size;
final int addLen = prefix.length() + suffix.length();
if (addLen == 0) {
compactElts();
return size == 0 ? "" : elts[0];
}
if (allLatin1) {
return bytesToString(elts, size, addLen);
}
return charsToString(elts, size, addLen);
}
charsToString()
, - bytesToString()
, byte[]
:
private String bytesToString(String[] elts, int size, int addLen) {
final byte[] bytes = new byte[len + addLen];
int k = getBytes(prefix, bytes, 0);
if (size > 0) {
final String delimiter = this.delimiter;
k += getBytes(elts[0], bytes, k);
for (int i = 1; i < size; i++) {
k += getBytes(delimiter, bytes, k);
k += getBytes(elts[i], bytes, k);
}
}
getBytes(suffix, bytes, k);
return new String(bytes);
}
@SuppressWarnings("deprecation")
private static int getBytes(String s, byte[] bytes, int start) {
int len = s.length();
s.getBytes(0, len, bytes, start);
return len;
}
— char[]
byte[]
. , :
public final class String {
boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
}
, StringJoiner
java.util
, String.isLatin1()
java.lang
. — :
public final class String {
private boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
}
, ?
public final class String {
public boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
}
, , . , core-libs-dev ( ).
?(, ), latin = true ( latin1, ):
count length Original Patched Units
sj 1 1 26.7 ± 1.3 38.2 ± 1.1 ns/op !
sj 1 5 27.4 ± 0.0 40.5 ± 2.2 ns/op !
sj 1 10 29.6 ± 1.9 38.4 ± 1.9 ns/op !
sj 1 100 61.1 ± 6.9 47.6 ± 0.6 ns/op
sj 5 1 91.1 ± 6.7 83.6 ± 2.0 ns/op
sj 5 5 96.1 ± 10.7 85.6 ± 1.1 ns/op
sj 5 10 105.5 ± 14.3 84.7 ± 1.1 ns/op
sj 5 100 266.6 ± 30.1 139.6 ± 14.0 ns/op
sj 10 1 190.7 ± 23.0 162.0 ± 2.9 ns/op
sj 10 5 200.0 ± 16.9 167.5 ± 11.0 ns/op
sj 10 10 216.4 ± 12.4 164.8 ± 1.7 ns/op
sj 10 100 545.3 ± 49.7 282.2 ± 12.0 ns/op
sj 100 1 1467.0 ± 90.3 1302.0 ± 18.5 ns/op
sj 100 5 1491.8 ± 166.2 1493.0 ± 135.4 ns/op
sj 100 10 1768.8 ± 160.6 1760.8 ± 111.4 ns/op
sj 100 100 3654.3 ± 113.1 3120.9 ± 175.9 ns/op
sj:·gc.alloc.rate.norm 1 1 120.0 ± 0.0 120.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 1 5 128.0 ± 0.0 120.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 1 10 144.0 ± 0.0 136.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 1 100 416.0 ± 0.0 312.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 5 1 144.0 ± 0.0 136.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 5 5 200.0 ± 0.0 168.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 5 10 272.0 ± 0.0 216.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 5 100 1632.0 ± 0.0 1128.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 10 1 256.0 ± 0.0 232.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 10 5 376.0 ± 0.0 312.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 10 10 520.0 ± 0.0 408.0 ± 0.0 B/op
sj:·gc.alloc.rate.norm 10 100 3224.1 ± 0.0 2216.1 ± 0.0 B/op
sj:·gc.alloc.rate.norm 100 1 1760.2 ± 14.9 1544.2 ± 0.0 B/op
sj:·gc.alloc.rate.norm 100 5 2960.3 ± 14.9 2344.2 ± 0.0 B/op
sj:·gc.alloc.rate.norm 100 10 4440.4 ± 0.0 3336.3 ± 0.0 B/op
sj:·gc.alloc.rate.norm 100 100 31449.3 ± 12.2 21346.7 ± 14.7 B/op
, 1 , : , . "" ( , ).
, , . : String.isLatin1()
, - :
package java.lang;
public class StringHelper {
public static boolean isLatin1(String str) {
return str.isLatin1();
}
}
:
You can not change the implementation anymore, by example if instead of having a split between latin1 and non latin1, we decide in the future to split between utf8 and non utf8.
If you want to optimize StringJoiner, the best way to do it is to use the shared secret mechanism so a java.util class can see implementation details of a java.lang class without exposing those details publicly. As an example, take a look to EnumSet and its implementations.
: , , . latin1 / latin1, (, UTF-8 / UTF-8). — SharedSecrets
.
! !
— jdk.internal.access.SharedSecrets
( jdk.internal.misc.SharedSecrets
). :
, , , "" , , :
yes!
crossing package boundary in a non public way is not free, but given that StringJoiner is used quite often (directly or indirectly using Collectors.joining()), it may worth the cost.
. , "" :
package jdk.internal.access;
public interface JavaLangStringAccess {
boolean isLatin1(String str);
}
( ):
class StringAccess implements jdk.internal.access.JavaLangStringAccess {
static {
SharedSecrets.setJavaLangStringAccess(new StringAccess());
}
private final Method isLatin1;
StringAccess() {
this.isLatin1 = initIsLatin1Method();
}
@Override
public boolean isLatin1(String str) {
try {
return (boolean) isLatin1.invoke(str);
} catch (IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}
private static Method initIsLatin1Method() {
try {
Method isLatin1 = String.class.getDeclaredMethod("isLatin1");
isLatin1.setAccessible(true);
return isLatin1;
} catch (NoSuchMethodException e) {
throw new Error(e);
}
}
}
SharedSecrets
( StringAccess
):
public class SharedSecrets {
private static final Unsafe unsafe = Unsafe.getUnsafe();
private static JavaLangStringAccess javaLangStringAccess;
public static JavaLangStringAccess getJavaLangStringAccess() {
if (javaLangStringAccess == null) {
unsafe.ensureClassInitialized(StringAccess.class);
}
return javaLangStringAccess;
}
public static void setJavaLangStringAccess(JavaLangStringAccess stringAccess) {
javaLangStringAccess = stringAccess;
}
}
, StringJoiner.add()
, JavaLangStringAccess.isLatin1()
:
public final class StringJoiner {
private boolean allLatin1;
public StringJoiner add(CharSequence newElement) {
final String elt = String.valueOf(newElement);
this.allLatin1 &= isLatin1(elt);
return this;
}
private static boolean isLatin1(String str) {
return SharedSecrets.getJavaLangStringAccess().isLatin1(str);
}
}
, , — . , , . . "". , :
public final class StringJoiner {
private boolean allLatin1;
public StringJoiner(CharSequence delimiter,
CharSequence prefix,
CharSequence suffix) {
this.allLatin1 = isLatin1(this.prefix) && isLatin1(this.delimiter) && isLatin1(this.suffix);
}
public StringJoiner add(CharSequence newElement) {
final String elt = String.valueOf(newElement);
this.allLatin1 &= isLatin1(elt);
return this;
}
}
, 10 . allLatin1
false
, , . char[]
.
, : allLatin1
toString()
. false
, :
private static boolean allLatin1(String[] strings, int size) {
for (int i = 0; i < size; i++) {
String str = strings[i];
if (!SharedSecrets.getJavaLangStringAccess().isLatin1(str)) {
return false;
}
}
return true;
}
private boolean psdLatin1() {
return SharedSecrets.getJavaLangStringAccess().isLatin1(delimiter)
&& SharedSecrets.getJavaLangStringAccess().isLatin1(prefix)
&& SharedSecrets.getJavaLangStringAccess().isLatin1(suffix)
}
toString()
:
@Override
public String toString() {
final String[] elts = this.elts;
if (elts == null && emptyValue != null) {
return emptyValue;
}
final int size = this.size;
final int addLen = prefix.length() + suffix.length();
if (addLen == 0) {
compactElts();
return size == 0 ? "" : elts[0];
}
if (psdLatin1() && allLatin1(elts, size)) {
return bytesToString(elts, size, addLen);
}
return charsToString(elts, size, addLen);
}
, :
- , ...Benchmark count latin len Original Patched
sj 1 true 1 26.9 ± 0.7 48.8 ± 2.2 ns/op !
sj 1 true 5 30.5 ± 1.0 46.1 ± 2.1 ns/op !
sj 1 true 10 31.2 ± 0.6 47.3 ± 1.3 ns/op !
sj 1 true 100 62.5 ± 3.3 79.9 ± 4.8 ns/op !
sj 5 true 1 78.2 ± 1.6 110.3 ± 2.9 ns/op !
sj 5 true 5 94.2 ± 8.7 116.6 ± 0.7 ns/op !
sj 5 true 10 95.3 ± 6.9 100.1 ± 0.4 ns/op
sj 5 true 100 188.0 ± 10.2 136.0 ± 0.4 ns/op
sj 10 true 1 160.3 ± 4.5 172.9 ± 0.8 ns/op
sj 10 true 5 169.0 ± 4.7 180.2 ± 9.1 ns/op
sj 10 true 10 205.7 ± 16.4 182.7 ± 1.1 ns/op
sj 10 true 100 366.5 ± 17.0 284.5 ± 3.1 ns/op
sj 100 true 1 1117.6 ± 11.1 2123.7 ± 11.1 ns/op !
sj 100 true 5 1270.7 ± 40.2 2163.6 ± 12.4 ns/op !
sj 100 true 10 1364.4 ± 14.0 2283.8 ± 16.1 ns/op !
sj 100 true 100 3592.9 ± 164.8 3535.2 ± 29.9 ns/op
sj 1 false 1 35.6 ± 1.2 59.1 ± 3.0 ns/op !
sj 1 false 5 39.3 ± 1.2 52.6 ± 2.5 ns/op !
sj 1 false 10 42.2 ± 1.6 53.6 ± 0.3 ns/op !
sj 1 false 100 70.5 ± 1.8 86.4 ± 0.4 ns/op !
sj 5 false 1 89.0 ± 3.5 102.2 ± 1.0 ns/op !
sj 5 false 5 87.6 ± 0.7 106.5 ± 1.2 ns/op !
sj 5 false 10 109.0 ± 5.6 116.5 ± 1.2 ns/op !
sj 5 false 100 324.0 ± 16.5 221.9 ± 0.5 ns/op
sj 10 false 1 183.9 ± 5.9 204.7 ± 5.5 ns/op !
sj 10 false 5 198.7 ± 9.7 202.4 ± 1.5 ns/op !
sj 10 false 10 196.7 ± 6.9 226.7 ± 6.4 ns/op !
sj 10 false 100 535.8 ± 2.3 553.0 ± 5.6 ns/op !
sj 100 false 1 1674.6 ± 122.1 1940.8 ± 16.2 ns/op !
sj 100 false 5 1791.9 ± 58.1 2158.1 ± 12.0 ns/op !
sj 100 false 10 2124.1 ± 193.3 2364.0 ± 25.2 ns/op !
sj 100 false 100 4323.4 ± 29.2 4675.5 ± 11.8 ns/op !
sj:·gc.a.r.n 1 true 1 120.0 ± 0.0 120.0 ± 0.0 B/op
sj:·gc.a.r.n 1 true 5 128.0 ± 0.0 120.0 ± 0.0 B/op
sj:·gc.a.r.n 1 true 10 144.0 ± 0.0 136.0 ± 0.0 B/op
sj:·gc.a.r.n 1 true 100 416.0 ± 0.0 312.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 1 144.0 ± 0.0 136.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 5 200.0 ± 0.0 168.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 10 272.0 ± 0.0 216.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 100 1632.0 ± 0.0 1128.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 1 256.0 ± 0.0 232.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 5 376.0 ± 0.0 316.8 ± 4.9 B/op
sj:·gc.a.r.n 10 true 10 520.0 ± 0.0 408.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 100 3224.1 ± 0.0 2236.9 ± 21.2 B/op
sj:·gc.a.r.n 100 true 1 1748.1 ± 4.0 1592.2 ± 0.0 B/op
sj:·gc.a.r.n 100 true 5 2948.2 ± 4.0 2392.3 ± 0.0 B/op
sj:·gc.a.r.n 100 true 10 4444.3 ± 4.0 3384.3 ± 0.0 B/op
sj:·gc.a.r.n 100 true 100 1441.4 ± 0.0 21385.4 ± 0.0 B/op
sj:·gc.a.r.n 1 false 1 144.0 ± 0.0 144.0 ± 0.0 B/op
sj:·gc.a.r.n 1 false 5 160.0 ± 0.0 160.0 ± 0.0 B/op
sj:·gc.a.r.n 1 false 10 184.0 ± 0.0 184.0 ± 0.0 B/op
sj:·gc.a.r.n 1 false 100 640.0 ± 0.0 640.0 ± 0.0 B/op
sj:·gc.a.r.n 5 false 1 184.0 ± 0.0 184.0 ± 0.0 B/op
sj:·gc.a.r.n 5 false 5 280.0 ± 0.0 280.0 ± 0.0 B/op
sj:·gc.a.r.n 5 false 10 400.0 ± 0.0 400.0 ± 0.0 B/op
sj:·gc.a.r.n 5 false 100 2664.1 ± 0.0 2664.1 ± 0.0 B/op
sj:·gc.a.r.n 10 false 1 320.0 ± 0.0 334.4 ± 7.4 B/op
sj:·gc.a.r.n 10 false 5 520.0 ± 0.0 520.0 ± 0.0 B/op
sj:·gc.a.r.n 10 false 10 760.0 ± 0.0 769.6 ± 6.5 B/op
sj:·gc.a.r.n 10 false 100 5264.2 ± 0.0 5273.8 ± 6.5 B/op
sj:·gc.a.r.n 100 false 1 2204.2 ± 4.0 2216.3 ± 0.0 B/op
sj:·gc.a.r.n 100 false 5 4196.3 ± 6.2 4216.4 ± 0.0 B/op
sj:·gc.a.r.n 100 false 10 6696.5 ± 5.4 6712.6 ± 0.0 B/op
sj:·gc.a.r.n 100 false 100 1702.0 ± 4.1 51714.4 ± 0.0 B/op
, , . , "" .
- lany . ( ):
Hello!
In many tests, I see little or no performance improvements. E.g.:
stringJoiner 100 10 1768.8 ±160.6 1760.8 ± 111.4 ns/op
How would you explain that this code change doesn't improve the performance for given count and length?
Also, you are using new String(bytes)
assuming that the platform charset is latin1-compatible. This is not always true, so your code would produce incorrect results depending on this setting. In general, decoding via charset is tons of extra work. Instead, I would experiment with pre-sized StringBuilder inside compactElts, instead of char[] array. While it does some extra checks, StringBuilder already can use the compact string representation, so if it's properly pre-sized, no extra memory will be allocated.
Finally, if you optimize for the case when X is true you should always benchmark what happens when X is false. It's great that in some cases we see a speedup for latin1 strings. But what if not all of the strings are latin1? Is there any performance degradation? If yes, can we tolerate it?
. , java.lang.String
, :
public final class String {
private final byte coder;
static final boolean COMPACT_STRINGS;
static {
COMPACT_STRINGS = true;
}
boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
}
, ? -XX:-CompactStrings
String.COMPACT_STRINGS
( true
). , — String.COMPACT_STRINGS
false
. ,
public final class String {
boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
}
. . isLatin1()
true
, . : bytesToString()
, , , , . :
In general, decoding via charset is tons of extra work. Instead, I would experiment with pre-sized StringBuilder inside compactElts, instead of char[] array. While it does some extra checks, StringBuilder already can use the compact string representation, so if it's properly pre-sized, no extra memory will be allocated.
— char[]
/ byte[]
, StringBuilder
-, java.lang
.
, :
@Override
public String toString() {
final String[] elts = this.elts;
if (elts == null && emptyValue != null) {
return emptyValue;
}
final int size = this.size;
final int addLen = prefix.length() + suffix.length();
if (addLen == 0) {
compactElts();
return size == 0 ? "" : elts[0];
}
StringBuilder sb = new StringBuilder(len + addLen).append(prefix);
if (size > 0) {
sb.append(elts[0]);
String delimiter = this.delimiter;
for (int i = 1; i < size; i++) {
sb.append(delimiter).append(elts[i]);
}
}
return sb.append(suffix).toString();
}
, . , :
public AbstractStringBuilder append(String str) {
if (str == null) {
return appendNull();
}
int len = str.length();
ensureCapacityInternal(count + len);
putStringAt(count, str);
count += len;
return this;
}
private final void putStringAt(int index, String str) {
if (getCoder() != str.coder()) {
inflate();
}
str.getBytes(value, index, coder);
}
. :
Benchmark count latin len Original StringBuilder
sj 1 true 1 26.9 ± 0.7 33.4 ± 0.2 ns/op !
sj 1 true 5 30.5 ± 1.0 33.0 ± 0.1 ns/op !
sj 1 true 10 31.2 ± 0.6 34.3 ± 0.3 ns/op !
sj 1 true 100 62.5 ± 3.3 44.4 ± 0.1 ns/op
sj 5 true 1 78.2 ± 1.6 87.8 ± 0.8 ns/op !
sj 5 true 5 94.2 ± 8.7 88.2 ± 0.9 ns/op
sj 5 true 10 95.3 ± 6.9 91.6 ± 0.6 ns/op
sj 5 true 100 188.0 ± 10.2 126.1 ± 0.7 ns/op
sj 10 true 1 160.3 ± 4.5 177.6 ± 0.8 ns/op !
sj 10 true 5 169.0 ± 4.7 179.4 ± 1.0 ns/op !
sj 10 true 10 205.7 ± 16.4 189.5 ± 1.2 ns/op
sj 10 true 100 366.5 ± 17.0 290.0 ± 0.8 ns/op
sj 100 true 1 1117.6 ± 11.1 1563.8 ± 2.8 ns/op !
sj 100 true 5 1270.7 ± 40.2 1592.4 ± 4.0 ns/op !
sj 100 true 10 1364.4 ± 14.0 1773.7 ± 57.7 ns/op !
sj 100 true 100 3592.9 ± 164.8 2899.2 ± 51.0 ns/op
sj 1 false 1 35.6 ± 1.2 52.7 ± 1.2 ns/op !
sj 1 false 5 39.3 ± 1.2 54.4 ± 1.6 ns/op !
sj 1 false 10 42.2 ± 1.6 52.2 ± 1.0 ns/op !
sj 1 false 100 70.5 ± 1.8 78.6 ± 1.2 ns/op !
sj 5 false 1 89.0 ± 3.5 116.3 ± 3.8 ns/op !
sj 5 false 5 87.6 ± 0.7 115.2 ± 2.9 ns/op !
sj 5 false 10 109.0 ± 5.6 126.5 ± 0.5 ns/op !
sj 5 false 100 324.0 ± 16.5 288.9 ± 0.5 ns/op
sj 10 false 1 183.9 ± 5.9 261.2 ± 7.7 ns/op !
sj 10 false 5 198.7 ± 9.7 253.3 ± 6.7 ns/op !
sj 10 false 10 196.7 ± 6.9 274.3 ± 7.0 ns/op !
sj 10 false 100 535.8 ± 2.3 677.3 ± 6.4 ns/op !
sj 100 false 1 1674.6 ± 122.1 2212.5 ± 32.2 ns/op !
sj 100 false 5 1791.9 ± 58.1 2492.8 ± 30.9 ns/op !
sj 100 false 10 2124.1 ± 193.3 2611.7 ± 17.5 ns/op !
sj 100 false 100 4323.4 ± 29.2 5501.3 ± 21.1 ns/op !
sj:·gc.a.r.n 1 true 1 120.0 ± 0.0 144.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 true 5 128.0 ± 0.0 144.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 true 10 144.0 ± 0.0 160.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 true 100 416.0 ± 0.0 336.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 1 144.0 ± 0.0 160.0 ± 0.0 B/op !
sj:·gc.a.r.n 5 true 5 200.0 ± 0.0 192.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 10 272.0 ± 0.0 240.0 ± 0.0 B/op
sj:·gc.a.r.n 5 true 100 1632.0 ± 0.0 1152.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 1 256.0 ± 0.0 256.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 5 376.0 ± 0.0 336.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 10 520.0 ± 0.0 432.0 ± 0.0 B/op
sj:·gc.a.r.n 10 true 100 3224.1 ± 0.0 2240.1 ± 0.0 B/op
sj:·gc.a.r.n 100 true 1 1748.1 ± 4.0 1568.2 ± 0.0 B/op
sj:·gc.a.r.n 100 true 5 2948.2 ± 4.0 2368.2 ± 0.0 B/op
sj:·gc.a.r.n 100 true 10 4444.3 ± 4.0 3364.3 ± 4.0 B/op
sj:·gc.a.r.n 100 true 100 31441.4 ± 0.0 21365.1 ± 4.1 B/op
sj:·gc.a.r.n 1 false 1 144.0 ± 0.0 192.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 false 5 160.0 ± 0.0 208.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 false 10 184.0 ± 0.0 240.0 ± 0.0 B/op !
sj:·gc.a.r.n 1 false 100 640.0 ± 0.0 784.0 ± 0.0 B/op !
sj:·gc.a.r.n 5 false 1 184.0 ± 0.0 240.0 ± 0.0 B/op !
sj:·gc.a.r.n 5 false 5 280.0 ± 0.0 349.6 ± 2.4 B/op !
sj:·gc.a.r.n 5 false 10 400.0 ± 0.0 496.0 ± 0.0 B/op !
sj:·gc.a.r.n 5 false 100 2664.1 ± 0.0 3216.1 ± 0.0 B/op !
sj:·gc.a.r.n 10 false 1 320.0 ± 0.0 384.0 ± 0.0 B/op !
sj:·gc.a.r.n 10 false 5 520.0 ± 0.0 624.0 ± 0.0 B/op !
sj:·gc.a.r.n 10 false 10 760.0 ± 0.0 912.0 ± 0.0 B/op !
sj:·gc.a.r.n 10 false 100 5264.2 ± 0.0 6320.3 ± 0.0 B/op !
sj:·gc.a.r.n 100 false 1 2204.2 ± 4.0 2436.3 ± 6.8 B/op !
sj:·gc.a.r.n 100 false 5 4196.3 ± 6.2 4832.4 ± 6.6 B/op !
sj:·gc.a.r.n 100 false 10 6696.5 ± 5.4 7844.7 ± 4.0 B/op !
sj:·gc.a.r.n 100 false 100 51702.0 ± 4.1 61838.6 ± 6.2 B/op !
, . ( ), .
, . String.isLatin1()
, — , StringBuilder
- , . ? , .
— JEP 254:
As a point of interest, some investigation of updating StringJoiner for CompactStrings was done a while back.
See https://bugs.openjdk.java.net/browse/JDK-8148937
2016 :
This is seems to be a performance optimization, but it clashes with Compact Strings which now have to re-compress the resulting char[] array into byte[]. We may want to extend this mechanism by figuring out the coders for arguments, and creating byte[] with appropriate coder, then using a private String constructor that does not re-compress.
. :
1) StringJoiner
- SharedSecrets
( 89 "")
2) . char[]
, / .
3) JavaLangAccess
, , . . SharedSecrets
-> JavaLangAccess
-> java.lang.System
, String.isLatin1()
System
!
, , , :
public interface JavaLangAccess {
boolean isLatin1(String str);
}
public final class System {
public boolean isLatin1(String str) {
return str.isLatin1();
}
}
. . :
In general, decoding via charset is tons of extra work.
, new String(byte[])
:
private String bytesToString(String[] elts, int size, int addLen) {
final byte[] bytes = new byte[len + addLen];
int k = getBytes(prefix, bytes, 0);
if (size > 0) {
final String delimiter = this.delimiter;
k += getBytes(elts[0], bytes, k);
for (int i = 1; i < size; i++) {
k += getBytes(delimiter, bytes, k);
k += getBytes(elts[i], bytes, k);
}
}
getBytes(suffix, bytes, k);
return new String(bytes);
}
- … :
public String(byte[] bytes) {
this(bytes, 0, bytes.length);
}
public String(byte bytes[], int offset, int length) {
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
}
new String(byte[])
. , , StringBuilder
:
@Override
@HotSpotIntrinsicCandidate
public String toString() {
return isLatin1()
? StringLatin1.newString(value, 0, count)
: StringUTF16.newString(value, 0, count);
}
final class StringLatin1 {
public static String newString(byte[] val, int index, int len) {
return new String(Arrays.copyOfRange(val, index, index + len), LATIN1);
}
}
, , . . StringBuilder.toString()
. , StringBuilder
. StringJoiner.toString()
, . StringLatin1.newString(byte[])
:
final class StringLatin1 {
public static String newString(byte[] val, int index, int len) {
byte[] copy = Arrays.copyOfRange(val, index, index + len);
return newString(copy);
}
public static String newString(byte[] val) {
return new String(val, LATIN1);
}
}
, ( ). , !
: . JDK 14, .
latin cnt len Original Mine ASh
sj true 1 1 27.7 ± 1.2 22.1 ± 0.2 19.7 ± 0.2 ns/op
sj true 1 5 27.7 ± 0.1 22.3 ± 0.0 19.8 ± 0.0 ns/op
sj true 1 10 29.0 ± 0.6 23.2 ± 0.0 20.1 ± 0.1 ns/op
sj true 1 100 51.4 ± 0.1 31.4 ± 0.1 32.9 ± 0.1 ns/op
sj true 5 1 71.1 ± 0.4 64.9 ± 0.3 53.8 ± 0.1 ns/op
sj true 5 5 79.5 ± 0.3 65.5 ± 0.3 54.8 ± 0.1 ns/op
sj true 5 10 81.2 ± 0.3 67.7 ± 0.2 56.1 ± 0.1 ns/op
sj true 5 100 150.0 ± 0.4 86.5 ± 0.5 78.5 ± 0.3 ns/op
sj true 10 1 145.5 ± 0.7 142.5 ± 0.4 127.1 ± 0.5 ns/op
sj true 10 5 160.0 ± 0.8 145.6 ± 0.2 130.8 ± 0.3 ns/op
sj true 10 10 165.6 ± 0.4 150.8 ± 0.3 138.9 ± 0.4 ns/op
sj true 10 100 340.2 ± 1.2 189.3 ± 0.4 175.2 ± 0.7 ns/op
sj true 100 1 1114.3 ± 15.9 1372.7 ± 53.0 1197.4 ± 34.0 ns/op !
sj true 100 5 1184.2 ± 18.5 1385.9 ± 56.0 1159.0 ± 32.3 ns/op
sj true 100 10 1345.8 ± 13.9 1369.0 ± 2.7 1233.5 ± 32.9 ns/op
sj true 100 100 3156.1 ± 19.1 1844.1 ± 4.1 1727.9 ± 37.6 ns/op
sj false 1 1 33.5 ± 0.4 34.9 ± 0.1 19.6 ± 0.0 ns/op
sj false 1 5 33.3 ± 0.3 35.9 ± 0.1 20.1 ± 0.1 ns/op
sj false 1 10 33.8 ± 0.1 35.9 ± 0.1 20.6 ± 0.0 ns/op
sj false 1 100 57.5 ± 0.2 58.3 ± 0.1 39.6 ± 0.1 ns/op
sj false 5 1 82.9 ± 0.5 82.0 ± 0.8 64.2 ± 1.3 ns/op
sj false 5 5 86.0 ± 0.5 84.8 ± 0.9 66.6 ± 0.8 ns/op
sj false 5 10 96.9 ± 0.5 92.5 ± 0.6 76.9 ± 0.7 ns/op
sj false 5 100 224.2 ± 0.6 226.7 ± 0.7 125.1 ± 0.6 ns/op
sj false 10 1 165.7 ± 0.9 167.6 ± 3.2 152.1 ± 1.0 ns/op
sj false 10 5 178.4 ± 0.7 179.6 ± 2.1 163.7 ± 0.9 ns/op
sj false 10 10 191.7 ± 0.9 195.9 ± 2.2 172.5 ± 0.8 ns/op
sj false 10 100 534.4 ± 1.4 534.5 ± 1.3 305.2 ± 1.1 ns/op
sj false 100 1 1435.9 ± 9.5 1428.2 ± 3.2 1333.0 ± 4.7 ns/op
sj false 100 5 1618.5 ± 14.3 1595.2 ± 3.7 1379.7 ± 10.4 ns/op
sj false 100 10 1898.1 ± 9.4 1860.8 ± 4.7 1531.5 ± 13.7 ns/op
sj false 100 100 4247.4 ± 60.7 4150.1 ± 9.1 2423.8 ± 11.9 ns/op
g.a.r.n true 1 1 124.0 ± 4.0 96.0 ± 0.0 96.0 ± 0.0 B/op
g.a.r.n true 1 5 128.0 ± 0.0 96.0 ± 0.0 96.0 ± 0.0 B/op
g.a.r.n true 1 10 144.0 ± 0.0 104.0 ± 0.0 104.0 ± 0.0 B/op
g.a.r.n true 1 100 416.0 ± 0.0 192.0 ± 0.0 192.0 ± 0.0 B/op
g.a.r.n true 5 1 144.0 ± 0.0 104.0 ± 0.0 96.0 ± 0.0 B/op
g.a.r.n true 5 5 200.0 ± 0.0 120.0 ± 0.0 120.0 ± 0.0 B/op
g.a.r.n true 5 10 272.0 ± 0.0 144.0 ± 0.0 144.0 ± 0.0 B/op
g.a.r.n true 5 100 1632.0 ± 0.0 600.0 ± 0.0 592.0 ± 0.0 B/op
g.a.r.n true 10 1 256.0 ± 0.0 192.0 ± 0.0 184.0 ± 0.0 B/op
g.a.r.n true 10 5 376.0 ± 0.0 232.0 ± 0.0 224.0 ± 0.0 B/op
g.a.r.n true 10 10 520.0 ± 0.0 280.0 ± 0.0 272.0 ± 0.0 B/op
g.a.r.n true 10 100 3224.1 ± 0.0 1184.0 ± 0.0 1168.0 ± 0.0 B/op
g.a.r.n true 100 1 1752.1 ± 5.4 1328.2 ± 5.4 1244.1 ± 9.5 B/op
g.a.r.n true 100 5 2952.2 ± 5.4 1728.2 ± 5.4 1627.3 ± 7.6 B/op
g.a.r.n true 100 10 4444.4 ± 4.0 2216.2 ± 0.0 2140.2 ± 9.5 B/op
g.a.r.n true 100 100 31445.6 ± 4.0 11216.7 ± 0.0 11135.0 ± 9.3 B/op
g.a.r.n false 1 1 144.0 ± 0.0 144.0 ± 0.0 96.0 ± 0.0 B/op
g.a.r.n false 1 5 160.0 ± 0.0 160.0 ± 0.0 104.0 ± 0.0 B/op
g.a.r.n false 1 10 184.0 ± 0.0 184.0 ± 0.0 112.0 ± 0.0 B/op
g.a.r.n false 1 100 640.0 ± 0.0 640.0 ± 0.0 288.0 ± 0.0 B/op
g.a.r.n false 5 1 184.0 ± 0.0 184.0 ± 0.0 104.0 ± 0.0 B/op
g.a.r.n false 5 5 280.0 ± 0.0 280.0 ± 0.0 144.0 ± 0.0 B/op
g.a.r.n false 5 10 400.0 ± 0.0 400.0 ± 0.0 192.0 ± 0.0 B/op
g.a.r.n false 5 100 2664.1 ± 0.0 2664.1 ± 0.0 1088.0 ± 0.0 B/op
g.a.r.n false 10 1 320.0 ± 0.0 320.0 ± 0.0 192.0 ± 0.0 B/op
g.a.r.n false 10 5 520.0 ± 0.0 520.0 ± 0.0 272.0 ± 0.0 B/op
g.a.r.n false 10 10 760.0 ± 0.0 760.0 ± 0.0 368.0 ± 0.0 B/op
g.a.r.n false 10 100 5264.2 ± 0.0 5264.2 ± 0.0 2168.1 ± 0.0 B/op
g.a.r.n false 100 1 2208.2 ± 0.0 2168.2 ± 0.0 1317.7 ± 5.7 B/op
g.a.r.n false 100 5 4196.3 ± 6.2 4168.3 ± 0.0 2123.4 ± 7.6 B/op
g.a.r.n false 100 10 6704.6 ± 0.0 6664.5 ± 0.0 3123.4 ± 7.6 B/op
g.a.r.n false 100 100 51698.1 ± 5.4 51666.3 ± 0.0 21124.3 ± 7.6 B/op
, , ! JDK-8148937 , ( ) JDK-8149758. . , JDK-8148937. , 1,5-2 - .
- — . , ( ) . — . , , ,
- जब आप जेडीके में कुछ भरने की कोशिश करते हैं, तो प्रूफरीडिंग निर्दयी होगी। हमारे प्रस्ताव हमें सबसे सच्चे लगते हैं, लेकिन अक्सर यह एक भ्रम है। आलोचना, यहां तक कि विनाशकारी, को दिल से न लें
- यहां तक कि सबसे अच्छे लाभ डेवलपर्स के ध्यान का वादा नहीं करते हैं। पिछले लेखों में से एक में, मैंने उपयोगी और एक ही समय में छोटे और 100% विश्वसनीय बदलावों का वर्णन किया है, जो कि कम महत्व या एक अलग प्राथमिकता के कारण केवल उस समय के लिए भी हटा दिए जाते हैं
एक आफ्टरवर्ड के बजाय
एक हफ्ते में, वसंत थोड़ा शुरू हो जाएगा, मैं पहले गर्मी के साथ ताकत का एक उछाल महसूस करना चाहता हूं और साहसपूर्वक जटिल कार्यों को ले सकता हूं, अपनी गलतियों को स्वीकार करने और सीखने, अध्ययन करने और फिर से अध्ययन करने में सक्षम हो :)