java.lang.String的细微差别

问候


在阅读了积累的材料之后,java.lang.String我决定从有效(而非如此)的使用中选择一些示例。


任何行转换都会产生新行


这是关于线条的主要神话之一。实际上,并非总是如此。假设我们有一个仅包含小写字母的字符串:


var str = "str";

现在这段代码


jshell> var str = "str";
jshell> System.out.println(str.toLowerCase() == str);

将输出


true

换句话说,这里的调用toLowerCase()返回了被调用的那一行。并且尽管该行为未在文档中进行描述,但是代码StringLatin1.toLowerCase()毫无疑问(这里和下面是来自https://hg.openjdk.java.net/jdk/jdk/的代码):


public static String toLowerCase(String str, byte[] value, Locale locale) {
  if (locale == null) {
    throw new NullPointerException();
  }
  int first;
  final int len = value.length;
  // Now check if there are any characters that need to be changed
  for (first = 0 ; first < len; first++) {
    int cp = value[first] & 0xff;
    // no need to check Character.ERROR
    if (cp != CharacterDataLatin1.instance.toLowerCase(cp)) {
      break;
    }
  }
  if (first == len)
    return str;     // <--   this
  //...
}

: , . , , , String.trim() String.strip():


//  :    strip()
//  trim()    this

/**
 *
 * @return  a string whose value is this string, with all leading
 *          and trailing space removed, or this string if it
 *          has no leading or trailing space.
 */
public String trim() {
  String ret = isLatin1() ? StringLatin1.trim(value)
                          : StringUTF16.trim(value);
  return ret == null ? this : ret;
}

/**
 * @return  a string whose value is this string, with all leading
 *          and trailing white space removed
 *
 * @see Character#isWhitespace(int)
 *
 * @since 11
 */
public String strip() {
  String ret = isLatin1() ? StringLatin1.strip(value)
                          : StringUTF16.strip(value);
  return ret == null ? this : ret;
}

:


boolean isUpperCase = name.toUpperCase().equals(name);

- StringUtils, ( ""). / /, , name.toUpperCase() name, ?


boolean isUpperCase = name.toUpperCase() == name; // 

, , String.toUpperCase() . ( , ) o.a.c.l.StringUtils.isAllUpperCase().



boolean eq = aString.toUpperCase().equals(anotherString);


boolean eq = aString.equalsIgnoreCase(anotherString);

, "" , "".


String.toLowerCase()


String.toLowerCase() / String.toUpperCase() , . :


boolean isEmpty = someStr.toLowerCase().isEmpty();

, . , / . , isEmpty() true. false, . . 1 , .
, :


boolean isEmpty = someStr.isEmpty();

. String.isEmpty() :


public boolean isEmpty() {
  return value.length == 0;
}


int len = someStr.toLowerCase().length();


int len = someStr.length();

, ?


String s = "!";


String s = "!";

, , . . — . , toLowerCase() / toUpperCase() , . , . , :


@Test
void toLowerCase() {
  String str = "\u00cc"; // Ì

  assert str.length() == 1;

  String strLowerCase = str.toLowerCase(new Locale("lt"));

  assert strLowerCase.length() == 3; // i̇̀
}

, : " ?" 1 , ( — 6 (!) ). :


/**
 * Converts all of the characters in this {@code String} to lower
 * case using the rules of the given {@code Locale}.  Case mapping is based
 * on the Unicode Standard version specified by the {@link java.lang.Character Character}
 * class. Since case mappings are not always 1:1 char mappings, the resulting
 * {@code String} may be a different length than the original {@code String}.
 */
public String toLowerCase(Locale locale) {
  //...
}

:


//StringLatin1

public static String toLowerCase(String str, byte[] value, Locale locale) {
  // ...
  String lang = locale.getLanguage();
  if (lang == "tr" || lang == "az" || lang == "lt") {        // !!!
    return toLowerCaseEx(str, value, first, locale, true);
  }
  //...
}

, , :)



1 — String.substring(n, n+1) — , , , 1. :


boolean startsWithUnderline = message.substring(0, 1).equals("_");


boolean startsWithUnderline = message.charAt(0) == '_';

, . :


String s = "xxx" + name.substring(n, n + 1);


String s = "xxx" + name.charAt(n);

, . . . , .


— :


boolean startsWithUrl = content.substring(index, index + 4).equals("url(");


boolean startsWithUrl = content.startsWith("url(", index);

. , ( ):


private String findPerClause(String str) {
  str = str.substring(str.indexOf('(') + 1);
  str = str.substring(0, str.length() - 1);
  return str;
}

, , :


 (  ,   )
-->
  ,   

, , :


private String findPerClause(String str) {
  int beginIndex = str.indexOf('(') + 1;
  int endIndex = str.length() - 1;
  return str.substring(beginIndex, endIndex);
}

, :


int idx = path.substring(2).indexOf('/');

, String.indexOf(int ch, int fromIndex), :


int idx = path.indexOf('/', 2);

. , '/' 2, . . :


int idx = name.indexOf('/', 2);
if (pos != -1)  {
  idx -= 2;
}

, .


JDK. ,


someStr.substring(n, n);

, n :


// String

public String substring(int beginIndex, int endIndex) {
  int length = length();
  checkBoundsBeginEnd(beginIndex, endIndex, length);
  int subLen = endIndex - beginIndex;
  if (beginIndex == 0 && endIndex == length) {
    return this;
  }
  return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
                    : StringUTF16.newString(value, beginIndex, subLen);
}

// StringLatin1

public static String newString(byte[] val, int index, int len) {
  return new String(Arrays.copyOfRange(val, index, index + len), LATIN1);
}

beginIndex endIndex subLen 0, StringLatin1.newString() . , :


// StringLatin1

public static String newString(byte[] val, int index, int len) {
  if (len == 0) {
      return "";
  }
  return new String(Arrays.copyOfRange(val, index, index + len), LATIN1);
}

StringLatin1.stripLeading() / stripTrailing() StringUTF16. .


, :


//  StringLatin1  
public static String stripLeading(byte[] value) {
  int left = indexOfNonWhitespace(value);
  if (left == value.length) {
    return "";
  }
  return (left != 0) ? newString(value, left, value.length - left) : null;
}

value.length == 0 . left == value.length newString,


public static String stripLeading(byte[] value) {
  int left = indexOfNonWhitespace(value);
  return (left != 0) ? newString(value, left, value.length - left) : null;
}

null! String.stripLeading() , this, . , . :


// 
boolean b= new String("").stripLeading() == ""; // true

//  
boolean b= new String("").stripLeading() == ""; // false !

, ?


, :)
From a compatibility point of view I think this should be fine, as
the identity of the returned empty string isn't specified.

https://mail.openjdk.java.net/pipermail/core-libs-dev/2020-February/064957.html


!


, :


@Warmup(iterations = 10, time = 1)
@Measurement(iterations = 10, time = 1)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgsAppend = {"-Xms4g", "-Xmx4g", "-XX:+UseParallelGC"})
public class SubstringBenchmark {
    private static final String str = "Tolstoy";

    @Benchmark
    public String substring() {
        return str.substring(1, 1);
    }
}

:




                                            Mode    Score    Error   Units
substring                                   avgt      5.8 ±  0.066   ns/op
substring:·gc.alloc.rate                    avgt   4325.9 ± 47.259  MB/sec
substring:·gc.alloc.rate.norm               avgt     40.0 ±  0.001    B/op
substring:·gc.churn.G1_Eden_Space           avgt   4338.8 ± 86.555  MB/sec
substring:·gc.churn.G1_Eden_Space.norm      avgt     40.1 ±  0.647    B/op
substring:·gc.churn.G1_Survivor_Space       avgt      0.0 ±  0.003  MB/sec
substring:·gc.churn.G1_Survivor_Space.norm  avgt   ≈ 10⁻⁴             B/op
substring:·gc.count                         avgt    557.0           counts
substring:·gc.time                          avgt    387.0               ms



substring                                   avgt      2.4 ±  0.172   ns/op
substring:·gc.alloc.rate                    avgt      0.0 ±  0.001  MB/sec
substring:·gc.alloc.rate.norm               avgt   ≈ 10⁻⁵             B/op
substring:·gc.count                         avgt      ≈ 0           counts

, String.substring(n, n) , .



, , , , . , AnnotationMetadataReadingVisitor-:


MultiValueMap<String, Object> getAllAnnotationAttributes(String annotationName, boolean classValAsStr) {
  // ...
  String annotatedElement = "class '" + getClassName() + "'";
  for (AnnotationAttributes raw : attributes) {
    for (Map.Entry<String, Object> entry : convertClassValues(
      "class '" + getClassName() + "'", classLoader, raw, classValAsStr).entrySet()) {
      allAttributes.add(entry.getKey(), entry.getValue());
    }
  }
  return allAttributes;
}

表达式"class '" + getClassName() + "'"将是相同的,我们并不是真的想在双循环中创建同一行,因此最好在循环外创建1次。之前,捕获此类示例是一个偶然的问题:我发现此示例在调试我的应用程序时在源内部成功失败。现在,借助IDEA-230889,可以实现自动化。当然,无论经过什么,都远非总是在循环中创建新行,但是即使在这些情况下,我们也可以区分出其中存在一些持久不变的部分:


// org.springframework.beans.factory.support.BeanDefinitionReaderUtils

public static String uniqueBeanName(String beanName, BeanDefinitionRegistry registry) {
  String id = beanName;
  int counter = -1;

  // Increase counter until the id is unique.
  while (counter == -1 || registry.containsBeanDefinition(id)) {
    counter++;
    id = beanName + GENERATED_BEAN_NAME_SEPARATOR + counter;
  }
  return id;
}

此处的前缀beanName + GENERATED_BEAN_NAME_SEPARATOR始终相同,因此可以将其出。


就是这样,在注释中写下您的示例-我们将进行介绍。


All Articles