首页 > 学院 > 开发设计 > 正文

深入Java—String源代码

2019-11-14 23:23:24
字体:
来源:转载
供稿:网友
深入java—String源代码

/* * @(#)String.java1.204 06/06/09 * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */

package java.lang;

import java.io.ObjectStreamClass;import java.io.ObjectStreamField;import java.io.UnsupportedEncodingException;import java.nio.charset.Charset;import java.util.ArrayList;import java.util.Arrays;import java.util.Comparator;import java.util.Formatter;import java.util.Locale;import java.util.regex.Matcher;import java.util.regex.Pattern;import java.util.regex.PatternSyntaxException;

/** * The <code>String</code> class represents character strings. All * string literals in Java programs, such as <code>"abc"</code>, are * implemented as instances of this class. * <p> * Strings are constant; their values cannot be changed after they * are created. String buffers support mutable strings. * Because String objects are immutable they can be shared. For example: * <p><blockquote><pre> * String str = "abc"; * </pre></blockquote><p> * is equivalent to: * <p><blockquote><pre> * char data[] = {'a', 'b', 'c'}; * String str = new String(data); * </pre></blockquote><p> * Here are some more examples of how strings can be used: * <p><blockquote><pre> * System.out.println("abc"); * String cde = "cde"; * System.out.println("abc" + cde); * String c = "abc".substring(2,3); * String d = cde.substring(1, 2); * </pre></blockquote> * <p> * The class <code>String</code> includes methods for examining * individual characters of the sequence, for comparing strings, for * searching strings, for extracting substrings, and for creating a * copy of a string with all characters translated to uppercase or to * lowercase. Case mapping is based on the Unicode Standard version * specified by the {@link java.lang.Character Character} class. * <p> * The Java language provides special support for the string * concatenation Operator (&nbsp;+&nbsp;), and for conversion of * other objects to strings. String concatenation is implemented * through the <code>StringBuilder</code>(or <code>StringBuffer</code>) * class and its <code>append</code> method. * String conversions are implemented through the method * <code>toString</code>, defined by <code>Object</code> and * inherited by all classes in Java. For additional information on * string concatenation and conversion, see Gosling, Joy, and Steele, * <i>The Java Language Specification</i>. * * <p> Unless otherwise noted, passing a <tt>null</tt> argument to a constructor * or method in this class will cause a {@link NullPointerException} to be * thrown. * * <p>A <code>String</code> represents a string in the UTF-16 format * in which <em>supplementary characters</em> are represented by <em>surrogate * pairs</em> (see the section <a href="Character.html#unicode">Unicode * Character Representations</a> in the <code>Character</code> class for * more information). * Index values refer to <code>char</code> code units, so a supplementary * character uses two positions in a <code>String</code>. * <p>The <code>String</code> class provides methods for dealing with * Unicode code points (i.e., characters), in addition to those for * dealing with Unicode code units (i.e., <code>char</code> values). * * @author Lee Boynton * @author Arthur van Hoff * @version 1.204, 06/09/06 * @see java.lang.Object#toString() * @see java.lang.StringBuffer * @see java.lang.StringBuilder * @see java.nio.charset.Charset * @since JDK1.0 */

public final class String implements java.io.Serializable, Comparable<String>, CharSequence{ /** The value is used for character storage. */ private final char value[];

/** The offset is the first index of the storage that is used. */ private final int offset;

/** The count is the number of characters in the String. */ private final int count;

/** Cache the hash code for the string */ private int hash; // Default to 0

/** use serialVersionUID from JDK 1.0.2 for interoperability */ private static final long serialVersionUID = -6849794470754667710L;

/** * Class String is special cased within the Serialization Stream Protocol. * * A String instance is written initially into an ObjectOutputStream in the * following format: * <pre> * <code>TC_STRING</code> (utf String) * </pre> * The String is written by method <code>DataOutput.writeUTF</code>. * A new handle is generated to refer to all future references to the * string instance within the stream. */ private static final ObjectStreamField[] serialPersistentFields = new ObjectStreamField[0];

/** * Initializes a newly created {@code String} object so that it represents * an empty character sequence. Note that use of this constructor is * unnecessary since Strings are immutable. */ public String() {this.offset = 0;this.count = 0;this.value = new char[0]; }

/** * Initializes a newly created {@code String} object so that it represents * the same sequence of characters as the argument; in other Words, the * newly created string is a copy of the argument string. Unless an * explicit copy of {@code original} is needed, use of this constructor is * unnecessary since Strings are immutable. * * @param original * A {@code String} */ public String(String original) {int size = original.count;char[] originalValue = original.value;char[] v; if (originalValue.length > size) { // The array representing the String is bigger than the new // String itself. Perhaps this constructor is being called // in order to trim the baggage, so make a copy of the array. int off = original.offset; v = Arrays.copyOfRange(originalValue, off, off+size); } else { // The array representing the String is the same // size as the String, so no point in making a copy. v = originalValue; }this.offset = 0;this.count = size;this.value = v; }

/** * Allocates a new {@code String} so that it represents the sequence of * characters currently contained in the character array argument. The * contents of the character array are copied; subsequent modification of * the character array does not affect the newly created string. * * @param value * The initial value of the string */ public String(char value[]) {int size = value.length;this.offset = 0;this.count = size;this.value = Arrays.copyOf(value, size); }

/** * Allocates a new {@code String} that contains characters from a subarray * of the character array argument. The {@code offset} argument is the * index of the first character of the subarray and the {@code count} * argument specifies the length of the subarray. The contents of the * subarray are copied; subsequent modification of the character array does * not affect the newly created string. * * @param value * Array that is the source of characters * * @param offset * The initial offset * * @param count * The length * * @throws IndexOutOfBoundsException * If the {@code offset} and {@code count} arguments index * characters outside the bounds of the {@code value} array */ public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.offset = 0; this.count = count; this.value = Arrays.copyOfRange(value, offset, offset+count); }

/** * Allocates a new {@code String} that contains characters from a subarray * of the Unicode code point array argument. The {@code offset} argument * is the index of the first code point of the subarray and the * {@code count} argument specifies the length of the subarray. The * contents of the subarray are converted to {@code char}s; subsequent * modification of the {@code int} array does not affect the newly created * string. * * @param codePoints * Array that is the source of Unicode code points * * @param offset * The initial offset * * @param count * The length * * @throws IllegalArgumentException * If any invalid Unicode code point is found in {@code * codePoints} * * @throws IndexOutOfBoundsException * If the {@code offset} and {@code count} arguments index * characters outside the bounds of the {@code codePoints} array * * @since 1.5 */ public String(int[] codePoints, int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > codePoints.length - count) { throw new StringIndexOutOfBoundsException(offset + count); }

int expansion = 0;int margin = 1;char[] v = new char[count + margin];int x = offset;int j = 0;for (int i = 0; i < count; i++) { int c = codePoints[x++]; if (c < 0) {throw new IllegalArgumentException(); } if (margin <= 0 && (j+1) >= v.length) {if (expansion == 0) { expansion = (((-margin + 1) * count) << 10) / i; expansion >>= 10; if (expansion <= 0) {expansion = 1; }} else { expansion *= 2;} int newLen = Math.min(v.length+expansion, count*2);margin = (newLen - v.length) - (count - i); v = Arrays.copyOf(v, newLen); } if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {v[j++] = (char) c; } else if (c <= Character.MAX_CODE_POINT) {Character.toSurrogates(c, v, j);j += 2;margin--; } else {throw new IllegalArgumentException(); }}this.offset = 0;this.value = v;this.count = j; }

/** * Allocates a new {@code String} constructed from a subarray of an array * of 8-bit integer values. * * <p> The {@code offset} argument is the index of the first byte of the * subarray, and the {@code count} argument specifies the length of the * subarray. * * <p> Each {@code byte} in the subarray is converted to a {@code char} as * specified in the method above. * * @deprecated This method does not properly convert bytes into characters. * As of JDK&nbsp;1.1, the preferred way to do this is via the * {@code String} constructors that take a {@link * java.nio.charset.Charset}, charset name, or that use the platform's * default charset. * * @param ascii * The bytes to be converted to characters * * @param hibyte * The top 8 bits of each 16-bit Unicode code unit * * @param offset * The initial offset * @param count * The length * * @throws IndexOutOfBoundsException * If the {@code offset} or {@code count} argument is invalid * * @see #String(byte[], int) * @see #String(byte[], int, int, java.lang.String) * @see #String(byte[], int, int, java.nio.charset.Charset) * @see #String(byte[], int, int) * @see #String(byte[], java.lang.String) * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[]) */ @Deprecated public String(byte ascii[], int hibyte, int offset, int count) {checkBounds(ascii, offset, count); char value[] = new char[count];

if (hibyte == 0) { for (int i = count ; i-- > 0 ;) { value[i] = (char) (ascii[i + offset] & 0xff); } } else { hibyte <<= 8; for (int i = count ; i-- > 0 ;) { value[i] = (char) (hibyte | (ascii[i + offset] & 0xff)); } }this.offset = 0;this.count = count;this.value = value; }

/** * Allocates a new {@code String} containing characters constructed from * an array of 8-bit integer values. Each character <i>c</i>in the * resulting string is constructed from the corresponding component * <i>b</i> in the byte array such that: * * <blockquote><pre> * <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8) * | (<b><i>b</i></b> &amp; 0xff)) * </pre></blockquote> * * @deprecated This method does not properly convert bytes into * characters. As of JDK&nbsp;1.1, the preferred way to do this is via the * {@code String} constructors that take a {@link * java.nio.charset.Charset}, charset name, or that use the platform's * default charset. * * @param ascii * The bytes to be converted to characters * * @param hibyte * The top 8 bits of each 16-bit Unicode code unit * * @see #String(byte[], int, int, java.lang.String) * @see #String(byte[], int, int, java.nio.charset.Charset) * @see #String(byte[], int, int) * @see #String(byte[], java.lang.String) * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[]) */ @Deprecated public String(byte ascii[], int hibyte) { this(ascii, hibyte, 0, ascii.length); }

/* Common private utility method used to bounds check the byte array * and requested offset & length values used by the String(byte[],..) * constructors. */ private static void checkBounds(byte[] bytes, int offset, int length) {if (length < 0) throw new StringIndexOutOfBoundsException(length);if (offset < 0) throw new StringIndexOutOfBoundsException(offset);if (offset > bytes.length - length) throw new StringIndexOutOfBoundsException(offset + length); }

/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified charset. The length of the new {@code String} * is a function of the charset, and hence may not be equal to the length * of the subarray. * * <p> The behavior of this constructor when the given bytes are not valid * in the given charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode

* @param charsetName * The name of a supported {@linkplain java.nio.charset.Charset * charset} * * @throws UnsupportedEncodingException * If the named charset is not supported * * @throws IndexOutOfBoundsException * If the {@code offset} and {@code length} arguments index * characters outside the bounds of the {@code bytes} array * * @since JDK1.1 */ public String(byte bytes[], int offset, int length, String charsetName)throws UnsupportedEncodingException {if (charsetName == null) throw new NullPointerException("charsetName");checkBounds(bytes, offset, length);char[] v = StringCoding.decode(charsetName, bytes, offset, length);this.offset = 0;this.count = v.length;this.value = v; }

/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. * The length of the new {@code String} is a function of the charset, and * hence may not be equal to the length of the subarray. * * <p> This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement string. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode * * @param charset * The {@linkplain java.nio.charset.Charset charset} to be used to * decode the {@code bytes} * * @throws IndexOutOfBoundsException * If the {@code offset} and {@code length} arguments index * characters outside the bounds of the {@code bytes} array * * @since 1.6 */ public String(byte bytes[], int offset, int length, Charset charset) {if (charset == null) throw new NullPointerException("charset");checkBounds(bytes, offset, length);char[] v = StringCoding.decode(charset, bytes, offset, length);this.offset = 0;this.count = v.length;this.value = v; }

/** * Constructs a new {@code String} by decoding the specified array of bytes * using the specified {@linkplain java.nio.charset.Charset charset}. The * length of the new {@code String} is a function of the charset, and hence * may not be equal to the length of the byte array. * * <p> The behavior of this constructor when the given bytes are not valid * in the given charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param charsetName * The name of a supported {@linkplain java.nio.charset.Charset * charset} * * @throws UnsupportedEncodingException * If the named charset is not supported * * @since JDK1.1 */ public String(byte bytes[], String charsetName)throws UnsupportedEncodingException {this(bytes, 0, bytes.length, charsetName); }

/** * Constructs a new {@code String} by decoding the specified array of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. * The length of the new {@code String} is a function of the charset, and * hence may not be equal to the length of the byte array. * * <p> This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement string. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param charset * The {@linkplain java.nio.charset.Charset charset} to be used to * decode the {@code bytes} * * @since 1.6 */ public String(byte bytes[], Charset charset) {this(bytes, 0, bytes.length, charset); }

/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the platform's default charset. The length of the new * {@code String} is a function of the charset, and hence may not be equal * to the length of the subarray. * * <p> The behavior of this constructor when the given bytes are not valid * in the default charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode * * @throws IndexOutOfBoundsException * If the {@code offset} and the {@code length} arguments index * characters outside the bounds of the {@code bytes} array * * @since JDK1.1 */ public String(byte bytes[], int offset, int length) {checkBounds(bytes, offset, length);char[] v = StringCoding.decode(bytes, offset, length);this.offset = 0;this.count = v.length;this.value = v; }

/** * Constructs a new {@code String} by decoding the specified array of bytes * using the platform's default charset. The length of the new {@code * String} is a function of the charset, and hence may not be equal to the * length of the byte array. * * <p> The behavior of this constructor when the given bytes are not valid * in the default charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @since JDK1.1 */ public String(byte bytes[]) {this(bytes, 0, bytes.length); }

/** * Allocates a new string that contains the sequence of characters * currently contained in the string buffer argument. The contents of the * string buffer are copied; subsequent modification of the string buffer * does not affect the newly created string. * * @param buffer * A {@code StringBuffer} */ public String(StringBuffer buffer) { String result = buffer.toString(); this.value = result.value; this.count = result.count; this.offset = result.offset; }

/** * Allocates a new string that contains the sequence of characters * currently contained in the string builder argument. The contents of the * string builder are copied; subsequent modification of the string builder * does not affect the newly created string. * * <p> This constructor is provided to ease migration to {@code * StringBuilder}. Obtaining a string from a string builder via the {@code * toString} method is likely to run faster and is generally preferred. * * @param builder * A {@code StringBuilder} * * @since 1.5 */ public String(StringBuilder builder) { String result = builder.toString(); this.value = result.value; this.count = result.count; this.offset = result.offset; }

// Package private constructor which shares value array for speed. String(int offset, int count, char value[]) {this.value = value;this.offset = offset;this.count = count; }

/** * Returns the length of this string. * The length is equal to the number of <a href="Character.html#unicode">Unicode * code units</a> in the string. * * @return the length of the sequence of characters represented by this * object. */ public int length() { return count; }

/** * Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>. * * @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise * <tt>false</tt> * * @since 1.6 */ public boolean isEmpty() {return count == 0; }

/** * Returns the <code>char</code> value at the * specified index. An index ranges from <code>0</code> to * <code>length() - 1</code>. The first <code>char</code> value of the sequence * is at index <code>0</code>, the next at index <code>1</code>, * and so on, as for array indexing. * * <p>If the <code>char</code> value specified by the index is a * <a href="Character.html#unicode">surrogate</a>, the surrogate * value is returned. * * @param index the index of the <code>char</code> value. * @return the <code>char</code> value at the specified index of this string. * The first <code>char</code> value is at index <code>0</code>. * @exception IndexOutOfBoundsException if the <code>index</code> * argument is negative or not less than the length of this * string. */ public char charAt(int index) { if ((index < 0) || (index >= count)) { throw new StringIndexOutOfBoundsException(index); } return value[index + offset]; }

/** * Returns the character (Unicode code point) at the specified * index. The index refers to <code>char</code> values * (Unicode code units) and ranges from <code>0</code> to * {@link #length()}<code> - 1</code>. * * <p> If the <code>char</code> value specified at the given index * is in the high-surrogate range, the following index is less * than the length of this <code>String</code>, and the * <code>char</code> value at the following index is in the * low-surrogate range, then the supplementary code point * corresponding to this surrogate pair is returned. Otherwise, * the <code>char</code> value at the given index is returned. * * @param index the index to the <code>char</code> values * @return the code point value of the character at the * <code>index</code> * @exception IndexOutOfBoundsException if the <code>index</code> * argument is negative or not less than the length of this * string. * @since 1.5 */ public int codePointAt(int index) { if ((index < 0) || (index >= count)) { throw new StringIndexOutOfBoundsException(index); } return Character.codePointAtImpl(value, offset + index, offset + count); }

/** * Returns the character (Unicode code point) before the specified * index. The index refers to <code>char</code> values * (Unicode code units) and ranges from <code>1</code> to {@link * CharSequence#length() length}. * * <p> If the <code>char</code> value at <code>(index - 1)</code> * is in the low-surrogate range, <code>(index - 2)</code> is not * negative, and the <code>char</code> value at <code>(index - * 2)</code> is in the high-surrogate range, then the * supplementary code point value of the surrogate pair is * returned. If the <code>char</code> value at <code>index - * 1</code> is an unpaired low-surrogate or a high-surrogate, the * surrogate value is returned. * * @param index the index following the code point that should be returned * @return the Unicode code point value before the given index. * @exception IndexOutOfBoundsException if the <code>index</code> * argument is less than 1 or greater than the length * of this string. * @since 1.5 */ public int codePointBefore(int index) {int i = index - 1; if ((i < 0) || (i >= count)) { throw new StringIndexOutOfBoundsException(index); } return Character.codePointBeforeImpl(value, offset + index, offset); }

/** * Returns the number of Unicode code points in the specified text * range of this <code>String</code>. The text range begins at the * specified <code>beginIndex</code> and extends to the * <code>char</code> at index <code>endIndex - 1</code>. Thus the * length (in <code>char</code>s) of the text range is * <code>endIndex-beginIndex</code>. Unpaired surrogates within * the text range count as one code point each. * * @param beginIndex the index to the first <code>char</code> of * the text range. * @param endIndex the index after the last <code>char</code> of * the text range. * @return the number of Unicode code points in the specified text * range * @exception IndexOutOfBoundsException if the * <code>beginIndex</code> is negative, or <code>endIndex</code> * is larger than the length of this <code>String</code>, or * <code>beginIndex</code> is larger than <code>endIndex</code>. * @since 1.5 */ public int codePointCount(int beginIndex, int endIndex) {if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) { throw new IndexOutOfBoundsException();}return Character.codePointCountImpl(value, offset+beginIndex, endIndex-beginIndex); }

/** * Returns the index within this <code>String</code> that is * offset from the given <code>index</code> by * <code>codePointOffset</code> code points. Unpaired surrogates * within the text range given by <code>index</code> and * <code>codePointOffset</code> count as one code point each. * * @param index the index to be offset * @param codePointOffset the offset in code points * @return the index within this <code>String</code> * @exception IndexOutOfBoundsException if <code>index</code> * is negative or larger then the length of this * <code>String</code>, or if <code>codePointOffset</code> is positive * and the substring starting with <code>index</code> has fewer * than <code>codePointOffset</code> code points, * or if <code>codePointOffset</code> is negative and the substring * before <code>index</code> has fewer than the absolute value * of <code>codePointOffset</code> code points. * @since 1.5 */ public int offsetByCodePoints(int index, int codePointOffset) {if (index < 0 || index > count) { throw new IndexOutOfBoundsException();}return Character.offsetByCodePointsImpl(value, offset, count,offset+index, codePointOffset) - offset; }

/** * Copy characters from this string into dst starting at dstBegin. * This method doesn't perform any range checking. */ void getChars(char dst[], int dstBegin) { System.arraycopy(value, offset, dst, dstBegin, count); }

/** * Copies characters from this string into the destination character * array. * <p> * The first character to be copied is at index <code>srcBegin</code>; * the last character to be copied is at index <code>srcEnd-1</code> * (thus the total number of characters to be copied is * <code>srcEnd-srcBegin</code>). The characters are copied into the * subarray of <code>dst</code> starting at index <code>dstBegin</code> * and ending at index: * <p><blockquote><pre> * dstbegin + (srcEnd-srcBegin) - 1 * </pre></blockquote> * * @param srcBegin index of the first character in the string * to copy. * @param srcEnd index after the last character in the string * to copy. * @param dst the destination array. * @param dstBegin the start offset in the destination array. * @exception IndexOutOfBoundsException If any of the following * is true: * <ul><li><code>srcBegin</code> is negative. * <li><code>srcBegin</code> is greater than <code>srcEnd</code> * <li><code>srcEnd</code> is greater t

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表