|  | /* java.lang.Character -- Wrapper class for char, and Unicode subsets | 
|  | Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc. | 
|  |  | 
|  | This file is part of GNU Classpath. | 
|  |  | 
|  | GNU Classpath is free software; you can redistribute it and/or modify | 
|  | it under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 2, or (at your option) | 
|  | any later version. | 
|  |  | 
|  | GNU Classpath is distributed in the hope that it will be useful, but | 
|  | WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | General Public License for more details. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License | 
|  | along with GNU Classpath; see the file COPYING.  If not, write to the | 
|  | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | 
|  | 02110-1301 USA. | 
|  |  | 
|  | Linking this library statically or dynamically with other modules is | 
|  | making a combined work based on this library.  Thus, the terms and | 
|  | conditions of the GNU General Public License cover the whole | 
|  | combination. | 
|  |  | 
|  | As a special exception, the copyright holders of this library give you | 
|  | permission to link this library with independent modules to produce an | 
|  | executable, regardless of the license terms of these independent | 
|  | modules, and to copy and distribute the resulting executable under | 
|  | terms of your choice, provided that you also meet, for each linked | 
|  | independent module, the terms and conditions of the license of that | 
|  | module.  An independent module is a module which is not derived from | 
|  | or based on this library.  If you modify this library, you may extend | 
|  | this exception to your version of the library, but you are not | 
|  | obligated to do so.  If you do not wish to do so, delete this | 
|  | exception statement from your version. */ | 
|  |  | 
|  |  | 
|  | package java.lang; | 
|  |  | 
|  | import gnu.java.lang.CharData; | 
|  |  | 
|  | import java.io.Serializable; | 
|  | import java.text.Collator; | 
|  | import java.util.Locale; | 
|  |  | 
|  | /** | 
|  | * Wrapper class for the primitive char data type.  In addition, this class | 
|  | * allows one to retrieve property information and perform transformations | 
|  | * on the defined characters in the Unicode Standard, Version 4.0.0. | 
|  | * java.lang.Character is designed to be very dynamic, and as such, it | 
|  | * retrieves information on the Unicode character set from a separate | 
|  | * database, gnu.java.lang.CharData, which can be easily upgraded. | 
|  | * | 
|  | * <p>For predicates, boundaries are used to describe | 
|  | * the set of characters for which the method will return true. | 
|  | * This syntax uses fairly normal regular expression notation. | 
|  | * See 5.13 of the Unicode Standard, Version 4.0, for the | 
|  | * boundary specification. | 
|  | * | 
|  | * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a> | 
|  | * for more information on the Unicode Standard. | 
|  | * | 
|  | * @author Tom Tromey (tromey@cygnus.com) | 
|  | * @author Paul N. Fisher | 
|  | * @author Jochen Hoenicke | 
|  | * @author Eric Blake (ebb9@email.byu.edu) | 
|  | * @author Andrew John Hughes (gnu_andrew@member.fsf.org) | 
|  | * @see CharData | 
|  | * @since 1.0 | 
|  | * @status partly updated to 1.5; some things still missing | 
|  | */ | 
|  | public final class Character implements Serializable, Comparable<Character> | 
|  | { | 
|  | /** | 
|  | * A subset of Unicode blocks. | 
|  | * | 
|  | * @author Paul N. Fisher | 
|  | * @author Eric Blake (ebb9@email.byu.edu) | 
|  | * @since 1.2 | 
|  | */ | 
|  | public static class Subset | 
|  | { | 
|  | /** The name of the subset. */ | 
|  | private final String name; | 
|  |  | 
|  | /** | 
|  | * Construct a new subset of characters. | 
|  | * | 
|  | * @param name the name of the subset | 
|  | * @throws NullPointerException if name is null | 
|  | */ | 
|  | protected Subset(String name) | 
|  | { | 
|  | // Note that name.toString() is name, unless name was null. | 
|  | this.name = name.toString(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Compares two Subsets for equality. This is <code>final</code>, and | 
|  | * restricts the comparison on the <code>==</code> operator, so it returns | 
|  | * true only for the same object. | 
|  | * | 
|  | * @param o the object to compare | 
|  | * @return true if o is this | 
|  | */ | 
|  | public final boolean equals(Object o) | 
|  | { | 
|  | return o == this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Makes the original hashCode of Object final, to be consistent with | 
|  | * equals. | 
|  | * | 
|  | * @return the hash code for this object | 
|  | */ | 
|  | public final int hashCode() | 
|  | { | 
|  | return super.hashCode(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the name of the subset. | 
|  | * | 
|  | * @return the name | 
|  | */ | 
|  | public final String toString() | 
|  | { | 
|  | return name; | 
|  | } | 
|  | } // class Subset | 
|  |  | 
|  | /** | 
|  | * A family of character subsets in the Unicode specification. A character | 
|  | * is in at most one of these blocks. | 
|  | * | 
|  | * This inner class was generated automatically from | 
|  | * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts. | 
|  | * This Unicode definition file can be found on the | 
|  | * <a href="http://www.unicode.org">http://www.unicode.org</a> website. | 
|  | * JDK 1.5 uses Unicode version 4.0.0. | 
|  | * | 
|  | * @author scripts/unicode-blocks.pl (written by Eric Blake) | 
|  | * @since 1.2 | 
|  | */ | 
|  | public static final class UnicodeBlock extends Subset | 
|  | { | 
|  | /** The start of the subset. */ | 
|  | private final int start; | 
|  |  | 
|  | /** The end of the subset. */ | 
|  | private final int end; | 
|  |  | 
|  | /** The canonical name of the block according to the Unicode standard. */ | 
|  | private final String canonicalName; | 
|  |  | 
|  | /** Enumeration for the <code>forName()</code> method */ | 
|  | private enum NameType { CANONICAL, NO_SPACES, CONSTANT; } | 
|  |  | 
|  | /** | 
|  | * Constructor for strictly defined blocks. | 
|  | * | 
|  | * @param start the start character of the range | 
|  | * @param end the end character of the range | 
|  | * @param name the block name | 
|  | * @param canonicalName the name of the block as defined in the Unicode | 
|  | *        standard. | 
|  | */ | 
|  | private UnicodeBlock(int start, int end, String name, | 
|  | String canonicalName) | 
|  | { | 
|  | super(name); | 
|  | this.start = start; | 
|  | this.end = end; | 
|  | this.canonicalName = canonicalName; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode character block which a character belongs to. | 
|  | * <strong>Note</strong>: This method does not support the use of | 
|  | * supplementary characters.  For such support, <code>of(int)</code> | 
|  | * should be used instead. | 
|  | * | 
|  | * @param ch the character to look up | 
|  | * @return the set it belongs to, or null if it is not in one | 
|  | */ | 
|  | public static UnicodeBlock of(char ch) | 
|  | { | 
|  | return of((int) ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode character block which a code point belongs to. | 
|  | * | 
|  | * @param codePoint the character to look up | 
|  | * @return the set it belongs to, or null if it is not in one. | 
|  | * @throws IllegalArgumentException if the specified code point is | 
|  | *         invalid. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static UnicodeBlock of(int codePoint) | 
|  | { | 
|  | if (codePoint > MAX_CODE_POINT) | 
|  | throw new IllegalArgumentException("The supplied integer value is " + | 
|  | "too large to be a codepoint."); | 
|  | // Simple binary search for the correct block. | 
|  | int low = 0; | 
|  | int hi = sets.length - 1; | 
|  | while (low <= hi) | 
|  | { | 
|  | int mid = (low + hi) >> 1; | 
|  | UnicodeBlock b = sets[mid]; | 
|  | if (codePoint < b.start) | 
|  | hi = mid - 1; | 
|  | else if (codePoint > b.end) | 
|  | low = mid + 1; | 
|  | else | 
|  | return b; | 
|  | } | 
|  | return null; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * <p> | 
|  | * Returns the <code>UnicodeBlock</code> with the given name, as defined | 
|  | * by the Unicode standard.  The version of Unicode in use is defined by | 
|  | * the <code>Character</code> class, and the names are given in the | 
|  | * <code>Blocks-<version>.txt</code> file corresponding to that version. | 
|  | * The name may be specified in one of three ways: | 
|  | * </p> | 
|  | * <ol> | 
|  | * <li>The canonical, human-readable name used by the Unicode standard. | 
|  | * This is the name with all spaces and hyphens retained.  For example, | 
|  | * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li> | 
|  | * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li> | 
|  | * <li>The name used for the constants specified by this class, which | 
|  | * is the canonical name with all spaces and hyphens replaced with | 
|  | * underscores e.g. `BASIC_LATIN'</li> | 
|  | * </ol> | 
|  | * <p> | 
|  | * The names are compared case-insensitively using the case comparison | 
|  | * associated with the U.S. English locale.  The method recognises the | 
|  | * previous names used for blocks as well as the current ones.  At | 
|  | * present, this simply means that the deprecated `SURROGATES_AREA' | 
|  | * will be recognised by this method (the <code>of()</code> methods | 
|  | * only return one of the three new surrogate blocks). | 
|  | * </p> | 
|  | * | 
|  | * @param blockName the name of the block to look up. | 
|  | * @return the specified block. | 
|  | * @throws NullPointerException if the <code>blockName</code> is | 
|  | *         <code>null</code>. | 
|  | * @throws IllegalArgumentException if the name does not match any Unicode | 
|  | *         block. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock forName(String blockName) | 
|  | { | 
|  | NameType type; | 
|  | if (blockName.indexOf(' ') != -1) | 
|  | type = NameType.CANONICAL; | 
|  | else if (blockName.indexOf('_') != -1) | 
|  | type = NameType.CONSTANT; | 
|  | else | 
|  | type = NameType.NO_SPACES; | 
|  | Collator usCollator = Collator.getInstance(Locale.US); | 
|  | usCollator.setStrength(Collator.PRIMARY); | 
|  | /* Special case for deprecated blocks not in sets */ | 
|  | switch (type) | 
|  | { | 
|  | case CANONICAL: | 
|  | if (usCollator.compare(blockName, "Surrogates Area") == 0) | 
|  | return SURROGATES_AREA; | 
|  | break; | 
|  | case NO_SPACES: | 
|  | if (usCollator.compare(blockName, "SurrogatesArea") == 0) | 
|  | return SURROGATES_AREA; | 
|  | break; | 
|  | case CONSTANT: | 
|  | if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) | 
|  | return SURROGATES_AREA; | 
|  | break; | 
|  | } | 
|  | /* Other cases */ | 
|  | switch (type) | 
|  | { | 
|  | case CANONICAL: | 
|  | for (UnicodeBlock block : sets) | 
|  | if (usCollator.compare(blockName, block.canonicalName) == 0) | 
|  | return block; | 
|  | break; | 
|  | case NO_SPACES: | 
|  | for (UnicodeBlock block : sets) | 
|  | { | 
|  | String nsName = block.canonicalName.replaceAll(" ",""); | 
|  | if (usCollator.compare(blockName, nsName) == 0) | 
|  | return block; | 
|  | } | 
|  | break; | 
|  | case CONSTANT: | 
|  | for (UnicodeBlock block : sets) | 
|  | if (usCollator.compare(blockName, block.toString()) == 0) | 
|  | return block; | 
|  | break; | 
|  | } | 
|  | throw new IllegalArgumentException("No Unicode block found for " + | 
|  | blockName + "."); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Basic Latin. | 
|  | * 0x0000 - 0x007F. | 
|  | */ | 
|  | public static final UnicodeBlock BASIC_LATIN | 
|  | = new UnicodeBlock(0x0000, 0x007F, | 
|  | "BASIC_LATIN", | 
|  | "Basic Latin"); | 
|  |  | 
|  | /** | 
|  | * Latin-1 Supplement. | 
|  | * 0x0080 - 0x00FF. | 
|  | */ | 
|  | public static final UnicodeBlock LATIN_1_SUPPLEMENT | 
|  | = new UnicodeBlock(0x0080, 0x00FF, | 
|  | "LATIN_1_SUPPLEMENT", | 
|  | "Latin-1 Supplement"); | 
|  |  | 
|  | /** | 
|  | * Latin Extended-A. | 
|  | * 0x0100 - 0x017F. | 
|  | */ | 
|  | public static final UnicodeBlock LATIN_EXTENDED_A | 
|  | = new UnicodeBlock(0x0100, 0x017F, | 
|  | "LATIN_EXTENDED_A", | 
|  | "Latin Extended-A"); | 
|  |  | 
|  | /** | 
|  | * Latin Extended-B. | 
|  | * 0x0180 - 0x024F. | 
|  | */ | 
|  | public static final UnicodeBlock LATIN_EXTENDED_B | 
|  | = new UnicodeBlock(0x0180, 0x024F, | 
|  | "LATIN_EXTENDED_B", | 
|  | "Latin Extended-B"); | 
|  |  | 
|  | /** | 
|  | * IPA Extensions. | 
|  | * 0x0250 - 0x02AF. | 
|  | */ | 
|  | public static final UnicodeBlock IPA_EXTENSIONS | 
|  | = new UnicodeBlock(0x0250, 0x02AF, | 
|  | "IPA_EXTENSIONS", | 
|  | "IPA Extensions"); | 
|  |  | 
|  | /** | 
|  | * Spacing Modifier Letters. | 
|  | * 0x02B0 - 0x02FF. | 
|  | */ | 
|  | public static final UnicodeBlock SPACING_MODIFIER_LETTERS | 
|  | = new UnicodeBlock(0x02B0, 0x02FF, | 
|  | "SPACING_MODIFIER_LETTERS", | 
|  | "Spacing Modifier Letters"); | 
|  |  | 
|  | /** | 
|  | * Combining Diacritical Marks. | 
|  | * 0x0300 - 0x036F. | 
|  | */ | 
|  | public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS | 
|  | = new UnicodeBlock(0x0300, 0x036F, | 
|  | "COMBINING_DIACRITICAL_MARKS", | 
|  | "Combining Diacritical Marks"); | 
|  |  | 
|  | /** | 
|  | * Greek. | 
|  | * 0x0370 - 0x03FF. | 
|  | */ | 
|  | public static final UnicodeBlock GREEK | 
|  | = new UnicodeBlock(0x0370, 0x03FF, | 
|  | "GREEK", | 
|  | "Greek"); | 
|  |  | 
|  | /** | 
|  | * Cyrillic. | 
|  | * 0x0400 - 0x04FF. | 
|  | */ | 
|  | public static final UnicodeBlock CYRILLIC | 
|  | = new UnicodeBlock(0x0400, 0x04FF, | 
|  | "CYRILLIC", | 
|  | "Cyrillic"); | 
|  |  | 
|  | /** | 
|  | * Cyrillic Supplementary. | 
|  | * 0x0500 - 0x052F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY | 
|  | = new UnicodeBlock(0x0500, 0x052F, | 
|  | "CYRILLIC_SUPPLEMENTARY", | 
|  | "Cyrillic Supplementary"); | 
|  |  | 
|  | /** | 
|  | * Armenian. | 
|  | * 0x0530 - 0x058F. | 
|  | */ | 
|  | public static final UnicodeBlock ARMENIAN | 
|  | = new UnicodeBlock(0x0530, 0x058F, | 
|  | "ARMENIAN", | 
|  | "Armenian"); | 
|  |  | 
|  | /** | 
|  | * Hebrew. | 
|  | * 0x0590 - 0x05FF. | 
|  | */ | 
|  | public static final UnicodeBlock HEBREW | 
|  | = new UnicodeBlock(0x0590, 0x05FF, | 
|  | "HEBREW", | 
|  | "Hebrew"); | 
|  |  | 
|  | /** | 
|  | * Arabic. | 
|  | * 0x0600 - 0x06FF. | 
|  | */ | 
|  | public static final UnicodeBlock ARABIC | 
|  | = new UnicodeBlock(0x0600, 0x06FF, | 
|  | "ARABIC", | 
|  | "Arabic"); | 
|  |  | 
|  | /** | 
|  | * Syriac. | 
|  | * 0x0700 - 0x074F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock SYRIAC | 
|  | = new UnicodeBlock(0x0700, 0x074F, | 
|  | "SYRIAC", | 
|  | "Syriac"); | 
|  |  | 
|  | /** | 
|  | * Thaana. | 
|  | * 0x0780 - 0x07BF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock THAANA | 
|  | = new UnicodeBlock(0x0780, 0x07BF, | 
|  | "THAANA", | 
|  | "Thaana"); | 
|  |  | 
|  | /** | 
|  | * Devanagari. | 
|  | * 0x0900 - 0x097F. | 
|  | */ | 
|  | public static final UnicodeBlock DEVANAGARI | 
|  | = new UnicodeBlock(0x0900, 0x097F, | 
|  | "DEVANAGARI", | 
|  | "Devanagari"); | 
|  |  | 
|  | /** | 
|  | * Bengali. | 
|  | * 0x0980 - 0x09FF. | 
|  | */ | 
|  | public static final UnicodeBlock BENGALI | 
|  | = new UnicodeBlock(0x0980, 0x09FF, | 
|  | "BENGALI", | 
|  | "Bengali"); | 
|  |  | 
|  | /** | 
|  | * Gurmukhi. | 
|  | * 0x0A00 - 0x0A7F. | 
|  | */ | 
|  | public static final UnicodeBlock GURMUKHI | 
|  | = new UnicodeBlock(0x0A00, 0x0A7F, | 
|  | "GURMUKHI", | 
|  | "Gurmukhi"); | 
|  |  | 
|  | /** | 
|  | * Gujarati. | 
|  | * 0x0A80 - 0x0AFF. | 
|  | */ | 
|  | public static final UnicodeBlock GUJARATI | 
|  | = new UnicodeBlock(0x0A80, 0x0AFF, | 
|  | "GUJARATI", | 
|  | "Gujarati"); | 
|  |  | 
|  | /** | 
|  | * Oriya. | 
|  | * 0x0B00 - 0x0B7F. | 
|  | */ | 
|  | public static final UnicodeBlock ORIYA | 
|  | = new UnicodeBlock(0x0B00, 0x0B7F, | 
|  | "ORIYA", | 
|  | "Oriya"); | 
|  |  | 
|  | /** | 
|  | * Tamil. | 
|  | * 0x0B80 - 0x0BFF. | 
|  | */ | 
|  | public static final UnicodeBlock TAMIL | 
|  | = new UnicodeBlock(0x0B80, 0x0BFF, | 
|  | "TAMIL", | 
|  | "Tamil"); | 
|  |  | 
|  | /** | 
|  | * Telugu. | 
|  | * 0x0C00 - 0x0C7F. | 
|  | */ | 
|  | public static final UnicodeBlock TELUGU | 
|  | = new UnicodeBlock(0x0C00, 0x0C7F, | 
|  | "TELUGU", | 
|  | "Telugu"); | 
|  |  | 
|  | /** | 
|  | * Kannada. | 
|  | * 0x0C80 - 0x0CFF. | 
|  | */ | 
|  | public static final UnicodeBlock KANNADA | 
|  | = new UnicodeBlock(0x0C80, 0x0CFF, | 
|  | "KANNADA", | 
|  | "Kannada"); | 
|  |  | 
|  | /** | 
|  | * Malayalam. | 
|  | * 0x0D00 - 0x0D7F. | 
|  | */ | 
|  | public static final UnicodeBlock MALAYALAM | 
|  | = new UnicodeBlock(0x0D00, 0x0D7F, | 
|  | "MALAYALAM", | 
|  | "Malayalam"); | 
|  |  | 
|  | /** | 
|  | * Sinhala. | 
|  | * 0x0D80 - 0x0DFF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock SINHALA | 
|  | = new UnicodeBlock(0x0D80, 0x0DFF, | 
|  | "SINHALA", | 
|  | "Sinhala"); | 
|  |  | 
|  | /** | 
|  | * Thai. | 
|  | * 0x0E00 - 0x0E7F. | 
|  | */ | 
|  | public static final UnicodeBlock THAI | 
|  | = new UnicodeBlock(0x0E00, 0x0E7F, | 
|  | "THAI", | 
|  | "Thai"); | 
|  |  | 
|  | /** | 
|  | * Lao. | 
|  | * 0x0E80 - 0x0EFF. | 
|  | */ | 
|  | public static final UnicodeBlock LAO | 
|  | = new UnicodeBlock(0x0E80, 0x0EFF, | 
|  | "LAO", | 
|  | "Lao"); | 
|  |  | 
|  | /** | 
|  | * Tibetan. | 
|  | * 0x0F00 - 0x0FFF. | 
|  | */ | 
|  | public static final UnicodeBlock TIBETAN | 
|  | = new UnicodeBlock(0x0F00, 0x0FFF, | 
|  | "TIBETAN", | 
|  | "Tibetan"); | 
|  |  | 
|  | /** | 
|  | * Myanmar. | 
|  | * 0x1000 - 0x109F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock MYANMAR | 
|  | = new UnicodeBlock(0x1000, 0x109F, | 
|  | "MYANMAR", | 
|  | "Myanmar"); | 
|  |  | 
|  | /** | 
|  | * Georgian. | 
|  | * 0x10A0 - 0x10FF. | 
|  | */ | 
|  | public static final UnicodeBlock GEORGIAN | 
|  | = new UnicodeBlock(0x10A0, 0x10FF, | 
|  | "GEORGIAN", | 
|  | "Georgian"); | 
|  |  | 
|  | /** | 
|  | * Hangul Jamo. | 
|  | * 0x1100 - 0x11FF. | 
|  | */ | 
|  | public static final UnicodeBlock HANGUL_JAMO | 
|  | = new UnicodeBlock(0x1100, 0x11FF, | 
|  | "HANGUL_JAMO", | 
|  | "Hangul Jamo"); | 
|  |  | 
|  | /** | 
|  | * Ethiopic. | 
|  | * 0x1200 - 0x137F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock ETHIOPIC | 
|  | = new UnicodeBlock(0x1200, 0x137F, | 
|  | "ETHIOPIC", | 
|  | "Ethiopic"); | 
|  |  | 
|  | /** | 
|  | * Cherokee. | 
|  | * 0x13A0 - 0x13FF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock CHEROKEE | 
|  | = new UnicodeBlock(0x13A0, 0x13FF, | 
|  | "CHEROKEE", | 
|  | "Cherokee"); | 
|  |  | 
|  | /** | 
|  | * Unified Canadian Aboriginal Syllabics. | 
|  | * 0x1400 - 0x167F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS | 
|  | = new UnicodeBlock(0x1400, 0x167F, | 
|  | "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", | 
|  | "Unified Canadian Aboriginal Syllabics"); | 
|  |  | 
|  | /** | 
|  | * Ogham. | 
|  | * 0x1680 - 0x169F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock OGHAM | 
|  | = new UnicodeBlock(0x1680, 0x169F, | 
|  | "OGHAM", | 
|  | "Ogham"); | 
|  |  | 
|  | /** | 
|  | * Runic. | 
|  | * 0x16A0 - 0x16FF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock RUNIC | 
|  | = new UnicodeBlock(0x16A0, 0x16FF, | 
|  | "RUNIC", | 
|  | "Runic"); | 
|  |  | 
|  | /** | 
|  | * Tagalog. | 
|  | * 0x1700 - 0x171F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock TAGALOG | 
|  | = new UnicodeBlock(0x1700, 0x171F, | 
|  | "TAGALOG", | 
|  | "Tagalog"); | 
|  |  | 
|  | /** | 
|  | * Hanunoo. | 
|  | * 0x1720 - 0x173F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock HANUNOO | 
|  | = new UnicodeBlock(0x1720, 0x173F, | 
|  | "HANUNOO", | 
|  | "Hanunoo"); | 
|  |  | 
|  | /** | 
|  | * Buhid. | 
|  | * 0x1740 - 0x175F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock BUHID | 
|  | = new UnicodeBlock(0x1740, 0x175F, | 
|  | "BUHID", | 
|  | "Buhid"); | 
|  |  | 
|  | /** | 
|  | * Tagbanwa. | 
|  | * 0x1760 - 0x177F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock TAGBANWA | 
|  | = new UnicodeBlock(0x1760, 0x177F, | 
|  | "TAGBANWA", | 
|  | "Tagbanwa"); | 
|  |  | 
|  | /** | 
|  | * Khmer. | 
|  | * 0x1780 - 0x17FF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock KHMER | 
|  | = new UnicodeBlock(0x1780, 0x17FF, | 
|  | "KHMER", | 
|  | "Khmer"); | 
|  |  | 
|  | /** | 
|  | * Mongolian. | 
|  | * 0x1800 - 0x18AF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock MONGOLIAN | 
|  | = new UnicodeBlock(0x1800, 0x18AF, | 
|  | "MONGOLIAN", | 
|  | "Mongolian"); | 
|  |  | 
|  | /** | 
|  | * Limbu. | 
|  | * 0x1900 - 0x194F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock LIMBU | 
|  | = new UnicodeBlock(0x1900, 0x194F, | 
|  | "LIMBU", | 
|  | "Limbu"); | 
|  |  | 
|  | /** | 
|  | * Tai Le. | 
|  | * 0x1950 - 0x197F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock TAI_LE | 
|  | = new UnicodeBlock(0x1950, 0x197F, | 
|  | "TAI_LE", | 
|  | "Tai Le"); | 
|  |  | 
|  | /** | 
|  | * Khmer Symbols. | 
|  | * 0x19E0 - 0x19FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock KHMER_SYMBOLS | 
|  | = new UnicodeBlock(0x19E0, 0x19FF, | 
|  | "KHMER_SYMBOLS", | 
|  | "Khmer Symbols"); | 
|  |  | 
|  | /** | 
|  | * Phonetic Extensions. | 
|  | * 0x1D00 - 0x1D7F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock PHONETIC_EXTENSIONS | 
|  | = new UnicodeBlock(0x1D00, 0x1D7F, | 
|  | "PHONETIC_EXTENSIONS", | 
|  | "Phonetic Extensions"); | 
|  |  | 
|  | /** | 
|  | * Latin Extended Additional. | 
|  | * 0x1E00 - 0x1EFF. | 
|  | */ | 
|  | public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL | 
|  | = new UnicodeBlock(0x1E00, 0x1EFF, | 
|  | "LATIN_EXTENDED_ADDITIONAL", | 
|  | "Latin Extended Additional"); | 
|  |  | 
|  | /** | 
|  | * Greek Extended. | 
|  | * 0x1F00 - 0x1FFF. | 
|  | */ | 
|  | public static final UnicodeBlock GREEK_EXTENDED | 
|  | = new UnicodeBlock(0x1F00, 0x1FFF, | 
|  | "GREEK_EXTENDED", | 
|  | "Greek Extended"); | 
|  |  | 
|  | /** | 
|  | * General Punctuation. | 
|  | * 0x2000 - 0x206F. | 
|  | */ | 
|  | public static final UnicodeBlock GENERAL_PUNCTUATION | 
|  | = new UnicodeBlock(0x2000, 0x206F, | 
|  | "GENERAL_PUNCTUATION", | 
|  | "General Punctuation"); | 
|  |  | 
|  | /** | 
|  | * Superscripts and Subscripts. | 
|  | * 0x2070 - 0x209F. | 
|  | */ | 
|  | public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS | 
|  | = new UnicodeBlock(0x2070, 0x209F, | 
|  | "SUPERSCRIPTS_AND_SUBSCRIPTS", | 
|  | "Superscripts and Subscripts"); | 
|  |  | 
|  | /** | 
|  | * Currency Symbols. | 
|  | * 0x20A0 - 0x20CF. | 
|  | */ | 
|  | public static final UnicodeBlock CURRENCY_SYMBOLS | 
|  | = new UnicodeBlock(0x20A0, 0x20CF, | 
|  | "CURRENCY_SYMBOLS", | 
|  | "Currency Symbols"); | 
|  |  | 
|  | /** | 
|  | * Combining Marks for Symbols. | 
|  | * 0x20D0 - 0x20FF. | 
|  | */ | 
|  | public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS | 
|  | = new UnicodeBlock(0x20D0, 0x20FF, | 
|  | "COMBINING_MARKS_FOR_SYMBOLS", | 
|  | "Combining Marks for Symbols"); | 
|  |  | 
|  | /** | 
|  | * Letterlike Symbols. | 
|  | * 0x2100 - 0x214F. | 
|  | */ | 
|  | public static final UnicodeBlock LETTERLIKE_SYMBOLS | 
|  | = new UnicodeBlock(0x2100, 0x214F, | 
|  | "LETTERLIKE_SYMBOLS", | 
|  | "Letterlike Symbols"); | 
|  |  | 
|  | /** | 
|  | * Number Forms. | 
|  | * 0x2150 - 0x218F. | 
|  | */ | 
|  | public static final UnicodeBlock NUMBER_FORMS | 
|  | = new UnicodeBlock(0x2150, 0x218F, | 
|  | "NUMBER_FORMS", | 
|  | "Number Forms"); | 
|  |  | 
|  | /** | 
|  | * Arrows. | 
|  | * 0x2190 - 0x21FF. | 
|  | */ | 
|  | public static final UnicodeBlock ARROWS | 
|  | = new UnicodeBlock(0x2190, 0x21FF, | 
|  | "ARROWS", | 
|  | "Arrows"); | 
|  |  | 
|  | /** | 
|  | * Mathematical Operators. | 
|  | * 0x2200 - 0x22FF. | 
|  | */ | 
|  | public static final UnicodeBlock MATHEMATICAL_OPERATORS | 
|  | = new UnicodeBlock(0x2200, 0x22FF, | 
|  | "MATHEMATICAL_OPERATORS", | 
|  | "Mathematical Operators"); | 
|  |  | 
|  | /** | 
|  | * Miscellaneous Technical. | 
|  | * 0x2300 - 0x23FF. | 
|  | */ | 
|  | public static final UnicodeBlock MISCELLANEOUS_TECHNICAL | 
|  | = new UnicodeBlock(0x2300, 0x23FF, | 
|  | "MISCELLANEOUS_TECHNICAL", | 
|  | "Miscellaneous Technical"); | 
|  |  | 
|  | /** | 
|  | * Control Pictures. | 
|  | * 0x2400 - 0x243F. | 
|  | */ | 
|  | public static final UnicodeBlock CONTROL_PICTURES | 
|  | = new UnicodeBlock(0x2400, 0x243F, | 
|  | "CONTROL_PICTURES", | 
|  | "Control Pictures"); | 
|  |  | 
|  | /** | 
|  | * Optical Character Recognition. | 
|  | * 0x2440 - 0x245F. | 
|  | */ | 
|  | public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION | 
|  | = new UnicodeBlock(0x2440, 0x245F, | 
|  | "OPTICAL_CHARACTER_RECOGNITION", | 
|  | "Optical Character Recognition"); | 
|  |  | 
|  | /** | 
|  | * Enclosed Alphanumerics. | 
|  | * 0x2460 - 0x24FF. | 
|  | */ | 
|  | public static final UnicodeBlock ENCLOSED_ALPHANUMERICS | 
|  | = new UnicodeBlock(0x2460, 0x24FF, | 
|  | "ENCLOSED_ALPHANUMERICS", | 
|  | "Enclosed Alphanumerics"); | 
|  |  | 
|  | /** | 
|  | * Box Drawing. | 
|  | * 0x2500 - 0x257F. | 
|  | */ | 
|  | public static final UnicodeBlock BOX_DRAWING | 
|  | = new UnicodeBlock(0x2500, 0x257F, | 
|  | "BOX_DRAWING", | 
|  | "Box Drawing"); | 
|  |  | 
|  | /** | 
|  | * Block Elements. | 
|  | * 0x2580 - 0x259F. | 
|  | */ | 
|  | public static final UnicodeBlock BLOCK_ELEMENTS | 
|  | = new UnicodeBlock(0x2580, 0x259F, | 
|  | "BLOCK_ELEMENTS", | 
|  | "Block Elements"); | 
|  |  | 
|  | /** | 
|  | * Geometric Shapes. | 
|  | * 0x25A0 - 0x25FF. | 
|  | */ | 
|  | public static final UnicodeBlock GEOMETRIC_SHAPES | 
|  | = new UnicodeBlock(0x25A0, 0x25FF, | 
|  | "GEOMETRIC_SHAPES", | 
|  | "Geometric Shapes"); | 
|  |  | 
|  | /** | 
|  | * Miscellaneous Symbols. | 
|  | * 0x2600 - 0x26FF. | 
|  | */ | 
|  | public static final UnicodeBlock MISCELLANEOUS_SYMBOLS | 
|  | = new UnicodeBlock(0x2600, 0x26FF, | 
|  | "MISCELLANEOUS_SYMBOLS", | 
|  | "Miscellaneous Symbols"); | 
|  |  | 
|  | /** | 
|  | * Dingbats. | 
|  | * 0x2700 - 0x27BF. | 
|  | */ | 
|  | public static final UnicodeBlock DINGBATS | 
|  | = new UnicodeBlock(0x2700, 0x27BF, | 
|  | "DINGBATS", | 
|  | "Dingbats"); | 
|  |  | 
|  | /** | 
|  | * Miscellaneous Mathematical Symbols-A. | 
|  | * 0x27C0 - 0x27EF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A | 
|  | = new UnicodeBlock(0x27C0, 0x27EF, | 
|  | "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", | 
|  | "Miscellaneous Mathematical Symbols-A"); | 
|  |  | 
|  | /** | 
|  | * Supplemental Arrows-A. | 
|  | * 0x27F0 - 0x27FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A | 
|  | = new UnicodeBlock(0x27F0, 0x27FF, | 
|  | "SUPPLEMENTAL_ARROWS_A", | 
|  | "Supplemental Arrows-A"); | 
|  |  | 
|  | /** | 
|  | * Braille Patterns. | 
|  | * 0x2800 - 0x28FF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock BRAILLE_PATTERNS | 
|  | = new UnicodeBlock(0x2800, 0x28FF, | 
|  | "BRAILLE_PATTERNS", | 
|  | "Braille Patterns"); | 
|  |  | 
|  | /** | 
|  | * Supplemental Arrows-B. | 
|  | * 0x2900 - 0x297F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B | 
|  | = new UnicodeBlock(0x2900, 0x297F, | 
|  | "SUPPLEMENTAL_ARROWS_B", | 
|  | "Supplemental Arrows-B"); | 
|  |  | 
|  | /** | 
|  | * Miscellaneous Mathematical Symbols-B. | 
|  | * 0x2980 - 0x29FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B | 
|  | = new UnicodeBlock(0x2980, 0x29FF, | 
|  | "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", | 
|  | "Miscellaneous Mathematical Symbols-B"); | 
|  |  | 
|  | /** | 
|  | * Supplemental Mathematical Operators. | 
|  | * 0x2A00 - 0x2AFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS | 
|  | = new UnicodeBlock(0x2A00, 0x2AFF, | 
|  | "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", | 
|  | "Supplemental Mathematical Operators"); | 
|  |  | 
|  | /** | 
|  | * Miscellaneous Symbols and Arrows. | 
|  | * 0x2B00 - 0x2BFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS | 
|  | = new UnicodeBlock(0x2B00, 0x2BFF, | 
|  | "MISCELLANEOUS_SYMBOLS_AND_ARROWS", | 
|  | "Miscellaneous Symbols and Arrows"); | 
|  |  | 
|  | /** | 
|  | * CJK Radicals Supplement. | 
|  | * 0x2E80 - 0x2EFF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT | 
|  | = new UnicodeBlock(0x2E80, 0x2EFF, | 
|  | "CJK_RADICALS_SUPPLEMENT", | 
|  | "CJK Radicals Supplement"); | 
|  |  | 
|  | /** | 
|  | * Kangxi Radicals. | 
|  | * 0x2F00 - 0x2FDF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock KANGXI_RADICALS | 
|  | = new UnicodeBlock(0x2F00, 0x2FDF, | 
|  | "KANGXI_RADICALS", | 
|  | "Kangxi Radicals"); | 
|  |  | 
|  | /** | 
|  | * Ideographic Description Characters. | 
|  | * 0x2FF0 - 0x2FFF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS | 
|  | = new UnicodeBlock(0x2FF0, 0x2FFF, | 
|  | "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", | 
|  | "Ideographic Description Characters"); | 
|  |  | 
|  | /** | 
|  | * CJK Symbols and Punctuation. | 
|  | * 0x3000 - 0x303F. | 
|  | */ | 
|  | public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION | 
|  | = new UnicodeBlock(0x3000, 0x303F, | 
|  | "CJK_SYMBOLS_AND_PUNCTUATION", | 
|  | "CJK Symbols and Punctuation"); | 
|  |  | 
|  | /** | 
|  | * Hiragana. | 
|  | * 0x3040 - 0x309F. | 
|  | */ | 
|  | public static final UnicodeBlock HIRAGANA | 
|  | = new UnicodeBlock(0x3040, 0x309F, | 
|  | "HIRAGANA", | 
|  | "Hiragana"); | 
|  |  | 
|  | /** | 
|  | * Katakana. | 
|  | * 0x30A0 - 0x30FF. | 
|  | */ | 
|  | public static final UnicodeBlock KATAKANA | 
|  | = new UnicodeBlock(0x30A0, 0x30FF, | 
|  | "KATAKANA", | 
|  | "Katakana"); | 
|  |  | 
|  | /** | 
|  | * Bopomofo. | 
|  | * 0x3100 - 0x312F. | 
|  | */ | 
|  | public static final UnicodeBlock BOPOMOFO | 
|  | = new UnicodeBlock(0x3100, 0x312F, | 
|  | "BOPOMOFO", | 
|  | "Bopomofo"); | 
|  |  | 
|  | /** | 
|  | * Hangul Compatibility Jamo. | 
|  | * 0x3130 - 0x318F. | 
|  | */ | 
|  | public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO | 
|  | = new UnicodeBlock(0x3130, 0x318F, | 
|  | "HANGUL_COMPATIBILITY_JAMO", | 
|  | "Hangul Compatibility Jamo"); | 
|  |  | 
|  | /** | 
|  | * Kanbun. | 
|  | * 0x3190 - 0x319F. | 
|  | */ | 
|  | public static final UnicodeBlock KANBUN | 
|  | = new UnicodeBlock(0x3190, 0x319F, | 
|  | "KANBUN", | 
|  | "Kanbun"); | 
|  |  | 
|  | /** | 
|  | * Bopomofo Extended. | 
|  | * 0x31A0 - 0x31BF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock BOPOMOFO_EXTENDED | 
|  | = new UnicodeBlock(0x31A0, 0x31BF, | 
|  | "BOPOMOFO_EXTENDED", | 
|  | "Bopomofo Extended"); | 
|  |  | 
|  | /** | 
|  | * Katakana Phonetic Extensions. | 
|  | * 0x31F0 - 0x31FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS | 
|  | = new UnicodeBlock(0x31F0, 0x31FF, | 
|  | "KATAKANA_PHONETIC_EXTENSIONS", | 
|  | "Katakana Phonetic Extensions"); | 
|  |  | 
|  | /** | 
|  | * Enclosed CJK Letters and Months. | 
|  | * 0x3200 - 0x32FF. | 
|  | */ | 
|  | public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS | 
|  | = new UnicodeBlock(0x3200, 0x32FF, | 
|  | "ENCLOSED_CJK_LETTERS_AND_MONTHS", | 
|  | "Enclosed CJK Letters and Months"); | 
|  |  | 
|  | /** | 
|  | * CJK Compatibility. | 
|  | * 0x3300 - 0x33FF. | 
|  | */ | 
|  | public static final UnicodeBlock CJK_COMPATIBILITY | 
|  | = new UnicodeBlock(0x3300, 0x33FF, | 
|  | "CJK_COMPATIBILITY", | 
|  | "CJK Compatibility"); | 
|  |  | 
|  | /** | 
|  | * CJK Unified Ideographs Extension A. | 
|  | * 0x3400 - 0x4DBF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A | 
|  | = new UnicodeBlock(0x3400, 0x4DBF, | 
|  | "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", | 
|  | "CJK Unified Ideographs Extension A"); | 
|  |  | 
|  | /** | 
|  | * Yijing Hexagram Symbols. | 
|  | * 0x4DC0 - 0x4DFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS | 
|  | = new UnicodeBlock(0x4DC0, 0x4DFF, | 
|  | "YIJING_HEXAGRAM_SYMBOLS", | 
|  | "Yijing Hexagram Symbols"); | 
|  |  | 
|  | /** | 
|  | * CJK Unified Ideographs. | 
|  | * 0x4E00 - 0x9FFF. | 
|  | */ | 
|  | public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS | 
|  | = new UnicodeBlock(0x4E00, 0x9FFF, | 
|  | "CJK_UNIFIED_IDEOGRAPHS", | 
|  | "CJK Unified Ideographs"); | 
|  |  | 
|  | /** | 
|  | * Yi Syllables. | 
|  | * 0xA000 - 0xA48F. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock YI_SYLLABLES | 
|  | = new UnicodeBlock(0xA000, 0xA48F, | 
|  | "YI_SYLLABLES", | 
|  | "Yi Syllables"); | 
|  |  | 
|  | /** | 
|  | * Yi Radicals. | 
|  | * 0xA490 - 0xA4CF. | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final UnicodeBlock YI_RADICALS | 
|  | = new UnicodeBlock(0xA490, 0xA4CF, | 
|  | "YI_RADICALS", | 
|  | "Yi Radicals"); | 
|  |  | 
|  | /** | 
|  | * Hangul Syllables. | 
|  | * 0xAC00 - 0xD7AF. | 
|  | */ | 
|  | public static final UnicodeBlock HANGUL_SYLLABLES | 
|  | = new UnicodeBlock(0xAC00, 0xD7AF, | 
|  | "HANGUL_SYLLABLES", | 
|  | "Hangul Syllables"); | 
|  |  | 
|  | /** | 
|  | * High Surrogates. | 
|  | * 0xD800 - 0xDB7F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock HIGH_SURROGATES | 
|  | = new UnicodeBlock(0xD800, 0xDB7F, | 
|  | "HIGH_SURROGATES", | 
|  | "High Surrogates"); | 
|  |  | 
|  | /** | 
|  | * High Private Use Surrogates. | 
|  | * 0xDB80 - 0xDBFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES | 
|  | = new UnicodeBlock(0xDB80, 0xDBFF, | 
|  | "HIGH_PRIVATE_USE_SURROGATES", | 
|  | "High Private Use Surrogates"); | 
|  |  | 
|  | /** | 
|  | * Low Surrogates. | 
|  | * 0xDC00 - 0xDFFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock LOW_SURROGATES | 
|  | = new UnicodeBlock(0xDC00, 0xDFFF, | 
|  | "LOW_SURROGATES", | 
|  | "Low Surrogates"); | 
|  |  | 
|  | /** | 
|  | * Private Use Area. | 
|  | * 0xE000 - 0xF8FF. | 
|  | */ | 
|  | public static final UnicodeBlock PRIVATE_USE_AREA | 
|  | = new UnicodeBlock(0xE000, 0xF8FF, | 
|  | "PRIVATE_USE_AREA", | 
|  | "Private Use Area"); | 
|  |  | 
|  | /** | 
|  | * CJK Compatibility Ideographs. | 
|  | * 0xF900 - 0xFAFF. | 
|  | */ | 
|  | public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS | 
|  | = new UnicodeBlock(0xF900, 0xFAFF, | 
|  | "CJK_COMPATIBILITY_IDEOGRAPHS", | 
|  | "CJK Compatibility Ideographs"); | 
|  |  | 
|  | /** | 
|  | * Alphabetic Presentation Forms. | 
|  | * 0xFB00 - 0xFB4F. | 
|  | */ | 
|  | public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS | 
|  | = new UnicodeBlock(0xFB00, 0xFB4F, | 
|  | "ALPHABETIC_PRESENTATION_FORMS", | 
|  | "Alphabetic Presentation Forms"); | 
|  |  | 
|  | /** | 
|  | * Arabic Presentation Forms-A. | 
|  | * 0xFB50 - 0xFDFF. | 
|  | */ | 
|  | public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A | 
|  | = new UnicodeBlock(0xFB50, 0xFDFF, | 
|  | "ARABIC_PRESENTATION_FORMS_A", | 
|  | "Arabic Presentation Forms-A"); | 
|  |  | 
|  | /** | 
|  | * Variation Selectors. | 
|  | * 0xFE00 - 0xFE0F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock VARIATION_SELECTORS | 
|  | = new UnicodeBlock(0xFE00, 0xFE0F, | 
|  | "VARIATION_SELECTORS", | 
|  | "Variation Selectors"); | 
|  |  | 
|  | /** | 
|  | * Combining Half Marks. | 
|  | * 0xFE20 - 0xFE2F. | 
|  | */ | 
|  | public static final UnicodeBlock COMBINING_HALF_MARKS | 
|  | = new UnicodeBlock(0xFE20, 0xFE2F, | 
|  | "COMBINING_HALF_MARKS", | 
|  | "Combining Half Marks"); | 
|  |  | 
|  | /** | 
|  | * CJK Compatibility Forms. | 
|  | * 0xFE30 - 0xFE4F. | 
|  | */ | 
|  | public static final UnicodeBlock CJK_COMPATIBILITY_FORMS | 
|  | = new UnicodeBlock(0xFE30, 0xFE4F, | 
|  | "CJK_COMPATIBILITY_FORMS", | 
|  | "CJK Compatibility Forms"); | 
|  |  | 
|  | /** | 
|  | * Small Form Variants. | 
|  | * 0xFE50 - 0xFE6F. | 
|  | */ | 
|  | public static final UnicodeBlock SMALL_FORM_VARIANTS | 
|  | = new UnicodeBlock(0xFE50, 0xFE6F, | 
|  | "SMALL_FORM_VARIANTS", | 
|  | "Small Form Variants"); | 
|  |  | 
|  | /** | 
|  | * Arabic Presentation Forms-B. | 
|  | * 0xFE70 - 0xFEFF. | 
|  | */ | 
|  | public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B | 
|  | = new UnicodeBlock(0xFE70, 0xFEFF, | 
|  | "ARABIC_PRESENTATION_FORMS_B", | 
|  | "Arabic Presentation Forms-B"); | 
|  |  | 
|  | /** | 
|  | * Halfwidth and Fullwidth Forms. | 
|  | * 0xFF00 - 0xFFEF. | 
|  | */ | 
|  | public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS | 
|  | = new UnicodeBlock(0xFF00, 0xFFEF, | 
|  | "HALFWIDTH_AND_FULLWIDTH_FORMS", | 
|  | "Halfwidth and Fullwidth Forms"); | 
|  |  | 
|  | /** | 
|  | * Specials. | 
|  | * 0xFFF0 - 0xFFFF. | 
|  | */ | 
|  | public static final UnicodeBlock SPECIALS | 
|  | = new UnicodeBlock(0xFFF0, 0xFFFF, | 
|  | "SPECIALS", | 
|  | "Specials"); | 
|  |  | 
|  | /** | 
|  | * Linear B Syllabary. | 
|  | * 0x10000 - 0x1007F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock LINEAR_B_SYLLABARY | 
|  | = new UnicodeBlock(0x10000, 0x1007F, | 
|  | "LINEAR_B_SYLLABARY", | 
|  | "Linear B Syllabary"); | 
|  |  | 
|  | /** | 
|  | * Linear B Ideograms. | 
|  | * 0x10080 - 0x100FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock LINEAR_B_IDEOGRAMS | 
|  | = new UnicodeBlock(0x10080, 0x100FF, | 
|  | "LINEAR_B_IDEOGRAMS", | 
|  | "Linear B Ideograms"); | 
|  |  | 
|  | /** | 
|  | * Aegean Numbers. | 
|  | * 0x10100 - 0x1013F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock AEGEAN_NUMBERS | 
|  | = new UnicodeBlock(0x10100, 0x1013F, | 
|  | "AEGEAN_NUMBERS", | 
|  | "Aegean Numbers"); | 
|  |  | 
|  | /** | 
|  | * Old Italic. | 
|  | * 0x10300 - 0x1032F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock OLD_ITALIC | 
|  | = new UnicodeBlock(0x10300, 0x1032F, | 
|  | "OLD_ITALIC", | 
|  | "Old Italic"); | 
|  |  | 
|  | /** | 
|  | * Gothic. | 
|  | * 0x10330 - 0x1034F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock GOTHIC | 
|  | = new UnicodeBlock(0x10330, 0x1034F, | 
|  | "GOTHIC", | 
|  | "Gothic"); | 
|  |  | 
|  | /** | 
|  | * Ugaritic. | 
|  | * 0x10380 - 0x1039F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock UGARITIC | 
|  | = new UnicodeBlock(0x10380, 0x1039F, | 
|  | "UGARITIC", | 
|  | "Ugaritic"); | 
|  |  | 
|  | /** | 
|  | * Deseret. | 
|  | * 0x10400 - 0x1044F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock DESERET | 
|  | = new UnicodeBlock(0x10400, 0x1044F, | 
|  | "DESERET", | 
|  | "Deseret"); | 
|  |  | 
|  | /** | 
|  | * Shavian. | 
|  | * 0x10450 - 0x1047F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SHAVIAN | 
|  | = new UnicodeBlock(0x10450, 0x1047F, | 
|  | "SHAVIAN", | 
|  | "Shavian"); | 
|  |  | 
|  | /** | 
|  | * Osmanya. | 
|  | * 0x10480 - 0x104AF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock OSMANYA | 
|  | = new UnicodeBlock(0x10480, 0x104AF, | 
|  | "OSMANYA", | 
|  | "Osmanya"); | 
|  |  | 
|  | /** | 
|  | * Cypriot Syllabary. | 
|  | * 0x10800 - 0x1083F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock CYPRIOT_SYLLABARY | 
|  | = new UnicodeBlock(0x10800, 0x1083F, | 
|  | "CYPRIOT_SYLLABARY", | 
|  | "Cypriot Syllabary"); | 
|  |  | 
|  | /** | 
|  | * Byzantine Musical Symbols. | 
|  | * 0x1D000 - 0x1D0FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS | 
|  | = new UnicodeBlock(0x1D000, 0x1D0FF, | 
|  | "BYZANTINE_MUSICAL_SYMBOLS", | 
|  | "Byzantine Musical Symbols"); | 
|  |  | 
|  | /** | 
|  | * Musical Symbols. | 
|  | * 0x1D100 - 0x1D1FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock MUSICAL_SYMBOLS | 
|  | = new UnicodeBlock(0x1D100, 0x1D1FF, | 
|  | "MUSICAL_SYMBOLS", | 
|  | "Musical Symbols"); | 
|  |  | 
|  | /** | 
|  | * Tai Xuan Jing Symbols. | 
|  | * 0x1D300 - 0x1D35F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS | 
|  | = new UnicodeBlock(0x1D300, 0x1D35F, | 
|  | "TAI_XUAN_JING_SYMBOLS", | 
|  | "Tai Xuan Jing Symbols"); | 
|  |  | 
|  | /** | 
|  | * Mathematical Alphanumeric Symbols. | 
|  | * 0x1D400 - 0x1D7FF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS | 
|  | = new UnicodeBlock(0x1D400, 0x1D7FF, | 
|  | "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", | 
|  | "Mathematical Alphanumeric Symbols"); | 
|  |  | 
|  | /** | 
|  | * CJK Unified Ideographs Extension B. | 
|  | * 0x20000 - 0x2A6DF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B | 
|  | = new UnicodeBlock(0x20000, 0x2A6DF, | 
|  | "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", | 
|  | "CJK Unified Ideographs Extension B"); | 
|  |  | 
|  | /** | 
|  | * CJK Compatibility Ideographs Supplement. | 
|  | * 0x2F800 - 0x2FA1F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT | 
|  | = new UnicodeBlock(0x2F800, 0x2FA1F, | 
|  | "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", | 
|  | "CJK Compatibility Ideographs Supplement"); | 
|  |  | 
|  | /** | 
|  | * Tags. | 
|  | * 0xE0000 - 0xE007F. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock TAGS | 
|  | = new UnicodeBlock(0xE0000, 0xE007F, | 
|  | "TAGS", | 
|  | "Tags"); | 
|  |  | 
|  | /** | 
|  | * Variation Selectors Supplement. | 
|  | * 0xE0100 - 0xE01EF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT | 
|  | = new UnicodeBlock(0xE0100, 0xE01EF, | 
|  | "VARIATION_SELECTORS_SUPPLEMENT", | 
|  | "Variation Selectors Supplement"); | 
|  |  | 
|  | /** | 
|  | * Supplementary Private Use Area-A. | 
|  | * 0xF0000 - 0xFFFFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A | 
|  | = new UnicodeBlock(0xF0000, 0xFFFFF, | 
|  | "SUPPLEMENTARY_PRIVATE_USE_AREA_A", | 
|  | "Supplementary Private Use Area-A"); | 
|  |  | 
|  | /** | 
|  | * Supplementary Private Use Area-B. | 
|  | * 0x100000 - 0x10FFFF. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B | 
|  | = new UnicodeBlock(0x100000, 0x10FFFF, | 
|  | "SUPPLEMENTARY_PRIVATE_USE_AREA_B", | 
|  | "Supplementary Private Use Area-B"); | 
|  |  | 
|  | /** | 
|  | * Surrogates Area. | 
|  | * 'D800' - 'DFFF'. | 
|  | * @deprecated As of 1.5, the three areas, | 
|  | * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>, | 
|  | * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a> | 
|  | * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined | 
|  | * by the Unicode standard, should be used in preference to | 
|  | * this.  These are also returned from calls to <code>of(int)</code> | 
|  | * and <code>of(char)</code>. | 
|  | */ | 
|  | @Deprecated | 
|  | public static final UnicodeBlock SURROGATES_AREA | 
|  | = new UnicodeBlock(0xD800, 0xDFFF, | 
|  | "SURROGATES_AREA", | 
|  | "Surrogates Area"); | 
|  |  | 
|  | /** | 
|  | * The defined subsets. | 
|  | */ | 
|  | private static final UnicodeBlock sets[] = { | 
|  | BASIC_LATIN, | 
|  | LATIN_1_SUPPLEMENT, | 
|  | LATIN_EXTENDED_A, | 
|  | LATIN_EXTENDED_B, | 
|  | IPA_EXTENSIONS, | 
|  | SPACING_MODIFIER_LETTERS, | 
|  | COMBINING_DIACRITICAL_MARKS, | 
|  | GREEK, | 
|  | CYRILLIC, | 
|  | CYRILLIC_SUPPLEMENTARY, | 
|  | ARMENIAN, | 
|  | HEBREW, | 
|  | ARABIC, | 
|  | SYRIAC, | 
|  | THAANA, | 
|  | DEVANAGARI, | 
|  | BENGALI, | 
|  | GURMUKHI, | 
|  | GUJARATI, | 
|  | ORIYA, | 
|  | TAMIL, | 
|  | TELUGU, | 
|  | KANNADA, | 
|  | MALAYALAM, | 
|  | SINHALA, | 
|  | THAI, | 
|  | LAO, | 
|  | TIBETAN, | 
|  | MYANMAR, | 
|  | GEORGIAN, | 
|  | HANGUL_JAMO, | 
|  | ETHIOPIC, | 
|  | CHEROKEE, | 
|  | UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, | 
|  | OGHAM, | 
|  | RUNIC, | 
|  | TAGALOG, | 
|  | HANUNOO, | 
|  | BUHID, | 
|  | TAGBANWA, | 
|  | KHMER, | 
|  | MONGOLIAN, | 
|  | LIMBU, | 
|  | TAI_LE, | 
|  | KHMER_SYMBOLS, | 
|  | PHONETIC_EXTENSIONS, | 
|  | LATIN_EXTENDED_ADDITIONAL, | 
|  | GREEK_EXTENDED, | 
|  | GENERAL_PUNCTUATION, | 
|  | SUPERSCRIPTS_AND_SUBSCRIPTS, | 
|  | CURRENCY_SYMBOLS, | 
|  | COMBINING_MARKS_FOR_SYMBOLS, | 
|  | LETTERLIKE_SYMBOLS, | 
|  | NUMBER_FORMS, | 
|  | ARROWS, | 
|  | MATHEMATICAL_OPERATORS, | 
|  | MISCELLANEOUS_TECHNICAL, | 
|  | CONTROL_PICTURES, | 
|  | OPTICAL_CHARACTER_RECOGNITION, | 
|  | ENCLOSED_ALPHANUMERICS, | 
|  | BOX_DRAWING, | 
|  | BLOCK_ELEMENTS, | 
|  | GEOMETRIC_SHAPES, | 
|  | MISCELLANEOUS_SYMBOLS, | 
|  | DINGBATS, | 
|  | MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, | 
|  | SUPPLEMENTAL_ARROWS_A, | 
|  | BRAILLE_PATTERNS, | 
|  | SUPPLEMENTAL_ARROWS_B, | 
|  | MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, | 
|  | SUPPLEMENTAL_MATHEMATICAL_OPERATORS, | 
|  | MISCELLANEOUS_SYMBOLS_AND_ARROWS, | 
|  | CJK_RADICALS_SUPPLEMENT, | 
|  | KANGXI_RADICALS, | 
|  | IDEOGRAPHIC_DESCRIPTION_CHARACTERS, | 
|  | CJK_SYMBOLS_AND_PUNCTUATION, | 
|  | HIRAGANA, | 
|  | KATAKANA, | 
|  | BOPOMOFO, | 
|  | HANGUL_COMPATIBILITY_JAMO, | 
|  | KANBUN, | 
|  | BOPOMOFO_EXTENDED, | 
|  | KATAKANA_PHONETIC_EXTENSIONS, | 
|  | ENCLOSED_CJK_LETTERS_AND_MONTHS, | 
|  | CJK_COMPATIBILITY, | 
|  | CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, | 
|  | YIJING_HEXAGRAM_SYMBOLS, | 
|  | CJK_UNIFIED_IDEOGRAPHS, | 
|  | YI_SYLLABLES, | 
|  | YI_RADICALS, | 
|  | HANGUL_SYLLABLES, | 
|  | HIGH_SURROGATES, | 
|  | HIGH_PRIVATE_USE_SURROGATES, | 
|  | LOW_SURROGATES, | 
|  | PRIVATE_USE_AREA, | 
|  | CJK_COMPATIBILITY_IDEOGRAPHS, | 
|  | ALPHABETIC_PRESENTATION_FORMS, | 
|  | ARABIC_PRESENTATION_FORMS_A, | 
|  | VARIATION_SELECTORS, | 
|  | COMBINING_HALF_MARKS, | 
|  | CJK_COMPATIBILITY_FORMS, | 
|  | SMALL_FORM_VARIANTS, | 
|  | ARABIC_PRESENTATION_FORMS_B, | 
|  | HALFWIDTH_AND_FULLWIDTH_FORMS, | 
|  | SPECIALS, | 
|  | LINEAR_B_SYLLABARY, | 
|  | LINEAR_B_IDEOGRAMS, | 
|  | AEGEAN_NUMBERS, | 
|  | OLD_ITALIC, | 
|  | GOTHIC, | 
|  | UGARITIC, | 
|  | DESERET, | 
|  | SHAVIAN, | 
|  | OSMANYA, | 
|  | CYPRIOT_SYLLABARY, | 
|  | BYZANTINE_MUSICAL_SYMBOLS, | 
|  | MUSICAL_SYMBOLS, | 
|  | TAI_XUAN_JING_SYMBOLS, | 
|  | MATHEMATICAL_ALPHANUMERIC_SYMBOLS, | 
|  | CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, | 
|  | CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, | 
|  | TAGS, | 
|  | VARIATION_SELECTORS_SUPPLEMENT, | 
|  | SUPPLEMENTARY_PRIVATE_USE_AREA_A, | 
|  | SUPPLEMENTARY_PRIVATE_USE_AREA_B, | 
|  | }; | 
|  | } // class UnicodeBlock | 
|  |  | 
|  | /** | 
|  | * A class to encompass all the properties of characters in the | 
|  | * private use blocks in the Unicode standard.  This class extends | 
|  | * UnassignedCharacters because the return type from getType() is | 
|  | * different. | 
|  | * @author Anthony Balkissoon abalkiss at redhat dot com | 
|  | * | 
|  | */ | 
|  | private static class PrivateUseCharacters extends UnassignedCharacters | 
|  | { | 
|  | /** | 
|  | * Returns the type of the character cp. | 
|  | */ | 
|  | static int getType(int cp) | 
|  | { | 
|  | // The upper 2 code points in any plane are considered unassigned, | 
|  | // even in the private-use planes. | 
|  | if ((cp & 0xffff) >= 0xfffe) | 
|  | return UnassignedCharacters.getType(cp); | 
|  | return PRIVATE_USE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns true if the character cp is defined. | 
|  | */ | 
|  | static boolean isDefined(int cp) | 
|  | { | 
|  | // The upper 2 code points in any plane are considered unassigned, | 
|  | // even in the private-use planes. | 
|  | if ((cp & 0xffff) >= 0xfffe) | 
|  | return UnassignedCharacters.isDefined(cp); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Gets the directionality for the character cp. | 
|  | */ | 
|  | static byte getDirectionality(int cp) | 
|  | { | 
|  | if ((cp & 0xffff) >= 0xfffe) | 
|  | return UnassignedCharacters.getDirectionality(cp); | 
|  | return DIRECTIONALITY_LEFT_TO_RIGHT; | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * A class to encompass all the properties of code points that are | 
|  | * currently undefined in the Unicode standard. | 
|  | * @author Anthony Balkissoon abalkiss at redhat dot com | 
|  | * | 
|  | */ | 
|  | private static class UnassignedCharacters | 
|  | { | 
|  | /** | 
|  | * Returns the numeric value for the unassigned characters. | 
|  | * @param cp the character | 
|  | * @param radix the radix (not used) | 
|  | * @return the numeric value of this character in this radix | 
|  | */ | 
|  | static int digit(int cp, int radix) | 
|  | { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode directionality property for unassigned | 
|  | * characters. | 
|  | * @param cp the character | 
|  | * @return DIRECTIONALITY_UNDEFINED | 
|  | */ | 
|  | static byte getDirectionality(int cp) | 
|  | { | 
|  | return DIRECTIONALITY_UNDEFINED; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns -1, the numeric value for unassigned Unicode characters. | 
|  | * @param cp the character | 
|  | * @return -1 | 
|  | */ | 
|  | static int getNumericValue(int cp) | 
|  | { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns UNASSIGNED, the type of unassigned Unicode characters. | 
|  | * @param cp the character | 
|  | * @return UNASSIGNED | 
|  | */ | 
|  | static int getType(int cp) | 
|  | { | 
|  | return UNASSIGNED; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indiciate that the character is not defined in the | 
|  | * Unicode standard. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isDefined(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not a digit. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isDigit(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot be ignored | 
|  | * within an identifier | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isIdentifierIgnorable(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot be part of a | 
|  | * Java identifier. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isJavaIdentifierPart(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot be start a | 
|  | * Java identifier. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isJavaIdentiferStart(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not a letter. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isLetter(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot is neither a letter | 
|  | * nor a digit. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isLetterOrDigit(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not a lowercase letter. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isLowerCase(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot is not mirrored. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isMirrored(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not a space character. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isSpaceChar(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character it not a titlecase letter. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isTitleCase(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot be part of a | 
|  | * Unicode identifier. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isUnicodeIdentifierPart(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character cannot start a | 
|  | * Unicode identifier. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isUnicodeIdentifierStart(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not an uppercase letter. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isUpperCase(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns false to indicate that the character is not a whitespace | 
|  | * character. | 
|  | * @param cp the character | 
|  | * @return false | 
|  | */ | 
|  | static boolean isWhiteSpace(int cp) | 
|  | { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns cp to indicate this character has no lowercase conversion. | 
|  | * @param cp the character | 
|  | * @return cp | 
|  | */ | 
|  | static int toLowerCase(int cp) | 
|  | { | 
|  | return cp; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns cp to indicate this character has no titlecase conversion. | 
|  | * @param cp the character | 
|  | * @return cp | 
|  | */ | 
|  | static int toTitleCase(int cp) | 
|  | { | 
|  | return cp; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns cp to indicate this character has no uppercase conversion. | 
|  | * @param cp the character | 
|  | * @return cp | 
|  | */ | 
|  | static int toUpperCase(int cp) | 
|  | { | 
|  | return cp; | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * The immutable value of this Character. | 
|  | * | 
|  | * @serial the value of this Character | 
|  | */ | 
|  | private final char value; | 
|  |  | 
|  | /** | 
|  | * Compatible with JDK 1.0+. | 
|  | */ | 
|  | private static final long serialVersionUID = 3786198910865385080L; | 
|  |  | 
|  | /** | 
|  | * Smallest value allowed for radix arguments in Java. This value is 2. | 
|  | * | 
|  | * @see #digit(char, int) | 
|  | * @see #forDigit(int, int) | 
|  | * @see Integer#toString(int, int) | 
|  | * @see Integer#valueOf(String) | 
|  | */ | 
|  | public static final int MIN_RADIX = 2; | 
|  |  | 
|  | /** | 
|  | * Largest value allowed for radix arguments in Java. This value is 36. | 
|  | * | 
|  | * @see #digit(char, int) | 
|  | * @see #forDigit(int, int) | 
|  | * @see Integer#toString(int, int) | 
|  | * @see Integer#valueOf(String) | 
|  | */ | 
|  | public static final int MAX_RADIX = 36; | 
|  |  | 
|  | /** | 
|  | * The minimum value the char data type can hold. | 
|  | * This value is <code>'\\u0000'</code>. | 
|  | */ | 
|  | public static final char MIN_VALUE = '\u0000'; | 
|  |  | 
|  | /** | 
|  | * The maximum value the char data type can hold. | 
|  | * This value is <code>'\\uFFFF'</code>. | 
|  | */ | 
|  | public static final char MAX_VALUE = '\uFFFF'; | 
|  |  | 
|  | /** | 
|  | * The minimum Unicode 4.0 code point.  This value is <code>0</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final int MIN_CODE_POINT = 0; | 
|  |  | 
|  | /** | 
|  | * The maximum Unicode 4.0 code point, which is greater than the range | 
|  | * of the char data type. | 
|  | * This value is <code>0x10FFFF</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final int MAX_CODE_POINT = 0x10FFFF; | 
|  |  | 
|  | /** | 
|  | * The minimum Unicode high surrogate code unit, or | 
|  | * <emph>leading-surrogate</emph>, in the UTF-16 character encoding. | 
|  | * This value is <code>'\uD800'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MIN_HIGH_SURROGATE = '\uD800'; | 
|  |  | 
|  | /** | 
|  | * The maximum Unicode high surrogate code unit, or | 
|  | * <emph>leading-surrogate</emph>, in the UTF-16 character encoding. | 
|  | * This value is <code>'\uDBFF'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MAX_HIGH_SURROGATE = '\uDBFF'; | 
|  |  | 
|  | /** | 
|  | * The minimum Unicode low surrogate code unit, or | 
|  | * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding. | 
|  | * This value is <code>'\uDC00'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MIN_LOW_SURROGATE = '\uDC00'; | 
|  |  | 
|  | /** | 
|  | * The maximum Unicode low surrogate code unit, or | 
|  | * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding. | 
|  | * This value is <code>'\uDFFF'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MAX_LOW_SURROGATE = '\uDFFF'; | 
|  |  | 
|  | /** | 
|  | * The minimum Unicode surrogate code unit in the UTF-16 character encoding. | 
|  | * This value is <code>'\uD800'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; | 
|  |  | 
|  | /** | 
|  | * The maximum Unicode surrogate code unit in the UTF-16 character encoding. | 
|  | * This value is <code>'\uDFFF'</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; | 
|  |  | 
|  | /** | 
|  | * The lowest possible supplementary Unicode code point (the first code | 
|  | * point outside the basic multilingual plane (BMP)). | 
|  | * This value is <code>0x10000</code>. | 
|  | */ | 
|  | public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; | 
|  |  | 
|  | /** | 
|  | * Class object representing the primitive char data type. | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C'); | 
|  |  | 
|  | /** | 
|  | * The number of bits needed to represent a <code>char</code>. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static final int SIZE = 16; | 
|  |  | 
|  | // This caches some Character values, and is used by boxing | 
|  | // conversions via valueOf().  We must cache at least 0..127; | 
|  | // this constant controls how much we actually cache. | 
|  | private static final int MAX_CACHE = 127; | 
|  | private static Character[] charCache = new Character[MAX_CACHE + 1]; | 
|  | static | 
|  | { | 
|  | for (char i=0; i <= MAX_CACHE; i++) | 
|  | charCache[i] = new Character(i); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Lu = Letter, Uppercase (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte UPPERCASE_LETTER = 1; | 
|  |  | 
|  | /** | 
|  | * Ll = Letter, Lowercase (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte LOWERCASE_LETTER = 2; | 
|  |  | 
|  | /** | 
|  | * Lt = Letter, Titlecase (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte TITLECASE_LETTER = 3; | 
|  |  | 
|  | /** | 
|  | * Mn = Mark, Non-Spacing (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte NON_SPACING_MARK = 6; | 
|  |  | 
|  | /** | 
|  | * Mc = Mark, Spacing Combining (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte COMBINING_SPACING_MARK = 8; | 
|  |  | 
|  | /** | 
|  | * Me = Mark, Enclosing (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte ENCLOSING_MARK = 7; | 
|  |  | 
|  | /** | 
|  | * Nd = Number, Decimal Digit (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte DECIMAL_DIGIT_NUMBER = 9; | 
|  |  | 
|  | /** | 
|  | * Nl = Number, Letter (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte LETTER_NUMBER = 10; | 
|  |  | 
|  | /** | 
|  | * No = Number, Other (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte OTHER_NUMBER = 11; | 
|  |  | 
|  | /** | 
|  | * Zs = Separator, Space (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte SPACE_SEPARATOR = 12; | 
|  |  | 
|  | /** | 
|  | * Zl = Separator, Line (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte LINE_SEPARATOR = 13; | 
|  |  | 
|  | /** | 
|  | * Zp = Separator, Paragraph (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte PARAGRAPH_SEPARATOR = 14; | 
|  |  | 
|  | /** | 
|  | * Cc = Other, Control (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte CONTROL = 15; | 
|  |  | 
|  | /** | 
|  | * Cf = Other, Format (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte FORMAT = 16; | 
|  |  | 
|  | /** | 
|  | * Cs = Other, Surrogate (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte SURROGATE = 19; | 
|  |  | 
|  | /** | 
|  | * Co = Other, Private Use (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte PRIVATE_USE = 18; | 
|  |  | 
|  | /** | 
|  | * Cn = Other, Not Assigned (Normative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte UNASSIGNED = 0; | 
|  |  | 
|  | /** | 
|  | * Lm = Letter, Modifier (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte MODIFIER_LETTER = 4; | 
|  |  | 
|  | /** | 
|  | * Lo = Letter, Other (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte OTHER_LETTER = 5; | 
|  |  | 
|  | /** | 
|  | * Pc = Punctuation, Connector (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte CONNECTOR_PUNCTUATION = 23; | 
|  |  | 
|  | /** | 
|  | * Pd = Punctuation, Dash (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte DASH_PUNCTUATION = 20; | 
|  |  | 
|  | /** | 
|  | * Ps = Punctuation, Open (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte START_PUNCTUATION = 21; | 
|  |  | 
|  | /** | 
|  | * Pe = Punctuation, Close (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte END_PUNCTUATION = 22; | 
|  |  | 
|  | /** | 
|  | * Pi = Punctuation, Initial Quote (Informative). | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte INITIAL_QUOTE_PUNCTUATION = 29; | 
|  |  | 
|  | /** | 
|  | * Pf = Punctuation, Final Quote (Informative). | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte FINAL_QUOTE_PUNCTUATION = 30; | 
|  |  | 
|  | /** | 
|  | * Po = Punctuation, Other (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte OTHER_PUNCTUATION = 24; | 
|  |  | 
|  | /** | 
|  | * Sm = Symbol, Math (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte MATH_SYMBOL = 25; | 
|  |  | 
|  | /** | 
|  | * Sc = Symbol, Currency (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte CURRENCY_SYMBOL = 26; | 
|  |  | 
|  | /** | 
|  | * Sk = Symbol, Modifier (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte MODIFIER_SYMBOL = 27; | 
|  |  | 
|  | /** | 
|  | * So = Symbol, Other (Informative). | 
|  | * | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static final byte OTHER_SYMBOL = 28; | 
|  |  | 
|  | /** | 
|  | * Undefined bidirectional character type. Undefined char values have | 
|  | * undefined directionality in the Unicode specification. | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_UNDEFINED = -1; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "L". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "R". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "AL". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "EN". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "ES". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "ET". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "AN". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "CS". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "NSM". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "BN". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; | 
|  |  | 
|  | /** | 
|  | * Neutral bidirectional character type "B". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; | 
|  |  | 
|  | /** | 
|  | * Neutral bidirectional character type "S". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "WS". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_WHITESPACE = 12; | 
|  |  | 
|  | /** | 
|  | * Neutral bidirectional character type "ON". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "LRE". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "LRO". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "RLE". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; | 
|  |  | 
|  | /** | 
|  | * Strong bidirectional character type "RLO". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; | 
|  |  | 
|  | /** | 
|  | * Weak bidirectional character type "PDF". | 
|  | * | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; | 
|  |  | 
|  | /** | 
|  | * Stores unicode block offset lookup table. Exploit package visibility of | 
|  | * String.value to avoid copying the array. | 
|  | * @see #readCodePoint(int) | 
|  | * @see CharData#BLOCKS | 
|  | */ | 
|  | private static final char[][] blocks = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[0]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[1]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[2]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[3]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[4]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[5]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[6]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[7]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[8]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[9]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[10]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[11]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[12]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[13]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[14]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[15]), | 
|  | String.zeroBasedStringValue(CharData.BLOCKS[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode attribute offset lookup table. Exploit package visibility | 
|  | * of String.value to avoid copying the array. | 
|  | * @see CharData#DATA | 
|  | */ | 
|  | private static final char[][] data = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.DATA[0]), | 
|  | String.zeroBasedStringValue(CharData.DATA[1]), | 
|  | String.zeroBasedStringValue(CharData.DATA[2]), | 
|  | String.zeroBasedStringValue(CharData.DATA[3]), | 
|  | String.zeroBasedStringValue(CharData.DATA[4]), | 
|  | String.zeroBasedStringValue(CharData.DATA[5]), | 
|  | String.zeroBasedStringValue(CharData.DATA[6]), | 
|  | String.zeroBasedStringValue(CharData.DATA[7]), | 
|  | String.zeroBasedStringValue(CharData.DATA[8]), | 
|  | String.zeroBasedStringValue(CharData.DATA[9]), | 
|  | String.zeroBasedStringValue(CharData.DATA[10]), | 
|  | String.zeroBasedStringValue(CharData.DATA[11]), | 
|  | String.zeroBasedStringValue(CharData.DATA[12]), | 
|  | String.zeroBasedStringValue(CharData.DATA[13]), | 
|  | String.zeroBasedStringValue(CharData.DATA[14]), | 
|  | String.zeroBasedStringValue(CharData.DATA[15]), | 
|  | String.zeroBasedStringValue(CharData.DATA[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode numeric value attribute table. Exploit package visibility | 
|  | * of String.value to avoid copying the array. | 
|  | * @see CharData#NUM_VALUE | 
|  | */ | 
|  | private static final char[][] numValue = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[0]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[1]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[2]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[3]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[4]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[5]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[6]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[7]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[8]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[9]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[10]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[11]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[12]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[13]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[14]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[15]), | 
|  | String.zeroBasedStringValue(CharData.NUM_VALUE[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode uppercase attribute table. Exploit package visibility | 
|  | * of String.value to avoid copying the array. | 
|  | * @see CharData#UPPER | 
|  | */ | 
|  | private static final char[][] upper = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.UPPER[0]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[1]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[2]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[3]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[4]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[5]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[6]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[7]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[8]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[9]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[10]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[11]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[12]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[13]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[14]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[15]), | 
|  | String.zeroBasedStringValue(CharData.UPPER[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode lowercase attribute table. Exploit package visibility | 
|  | * of String.value to avoid copying the array. | 
|  | * @see CharData#LOWER | 
|  | */ | 
|  | private static final char[][] lower = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.LOWER[0]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[1]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[2]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[3]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[4]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[5]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[6]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[7]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[8]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[9]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[10]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[11]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[12]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[13]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[14]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[15]), | 
|  | String.zeroBasedStringValue(CharData.LOWER[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode direction attribute table. Exploit package visibility | 
|  | * of String.value to avoid copying the array. | 
|  | * @see CharData#DIRECTION | 
|  | */ | 
|  | // Package visible for use by String. | 
|  | static final char[][] direction = | 
|  | new char[][]{ | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[0]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[1]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[2]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[3]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[4]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[5]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[6]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[7]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[8]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[9]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[10]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[11]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[12]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[13]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[14]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[15]), | 
|  | String.zeroBasedStringValue(CharData.DIRECTION[16])}; | 
|  |  | 
|  | /** | 
|  | * Stores unicode titlecase table. Exploit package visibility of | 
|  | * String.value to avoid copying the array. | 
|  | * @see CharData#TITLE | 
|  | */ | 
|  | private static final char[] title = String.zeroBasedStringValue(CharData.TITLE); | 
|  |  | 
|  | /** | 
|  | * Mask for grabbing the type out of the contents of data. | 
|  | * @see CharData#DATA | 
|  | */ | 
|  | private static final int TYPE_MASK = 0x1F; | 
|  |  | 
|  | /** | 
|  | * Mask for grabbing the non-breaking space flag out of the contents of | 
|  | * data. | 
|  | * @see CharData#DATA | 
|  | */ | 
|  | private static final int NO_BREAK_MASK = 0x20; | 
|  |  | 
|  | /** | 
|  | * Mask for grabbing the mirrored directionality flag out of the contents | 
|  | * of data. | 
|  | * @see CharData#DATA | 
|  | */ | 
|  | private static final int MIRROR_MASK = 0x40; | 
|  |  | 
|  | /** | 
|  | * Grabs an attribute offset from the Unicode attribute database. The lower | 
|  | * 5 bits are the character type, the next 2 bits are flags, and the top | 
|  | * 9 bits are the offset into the attribute tables. | 
|  | * | 
|  | * @param codePoint the character to look up | 
|  | * @return the character's attribute offset and type | 
|  | * @see #TYPE_MASK | 
|  | * @see #NO_BREAK_MASK | 
|  | * @see #MIRROR_MASK | 
|  | * @see CharData#DATA | 
|  | * @see CharData#SHIFT | 
|  | */ | 
|  | // Package visible for use in String. | 
|  | static char readCodePoint(int codePoint) | 
|  | { | 
|  | int plane = codePoint >>> 16; | 
|  | char offset = (char) (codePoint & 0xffff); | 
|  | return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)]; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Wraps up a character. | 
|  | * | 
|  | * @param value the character to wrap | 
|  | */ | 
|  | public Character(char value) | 
|  | { | 
|  | this.value = value; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the character which has been wrapped by this class. | 
|  | * | 
|  | * @return the character wrapped | 
|  | */ | 
|  | public char charValue() | 
|  | { | 
|  | return value; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the numerical value (unsigned) of the wrapped character. | 
|  | * Range of returned values: 0x0000-0xFFFF. | 
|  | * | 
|  | * @return the value of the wrapped character | 
|  | */ | 
|  | public int hashCode() | 
|  | { | 
|  | return value; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if an object is equal to this object. This is only true for | 
|  | * another Character object wrapping the same value. | 
|  | * | 
|  | * @param o object to compare | 
|  | * @return true if o is a Character with the same value | 
|  | */ | 
|  | public boolean equals(Object o) | 
|  | { | 
|  | return o instanceof Character && value == ((Character) o).value; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts the wrapped character into a String. | 
|  | * | 
|  | * @return a String containing one character -- the wrapped character | 
|  | *         of this instance | 
|  | */ | 
|  | public String toString() | 
|  | { | 
|  | // Package constructor avoids an array copy. | 
|  | return new String(new char[] { value }, 0, 1, true); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns a String of length 1 representing the specified character. | 
|  | * | 
|  | * @param ch the character to convert | 
|  | * @return a String containing the character | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static String toString(char ch) | 
|  | { | 
|  | // Package constructor avoids an array copy. | 
|  | return new String(new char[] { ch }, 0, 1, true); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode lowercase letter. For example, | 
|  | * <code>'a'</code> is lowercase.  Returns true if getType() returns | 
|  | * LOWERCASE_LETTER. | 
|  | * <br> | 
|  | * lowercase = [Ll] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode lowercase letter, else false | 
|  | * @see #isUpperCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #getType(char) | 
|  | */ | 
|  | public static boolean isLowerCase(char ch) | 
|  | { | 
|  | return isLowerCase((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode lowercase letter. For example, | 
|  | * <code>'a'</code> is lowercase.  Returns true if getType() returns | 
|  | * LOWERCASE_LETTER. | 
|  | * <br> | 
|  | * lowercase = [Ll] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode lowercase letter, else false | 
|  | * @see #isUpperCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #getType(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isLowerCase(int codePoint) | 
|  | { | 
|  | return getType(codePoint) == LOWERCASE_LETTER; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode uppercase letter. For example, | 
|  | * <code>'A'</code> is uppercase.  Returns true if getType() returns | 
|  | * UPPERCASE_LETTER. | 
|  | * <br> | 
|  | * uppercase = [Lu] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode uppercase letter, else false | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | * @see #getType(char) | 
|  | */ | 
|  | public static boolean isUpperCase(char ch) | 
|  | { | 
|  | return isUpperCase((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode uppercase letter. For example, | 
|  | * <code>'A'</code> is uppercase.  Returns true if getType() returns | 
|  | * UPPERCASE_LETTER. | 
|  | * <br> | 
|  | * uppercase = [Lu] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode uppercase letter, else false | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | * @see #getType(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isUpperCase(int codePoint) | 
|  | { | 
|  | return getType(codePoint) == UPPERCASE_LETTER; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode titlecase letter. For example, | 
|  | * the character "Lj" (Latin capital L with small letter j) is titlecase. | 
|  | * True if getType() returns TITLECASE_LETTER. | 
|  | * <br> | 
|  | * titlecase = [Lt] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode titlecase letter, else false | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | * @see #getType(char) | 
|  | */ | 
|  | public static boolean isTitleCase(char ch) | 
|  | { | 
|  | return isTitleCase((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode titlecase letter. For example, | 
|  | * the character "Lj" (Latin capital L with small letter j) is titlecase. | 
|  | * True if getType() returns TITLECASE_LETTER. | 
|  | * <br> | 
|  | * titlecase = [Lt] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode titlecase letter, else false | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | * @see #getType(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isTitleCase(int codePoint) | 
|  | { | 
|  | return getType(codePoint) == TITLECASE_LETTER; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode decimal digit. For example, | 
|  | * <code>'0'</code> is a digit.  A character is a Unicode digit if | 
|  | * getType() returns DECIMAL_DIGIT_NUMBER. | 
|  | * <br> | 
|  | * Unicode decimal digit = [Nd] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode decimal digit, else false | 
|  | * @see #digit(char, int) | 
|  | * @see #forDigit(int, int) | 
|  | * @see #getType(char) | 
|  | */ | 
|  | public static boolean isDigit(char ch) | 
|  | { | 
|  | return isDigit((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode decimal digit. For example, | 
|  | * <code>'0'</code> is a digit. A character is a Unicode digit if | 
|  | * getType() returns DECIMAL_DIGIT_NUMBER. | 
|  | * <br> | 
|  | * Unicode decimal digit = [Nd] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode decimal digit, else false | 
|  | * @see #digit(char, int) | 
|  | * @see #forDigit(int, int) | 
|  | * @see #getType(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  |  | 
|  | public static boolean isDigit(int codePoint) | 
|  | { | 
|  | return getType(codePoint) == DECIMAL_DIGIT_NUMBER; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is part of the Unicode Standard. This is an | 
|  | * evolving standard, but covers every character in the data file. | 
|  | * <br> | 
|  | * defined = not [Cn] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode character, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | */ | 
|  | public static boolean isDefined(char ch) | 
|  | { | 
|  | return isDefined((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is part of the Unicode Standard. This is an | 
|  | * evolving standard, but covers every character in the data file. | 
|  | * <br> | 
|  | * defined = not [Cn] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode character, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isDefined(int codePoint) | 
|  | { | 
|  | return getType(codePoint) != UNASSIGNED; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode letter. Not all letters have case, | 
|  | * so this may return true when isLowerCase and isUpperCase return false. | 
|  | * A character is a Unicode letter if getType() returns one of | 
|  | * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, | 
|  | * or OTHER_LETTER. | 
|  | * <br> | 
|  | * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode letter, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isJavaLetter(char) | 
|  | * @see #isJavaLetterOrDigit(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @see #isUpperCase(char) | 
|  | */ | 
|  | public static boolean isLetter(char ch) | 
|  | { | 
|  | return isLetter((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode letter. Not all letters have case, | 
|  | * so this may return true when isLowerCase and isUpperCase return false. | 
|  | * A character is a Unicode letter if getType() returns one of | 
|  | * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, | 
|  | * or OTHER_LETTER. | 
|  | * <br> | 
|  | * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode letter, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isJavaLetter(char) | 
|  | * @see #isJavaLetterOrDigit(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isTitleCase(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @see #isUpperCase(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isLetter(int codePoint) | 
|  | { | 
|  | return ((1 << getType(codePoint)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER))) != 0; | 
|  | } | 
|  | /** | 
|  | * Returns the index into the given CharSequence that is offset | 
|  | * <code>codePointOffset</code> code points from <code>index</code>. | 
|  | * @param seq the CharSequence | 
|  | * @param index the start position in the CharSequence | 
|  | * @param codePointOffset the number of code points offset from the start | 
|  | * position | 
|  | * @return the index into the CharSequence that is codePointOffset code | 
|  | * points offset from index | 
|  | * | 
|  | * @throws NullPointerException if seq is null | 
|  | * @throws IndexOutOfBoundsException if index is negative or greater than the | 
|  | * length of the sequence. | 
|  | * @throws IndexOutOfBoundsException if codePointOffset is positive and the | 
|  | * subsequence from index to the end of seq has fewer than codePointOffset | 
|  | * code points | 
|  | * @throws IndexOutOfBoundsException if codePointOffset is negative and the | 
|  | * subsequence from the start of seq to index has fewer than | 
|  | * (-codePointOffset) code points | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int offsetByCodePoints(CharSequence seq, | 
|  | int index, | 
|  | int codePointOffset) | 
|  | { | 
|  | int len = seq.length(); | 
|  | if (index < 0 || index > len) | 
|  | throw new IndexOutOfBoundsException(); | 
|  |  | 
|  | int numToGo = codePointOffset; | 
|  | int offset = index; | 
|  | int adjust = 1; | 
|  | if (numToGo >= 0) | 
|  | { | 
|  | for (; numToGo > 0; offset++) | 
|  | { | 
|  | numToGo--; | 
|  | if (Character.isHighSurrogate(seq.charAt(offset)) | 
|  | && (offset + 1) < len | 
|  | && Character.isLowSurrogate(seq.charAt(offset + 1))) | 
|  | offset++; | 
|  | } | 
|  | return offset; | 
|  | } | 
|  | else | 
|  | { | 
|  | numToGo *= -1; | 
|  | for (; numToGo > 0;) | 
|  | { | 
|  | numToGo--; | 
|  | offset--; | 
|  | if (Character.isLowSurrogate(seq.charAt(offset)) | 
|  | && (offset - 1) >= 0 | 
|  | && Character.isHighSurrogate(seq.charAt(offset - 1))) | 
|  | offset--; | 
|  | } | 
|  | return offset; | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the index into the given char subarray that is offset | 
|  | * <code>codePointOffset</code> code points from <code>index</code>. | 
|  | * @param a the char array | 
|  | * @param start the start index of the subarray | 
|  | * @param count the length of the subarray | 
|  | * @param index the index to be offset | 
|  | * @param codePointOffset the number of code points offset from <code>index | 
|  | * </code> | 
|  | * @return the index into the char array | 
|  | * | 
|  | * @throws NullPointerException if a is null | 
|  | * @throws IndexOutOfBoundsException if start or count is negative or if | 
|  | * start + count is greater than the length of the array | 
|  | * @throws IndexOutOfBoundsException if index is less than start or larger | 
|  | * than start + count | 
|  | * @throws IndexOutOfBoundsException if codePointOffset is positive and the | 
|  | * subarray from index to start + count - 1 has fewer than codePointOffset | 
|  | * code points. | 
|  | * @throws IndexOutOfBoundsException if codePointOffset is negative and the | 
|  | * subarray from start to index - 1 has fewer than (-codePointOffset) code | 
|  | * points | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int offsetByCodePoints(char[] a, | 
|  | int start, | 
|  | int count, | 
|  | int index, | 
|  | int codePointOffset) | 
|  | { | 
|  | int len = a.length; | 
|  | int end = start + count; | 
|  | if (start < 0 || count < 0 || end > len || index < start || index > end) | 
|  | throw new IndexOutOfBoundsException(); | 
|  |  | 
|  | int numToGo = codePointOffset; | 
|  | int offset = index; | 
|  | int adjust = 1; | 
|  | if (numToGo >= 0) | 
|  | { | 
|  | for (; numToGo > 0; offset++) | 
|  | { | 
|  | numToGo--; | 
|  | if (Character.isHighSurrogate(a[offset]) | 
|  | && (offset + 1) < len | 
|  | && Character.isLowSurrogate(a[offset + 1])) | 
|  | offset++; | 
|  | } | 
|  | return offset; | 
|  | } | 
|  | else | 
|  | { | 
|  | numToGo *= -1; | 
|  | for (; numToGo > 0;) | 
|  | { | 
|  | numToGo--; | 
|  | offset--; | 
|  | if (Character.isLowSurrogate(a[offset]) | 
|  | && (offset - 1) >= 0 | 
|  | && Character.isHighSurrogate(a[offset - 1])) | 
|  | offset--; | 
|  | if (offset < start) | 
|  | throw new IndexOutOfBoundsException(); | 
|  | } | 
|  | return offset; | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the number of Unicode code points in the specified range of the | 
|  | * given CharSequence.  The first char in the range is at position | 
|  | * beginIndex and the last one is at position endIndex - 1.  Paired | 
|  | * surrogates (supplementary characters are represented by a pair of chars - | 
|  | * one from the high surrogates and one from the low surrogates) | 
|  | * count as just one code point. | 
|  | * @param seq the CharSequence to inspect | 
|  | * @param beginIndex the beginning of the range | 
|  | * @param endIndex the end of the range | 
|  | * @return the number of Unicode code points in the given range of the | 
|  | * sequence | 
|  | * @throws NullPointerException if seq is null | 
|  | * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is | 
|  | * larger than the length of seq, or if beginIndex is greater than endIndex. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointCount(CharSequence seq, int beginIndex, | 
|  | int endIndex) | 
|  | { | 
|  | int len = seq.length(); | 
|  | if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) | 
|  | throw new IndexOutOfBoundsException(); | 
|  |  | 
|  | int count = 0; | 
|  | for (int i = beginIndex; i < endIndex; i++) | 
|  | { | 
|  | count++; | 
|  | // If there is a pairing, count it only once. | 
|  | if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex | 
|  | && isLowSurrogate(seq.charAt(i + 1))) | 
|  | i ++; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the number of Unicode code points in the specified range of the | 
|  | * given char array.  The first char in the range is at position | 
|  | * offset and the length of the range is count.  Paired surrogates | 
|  | * (supplementary characters are represented by a pair of chars - | 
|  | * one from the high surrogates and one from the low surrogates) | 
|  | * count as just one code point. | 
|  | * @param a the char array to inspect | 
|  | * @param offset the beginning of the range | 
|  | * @param count the length of the range | 
|  | * @return the number of Unicode code points in the given range of the | 
|  | * array | 
|  | * @throws NullPointerException if a is null | 
|  | * @throws IndexOutOfBoundsException if offset or count is negative or if | 
|  | * offset + countendIndex is larger than the length of a. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointCount(char[] a, int offset, | 
|  | int count) | 
|  | { | 
|  | int len = a.length; | 
|  | int end = offset + count; | 
|  | if (offset < 0 || count < 0 || end > len) | 
|  | throw new IndexOutOfBoundsException(); | 
|  |  | 
|  | int counter = 0; | 
|  | for (int i = offset; i < end; i++) | 
|  | { | 
|  | counter++; | 
|  | // If there is a pairing, count it only once. | 
|  | if (isHighSurrogate(a[i]) && (i + 1) < end | 
|  | && isLowSurrogate(a[i + 1])) | 
|  | i ++; | 
|  | } | 
|  | return counter; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode letter or a Unicode digit. This | 
|  | * is the combination of isLetter and isDigit. | 
|  | * <br> | 
|  | * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode letter or a Unicode digit, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isJavaLetter(char) | 
|  | * @see #isJavaLetterOrDigit(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | */ | 
|  | public static boolean isLetterOrDigit(char ch) | 
|  | { | 
|  | return isLetterOrDigit((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode letter or a Unicode digit. This | 
|  | * is the combination of isLetter and isDigit. | 
|  | * <br> | 
|  | * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode letter or a Unicode digit, else false | 
|  | * @see #isDigit(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isJavaLetter(char) | 
|  | * @see #isJavaLetterOrDigit(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isLetterOrDigit(int codePoint) | 
|  | { | 
|  | return ((1 << getType(codePoint)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER) | 
|  | | (1 << DECIMAL_DIGIT_NUMBER))) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can start a Java identifier. This is the | 
|  | * combination of isLetter, any character where getType returns | 
|  | * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation | 
|  | * (like '_'). | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can start a Java identifier, else false | 
|  | * @deprecated Replaced by {@link #isJavaIdentifierStart(char)} | 
|  | * @see #isJavaLetterOrDigit(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | */ | 
|  | public static boolean isJavaLetter(char ch) | 
|  | { | 
|  | return isJavaIdentifierStart(ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can follow the first letter in | 
|  | * a Java identifier.  This is the combination of isJavaLetter (isLetter, | 
|  | * type of LETTER_NUMBER, currency, connecting punctuation) and digit, | 
|  | * numeric letter (like Roman numerals), combining marks, non-spacing marks, | 
|  | * or isIdentifierIgnorable. | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can follow the first letter in a Java identifier | 
|  | * @deprecated Replaced by {@link #isJavaIdentifierPart(char)} | 
|  | * @see #isJavaLetter(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @see #isIdentifierIgnorable(char) | 
|  | */ | 
|  | public static boolean isJavaLetterOrDigit(char ch) | 
|  | { | 
|  | return isJavaIdentifierPart(ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can start a Java identifier. This is the | 
|  | * combination of isLetter, any character where getType returns | 
|  | * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation | 
|  | * (like '_'). | 
|  | * <br> | 
|  | * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can start a Java identifier, else false | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isJavaIdentifierStart(char ch) | 
|  | { | 
|  | return isJavaIdentifierStart((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can start a Java identifier. This is the | 
|  | * combination of isLetter, any character where getType returns | 
|  | * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation | 
|  | * (like '_'). | 
|  | * <br> | 
|  | * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch can start a Java identifier, else false | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isJavaIdentifierStart(int codePoint) | 
|  | { | 
|  | return ((1 << getType(codePoint)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER) | 
|  | | (1 << LETTER_NUMBER) | 
|  | | (1 << CURRENCY_SYMBOL) | 
|  | | (1 << CONNECTOR_PUNCTUATION))) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can follow the first letter in | 
|  | * a Java identifier.  This is the combination of isJavaLetter (isLetter, | 
|  | * type of LETTER_NUMBER, currency, connecting punctuation) and digit, | 
|  | * numeric letter (like Roman numerals), combining marks, non-spacing marks, | 
|  | * or isIdentifierIgnorable. | 
|  | * <br> | 
|  | * Java identifier extender = | 
|  | *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf] | 
|  | *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can follow the first letter in a Java identifier | 
|  | * @see #isIdentifierIgnorable(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isJavaIdentifierPart(char ch) | 
|  | { | 
|  | return isJavaIdentifierPart((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can follow the first letter in | 
|  | * a Java identifier.  This is the combination of isJavaLetter (isLetter, | 
|  | * type of LETTER_NUMBER, currency, connecting punctuation) and digit, | 
|  | * numeric letter (like Roman numerals), combining marks, non-spacing marks, | 
|  | * or isIdentifierIgnorable. | 
|  | * <br> | 
|  | * Java identifier extender = | 
|  | *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf] | 
|  | *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch can follow the first letter in a Java identifier | 
|  | * @see #isIdentifierIgnorable(char) | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isJavaIdentifierPart(int codePoint) | 
|  | { | 
|  | int category = getType(codePoint); | 
|  | return ((1 << category) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER) | 
|  | | (1 << NON_SPACING_MARK) | 
|  | | (1 << COMBINING_SPACING_MARK) | 
|  | | (1 << DECIMAL_DIGIT_NUMBER) | 
|  | | (1 << LETTER_NUMBER) | 
|  | | (1 << CURRENCY_SYMBOL) | 
|  | | (1 << CONNECTOR_PUNCTUATION) | 
|  | | (1 << FORMAT))) != 0 | 
|  | || (category == CONTROL && isIdentifierIgnorable(codePoint)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can start a Unicode identifier.  Only | 
|  | * letters can start a Unicode identifier, but this includes characters | 
|  | * in LETTER_NUMBER. | 
|  | * <br> | 
|  | * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can start a Unicode identifier, else false | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isUnicodeIdentifierStart(char ch) | 
|  | { | 
|  | return isUnicodeIdentifierStart((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can start a Unicode identifier.  Only | 
|  | * letters can start a Unicode identifier, but this includes characters | 
|  | * in LETTER_NUMBER. | 
|  | * <br> | 
|  | * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch can start a Unicode identifier, else false | 
|  | * @see #isJavaIdentifierStart(char) | 
|  | * @see #isLetter(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isUnicodeIdentifierStart(int codePoint) | 
|  | { | 
|  | return ((1 << getType(codePoint)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER) | 
|  | | (1 << LETTER_NUMBER))) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can follow the first letter in | 
|  | * a Unicode identifier. This includes letters, connecting punctuation, | 
|  | * digits, numeric letters, combining marks, non-spacing marks, and | 
|  | * isIdentifierIgnorable. | 
|  | * <br> | 
|  | * Unicode identifier extender = | 
|  | *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]| | 
|  | *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch can follow the first letter in a Unicode identifier | 
|  | * @see #isIdentifierIgnorable(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isUnicodeIdentifierPart(char ch) | 
|  | { | 
|  | return isUnicodeIdentifierPart((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character can follow the first letter in | 
|  | * a Unicode identifier. This includes letters, connecting punctuation, | 
|  | * digits, numeric letters, combining marks, non-spacing marks, and | 
|  | * isIdentifierIgnorable. | 
|  | * <br> | 
|  | * Unicode identifier extender = | 
|  | *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]| | 
|  | *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch can follow the first letter in a Unicode identifier | 
|  | * @see #isIdentifierIgnorable(char) | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isLetterOrDigit(char) | 
|  | * @see #isUnicodeIdentifierStart(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isUnicodeIdentifierPart(int codePoint) | 
|  | { | 
|  | int category = getType(codePoint); | 
|  | return ((1 << category) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << TITLECASE_LETTER) | 
|  | | (1 << MODIFIER_LETTER) | 
|  | | (1 << OTHER_LETTER) | 
|  | | (1 << NON_SPACING_MARK) | 
|  | | (1 << COMBINING_SPACING_MARK) | 
|  | | (1 << DECIMAL_DIGIT_NUMBER) | 
|  | | (1 << LETTER_NUMBER) | 
|  | | (1 << CONNECTOR_PUNCTUATION) | 
|  | | (1 << FORMAT))) != 0 | 
|  | || (category == CONTROL && isIdentifierIgnorable(codePoint)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is ignorable in a Unicode identifier. This | 
|  | * includes the non-whitespace ISO control characters (<code>'\u0000'</code> | 
|  | * through <code>'\u0008'</code>, <code>'\u000E'</code> through | 
|  | * <code>'\u001B'</code>, and <code>'\u007F'</code> through | 
|  | * <code>'\u009F'</code>), and FORMAT characters. | 
|  | * <br> | 
|  | * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B | 
|  | *    |U+007F-U+009F | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is ignorable in a Unicode or Java identifier | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isIdentifierIgnorable(char ch) | 
|  | { | 
|  | return isIdentifierIgnorable((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is ignorable in a Unicode identifier. This | 
|  | * includes the non-whitespace ISO control characters (<code>'\u0000'</code> | 
|  | * through <code>'\u0008'</code>, <code>'\u000E'</code> through | 
|  | * <code>'\u001B'</code>, and <code>'\u007F'</code> through | 
|  | * <code>'\u009F'</code>), and FORMAT characters. | 
|  | * <br> | 
|  | * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B | 
|  | *    |U+007F-U+009F | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is ignorable in a Unicode or Java identifier | 
|  | * @see #isJavaIdentifierPart(char) | 
|  | * @see #isUnicodeIdentifierPart(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isIdentifierIgnorable(int codePoint) | 
|  | { | 
|  | if ((codePoint >= 0 && codePoint <= 0x0008) | 
|  | || (codePoint >= 0x000E && codePoint <= 0x001B) | 
|  | || (codePoint >= 0x007F && codePoint <= 0x009F) | 
|  | || getType(codePoint) == FORMAT) | 
|  | return true; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its lowercase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isLowerCase(toLowerCase(ch)) does not always return true. | 
|  | * | 
|  | * @param ch character to convert to lowercase | 
|  | * @return lowercase mapping of ch, or ch if lowercase mapping does | 
|  | *         not exist | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | */ | 
|  | public static char toLowerCase(char ch) | 
|  | { | 
|  | return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its lowercase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isLowerCase(toLowerCase(ch)) does not always return true. | 
|  | * | 
|  | * @param codePoint character to convert to lowercase | 
|  | * @return lowercase mapping of ch, or ch if lowercase mapping does | 
|  | *         not exist | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int toLowerCase(int codePoint) | 
|  | { | 
|  | // If the code point is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.toLowerCase(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.toLowerCase(codePoint); | 
|  |  | 
|  | // The short value stored in lower[plane] is the signed difference between | 
|  | // codePoint and its lowercase conversion. | 
|  | return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its uppercase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isUpperCase(toUpperCase(ch)) does not always return true. | 
|  | * | 
|  | * @param ch character to convert to uppercase | 
|  | * @return uppercase mapping of ch, or ch if uppercase mapping does | 
|  | *         not exist | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | */ | 
|  | public static char toUpperCase(char ch) | 
|  | { | 
|  | return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its uppercase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isUpperCase(toUpperCase(ch)) does not always return true. | 
|  | * | 
|  | * @param codePoint character to convert to uppercase | 
|  | * @return uppercase mapping of ch, or ch if uppercase mapping does | 
|  | *         not exist | 
|  | * @see #isLowerCase(char) | 
|  | * @see #isUpperCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #toTitleCase(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int toUpperCase(int codePoint) | 
|  | { | 
|  | // If the code point is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.toUpperCase(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.toUpperCase(codePoint); | 
|  |  | 
|  | // The short value stored in upper[plane] is the signed difference between | 
|  | // codePoint and its uppercase conversion. | 
|  | return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its titlecase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isTitleCase(toTitleCase(ch)) does not always return true. | 
|  | * | 
|  | * @param ch character to convert to titlecase | 
|  | * @return titlecase mapping of ch, or ch if titlecase mapping does | 
|  | *         not exist | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | */ | 
|  | public static char toTitleCase(char ch) | 
|  | { | 
|  | // As title is short, it doesn't hurt to exhaustively iterate over it. | 
|  | for (int i = title.length - 2; i >= 0; i -= 2) | 
|  | if (title[i] == ch) | 
|  | return title[i + 1]; | 
|  | return toUpperCase(ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a Unicode character into its titlecase equivalent mapping. | 
|  | * If a mapping does not exist, then the character passed is returned. | 
|  | * Note that isTitleCase(toTitleCase(ch)) does not always return true. | 
|  | * | 
|  | * @param codePoint character to convert to titlecase | 
|  | * @return titlecase mapping of ch, or ch if titlecase mapping does | 
|  | *         not exist | 
|  | * @see #isTitleCase(char) | 
|  | * @see #toLowerCase(char) | 
|  | * @see #toUpperCase(char) | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int toTitleCase(int codePoint) | 
|  | { | 
|  | // As of Unicode 4.0.0 no characters outside of plane 0 have | 
|  | // titlecase mappings that are different from their uppercase | 
|  | // mapping. | 
|  | if (codePoint < 0x10000) | 
|  | return (int) toTitleCase((char)codePoint); | 
|  | return toUpperCase(codePoint); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a character into a digit of the specified radix. If the radix | 
|  | * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch) | 
|  | * exceeds the radix, or if ch is not a decimal digit or in the case | 
|  | * insensitive set of 'a'-'z', the result is -1. | 
|  | * <br> | 
|  | * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A | 
|  | *    |U+FF21-U+FF3A|U+FF41-U+FF5A | 
|  | * | 
|  | * @param ch character to convert into a digit | 
|  | * @param radix radix in which ch is a digit | 
|  | * @return digit which ch represents in radix, or -1 not a valid digit | 
|  | * @see #MIN_RADIX | 
|  | * @see #MAX_RADIX | 
|  | * @see #forDigit(int, int) | 
|  | * @see #isDigit(char) | 
|  | * @see #getNumericValue(char) | 
|  | */ | 
|  | public static int digit(char ch, int radix) | 
|  | { | 
|  | if (radix < MIN_RADIX || radix > MAX_RADIX) | 
|  | return -1; | 
|  | char attr = readCodePoint((int)ch); | 
|  | if (((1 << (attr & TYPE_MASK)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << DECIMAL_DIGIT_NUMBER))) != 0) | 
|  | { | 
|  | // Signedness doesn't matter; 0xffff vs. -1 are both rejected. | 
|  | int digit = numValue[0][attr >> 7]; | 
|  | return (digit < radix) ? digit : -1; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a character into a digit of the specified radix. If the radix | 
|  | * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch) | 
|  | * exceeds the radix, or if ch is not a decimal digit or in the case | 
|  | * insensitive set of 'a'-'z', the result is -1. | 
|  | * <br> | 
|  | * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A | 
|  | *    |U+FF21-U+FF3A|U+FF41-U+FF5A | 
|  | * | 
|  | * @param codePoint character to convert into a digit | 
|  | * @param radix radix in which ch is a digit | 
|  | * @return digit which ch represents in radix, or -1 not a valid digit | 
|  | * @see #MIN_RADIX | 
|  | * @see #MAX_RADIX | 
|  | * @see #forDigit(int, int) | 
|  | * @see #isDigit(char) | 
|  | * @see #getNumericValue(char) | 
|  | */ | 
|  | public static int digit(int codePoint, int radix) | 
|  | { | 
|  | if (radix < MIN_RADIX || radix > MAX_RADIX) | 
|  | return -1; | 
|  |  | 
|  | // If the code point is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.digit(codePoint, radix); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.digit(codePoint, radix); | 
|  | char attr = readCodePoint(codePoint); | 
|  | if (((1 << (attr & TYPE_MASK)) | 
|  | & ((1 << UPPERCASE_LETTER) | 
|  | | (1 << LOWERCASE_LETTER) | 
|  | | (1 << DECIMAL_DIGIT_NUMBER))) != 0) | 
|  | { | 
|  | // Signedness doesn't matter; 0xffff vs. -1 are both rejected. | 
|  | int digit = numValue[plane][attr >> 7]; | 
|  |  | 
|  | // If digit is less than or equal to -3 then the numerical value was | 
|  | // too large to fit into numValue and is stored in CharData.LARGENUMS. | 
|  | if (digit <= -3) | 
|  | digit = CharData.LARGENUMS[-digit - 3]; | 
|  | return (digit < radix) ? digit : -1; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode numeric value property of a character. For example, | 
|  | * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. | 
|  | * | 
|  | * <p>This method also returns values for the letters A through Z, (not | 
|  | * specified by Unicode), in these ranges: <code>'\u0041'</code> | 
|  | * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code> | 
|  | * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code> | 
|  | * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through | 
|  | * <code>'\uFF5A'</code> (full width variants). | 
|  | * | 
|  | * <p>If the character lacks a numeric value property, -1 is returned. | 
|  | * If the character has a numeric value property which is not representable | 
|  | * as a nonnegative integer, such as a fraction, -2 is returned. | 
|  | * | 
|  | * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A | 
|  | *    |U+FF21-U+FF3A|U+FF41-U+FF5A | 
|  | * | 
|  | * @param ch character from which the numeric value property will | 
|  | *        be retrieved | 
|  | * @return the numeric value property of ch, or -1 if it does not exist, or | 
|  | *         -2 if it is not representable as a nonnegative integer | 
|  | * @see #forDigit(int, int) | 
|  | * @see #digit(char, int) | 
|  | * @see #isDigit(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static int getNumericValue(char ch) | 
|  | { | 
|  | // Treat numValue as signed. | 
|  | return (short) numValue[0][readCodePoint((int)ch) >> 7]; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode numeric value property of a character. For example, | 
|  | * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. | 
|  | * | 
|  | * <p>This method also returns values for the letters A through Z, (not | 
|  | * specified by Unicode), in these ranges: <code>'\u0041'</code> | 
|  | * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code> | 
|  | * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code> | 
|  | * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through | 
|  | * <code>'\uFF5A'</code> (full width variants). | 
|  | * | 
|  | * <p>If the character lacks a numeric value property, -1 is returned. | 
|  | * If the character has a numeric value property which is not representable | 
|  | * as a nonnegative integer, such as a fraction, -2 is returned. | 
|  | * | 
|  | * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A | 
|  | *    |U+FF21-U+FF3A|U+FF41-U+FF5A | 
|  | * | 
|  | * @param codePoint character from which the numeric value property will | 
|  | *        be retrieved | 
|  | * @return the numeric value property of ch, or -1 if it does not exist, or | 
|  | *         -2 if it is not representable as a nonnegative integer | 
|  | * @see #forDigit(int, int) | 
|  | * @see #digit(char, int) | 
|  | * @see #isDigit(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int getNumericValue(int codePoint) | 
|  | { | 
|  | // If the code point is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.getNumericValue(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.getNumericValue(codePoint); | 
|  |  | 
|  | // If the value N found in numValue[plane] is less than or equal to -3 | 
|  | // then the numeric value was too big to fit into 16 bits and is | 
|  | // stored in CharData.LARGENUMS at offset (-N - 3). | 
|  | short num = (short)numValue[plane][readCodePoint(codePoint) >> 7]; | 
|  | if (num <= -3) | 
|  | return CharData.LARGENUMS[-num - 3]; | 
|  | return num; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a ISO-LATIN-1 space. This is only the five | 
|  | * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>, | 
|  | * <code>'\r'</code>, and <code>' '</code>. | 
|  | * <br> | 
|  | * Java space = U+0020|U+0009|U+000A|U+000C|U+000D | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a space, else false | 
|  | * @deprecated Replaced by {@link #isWhitespace(char)} | 
|  | * @see #isSpaceChar(char) | 
|  | * @see #isWhitespace(char) | 
|  | */ | 
|  | public static boolean isSpace(char ch) | 
|  | { | 
|  | // Performing the subtraction up front alleviates need to compare longs. | 
|  | return ch-- <= ' ' && ((1 << ch) | 
|  | & ((1 << (' ' - 1)) | 
|  | | (1 << ('\t' - 1)) | 
|  | | (1 << ('\n' - 1)) | 
|  | | (1 << ('\r' - 1)) | 
|  | | (1 << ('\f' - 1)))) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode space character. This includes | 
|  | * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR. | 
|  | * <br> | 
|  | * Unicode space = [Zs]|[Zp]|[Zl] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is a Unicode space, else false | 
|  | * @see #isWhitespace(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isSpaceChar(char ch) | 
|  | { | 
|  | return isSpaceChar((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is a Unicode space character. This includes | 
|  | * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR. | 
|  | * <br> | 
|  | * Unicode space = [Zs]|[Zp]|[Zl] | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is a Unicode space, else false | 
|  | * @see #isWhitespace(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isSpaceChar(int codePoint) | 
|  | { | 
|  | return ((1 << getType(codePoint)) | 
|  | & ((1 << SPACE_SEPARATOR) | 
|  | | (1 << LINE_SEPARATOR) | 
|  | | (1 << PARAGRAPH_SEPARATOR))) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is Java whitespace. This includes Unicode | 
|  | * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and | 
|  | * PARAGRAPH_SEPARATOR) except the non-breaking spaces | 
|  | * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>); | 
|  | * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>, | 
|  | * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>, | 
|  | * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>, | 
|  | * and <code>'\u001F'</code>. | 
|  | * <br> | 
|  | * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is Java whitespace, else false | 
|  | * @see #isSpaceChar(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isWhitespace(char ch) | 
|  | { | 
|  | return isWhitespace((int) ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character is Java whitespace. This includes Unicode | 
|  | * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and | 
|  | * PARAGRAPH_SEPARATOR) except the non-breaking spaces | 
|  | * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>); | 
|  | * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>, | 
|  | * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>, | 
|  | * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>, | 
|  | * and <code>'\u001F'</code>. | 
|  | * <br> | 
|  | * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F | 
|  | * | 
|  | * @param codePoint character to test | 
|  | * @return true if ch is Java whitespace, else false | 
|  | * @see #isSpaceChar(char) | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isWhitespace(int codePoint) | 
|  | { | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.isWhiteSpace(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.isWhiteSpace(codePoint); | 
|  |  | 
|  | int attr = readCodePoint(codePoint); | 
|  | return ((((1 << (attr & TYPE_MASK)) | 
|  | & ((1 << SPACE_SEPARATOR) | 
|  | | (1 << LINE_SEPARATOR) | 
|  | | (1 << PARAGRAPH_SEPARATOR))) != 0) | 
|  | && (attr & NO_BREAK_MASK) == 0) | 
|  | || (codePoint <= '\u001F' && ((1 << codePoint) | 
|  | & ((1 << '\t') | 
|  | | (1 << '\n') | 
|  | | (1 << '\u000B') | 
|  | | (1 << '\u000C') | 
|  | | (1 << '\r') | 
|  | | (1 << '\u001C') | 
|  | | (1 << '\u001D') | 
|  | | (1 << '\u001E') | 
|  | | (1 << '\u001F'))) != 0); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if a character has the ISO Control property. | 
|  | * <br> | 
|  | * ISO Control = [Cc] | 
|  | * | 
|  | * @param ch character to test | 
|  | * @return true if ch is an ISO Control character, else false | 
|  | * @see #isSpaceChar(char) | 
|  | * @see #isWhitespace(char) | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static boolean isISOControl(char ch) | 
|  | { | 
|  | return isISOControl((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines if the character is an ISO Control character.  This is true | 
|  | * if the code point is in the range [0, 0x001F] or if it is in the range | 
|  | * [0x007F, 0x009F]. | 
|  | * @param codePoint the character to check | 
|  | * @return true if the character is in one of the above ranges | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isISOControl(int codePoint) | 
|  | { | 
|  | if ((codePoint >= 0 && codePoint <= 0x001F) | 
|  | || (codePoint >= 0x007F && codePoint <= 0x009F)) | 
|  | return true; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode general category property of a character. | 
|  | * | 
|  | * @param ch character from which the general category property will | 
|  | *        be retrieved | 
|  | * @return the character category property of ch as an integer | 
|  | * @see #UNASSIGNED | 
|  | * @see #UPPERCASE_LETTER | 
|  | * @see #LOWERCASE_LETTER | 
|  | * @see #TITLECASE_LETTER | 
|  | * @see #MODIFIER_LETTER | 
|  | * @see #OTHER_LETTER | 
|  | * @see #NON_SPACING_MARK | 
|  | * @see #ENCLOSING_MARK | 
|  | * @see #COMBINING_SPACING_MARK | 
|  | * @see #DECIMAL_DIGIT_NUMBER | 
|  | * @see #LETTER_NUMBER | 
|  | * @see #OTHER_NUMBER | 
|  | * @see #SPACE_SEPARATOR | 
|  | * @see #LINE_SEPARATOR | 
|  | * @see #PARAGRAPH_SEPARATOR | 
|  | * @see #CONTROL | 
|  | * @see #FORMAT | 
|  | * @see #PRIVATE_USE | 
|  | * @see #SURROGATE | 
|  | * @see #DASH_PUNCTUATION | 
|  | * @see #START_PUNCTUATION | 
|  | * @see #END_PUNCTUATION | 
|  | * @see #CONNECTOR_PUNCTUATION | 
|  | * @see #OTHER_PUNCTUATION | 
|  | * @see #MATH_SYMBOL | 
|  | * @see #CURRENCY_SYMBOL | 
|  | * @see #MODIFIER_SYMBOL | 
|  | * @see #INITIAL_QUOTE_PUNCTUATION | 
|  | * @see #FINAL_QUOTE_PUNCTUATION | 
|  | * @since 1.1 | 
|  | */ | 
|  | public static int getType(char ch) | 
|  | { | 
|  | return getType((int)ch); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode general category property of a character. | 
|  | * | 
|  | * @param codePoint character from which the general category property will | 
|  | *        be retrieved | 
|  | * @return the character category property of ch as an integer | 
|  | * @see #UNASSIGNED | 
|  | * @see #UPPERCASE_LETTER | 
|  | * @see #LOWERCASE_LETTER | 
|  | * @see #TITLECASE_LETTER | 
|  | * @see #MODIFIER_LETTER | 
|  | * @see #OTHER_LETTER | 
|  | * @see #NON_SPACING_MARK | 
|  | * @see #ENCLOSING_MARK | 
|  | * @see #COMBINING_SPACING_MARK | 
|  | * @see #DECIMAL_DIGIT_NUMBER | 
|  | * @see #LETTER_NUMBER | 
|  | * @see #OTHER_NUMBER | 
|  | * @see #SPACE_SEPARATOR | 
|  | * @see #LINE_SEPARATOR | 
|  | * @see #PARAGRAPH_SEPARATOR | 
|  | * @see #CONTROL | 
|  | * @see #FORMAT | 
|  | * @see #PRIVATE_USE | 
|  | * @see #SURROGATE | 
|  | * @see #DASH_PUNCTUATION | 
|  | * @see #START_PUNCTUATION | 
|  | * @see #END_PUNCTUATION | 
|  | * @see #CONNECTOR_PUNCTUATION | 
|  | * @see #OTHER_PUNCTUATION | 
|  | * @see #MATH_SYMBOL | 
|  | * @see #CURRENCY_SYMBOL | 
|  | * @see #MODIFIER_SYMBOL | 
|  | * @see #INITIAL_QUOTE_PUNCTUATION | 
|  | * @see #FINAL_QUOTE_PUNCTUATION | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int getType(int codePoint) | 
|  | { | 
|  | // If the codePoint is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.getType(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.getType(codePoint); | 
|  |  | 
|  | return readCodePoint(codePoint) & TYPE_MASK; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a digit into a character which represents that digit | 
|  | * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX, | 
|  | * or the digit exceeds the radix, then the null character <code>'\0'</code> | 
|  | * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'. | 
|  | * <br> | 
|  | * return value boundary = U+0030-U+0039|U+0061-U+007A | 
|  | * | 
|  | * @param digit digit to be converted into a character | 
|  | * @param radix radix of digit | 
|  | * @return character representing digit in radix, or '\0' | 
|  | * @see #MIN_RADIX | 
|  | * @see #MAX_RADIX | 
|  | * @see #digit(char, int) | 
|  | */ | 
|  | public static char forDigit(int digit, int radix) | 
|  | { | 
|  | if (radix < MIN_RADIX || radix > MAX_RADIX | 
|  | || digit < 0 || digit >= radix) | 
|  | return '\0'; | 
|  | return Number.digits[digit]; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode directionality property of the character. This | 
|  | * is used in the visual ordering of text. | 
|  | * | 
|  | * @param ch the character to look up | 
|  | * @return the directionality constant, or DIRECTIONALITY_UNDEFINED | 
|  | * @see #DIRECTIONALITY_UNDEFINED | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR | 
|  | * @see #DIRECTIONALITY_ARABIC_NUMBER | 
|  | * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR | 
|  | * @see #DIRECTIONALITY_NONSPACING_MARK | 
|  | * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL | 
|  | * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR | 
|  | * @see #DIRECTIONALITY_SEGMENT_SEPARATOR | 
|  | * @see #DIRECTIONALITY_WHITESPACE | 
|  | * @see #DIRECTIONALITY_OTHER_NEUTRALS | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE | 
|  | * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static byte getDirectionality(char ch) | 
|  | { | 
|  | // The result will correctly be signed. | 
|  | return getDirectionality((int)ch); | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Returns the Unicode directionality property of the character. This | 
|  | * is used in the visual ordering of text. | 
|  | * | 
|  | * @param codePoint the character to look up | 
|  | * @return the directionality constant, or DIRECTIONALITY_UNDEFINED | 
|  | * @see #DIRECTIONALITY_UNDEFINED | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR | 
|  | * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR | 
|  | * @see #DIRECTIONALITY_ARABIC_NUMBER | 
|  | * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR | 
|  | * @see #DIRECTIONALITY_NONSPACING_MARK | 
|  | * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL | 
|  | * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR | 
|  | * @see #DIRECTIONALITY_SEGMENT_SEPARATOR | 
|  | * @see #DIRECTIONALITY_WHITESPACE | 
|  | * @see #DIRECTIONALITY_OTHER_NEUTRALS | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING | 
|  | * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING | 
|  | * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE | 
|  | * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static byte getDirectionality(int codePoint) | 
|  | { | 
|  | // If the code point is unassigned or in one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.getDirectionality(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.getDirectionality(codePoint); | 
|  |  | 
|  | // The result will correctly be signed. | 
|  | return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines whether the character is mirrored according to Unicode. For | 
|  | * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in | 
|  | * left-to-right text, but ')' in right-to-left text. | 
|  | * | 
|  | * @param ch the character to look up | 
|  | * @return true if the character is mirrored | 
|  | * @since 1.4 | 
|  | */ | 
|  | public static boolean isMirrored(char ch) | 
|  | { | 
|  | return (readCodePoint((int)ch) & MIRROR_MASK) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines whether the character is mirrored according to Unicode. For | 
|  | * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in | 
|  | * left-to-right text, but ')' in right-to-left text. | 
|  | * | 
|  | * @param codePoint the character to look up | 
|  | * @return true if the character is mirrored | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isMirrored(int codePoint) | 
|  | { | 
|  | // If the code point is unassigned or part of one of the private use areas | 
|  | // then we delegate the call to the appropriate private static inner class. | 
|  | int plane = codePoint >>> 16; | 
|  | if (plane > 2 && plane < 14) | 
|  | return UnassignedCharacters.isMirrored(codePoint); | 
|  | if (plane > 14) | 
|  | return PrivateUseCharacters.isMirrored(codePoint); | 
|  |  | 
|  | return (readCodePoint(codePoint) & MIRROR_MASK) != 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Compares another Character to this Character, numerically. | 
|  | * | 
|  | * @param anotherCharacter Character to compare with this Character | 
|  | * @return a negative integer if this Character is less than | 
|  | *         anotherCharacter, zero if this Character is equal, and | 
|  | *         a positive integer if this Character is greater | 
|  | * @throws NullPointerException if anotherCharacter is null | 
|  | * @since 1.2 | 
|  | */ | 
|  | public int compareTo(Character anotherCharacter) | 
|  | { | 
|  | return value - anotherCharacter.value; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Compares two unboxed char values. | 
|  | * The result is positive if the first is greater, negative if the second | 
|  | * is greater, and 0 if the two are equal. | 
|  | * | 
|  | * @param x First value to compare. | 
|  | * @param y Second value to compare. | 
|  | * | 
|  | * @return positive int if the first value is greater, negative if the second | 
|  | * is greater, and 0 if the two are equal. | 
|  | * @since 1.7 | 
|  | */ | 
|  | public static int compare(char x, char y) | 
|  | { | 
|  | return Character.valueOf(x).compareTo(Character.valueOf(y)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns an <code>Character</code> object wrapping the value. | 
|  | * In contrast to the <code>Character</code> constructor, this method | 
|  | * will cache some values.  It is used by boxing conversion. | 
|  | * | 
|  | * @param val the value to wrap | 
|  | * @return the <code>Character</code> | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static Character valueOf(char val) | 
|  | { | 
|  | if (val > MAX_CACHE) | 
|  | return new Character(val); | 
|  | else | 
|  | return charCache[val - MIN_VALUE]; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Reverse the bytes in val. | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static char reverseBytes(char val) | 
|  | { | 
|  | return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a unicode code point to a UTF-16 representation of that | 
|  | * code point. | 
|  | * | 
|  | * @param codePoint the unicode code point | 
|  | * | 
|  | * @return the UTF-16 representation of that code point | 
|  | * | 
|  | * @throws IllegalArgumentException if the code point is not a valid | 
|  | *         unicode code point | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static char[] toChars(int codePoint) | 
|  | { | 
|  | if (!isValidCodePoint(codePoint)) | 
|  | throw new IllegalArgumentException("Illegal Unicode code point : " | 
|  | + codePoint); | 
|  | char[] result = new char[charCount(codePoint)]; | 
|  | int ignore = toChars(codePoint, result, 0); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts a unicode code point to its UTF-16 representation. | 
|  | * | 
|  | * @param codePoint the unicode code point | 
|  | * @param dst the target char array | 
|  | * @param dstIndex the start index for the target | 
|  | * | 
|  | * @return number of characters written to <code>dst</code> | 
|  | * | 
|  | * @throws IllegalArgumentException if <code>codePoint</code> is not a | 
|  | *         valid unicode code point | 
|  | * @throws NullPointerException if <code>dst</code> is <code>null</code> | 
|  | * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid | 
|  | *         in <code>dst</code> or if the UTF-16 representation does not | 
|  | *         fit into <code>dst</code> | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int toChars(int codePoint, char[] dst, int dstIndex) | 
|  | { | 
|  | if (!isValidCodePoint(codePoint)) | 
|  | { | 
|  | throw new IllegalArgumentException("not a valid code point: " | 
|  | + codePoint); | 
|  | } | 
|  |  | 
|  | int result; | 
|  | if (isSupplementaryCodePoint(codePoint)) | 
|  | { | 
|  | // Write second char first to cause IndexOutOfBoundsException | 
|  | // immediately. | 
|  | final int cp2 = codePoint - 0x10000; | 
|  | dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE); | 
|  | dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE); | 
|  | result = 2; | 
|  | } | 
|  | else | 
|  | { | 
|  | dst[dstIndex] = (char) codePoint; | 
|  | result = 1; | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Return number of 16-bit characters required to represent the given | 
|  | * code point. | 
|  | * | 
|  | * @param codePoint a unicode code point | 
|  | * | 
|  | * @return 2 if codePoint >= 0x10000, 1 otherwise. | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int charCount(int codePoint) | 
|  | { | 
|  | return | 
|  | (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) | 
|  | ? 2 | 
|  | : 1; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines whether the specified code point is | 
|  | * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode | 
|  | * supplementary character range. | 
|  | * | 
|  | * @param codePoint a Unicode code point | 
|  | * | 
|  | * @return <code>true</code> if code point is in supplementary range | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isSupplementaryCodePoint(int codePoint) | 
|  | { | 
|  | return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT | 
|  | && codePoint <= MAX_CODE_POINT; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Determines whether the specified code point is | 
|  | * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point. | 
|  | * | 
|  | * @param codePoint a Unicode code point | 
|  | * | 
|  | * @return <code>true</code> if code point is valid | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isValidCodePoint(int codePoint) | 
|  | { | 
|  | return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Return true if the given character is a high surrogate. | 
|  | * @param ch the character | 
|  | * @return true if the character is a high surrogate character | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isHighSurrogate(char ch) | 
|  | { | 
|  | return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Return true if the given character is a low surrogate. | 
|  | * @param ch the character | 
|  | * @return true if the character is a low surrogate character | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isLowSurrogate(char ch) | 
|  | { | 
|  | return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Return true if the given characters compose a surrogate pair. | 
|  | * This is true if the first character is a high surrogate and the | 
|  | * second character is a low surrogate. | 
|  | * @param ch1 the first character | 
|  | * @param ch2 the first character | 
|  | * @return true if the characters compose a surrogate pair | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static boolean isSurrogatePair(char ch1, char ch2) | 
|  | { | 
|  | return isHighSurrogate(ch1) && isLowSurrogate(ch2); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Given a valid surrogate pair, this returns the corresponding | 
|  | * code point. | 
|  | * @param high the high character of the pair | 
|  | * @param low the low character of the pair | 
|  | * @return the corresponding code point | 
|  | * | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int toCodePoint(char high, char low) | 
|  | { | 
|  | return ((high - MIN_HIGH_SURROGATE) * 0x400) + | 
|  | (low - MIN_LOW_SURROGATE) + 0x10000; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point at the specified index in the CharSequence. | 
|  | * This is like CharSequence#charAt(int), but if the character is | 
|  | * the start of a surrogate pair, and there is a following | 
|  | * character, and this character completes the pair, then the | 
|  | * corresponding supplementary code point is returned.  Otherwise, | 
|  | * the character at the index is returned. | 
|  | * | 
|  | * @param sequence the CharSequence | 
|  | * @param index the index of the codepoint to get, starting at 0 | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is negative or >= length() | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointAt(CharSequence sequence, int index) | 
|  | { | 
|  | int len = sequence.length(); | 
|  | if (index < 0 || index >= len) | 
|  | throw new IndexOutOfBoundsException(); | 
|  | char high = sequence.charAt(index); | 
|  | if (! isHighSurrogate(high) || ++index >= len) | 
|  | return high; | 
|  | char low = sequence.charAt(index); | 
|  | if (! isLowSurrogate(low)) | 
|  | return high; | 
|  | return toCodePoint(high, low); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point at the specified index in the CharSequence. | 
|  | * If the character is the start of a surrogate pair, and there is a | 
|  | * following character, and this character completes the pair, then | 
|  | * the corresponding supplementary code point is returned. | 
|  | * Otherwise, the character at the index is returned. | 
|  | * | 
|  | * @param chars the character array in which to look | 
|  | * @param index the index of the codepoint to get, starting at 0 | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is negative or >= length() | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointAt(char[] chars, int index) | 
|  | { | 
|  | return codePointAt(chars, index, chars.length); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point at the specified index in the CharSequence. | 
|  | * If the character is the start of a surrogate pair, and there is a | 
|  | * following character within the specified range, and this | 
|  | * character completes the pair, then the corresponding | 
|  | * supplementary code point is returned.  Otherwise, the character | 
|  | * at the index is returned. | 
|  | * | 
|  | * @param chars the character array in which to look | 
|  | * @param index the index of the codepoint to get, starting at 0 | 
|  | * @param limit the limit past which characters should not be examined | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is negative or >= | 
|  | * limit, or if limit is negative or >= the length of the array | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointAt(char[] chars, int index, int limit) | 
|  | { | 
|  | if (index < 0 || index >= limit || limit < 0 || limit > chars.length) | 
|  | throw new IndexOutOfBoundsException(); | 
|  | char high = chars[index]; | 
|  | if (! isHighSurrogate(high) || ++index >= limit) | 
|  | return high; | 
|  | char low = chars[index]; | 
|  | if (! isLowSurrogate(low)) | 
|  | return high; | 
|  | return toCodePoint(high, low); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point before the specified index.  This is like | 
|  | * #codePointAt(char[], int), but checks the characters at | 
|  | * <code>index-1</code> and <code>index-2</code> to see if they form | 
|  | * a supplementary code point.  If they do not, the character at | 
|  | * <code>index-1</code> is returned. | 
|  | * | 
|  | * @param chars the character array | 
|  | * @param index the index just past the codepoint to get, starting at 0 | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is negative or >= length() | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointBefore(char[] chars, int index) | 
|  | { | 
|  | return codePointBefore(chars, index, 1); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point before the specified index.  This is like | 
|  | * #codePointAt(char[], int), but checks the characters at | 
|  | * <code>index-1</code> and <code>index-2</code> to see if they form | 
|  | * a supplementary code point.  If they do not, the character at | 
|  | * <code>index-1</code> is returned.  The start parameter is used to | 
|  | * limit the range of the array which may be examined. | 
|  | * | 
|  | * @param chars the character array | 
|  | * @param index the index just past the codepoint to get, starting at 0 | 
|  | * @param start the index before which characters should not be examined | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is > start or > | 
|  | * the length of the array, or if limit is negative or >= the | 
|  | * length of the array | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointBefore(char[] chars, int index, int start) | 
|  | { | 
|  | if (index < start || index > chars.length | 
|  | || start < 0 || start >= chars.length) | 
|  | throw new IndexOutOfBoundsException(); | 
|  | --index; | 
|  | char low = chars[index]; | 
|  | if (! isLowSurrogate(low) || --index < start) | 
|  | return low; | 
|  | char high = chars[index]; | 
|  | if (! isHighSurrogate(high)) | 
|  | return low; | 
|  | return toCodePoint(high, low); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Get the code point before the specified index.  This is like | 
|  | * #codePointAt(CharSequence, int), but checks the characters at | 
|  | * <code>index-1</code> and <code>index-2</code> to see if they form | 
|  | * a supplementary code point.  If they do not, the character at | 
|  | * <code>index-1</code> is returned. | 
|  | * | 
|  | * @param sequence the CharSequence | 
|  | * @param index the index just past the codepoint to get, starting at 0 | 
|  | * @return the codepoint at the specified index | 
|  | * @throws IndexOutOfBoundsException if index is negative or >= length() | 
|  | * @since 1.5 | 
|  | */ | 
|  | public static int codePointBefore(CharSequence sequence, int index) | 
|  | { | 
|  | int len = sequence.length(); | 
|  | if (index < 1 || index > len) | 
|  | throw new IndexOutOfBoundsException(); | 
|  | --index; | 
|  | char low = sequence.charAt(index); | 
|  | if (! isLowSurrogate(low) || --index < 0) | 
|  | return low; | 
|  | char high = sequence.charAt(index); | 
|  | if (! isHighSurrogate(high)) | 
|  | return low; | 
|  | return toCodePoint(high, low); | 
|  | } | 
|  | } // class Character |