View Javadoc
1   /*
2    * Copyright (C) 2008 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.base;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  import static com.google.common.base.Preconditions.checkPositionIndex;
20  
21  import com.google.common.annotations.GwtCompatible;
22  import com.google.common.annotations.GwtIncompatible;
23  import com.google.common.annotations.VisibleForTesting;
24  import java.util.Arrays;
25  import java.util.BitSet;
26  
27  /**
28   * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
29   * for any {@link Object}. Also offers basic text processing methods based on this function.
30   * Implementations are strongly encouraged to be side-effect-free and immutable.
31   *
32   * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
33   * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
34   *
35   * <p><b>Warning:</b> This class deals only with {@code char} values; it does not understand
36   * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
37   * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
38   * treats these just as two separate characters.
39   *
40   * <p>Example usages:
41   *
42   * <pre>
43   *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
44   *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
45   *
46   * <p>See the Guava User Guide article on <a
47   * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
48   * </a>.
49   *
50   * @author Kevin Bourrillion
51   * @since 1.0
52   */
53  @GwtCompatible(emulated = true)
54  public abstract class CharMatcher implements Predicate<Character> {
55    /*
56     *           N777777777NO
57     *         N7777777777777N
58     *        M777777777777777N
59     *        $N877777777D77777M
60     *       N M77777777ONND777M
61     *       MN777777777NN  D777
62     *     N7ZN777777777NN ~M7778
63     *    N777777777777MMNN88777N
64     *    N777777777777MNZZZ7777O
65     *    DZN7777O77777777777777
66     *     N7OONND7777777D77777N
67     *      8$M++++?N???$77777$
68     *       M7++++N+M77777777N
69     *        N77O777777777777$                              M
70     *          DNNM$$$$777777N                              D
71     *         N$N:=N$777N7777M                             NZ
72     *        77Z::::N777777777                          ODZZZ
73     *       77N::::::N77777777M                         NNZZZ$
74     *     $777:::::::77777777MN                        ZM8ZZZZZ
75     *     777M::::::Z7777777Z77                        N++ZZZZNN
76     *    7777M:::::M7777777$777M                       $++IZZZZM
77     *   M777$:::::N777777$M7777M                       +++++ZZZDN
78     *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
79     *       N::::::N:7$O:77777777                      N++++ZZZZN
80     *       M::::::::::::N77777777+                   +?+++++ZZZM
81     *       8::::::::::::D77777777M                    O+++++ZZ
82     *        ::::::::::::M777777777N                      O+?D
83     *        M:::::::::::M77777777778                     77=
84     *        D=::::::::::N7777777777N                    777
85     *       INN===::::::=77777777777N                  I777N
86     *      ?777N========N7777777777787M               N7777
87     *      77777$D======N77777777777N777N?         N777777
88     *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
89     *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
90     *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
91     *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
92     *   7 :N MNN$$$$M$      $$$777$8      8D8I
93     *     NMM.:7O           777777778
94     *                       7777777MN
95     *                       M NO .7:
96     *                       M   :   M
97     *                            8
98     */
99  
100   // Constant matcher factory methods
101 
102   /**
103    * Matches any character.
104    *
105    * @since 19.0 (since 1.0 as constant {@code ANY})
106    */
107   public static CharMatcher any() {
108     return Any.INSTANCE;
109   }
110 
111   /**
112    * Matches no characters.
113    *
114    * @since 19.0 (since 1.0 as constant {@code NONE})
115    */
116   public static CharMatcher none() {
117     return None.INSTANCE;
118   }
119 
120   /**
121    * Determines whether a character is whitespace according to the latest Unicode standard, as
122    * illustrated
123    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
124    * This is not the same definition used by other Java APIs. (See a
125    * <a href="https://goo.gl/Y6SLWx">comparison of several definitions of
126    * "whitespace"</a>.)
127    *
128    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
129    * date.
130    *
131    * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
132    */
133   public static CharMatcher whitespace() {
134     return Whitespace.INSTANCE;
135   }
136 
137   /**
138    * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
139    * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
140    * discussion of that term.
141    *
142    * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
143    */
144   public static CharMatcher breakingWhitespace() {
145     return BreakingWhitespace.INSTANCE;
146   }
147 
148   /**
149    * Determines whether a character is ASCII, meaning that its code point is less than 128.
150    *
151    * @since 19.0 (since 1.0 as constant {@code ASCII})
152    */
153   public static CharMatcher ascii() {
154     return Ascii.INSTANCE;
155   }
156 
157   /**
158    * Determines whether a character is a digit according to
159    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
160    * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
161    *
162    * @since 19.0 (since 1.0 as constant {@code DIGIT})
163    */
164   public static CharMatcher digit() {
165     return Digit.INSTANCE;
166   }
167 
168   /**
169    * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
170    * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
171    * '9')}.
172    *
173    * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
174    */
175   public static CharMatcher javaDigit() {
176     return JavaDigit.INSTANCE;
177   }
178 
179   /**
180    * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
181    * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
182    * inRange('a', 'z').or(inRange('A', 'Z'))}.
183    *
184    * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
185    */
186   public static CharMatcher javaLetter() {
187     return JavaLetter.INSTANCE;
188   }
189 
190   /**
191    * Determines whether a character is a letter or digit according to
192    * {@linkplain Character#isLetterOrDigit(char) Java's definition}.
193    *
194    * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
195    */
196   public static CharMatcher javaLetterOrDigit() {
197     return JavaLetterOrDigit.INSTANCE;
198   }
199 
200   /**
201    * Determines whether a character is upper case according to
202    * {@linkplain Character#isUpperCase(char) Java's definition}.
203    *
204    * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
205    */
206   public static CharMatcher javaUpperCase() {
207     return JavaUpperCase.INSTANCE;
208   }
209 
210   /**
211    * Determines whether a character is lower case according to
212    * {@linkplain Character#isLowerCase(char) Java's definition}.
213    *
214    * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
215    */
216   public static CharMatcher javaLowerCase() {
217     return JavaLowerCase.INSTANCE;
218   }
219 
220   /**
221    * Determines whether a character is an ISO control character as specified by
222    * {@link Character#isISOControl(char)}.
223    *
224    * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
225    */
226   public static CharMatcher javaIsoControl() {
227     return JavaIsoControl.INSTANCE;
228   }
229 
230   /**
231    * Determines whether a character is invisible; that is, if its Unicode category is any of
232    * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
233    * PRIVATE_USE according to ICU4J.
234    *
235    * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
236    */
237   public static CharMatcher invisible() {
238     return Invisible.INSTANCE;
239   }
240 
241   /**
242    * Determines whether a character is single-width (not double-width). When in doubt, this matcher
243    * errs on the side of returning {@code false} (that is, it tends to assume a character is
244    * double-width).
245    *
246    * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
247    * date.
248    *
249    * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
250    */
251   public static CharMatcher singleWidth() {
252     return SingleWidth.INSTANCE;
253   }
254 
255   // Legacy constants
256 
257   /**
258    * Determines whether a character is whitespace according to the latest Unicode
259    * standard, as illustrated
260    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
261    * This is not the same definition used by other Java APIs. (See a
262    * <a href="https://goo.gl/Y6SLWx">comparison of several definitions of
263    * "whitespace"</a>.)
264    *
265    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant
266    * to keep it up to date.
267    *
268    * @deprecated Use {@link #whitespace()} instead. This constant is scheduled to be
269    *     removed in June 2018.
270    */
271   @com.google.common.annotations.Beta
272   @Deprecated
273   public static final CharMatcher WHITESPACE = whitespace();
274 
275   /**
276    * Determines whether a character is a breaking whitespace (that is, a whitespace
277    * which can be interpreted as a break between words for formatting purposes). See
278    * {@link #whitespace} for a discussion of that term.
279    *
280    * @since 2.0
281    * @deprecated Use {@link #breakingWhitespace()} instead. This constant is scheduled
282    *     to be removed in June 2018.
283    */
284   @com.google.common.annotations.Beta
285   @Deprecated
286   public static final CharMatcher BREAKING_WHITESPACE = breakingWhitespace();
287 
288   /**
289    * Determines whether a character is ASCII, meaning that its code point is less than
290    * 128.
291    *
292    * @deprecated Use {@link #ascii()} instead. This constant is scheduled to be
293    *     removed in June 2018.
294    */
295   @com.google.common.annotations.Beta
296   @Deprecated
297   public static final CharMatcher ASCII = ascii();
298 
299   /**
300    * Determines whether a character is a digit according to
301    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">
302    * Unicode</a>. If you only care to match ASCII digits, you can use
303    * {@code inRange('0', '9')}.
304    *
305    * @deprecated Use {@link #digit()} instead. This constant is scheduled to be
306    *     removed in June 2018.
307    */
308   @com.google.common.annotations.Beta
309   @Deprecated
310   public static final CharMatcher DIGIT = digit();
311 
312   /**
313    * Determines whether a character is a digit according to
314    * {@linkplain Character#isDigit(char) Java's definition}. If you only care to match
315    * ASCII digits, you can use {@code inRange('0', '9')}.
316    *
317    * @deprecated Use {@link #javaDigit()} instead. This constant is scheduled to be
318    *     removed in June 2018.
319    */
320   @com.google.common.annotations.Beta
321   @Deprecated
322   public static final CharMatcher JAVA_DIGIT = javaDigit();
323 
324   /**
325    * Determines whether a character is a letter according to
326    * {@linkplain Character#isLetter(char) Java's definition}. If you only care to
327    * match letters of the Latin alphabet, you can use
328    * {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
329    *
330    * @deprecated Use {@link #javaLetter()} instead. This constant is scheduled to be
331    *     removed in June 2018.
332    */
333   @com.google.common.annotations.Beta
334   @Deprecated
335   public static final CharMatcher JAVA_LETTER = javaLetter();
336 
337   /**
338    * Determines whether a character is a letter or digit according to
339    * {@linkplain Character#isLetterOrDigit(char) Java's definition}.
340    *
341    * @deprecated Use {@link #javaLetterOrDigit()} instead. This constant is scheduled
342    *     to be removed in June 2018.
343    */
344   @com.google.common.annotations.Beta
345   @Deprecated
346   public static final CharMatcher JAVA_LETTER_OR_DIGIT = javaLetterOrDigit();
347 
348   /**
349    * Determines whether a character is upper case according to
350    * {@linkplain Character#isUpperCase(char) Java's definition}.
351    *
352    * @deprecated Use {@link #javaUpperCase()} instead. This constant is scheduled to
353    *     be removed in June 2018.
354    */
355   @com.google.common.annotations.Beta
356   @Deprecated
357   public static final CharMatcher JAVA_UPPER_CASE = javaUpperCase();
358 
359   /**
360    * Determines whether a character is lower case according to
361    * {@linkplain Character#isLowerCase(char) Java's definition}.
362    *
363    * @deprecated Use {@link #javaLowerCase()} instead. This constant is scheduled to
364    *     be removed in June 2018.
365    */
366   @com.google.common.annotations.Beta
367   @Deprecated
368   public static final CharMatcher JAVA_LOWER_CASE = javaLowerCase();
369 
370   /**
371    * Determines whether a character is an ISO control character as specified by
372    * {@link Character#isISOControl(char)}.
373    *
374    * @deprecated Use {@link #javaIsoControl()} instead. This constant is scheduled to
375    *     be removed in June 2018.
376    */
377   @com.google.common.annotations.Beta
378   @Deprecated
379   public static final CharMatcher JAVA_ISO_CONTROL = javaIsoControl();
380 
381   /**
382    * Determines whether a character is invisible; that is, if its Unicode category is
383    * any of SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT,
384    * SURROGATE, and PRIVATE_USE according to ICU4J.
385    *
386    * @deprecated Use {@link #invisible()} instead. This constant is scheduled to be
387    *     removed in June 2018.
388    */
389   @com.google.common.annotations.Beta
390   @Deprecated
391   public static final CharMatcher INVISIBLE = invisible();
392 
393   /**
394    * Determines whether a character is single-width (not double-width). When in doubt,
395    * this matcher errs on the side of returning {@code false} (that is, it tends to
396    * assume a character is double-width).
397    *
398    * <p><b>Note:</b> as the reference file evolves, we will modify this constant to
399    * keep it up to date.
400    *
401    * @deprecated Use {@link #singleWidth()} instead. This constant is scheduled to be
402    *     removed in June 2018.
403    */
404   @com.google.common.annotations.Beta
405   @Deprecated
406   public static final CharMatcher SINGLE_WIDTH = singleWidth();
407 
408   /**
409    * Matches any character.
410    *
411    * @deprecated Use {@link #any()} instead. This constant is scheduled to be
412    *     removed in June 2018.
413    */
414   @com.google.common.annotations.Beta
415   @Deprecated
416   public static final CharMatcher ANY = any();
417 
418   /**
419    * Matches no characters.
420    *
421    * @deprecated Use {@link #none()} instead. This constant is scheduled to be
422    *     removed in June 2018.
423    */
424   @com.google.common.annotations.Beta
425   @Deprecated
426   public static final CharMatcher NONE = none();
427 
428   // Static factories
429 
430   /**
431    * Returns a {@code char} matcher that matches only one specified character.
432    */
433   public static CharMatcher is(final char match) {
434     return new Is(match);
435   }
436 
437   /**
438    * Returns a {@code char} matcher that matches any character except the one specified.
439    *
440    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
441    */
442   public static CharMatcher isNot(final char match) {
443     return new IsNot(match);
444   }
445 
446   /**
447    * Returns a {@code char} matcher that matches any character present in the given character
448    * sequence.
449    */
450   public static CharMatcher anyOf(final CharSequence sequence) {
451     switch (sequence.length()) {
452       case 0:
453         return none();
454       case 1:
455         return is(sequence.charAt(0));
456       case 2:
457         return isEither(sequence.charAt(0), sequence.charAt(1));
458       default:
459         // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
460         // matcher?
461         return new AnyOf(sequence);
462     }
463   }
464 
465   /**
466    * Returns a {@code char} matcher that matches any character not present in the given character
467    * sequence.
468    */
469   public static CharMatcher noneOf(CharSequence sequence) {
470     return anyOf(sequence).negate();
471   }
472 
473   /**
474    * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
475    * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
476    * CharMatcher.inRange('a', 'z')}.
477    *
478    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
479    */
480   public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
481     return new InRange(startInclusive, endInclusive);
482   }
483 
484   /**
485    * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
486    * which operates on primitive {@code char} instances instead.
487    */
488   public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
489     return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
490   }
491 
492   // Constructors
493 
494   /**
495    * Constructor for use by subclasses. When subclassing, you may want to override
496    * {@code toString()} to provide a useful description.
497    */
498   protected CharMatcher() {}
499 
500   // Abstract methods
501 
502   /** Determines a true or false value for the given character. */
503   public abstract boolean matches(char c);
504 
505   // Non-static factories
506 
507   /**
508    * Returns a matcher that matches any character not matched by this matcher.
509    */
510   public CharMatcher negate() {
511     return new Negated(this);
512   }
513 
514   /**
515    * Returns a matcher that matches any character matched by both this matcher and {@code other}.
516    */
517   public CharMatcher and(CharMatcher other) {
518     return new And(this, other);
519   }
520 
521   /**
522    * Returns a matcher that matches any character matched by either this matcher or {@code other}.
523    */
524   public CharMatcher or(CharMatcher other) {
525     return new Or(this, other);
526   }
527 
528   /**
529    * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
530    * query than the original; your mileage may vary. Precomputation takes time and is likely to be
531    * worthwhile only if the precomputed matcher is queried many thousands of times.
532    *
533    * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
534    * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
535    * worthwhile tradeoff in a browser.
536    */
537   public CharMatcher precomputed() {
538     return Platform.precomputeCharMatcher(this);
539   }
540 
541   private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
542 
543   /**
544    * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
545    * on {@link Platform} so that we can have different behavior in GWT.
546    *
547    * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
548    * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
549    * for matchers that only match a few characters, and so on. In the worst-case scenario, it
550    * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
551    * matcher which is faster to query than the original.
552    */
553   @GwtIncompatible // SmallCharMatcher
554   CharMatcher precomputedInternal() {
555     final BitSet table = new BitSet();
556     setBits(table);
557     int totalCharacters = table.cardinality();
558     if (totalCharacters * 2 <= DISTINCT_CHARS) {
559       return precomputedPositive(totalCharacters, table, toString());
560     } else {
561       // TODO(lowasser): is it worth it to worry about the last character of large matchers?
562       table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
563       int negatedCharacters = DISTINCT_CHARS - totalCharacters;
564       String suffix = ".negate()";
565       final String description = toString();
566       String negatedDescription =
567           description.endsWith(suffix)
568               ? description.substring(0, description.length() - suffix.length())
569               : description + suffix;
570       return new NegatedFastMatcher(
571           precomputedPositive(negatedCharacters, table, negatedDescription)) {
572         @Override
573         public String toString() {
574           return description;
575         }
576       };
577     }
578   }
579 
580   /**
581    * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
582    */
583   @GwtIncompatible // SmallCharMatcher
584   private static CharMatcher precomputedPositive(
585       int totalCharacters, BitSet table, String description) {
586     switch (totalCharacters) {
587       case 0:
588         return none();
589       case 1:
590         return is((char) table.nextSetBit(0));
591       case 2:
592         char c1 = (char) table.nextSetBit(0);
593         char c2 = (char) table.nextSetBit(c1 + 1);
594         return isEither(c1, c2);
595       default:
596         return isSmall(totalCharacters, table.length())
597             ? SmallCharMatcher.from(table, description)
598             : new BitSetMatcher(table, description);
599     }
600   }
601 
602   @GwtIncompatible // SmallCharMatcher
603   private static boolean isSmall(int totalCharacters, int tableLength) {
604     return totalCharacters <= SmallCharMatcher.MAX_SIZE
605         && tableLength > (totalCharacters * 4 * Character.SIZE);
606     // err on the side of BitSetMatcher
607   }
608 
609   /**
610    * Sets bits in {@code table} matched by this matcher.
611    */
612   @GwtIncompatible // used only from other GwtIncompatible code
613   void setBits(BitSet table) {
614     for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
615       if (matches((char) c)) {
616         table.set(c);
617       }
618     }
619   }
620 
621   // Text processing routines
622 
623   /**
624    * Returns {@code true} if a character sequence contains at least one matching character.
625    * Equivalent to {@code !matchesNoneOf(sequence)}.
626    *
627    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
628    * character, until this returns {@code true} or the end is reached.
629    *
630    * @param sequence the character sequence to examine, possibly empty
631    * @return {@code true} if this matcher matches at least one character in the sequence
632    * @since 8.0
633    */
634   public boolean matchesAnyOf(CharSequence sequence) {
635     return !matchesNoneOf(sequence);
636   }
637 
638   /**
639    * Returns {@code true} if a character sequence contains only matching characters.
640    *
641    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
642    * character, until this returns {@code false} or the end is reached.
643    *
644    * @param sequence the character sequence to examine, possibly empty
645    * @return {@code true} if this matcher matches every character in the sequence, including when
646    *     the sequence is empty
647    */
648   public boolean matchesAllOf(CharSequence sequence) {
649     for (int i = sequence.length() - 1; i >= 0; i--) {
650       if (!matches(sequence.charAt(i))) {
651         return false;
652       }
653     }
654     return true;
655   }
656 
657   /**
658    * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
659    * {@code !matchesAnyOf(sequence)}.
660    *
661    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
662    * character, until this returns {@code true} or the end is reached.
663    *
664    * @param sequence the character sequence to examine, possibly empty
665    * @return {@code true} if this matcher matches no characters in the sequence, including when
666    *     the sequence is empty
667    */
668   public boolean matchesNoneOf(CharSequence sequence) {
669     return indexIn(sequence) == -1;
670   }
671 
672   /**
673    * Returns the index of the first matching character in a character sequence, or {@code -1} if no
674    * matching character is present.
675    *
676    * <p>The default implementation iterates over the sequence in forward order calling
677    * {@link #matches} for each character.
678    *
679    * @param sequence the character sequence to examine from the beginning
680    * @return an index, or {@code -1} if no character matches
681    */
682   public int indexIn(CharSequence sequence) {
683     return indexIn(sequence, 0);
684   }
685 
686   /**
687    * Returns the index of the first matching character in a character sequence, starting from a
688    * given position, or {@code -1} if no character matches after that position.
689    *
690    * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
691    * start}, calling {@link #matches} for each character.
692    *
693    * @param sequence the character sequence to examine
694    * @param start the first index to examine; must be nonnegative and no greater than {@code
695    *        sequence.length()}
696    * @return the index of the first matching character, guaranteed to be no less than {@code start},
697    *     or {@code -1} if no character matches
698    * @throws IndexOutOfBoundsException if start is negative or greater than {@code
699    *         sequence.length()}
700    */
701   public int indexIn(CharSequence sequence, int start) {
702     int length = sequence.length();
703     checkPositionIndex(start, length);
704     for (int i = start; i < length; i++) {
705       if (matches(sequence.charAt(i))) {
706         return i;
707       }
708     }
709     return -1;
710   }
711 
712   /**
713    * Returns the index of the last matching character in a character sequence, or {@code -1} if no
714    * matching character is present.
715    *
716    * <p>The default implementation iterates over the sequence in reverse order calling
717    * {@link #matches} for each character.
718    *
719    * @param sequence the character sequence to examine from the end
720    * @return an index, or {@code -1} if no character matches
721    */
722   public int lastIndexIn(CharSequence sequence) {
723     for (int i = sequence.length() - 1; i >= 0; i--) {
724       if (matches(sequence.charAt(i))) {
725         return i;
726       }
727     }
728     return -1;
729   }
730 
731   /**
732    * Returns the number of matching characters found in a character sequence.
733    */
734   public int countIn(CharSequence sequence) {
735     int count = 0;
736     for (int i = 0; i < sequence.length(); i++) {
737       if (matches(sequence.charAt(i))) {
738         count++;
739       }
740     }
741     return count;
742   }
743 
744   /**
745    * Returns a string containing all non-matching characters of a character sequence, in order. For
746    * example: <pre>   {@code
747    *
748    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
749    *
750    * ... returns {@code "bzr"}.
751    */
752   public String removeFrom(CharSequence sequence) {
753     String string = sequence.toString();
754     int pos = indexIn(string);
755     if (pos == -1) {
756       return string;
757     }
758 
759     char[] chars = string.toCharArray();
760     int spread = 1;
761 
762     // This unusual loop comes from extensive benchmarking
763     OUT:
764     while (true) {
765       pos++;
766       while (true) {
767         if (pos == chars.length) {
768           break OUT;
769         }
770         if (matches(chars[pos])) {
771           break;
772         }
773         chars[pos - spread] = chars[pos];
774         pos++;
775       }
776       spread++;
777     }
778     return new String(chars, 0, pos - spread);
779   }
780 
781   /**
782    * Returns a string containing all matching characters of a character sequence, in order. For
783    * example: <pre>   {@code
784    *
785    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
786    *
787    * ... returns {@code "aaa"}.
788    */
789   public String retainFrom(CharSequence sequence) {
790     return negate().removeFrom(sequence);
791   }
792 
793   /**
794    * Returns a string copy of the input character sequence, with each character that matches this
795    * matcher replaced by a given replacement character. For example: <pre>   {@code
796    *
797    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
798    *
799    * ... returns {@code "rodor"}.
800    *
801    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
802    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
803    * character.
804    *
805    * @param sequence the character sequence to replace matching characters in
806    * @param replacement the character to append to the result string in place of each matching
807    *     character in {@code sequence}
808    * @return the new string
809    */
810   public String replaceFrom(CharSequence sequence, char replacement) {
811     String string = sequence.toString();
812     int pos = indexIn(string);
813     if (pos == -1) {
814       return string;
815     }
816     char[] chars = string.toCharArray();
817     chars[pos] = replacement;
818     for (int i = pos + 1; i < chars.length; i++) {
819       if (matches(chars[i])) {
820         chars[i] = replacement;
821       }
822     }
823     return new String(chars);
824   }
825 
826   /**
827    * Returns a string copy of the input character sequence, with each character that matches this
828    * matcher replaced by a given replacement sequence. For example: <pre>   {@code
829    *
830    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
831    *
832    * ... returns {@code "yoohoo"}.
833    *
834    * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
835    * off calling {@link #replaceFrom(CharSequence, char)} directly.
836    *
837    * @param sequence the character sequence to replace matching characters in
838    * @param replacement the characters to append to the result string in place of each matching
839    *     character in {@code sequence}
840    * @return the new string
841    */
842   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
843     int replacementLen = replacement.length();
844     if (replacementLen == 0) {
845       return removeFrom(sequence);
846     }
847     if (replacementLen == 1) {
848       return replaceFrom(sequence, replacement.charAt(0));
849     }
850 
851     String string = sequence.toString();
852     int pos = indexIn(string);
853     if (pos == -1) {
854       return string;
855     }
856 
857     int len = string.length();
858     StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
859 
860     int oldpos = 0;
861     do {
862       buf.append(string, oldpos, pos);
863       buf.append(replacement);
864       oldpos = pos + 1;
865       pos = indexIn(string, oldpos);
866     } while (pos != -1);
867 
868     buf.append(string, oldpos, len);
869     return buf.toString();
870   }
871 
872   /**
873    * Returns a substring of the input character sequence that omits all characters this matcher
874    * matches from the beginning and from the end of the string. For example: <pre>   {@code
875    *
876    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
877    *
878    * ... returns {@code "cat"}.
879    *
880    * <p>Note that: <pre>   {@code
881    *
882    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
883    *
884    * ... is equivalent to {@link String#trim()}.
885    */
886   public String trimFrom(CharSequence sequence) {
887     int len = sequence.length();
888     int first;
889     int last;
890 
891     for (first = 0; first < len; first++) {
892       if (!matches(sequence.charAt(first))) {
893         break;
894       }
895     }
896     for (last = len - 1; last > first; last--) {
897       if (!matches(sequence.charAt(last))) {
898         break;
899       }
900     }
901 
902     return sequence.subSequence(first, last + 1).toString();
903   }
904 
905   /**
906    * Returns a substring of the input character sequence that omits all characters this matcher
907    * matches from the beginning of the string. For example: <pre> {@code
908    *
909    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
910    *
911    * ... returns {@code "catbab"}.
912    */
913   public String trimLeadingFrom(CharSequence sequence) {
914     int len = sequence.length();
915     for (int first = 0; first < len; first++) {
916       if (!matches(sequence.charAt(first))) {
917         return sequence.subSequence(first, len).toString();
918       }
919     }
920     return "";
921   }
922 
923   /**
924    * Returns a substring of the input character sequence that omits all characters this matcher
925    * matches from the end of the string. For example: <pre> {@code
926    *
927    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
928    *
929    * ... returns {@code "abacat"}.
930    */
931   public String trimTrailingFrom(CharSequence sequence) {
932     int len = sequence.length();
933     for (int last = len - 1; last >= 0; last--) {
934       if (!matches(sequence.charAt(last))) {
935         return sequence.subSequence(0, last + 1).toString();
936       }
937     }
938     return "";
939   }
940 
941   /**
942    * Returns a string copy of the input character sequence, with each group of consecutive
943    * characters that match this matcher replaced by a single replacement character. For example:
944    * <pre>   {@code
945    *
946    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
947    *
948    * ... returns {@code "b-p-r"}.
949    *
950    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
951    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
952    * character.
953    *
954    * @param sequence the character sequence to replace matching groups of characters in
955    * @param replacement the character to append to the result string in place of each group of
956    *     matching characters in {@code sequence}
957    * @return the new string
958    */
959   public String collapseFrom(CharSequence sequence, char replacement) {
960     // This implementation avoids unnecessary allocation.
961     int len = sequence.length();
962     for (int i = 0; i < len; i++) {
963       char c = sequence.charAt(i);
964       if (matches(c)) {
965         if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
966           // a no-op replacement
967           i++;
968         } else {
969           StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
970           return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
971         }
972       }
973     }
974     // no replacement needed
975     return sequence.toString();
976   }
977 
978   /**
979    * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
980    * groups of matching characters at the start or end of the sequence are removed without
981    * replacement.
982    */
983   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
984     // This implementation avoids unnecessary allocation.
985     int len = sequence.length();
986     int first = 0;
987     int last = len - 1;
988 
989     while (first < len && matches(sequence.charAt(first))) {
990       first++;
991     }
992 
993     while (last > first && matches(sequence.charAt(last))) {
994       last--;
995     }
996 
997     return (first == 0 && last == len - 1)
998         ? collapseFrom(sequence, replacement)
999         : finishCollapseFrom(
1000             sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
1001   }
1002 
1003   private String finishCollapseFrom(
1004       CharSequence sequence,
1005       int start,
1006       int end,
1007       char replacement,
1008       StringBuilder builder,
1009       boolean inMatchingGroup) {
1010     for (int i = start; i < end; i++) {
1011       char c = sequence.charAt(i);
1012       if (matches(c)) {
1013         if (!inMatchingGroup) {
1014           builder.append(replacement);
1015           inMatchingGroup = true;
1016         }
1017       } else {
1018         builder.append(c);
1019         inMatchingGroup = false;
1020       }
1021     }
1022     return builder.toString();
1023   }
1024 
1025   /**
1026    * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
1027    *     instead.
1028    */
1029   @Deprecated
1030   @Override
1031   public boolean apply(Character character) {
1032     return matches(character);
1033   }
1034 
1035   /**
1036    * Returns a string representation of this {@code CharMatcher}, such as
1037    * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
1038    */
1039   @Override
1040   public String toString() {
1041     return super.toString();
1042   }
1043 
1044   /**
1045    * Returns the Java Unicode escape sequence for the given character, in the form "\u12AB" where
1046    * "12AB" is the four hexadecimal digits representing the 16 bits of the UTF-16 character.
1047    */
1048   private static String showCharacter(char c) {
1049     String hex = "0123456789ABCDEF";
1050     char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
1051     for (int i = 0; i < 4; i++) {
1052       tmp[5 - i] = hex.charAt(c & 0xF);
1053       c = (char) (c >> 4);
1054     }
1055     return String.copyValueOf(tmp);
1056   }
1057 
1058   // Fast matchers
1059 
1060   /** A matcher for which precomputation will not yield any significant benefit. */
1061   abstract static class FastMatcher extends CharMatcher {
1062 
1063     @Override
1064     public final CharMatcher precomputed() {
1065       return this;
1066     }
1067 
1068     @Override
1069     public CharMatcher negate() {
1070       return new NegatedFastMatcher(this);
1071     }
1072   }
1073 
1074   /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
1075   abstract static class NamedFastMatcher extends FastMatcher {
1076 
1077     private final String description;
1078 
1079     NamedFastMatcher(String description) {
1080       this.description = checkNotNull(description);
1081     }
1082 
1083     @Override
1084     public final String toString() {
1085       return description;
1086     }
1087   }
1088 
1089   /** Negation of a {@link FastMatcher}. */
1090   static class NegatedFastMatcher extends Negated {
1091 
1092     NegatedFastMatcher(CharMatcher original) {
1093       super(original);
1094     }
1095 
1096     @Override
1097     public final CharMatcher precomputed() {
1098       return this;
1099     }
1100   }
1101 
1102   /** Fast matcher using a {@link BitSet} table of matching characters. */
1103   @GwtIncompatible // used only from other GwtIncompatible code
1104   private static final class BitSetMatcher extends NamedFastMatcher {
1105 
1106     private final BitSet table;
1107 
1108     private BitSetMatcher(BitSet table, String description) {
1109       super(description);
1110       if (table.length() + Long.SIZE < table.size()) {
1111         table = (BitSet) table.clone();
1112         // If only we could actually call BitSet.trimToSize() ourselves...
1113       }
1114       this.table = table;
1115     }
1116 
1117     @Override
1118     public boolean matches(char c) {
1119       return table.get(c);
1120     }
1121 
1122     @Override
1123     void setBits(BitSet bitSet) {
1124       bitSet.or(table);
1125     }
1126   }
1127 
1128   // Static constant implementation classes
1129 
1130   /** Implementation of {@link #any()}. */
1131   private static final class Any extends NamedFastMatcher {
1132 
1133     static final Any INSTANCE = new Any();
1134 
1135     private Any() {
1136       super("CharMatcher.any()");
1137     }
1138 
1139     @Override
1140     public boolean matches(char c) {
1141       return true;
1142     }
1143 
1144     @Override
1145     public int indexIn(CharSequence sequence) {
1146       return (sequence.length() == 0) ? -1 : 0;
1147     }
1148 
1149     @Override
1150     public int indexIn(CharSequence sequence, int start) {
1151       int length = sequence.length();
1152       checkPositionIndex(start, length);
1153       return (start == length) ? -1 : start;
1154     }
1155 
1156     @Override
1157     public int lastIndexIn(CharSequence sequence) {
1158       return sequence.length() - 1;
1159     }
1160 
1161     @Override
1162     public boolean matchesAllOf(CharSequence sequence) {
1163       checkNotNull(sequence);
1164       return true;
1165     }
1166 
1167     @Override
1168     public boolean matchesNoneOf(CharSequence sequence) {
1169       return sequence.length() == 0;
1170     }
1171 
1172     @Override
1173     public String removeFrom(CharSequence sequence) {
1174       checkNotNull(sequence);
1175       return "";
1176     }
1177 
1178     @Override
1179     public String replaceFrom(CharSequence sequence, char replacement) {
1180       char[] array = new char[sequence.length()];
1181       Arrays.fill(array, replacement);
1182       return new String(array);
1183     }
1184 
1185     @Override
1186     public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1187       StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1188       for (int i = 0; i < sequence.length(); i++) {
1189         result.append(replacement);
1190       }
1191       return result.toString();
1192     }
1193 
1194     @Override
1195     public String collapseFrom(CharSequence sequence, char replacement) {
1196       return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1197     }
1198 
1199     @Override
1200     public String trimFrom(CharSequence sequence) {
1201       checkNotNull(sequence);
1202       return "";
1203     }
1204 
1205     @Override
1206     public int countIn(CharSequence sequence) {
1207       return sequence.length();
1208     }
1209 
1210     @Override
1211     public CharMatcher and(CharMatcher other) {
1212       return checkNotNull(other);
1213     }
1214 
1215     @Override
1216     public CharMatcher or(CharMatcher other) {
1217       checkNotNull(other);
1218       return this;
1219     }
1220 
1221     @Override
1222     public CharMatcher negate() {
1223       return none();
1224     }
1225   }
1226 
1227   /** Implementation of {@link #none()}. */
1228   private static final class None extends NamedFastMatcher {
1229 
1230     static final None INSTANCE = new None();
1231 
1232     private None() {
1233       super("CharMatcher.none()");
1234     }
1235 
1236     @Override
1237     public boolean matches(char c) {
1238       return false;
1239     }
1240 
1241     @Override
1242     public int indexIn(CharSequence sequence) {
1243       checkNotNull(sequence);
1244       return -1;
1245     }
1246 
1247     @Override
1248     public int indexIn(CharSequence sequence, int start) {
1249       int length = sequence.length();
1250       checkPositionIndex(start, length);
1251       return -1;
1252     }
1253 
1254     @Override
1255     public int lastIndexIn(CharSequence sequence) {
1256       checkNotNull(sequence);
1257       return -1;
1258     }
1259 
1260     @Override
1261     public boolean matchesAllOf(CharSequence sequence) {
1262       return sequence.length() == 0;
1263     }
1264 
1265     @Override
1266     public boolean matchesNoneOf(CharSequence sequence) {
1267       checkNotNull(sequence);
1268       return true;
1269     }
1270 
1271     @Override
1272     public String removeFrom(CharSequence sequence) {
1273       return sequence.toString();
1274     }
1275 
1276     @Override
1277     public String replaceFrom(CharSequence sequence, char replacement) {
1278       return sequence.toString();
1279     }
1280 
1281     @Override
1282     public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1283       checkNotNull(replacement);
1284       return sequence.toString();
1285     }
1286 
1287     @Override
1288     public String collapseFrom(CharSequence sequence, char replacement) {
1289       return sequence.toString();
1290     }
1291 
1292     @Override
1293     public String trimFrom(CharSequence sequence) {
1294       return sequence.toString();
1295     }
1296 
1297     @Override
1298     public String trimLeadingFrom(CharSequence sequence) {
1299       return sequence.toString();
1300     }
1301 
1302     @Override
1303     public String trimTrailingFrom(CharSequence sequence) {
1304       return sequence.toString();
1305     }
1306 
1307     @Override
1308     public int countIn(CharSequence sequence) {
1309       checkNotNull(sequence);
1310       return 0;
1311     }
1312 
1313     @Override
1314     public CharMatcher and(CharMatcher other) {
1315       checkNotNull(other);
1316       return this;
1317     }
1318 
1319     @Override
1320     public CharMatcher or(CharMatcher other) {
1321       return checkNotNull(other);
1322     }
1323 
1324     @Override
1325     public CharMatcher negate() {
1326       return any();
1327     }
1328   }
1329 
1330   /** Implementation of {@link #whitespace()}. */
1331   @VisibleForTesting
1332   static final class Whitespace extends NamedFastMatcher {
1333 
1334     static final String TABLE =
1335         "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1336             + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1337             + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1338             + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1339     static final int MULTIPLIER = 1682554634;
1340     static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1341 
1342     static final Whitespace INSTANCE = new Whitespace();
1343 
1344     Whitespace() {
1345       super("CharMatcher.whitespace()");
1346     }
1347 
1348     @Override
1349     public boolean matches(char c) {
1350       return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1351     }
1352 
1353     @GwtIncompatible // used only from other GwtIncompatible code
1354     @Override
1355     void setBits(BitSet table) {
1356       for (int i = 0; i < TABLE.length(); i++) {
1357         table.set(TABLE.charAt(i));
1358       }
1359     }
1360   }
1361 
1362   /** Implementation of {@link #breakingWhitespace()}. */
1363   private static final class BreakingWhitespace extends CharMatcher {
1364 
1365     static final CharMatcher INSTANCE = new BreakingWhitespace();
1366 
1367     @Override
1368     public boolean matches(char c) {
1369       switch (c) {
1370         case '\t':
1371         case '\n':
1372         case '\013':
1373         case '\f':
1374         case '\r':
1375         case ' ':
1376         case '\u0085':
1377         case '\u1680':
1378         case '\u2028':
1379         case '\u2029':
1380         case '\u205f':
1381         case '\u3000':
1382           return true;
1383         case '\u2007':
1384           return false;
1385         default:
1386           return c >= '\u2000' && c <= '\u200a';
1387       }
1388     }
1389 
1390     @Override
1391     public String toString() {
1392       return "CharMatcher.breakingWhitespace()";
1393     }
1394   }
1395 
1396   /** Implementation of {@link #ascii()}. */
1397   private static final class Ascii extends NamedFastMatcher {
1398 
1399     static final Ascii INSTANCE = new Ascii();
1400 
1401     Ascii() {
1402       super("CharMatcher.ascii()");
1403     }
1404 
1405     @Override
1406     public boolean matches(char c) {
1407       return c <= '\u007f';
1408     }
1409   }
1410 
1411   /** Implementation that matches characters that fall within multiple ranges. */
1412   private static class RangesMatcher extends CharMatcher {
1413 
1414     private final String description;
1415     private final char[] rangeStarts;
1416     private final char[] rangeEnds;
1417 
1418     RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1419       this.description = description;
1420       this.rangeStarts = rangeStarts;
1421       this.rangeEnds = rangeEnds;
1422       checkArgument(rangeStarts.length == rangeEnds.length);
1423       for (int i = 0; i < rangeStarts.length; i++) {
1424         checkArgument(rangeStarts[i] <= rangeEnds[i]);
1425         if (i + 1 < rangeStarts.length) {
1426           checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1427         }
1428       }
1429     }
1430 
1431     @Override
1432     public boolean matches(char c) {
1433       int index = Arrays.binarySearch(rangeStarts, c);
1434       if (index >= 0) {
1435         return true;
1436       } else {
1437         index = ~index - 1;
1438         return index >= 0 && c <= rangeEnds[index];
1439       }
1440     }
1441 
1442     @Override
1443     public String toString() {
1444       return description;
1445     }
1446   }
1447 
1448   /** Implementation of {@link #digit()}. */
1449   private static final class Digit extends RangesMatcher {
1450 
1451     // Must be in ascending order.
1452     private static final String ZEROES =
1453         "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66"
1454             + "\u0be6\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810"
1455             + "\u1946\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
1456 
1457     private static char[] zeroes() {
1458       return ZEROES.toCharArray();
1459     }
1460 
1461     private static char[] nines() {
1462       char[] nines = new char[ZEROES.length()];
1463       for (int i = 0; i < ZEROES.length(); i++) {
1464         nines[i] = (char) (ZEROES.charAt(i) + 9);
1465       }
1466       return nines;
1467     }
1468 
1469     static final Digit INSTANCE = new Digit();
1470 
1471     private Digit() {
1472       super("CharMatcher.digit()", zeroes(), nines());
1473     }
1474   }
1475 
1476   /** Implementation of {@link #javaDigit()}. */
1477   private static final class JavaDigit extends CharMatcher {
1478 
1479     static final JavaDigit INSTANCE = new JavaDigit();
1480 
1481     @Override
1482     public boolean matches(char c) {
1483       return Character.isDigit(c);
1484     }
1485 
1486     @Override
1487     public String toString() {
1488       return "CharMatcher.javaDigit()";
1489     }
1490   }
1491 
1492   /** Implementation of {@link #javaLetter()}. */
1493   private static final class JavaLetter extends CharMatcher {
1494 
1495     static final JavaLetter INSTANCE = new JavaLetter();
1496 
1497     @Override
1498     public boolean matches(char c) {
1499       return Character.isLetter(c);
1500     }
1501 
1502     @Override
1503     public String toString() {
1504       return "CharMatcher.javaLetter()";
1505     }
1506   }
1507 
1508   /** Implementation of {@link #javaLetterOrDigit()}. */
1509   private static final class JavaLetterOrDigit extends CharMatcher {
1510 
1511     static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1512 
1513     @Override
1514     public boolean matches(char c) {
1515       return Character.isLetterOrDigit(c);
1516     }
1517 
1518     @Override
1519     public String toString() {
1520       return "CharMatcher.javaLetterOrDigit()";
1521     }
1522   }
1523 
1524   /** Implementation of {@link #javaUpperCase()}. */
1525   private static final class JavaUpperCase extends CharMatcher {
1526 
1527     static final JavaUpperCase INSTANCE = new JavaUpperCase();
1528 
1529     @Override
1530     public boolean matches(char c) {
1531       return Character.isUpperCase(c);
1532     }
1533 
1534     @Override
1535     public String toString() {
1536       return "CharMatcher.javaUpperCase()";
1537     }
1538   }
1539 
1540   /** Implementation of {@link #javaLowerCase()}. */
1541   private static final class JavaLowerCase extends CharMatcher {
1542 
1543     static final JavaLowerCase INSTANCE = new JavaLowerCase();
1544 
1545     @Override
1546     public boolean matches(char c) {
1547       return Character.isLowerCase(c);
1548     }
1549 
1550     @Override
1551     public String toString() {
1552       return "CharMatcher.javaLowerCase()";
1553     }
1554   }
1555 
1556   /** Implementation of {@link #javaIsoControl()}. */
1557   private static final class JavaIsoControl extends NamedFastMatcher {
1558 
1559     static final JavaIsoControl INSTANCE = new JavaIsoControl();
1560 
1561     private JavaIsoControl() {
1562       super("CharMatcher.javaIsoControl()");
1563     }
1564 
1565     @Override
1566     public boolean matches(char c) {
1567       return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1568     }
1569   }
1570 
1571   /** Implementation of {@link #invisible()}. */
1572   private static final class Invisible extends RangesMatcher {
1573 
1574     private static final String RANGE_STARTS =
1575         "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067"
1576             + "\u2068\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa";
1577     private static final String RANGE_ENDS =
1578         "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067"
1579             + "\u2068\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb";
1580 
1581     static final Invisible INSTANCE = new Invisible();
1582 
1583     private Invisible() {
1584       super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1585     }
1586   }
1587 
1588   /** Implementation of {@link #singleWidth()}. */
1589   private static final class SingleWidth extends RangesMatcher {
1590 
1591     static final SingleWidth INSTANCE = new SingleWidth();
1592 
1593     private SingleWidth() {
1594       super(
1595           "CharMatcher.singleWidth()",
1596           "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1597           "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1598     }
1599   }
1600 
1601   // Non-static factory implementation classes
1602 
1603   /** Implementation of {@link #negate()}. */
1604   private static class Negated extends CharMatcher {
1605 
1606     final CharMatcher original;
1607 
1608     Negated(CharMatcher original) {
1609       this.original = checkNotNull(original);
1610     }
1611 
1612     @Override
1613     public boolean matches(char c) {
1614       return !original.matches(c);
1615     }
1616 
1617     @Override
1618     public boolean matchesAllOf(CharSequence sequence) {
1619       return original.matchesNoneOf(sequence);
1620     }
1621 
1622     @Override
1623     public boolean matchesNoneOf(CharSequence sequence) {
1624       return original.matchesAllOf(sequence);
1625     }
1626 
1627     @Override
1628     public int countIn(CharSequence sequence) {
1629       return sequence.length() - original.countIn(sequence);
1630     }
1631 
1632     @GwtIncompatible // used only from other GwtIncompatible code
1633     @Override
1634     void setBits(BitSet table) {
1635       BitSet tmp = new BitSet();
1636       original.setBits(tmp);
1637       tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1638       table.or(tmp);
1639     }
1640 
1641     @Override
1642     public CharMatcher negate() {
1643       return original;
1644     }
1645 
1646     @Override
1647     public String toString() {
1648       return original + ".negate()";
1649     }
1650   }
1651 
1652   /** Implementation of {@link #and(CharMatcher)}. */
1653   private static final class And extends CharMatcher {
1654 
1655     final CharMatcher first;
1656     final CharMatcher second;
1657 
1658     And(CharMatcher a, CharMatcher b) {
1659       first = checkNotNull(a);
1660       second = checkNotNull(b);
1661     }
1662 
1663     @Override
1664     public boolean matches(char c) {
1665       return first.matches(c) && second.matches(c);
1666     }
1667 
1668     @GwtIncompatible // used only from other GwtIncompatible code
1669     @Override
1670     void setBits(BitSet table) {
1671       BitSet tmp1 = new BitSet();
1672       first.setBits(tmp1);
1673       BitSet tmp2 = new BitSet();
1674       second.setBits(tmp2);
1675       tmp1.and(tmp2);
1676       table.or(tmp1);
1677     }
1678 
1679     @Override
1680     public String toString() {
1681       return "CharMatcher.and(" + first + ", " + second + ")";
1682     }
1683   }
1684 
1685   /** Implementation of {@link #or(CharMatcher)}. */
1686   private static final class Or extends CharMatcher {
1687 
1688     final CharMatcher first;
1689     final CharMatcher second;
1690 
1691     Or(CharMatcher a, CharMatcher b) {
1692       first = checkNotNull(a);
1693       second = checkNotNull(b);
1694     }
1695 
1696     @GwtIncompatible // used only from other GwtIncompatible code
1697     @Override
1698     void setBits(BitSet table) {
1699       first.setBits(table);
1700       second.setBits(table);
1701     }
1702 
1703     @Override
1704     public boolean matches(char c) {
1705       return first.matches(c) || second.matches(c);
1706     }
1707 
1708     @Override
1709     public String toString() {
1710       return "CharMatcher.or(" + first + ", " + second + ")";
1711     }
1712   }
1713 
1714   // Static factory implementations
1715 
1716   /** Implementation of {@link #is(char)}. */
1717   private static final class Is extends FastMatcher {
1718 
1719     private final char match;
1720 
1721     Is(char match) {
1722       this.match = match;
1723     }
1724 
1725     @Override
1726     public boolean matches(char c) {
1727       return c == match;
1728     }
1729 
1730     @Override
1731     public String replaceFrom(CharSequence sequence, char replacement) {
1732       return sequence.toString().replace(match, replacement);
1733     }
1734 
1735     @Override
1736     public CharMatcher and(CharMatcher other) {
1737       return other.matches(match) ? this : none();
1738     }
1739 
1740     @Override
1741     public CharMatcher or(CharMatcher other) {
1742       return other.matches(match) ? other : super.or(other);
1743     }
1744 
1745     @Override
1746     public CharMatcher negate() {
1747       return isNot(match);
1748     }
1749 
1750     @GwtIncompatible // used only from other GwtIncompatible code
1751     @Override
1752     void setBits(BitSet table) {
1753       table.set(match);
1754     }
1755 
1756     @Override
1757     public String toString() {
1758       return "CharMatcher.is('" + showCharacter(match) + "')";
1759     }
1760   }
1761 
1762   /** Implementation of {@link #isNot(char)}. */
1763   private static final class IsNot extends FastMatcher {
1764 
1765     private final char match;
1766 
1767     IsNot(char match) {
1768       this.match = match;
1769     }
1770 
1771     @Override
1772     public boolean matches(char c) {
1773       return c != match;
1774     }
1775 
1776     @Override
1777     public CharMatcher and(CharMatcher other) {
1778       return other.matches(match) ? super.and(other) : other;
1779     }
1780 
1781     @Override
1782     public CharMatcher or(CharMatcher other) {
1783       return other.matches(match) ? any() : this;
1784     }
1785 
1786     @GwtIncompatible // used only from other GwtIncompatible code
1787     @Override
1788     void setBits(BitSet table) {
1789       table.set(0, match);
1790       table.set(match + 1, Character.MAX_VALUE + 1);
1791     }
1792 
1793     @Override
1794     public CharMatcher negate() {
1795       return is(match);
1796     }
1797 
1798     @Override
1799     public String toString() {
1800       return "CharMatcher.isNot('" + showCharacter(match) + "')";
1801     }
1802   }
1803 
1804   private static CharMatcher.IsEither isEither(char c1, char c2) {
1805     return new CharMatcher.IsEither(c1, c2);
1806   }
1807 
1808   /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1809   private static final class IsEither extends FastMatcher {
1810 
1811     private final char match1;
1812     private final char match2;
1813 
1814     IsEither(char match1, char match2) {
1815       this.match1 = match1;
1816       this.match2 = match2;
1817     }
1818 
1819     @Override
1820     public boolean matches(char c) {
1821       return c == match1 || c == match2;
1822     }
1823 
1824     @GwtIncompatible // used only from other GwtIncompatible code
1825     @Override
1826     void setBits(BitSet table) {
1827       table.set(match1);
1828       table.set(match2);
1829     }
1830 
1831     @Override
1832     public String toString() {
1833       return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1834     }
1835   }
1836 
1837   /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1838   private static final class AnyOf extends CharMatcher {
1839 
1840     private final char[] chars;
1841 
1842     public AnyOf(CharSequence chars) {
1843       this.chars = chars.toString().toCharArray();
1844       Arrays.sort(this.chars);
1845     }
1846 
1847     @Override
1848     public boolean matches(char c) {
1849       return Arrays.binarySearch(chars, c) >= 0;
1850     }
1851 
1852     @Override
1853     @GwtIncompatible // used only from other GwtIncompatible code
1854     void setBits(BitSet table) {
1855       for (char c : chars) {
1856         table.set(c);
1857       }
1858     }
1859 
1860     @Override
1861     public String toString() {
1862       StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1863       for (char c : chars) {
1864         description.append(showCharacter(c));
1865       }
1866       description.append("\")");
1867       return description.toString();
1868     }
1869   }
1870 
1871   /** Implementation of {@link #inRange(char, char)}. */
1872   private static final class InRange extends FastMatcher {
1873 
1874     private final char startInclusive;
1875     private final char endInclusive;
1876 
1877     InRange(char startInclusive, char endInclusive) {
1878       checkArgument(endInclusive >= startInclusive);
1879       this.startInclusive = startInclusive;
1880       this.endInclusive = endInclusive;
1881     }
1882 
1883     @Override
1884     public boolean matches(char c) {
1885       return startInclusive <= c && c <= endInclusive;
1886     }
1887 
1888     @GwtIncompatible // used only from other GwtIncompatible code
1889     @Override
1890     void setBits(BitSet table) {
1891       table.set(startInclusive, endInclusive + 1);
1892     }
1893 
1894     @Override
1895     public String toString() {
1896       return "CharMatcher.inRange('"
1897           + showCharacter(startInclusive)
1898           + "', '"
1899           + showCharacter(endInclusive)
1900           + "')";
1901     }
1902   }
1903 
1904   /** Implementation of {@link #forPredicate(Predicate)}. */
1905   private static final class ForPredicate extends CharMatcher {
1906 
1907     private final Predicate<? super Character> predicate;
1908 
1909     ForPredicate(Predicate<? super Character> predicate) {
1910       this.predicate = checkNotNull(predicate);
1911     }
1912 
1913     @Override
1914     public boolean matches(char c) {
1915       return predicate.apply(c);
1916     }
1917 
1918     @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1919     @Override
1920     public boolean apply(Character character) {
1921       return predicate.apply(checkNotNull(character));
1922     }
1923 
1924     @Override
1925     public String toString() {
1926       return "CharMatcher.forPredicate(" + predicate + ")";
1927     }
1928   }
1929 }