View Javadoc
1   /*
2    * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package java.util.regex;
27  
28  import java.util.HashMap;
29  import java.util.Locale;
30  
31  enum UnicodeProp {
32  
33      ALPHABETIC {
34          public boolean is(int ch) {
35              return Character.isAlphabetic(ch);
36          }
37      },
38  
39      LETTER {
40          public boolean is(int ch) {
41              return Character.isLetter(ch);
42          }
43      },
44  
45      IDEOGRAPHIC {
46          public boolean is(int ch) {
47              return Character.isIdeographic(ch);
48          }
49      },
50  
51      LOWERCASE {
52          public boolean is(int ch) {
53              return Character.isLowerCase(ch);
54          }
55      },
56  
57      UPPERCASE {
58          public boolean is(int ch) {
59              return Character.isUpperCase(ch);
60          }
61      },
62  
63      TITLECASE {
64          public boolean is(int ch) {
65              return Character.isTitleCase(ch);
66          }
67      },
68  
69      WHITE_SPACE {
70          // \p{Whitespace}
71          public boolean is(int ch) {
72              return ((((1 << Character.SPACE_SEPARATOR) |
73                        (1 << Character.LINE_SEPARATOR) |
74                        (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
75                     != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
76          }
77      },
78  
79      CONTROL {
80          // \p{gc=Control}
81          public boolean is(int ch) {
82              return Character.getType(ch) == Character.CONTROL;
83          }
84      },
85  
86      PUNCTUATION {
87          // \p{gc=Punctuation}
88          public boolean is(int ch) {
89              return ((((1 << Character.CONNECTOR_PUNCTUATION) |
90                        (1 << Character.DASH_PUNCTUATION) |
91                        (1 << Character.START_PUNCTUATION) |
92                        (1 << Character.END_PUNCTUATION) |
93                        (1 << Character.OTHER_PUNCTUATION) |
94                        (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
95                        (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
96                     != 0;
97          }
98      },
99  
100     HEX_DIGIT {
101         // \p{gc=Decimal_Number}
102         // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
103         public boolean is(int ch) {
104             return DIGIT.is(ch) ||
105                    (ch >= 0x0030 && ch <= 0x0039) ||
106                    (ch >= 0x0041 && ch <= 0x0046) ||
107                    (ch >= 0x0061 && ch <= 0x0066) ||
108                    (ch >= 0xFF10 && ch <= 0xFF19) ||
109                    (ch >= 0xFF21 && ch <= 0xFF26) ||
110                    (ch >= 0xFF41 && ch <= 0xFF46);
111         }
112     },
113 
114     ASSIGNED {
115         public boolean is(int ch) {
116             return Character.getType(ch) != Character.UNASSIGNED;
117         }
118     },
119 
120     NONCHARACTER_CODE_POINT {
121         // PropList.txt:Noncharacter_Code_Point
122         public boolean is(int ch) {
123             return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
124         }
125     },
126 
127     DIGIT {
128         // \p{gc=Decimal_Number}
129         public boolean is(int ch) {
130             return Character.isDigit(ch);
131         }
132     },
133 
134     ALNUM {
135         // \p{alpha}
136         // \p{digit}
137         public boolean is(int ch) {
138             return ALPHABETIC.is(ch) || DIGIT.is(ch);
139         }
140     },
141 
142     BLANK {
143         // \p{Whitespace} --
144         // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
145         //  \p{gc=Line_Separator}
146         //  \p{gc=Paragraph_Separator}]
147         public boolean is(int ch) {
148             return Character.getType(ch) == Character.SPACE_SEPARATOR ||
149                    ch == 0x9; // \N{HT}
150         }
151     },
152 
153     GRAPH {
154         // [^
155         //  \p{space}
156         //  \p{gc=Control}
157         //  \p{gc=Surrogate}
158         //  \p{gc=Unassigned}]
159         public boolean is(int ch) {
160             return ((((1 << Character.SPACE_SEPARATOR) |
161                       (1 << Character.LINE_SEPARATOR) |
162                       (1 << Character.PARAGRAPH_SEPARATOR) |
163                       (1 << Character.CONTROL) |
164                       (1 << Character.SURROGATE) |
165                       (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
166                    == 0;
167         }
168     },
169 
170     PRINT {
171         // \p{graph}
172         // \p{blank}
173         // -- \p{cntrl}
174         public boolean is(int ch) {
175             return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
176         }
177     },
178 
179     WORD {
180         //  \p{alpha}
181         //  \p{gc=Mark}
182         //  \p{digit}
183         //  \p{gc=Connector_Punctuation}
184         //  \p{Join_Control}    200C..200D
185 
186         public boolean is(int ch) {
187             return ALPHABETIC.is(ch) ||
188                    ((((1 << Character.NON_SPACING_MARK) |
189                       (1 << Character.ENCLOSING_MARK) |
190                       (1 << Character.COMBINING_SPACING_MARK) |
191                       (1 << Character.DECIMAL_DIGIT_NUMBER) |
192                       (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
193                    != 0 ||
194                    JOIN_CONTROL.is(ch);
195         }
196     },
197 
198     JOIN_CONTROL {
199         //  200C..200D    PropList.txt:Join_Control
200         public boolean is(int ch) {
201            return (ch == 0x200C || ch == 0x200D);
202         }
203     };
204 
205     private final static HashMap<String, String> posix = new HashMap<>();
206     private final static HashMap<String, String> aliases = new HashMap<>();
207     static {
208         posix.put("ALPHA", "ALPHABETIC");
209         posix.put("LOWER", "LOWERCASE");
210         posix.put("UPPER", "UPPERCASE");
211         posix.put("SPACE", "WHITE_SPACE");
212         posix.put("PUNCT", "PUNCTUATION");
213         posix.put("XDIGIT","HEX_DIGIT");
214         posix.put("ALNUM", "ALNUM");
215         posix.put("CNTRL", "CONTROL");
216         posix.put("DIGIT", "DIGIT");
217         posix.put("BLANK", "BLANK");
218         posix.put("GRAPH", "GRAPH");
219         posix.put("PRINT", "PRINT");
220 
221         aliases.put("WHITESPACE", "WHITE_SPACE");
222         aliases.put("HEXDIGIT","HEX_DIGIT");
223         aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
224         aliases.put("JOINCONTROL", "JOIN_CONTROL");
225     }
226 
227     public static UnicodeProp forName(String propName) {
228         propName = propName.toUpperCase(Locale.ENGLISH);
229         String alias = aliases.get(propName);
230         if (alias != null)
231             propName = alias;
232         try {
233             return valueOf (propName);
234         } catch (IllegalArgumentException x) {}
235         return null;
236     }
237 
238     public static UnicodeProp forPOSIXName(String propName) {
239         propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
240         if (propName == null)
241             return null;
242         return valueOf (propName);
243     }
244 
245     public abstract boolean is(int ch);
246 }