View Javadoc
1   /*
2    * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package build.tools.generatecharacter;
27  
28  import java.io.*;
29  import java.util.*;
30  import java.lang.*;
31  
32  /**
33   * SpecialCaseMap has the responsibility of storing the
34   * 1:M, locale-sensitive, and context sensitive case mappings
35   * that occur when uppercasing Unicode 4.0 characters. This class can
36   * read and parse the SpecialCasing.txt file that contains those mappings.
37   * <p>
38   * A single SpecialCaseMap contains the mapping for one character.
39   * <p>
40   * @author John O'Conner
41   */
42  public class SpecialCaseMap implements Comparable<SpecialCaseMap> {
43  
44      SpecialCaseMap() {
45          chSource = 0xFFFF;
46      }
47  
48  
49      /**
50       * Read and parse a Unicode special case map file.
51       *
52       * @param file   a file specifying the Unicode special case mappings
53       * @return  an array of SpecialCaseMap objects, one for each line of the
54       *          special case map data file that could be successfully parsed
55       */
56  
57      public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException {
58          ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150);
59          int count = 0;
60          BufferedReader f = new BufferedReader(new FileReader(file));
61                  String line = null;
62          loop:
63          while(true) {
64              try {
65                  line = f.readLine();
66              }
67              catch (IOException e) { break loop; }
68                  if (line == null) break loop;
69                  SpecialCaseMap item = parse(line.trim());
70                  if (item != null) {
71                                  if(item.getCharSource() >> 16 < plane) continue;
72                                  if(item.getCharSource() >> 16 > plane) break;
73                                  caseMaps.add(item);
74                  ++count;
75              }
76  
77          }
78          caseMaps.trimToSize();
79          SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()];
80          caseMaps.toArray(result);
81          Arrays.sort(result);
82          return result;
83  
84      }
85  
86     /**
87      * Given one line of a Unicode special casing data file as a String, parse the line
88      * and return a SpecialCaseMap object that contains the case mapping.
89      *
90      * @param s a line of the Unicode special case map data file to be parsed
91      * @return a SpecialCaseMap object, or null if the parsing process failed for some reason
92      */
93      public static SpecialCaseMap parse(String s) {
94          SpecialCaseMap spec = null;
95          String[] tokens = new String[REQUIRED_FIELDS];
96          if ( s != null && s.length() != 0 && s.charAt(0) != '#') {
97              try {
98                  int x = 0, tokenStart = 0, tokenEnd = 0;
99                  for (x=0; x<REQUIRED_FIELDS-1; x++) {
100                     tokenEnd = s.indexOf(';', tokenStart);
101                     tokens[x] = s.substring(tokenStart, tokenEnd);
102                     tokenStart = tokenEnd+1;
103                 }
104                 tokens[x] = s.substring(tokenStart);
105 
106                 if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) {
107                     spec = new SpecialCaseMap();
108                     spec.setCharSource(parseChar(tokens[FIELD_SOURCE]));
109                     spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE]));
110                     spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE]));
111                     spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE]));
112                     spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS]));
113                     spec.setContext(parseContext(tokens[FIELD_CONDITIONS]));
114                 }
115             }
116             catch(Exception e) {
117                 spec = null;
118                 System.out.println("Error parsing spec line.");
119             }
120         }
121         return spec;
122     }
123 
124     static int parseChar(String token) throws NumberFormatException {
125         return Integer.parseInt(token, 16);
126     }
127 
128     static char[] parseCaseMap(String token ) throws NumberFormatException {
129         int pos = 0;
130         StringBuffer buff = new StringBuffer();
131         int start = 0, end = 0;
132         while(pos < token.length() ){
133             while(Character.isSpaceChar(token.charAt(pos++)));
134             --pos;
135             start = pos;
136             while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++;
137             end = pos;
138             int ch = parseChar(token.substring(start,end));
139                         if (ch > 0xFFFF) {
140                                 buff.append(getHighSurrogate(ch));
141                                 buff.append(getLowSurrogate(ch));
142                         } else {
143                                 buff.append((char)ch);
144                         }
145         }
146         char[] map = new char[buff.length()];
147         buff.getChars(0, buff.length(), map, 0);
148         return map;
149     }
150 
151     static Locale parseLocale(String token) {
152         return null;
153     }
154 
155     static String[] parseContext(String token) {
156         return null;
157     }
158 
159     static  int find(int ch, SpecialCaseMap[] map) {
160         if ((map == null) || (map.length == 0)) {
161             return -1;
162         }
163         int top, bottom, current;
164         bottom = 0;
165         top = map.length;
166         current = top/2;
167         // invariant: top > current >= bottom && ch >= map.chSource
168         while (top - bottom > 1) {
169             if (ch >= map[current].getCharSource()) {
170                 bottom = current;
171             } else {
172                 top = current;
173             }
174             current = (top + bottom) / 2;
175         }
176         if (ch == map[current].getCharSource()) return current;
177         else return -1;
178     }
179 
180     /*
181      * Extracts and returns the high surrogate value from a UTF-32 code point.
182      * If argument is a BMP character, then it is converted to a char and returned;
183      * otherwise the high surrogate value is extracted.
184      * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
185      * @return the high surrogate value that helps create <code>codePoint</code>; else
186      *         the char representation of <code>codePoint</code> if it is a BMP character.
187      * @since 1.5
188      */
189     static char getHighSurrogate(int codePoint) {
190         char high = (char)codePoint;
191         if (codePoint > 0xFFFF) {
192             high = (char)((codePoint - 0x10000)/0x0400 + 0xD800);
193         }
194         return high;
195     }
196 
197 
198     /*
199      * Extracts and returns the low surrogate value from a UTF-32 code point.
200      * If argument is a BMP character, then it is converted to a char and returned;
201      * otherwise the high surrogate value is extracted.
202      * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
203      * @return the low surrogate value that helps create <code>codePoint</code>; else
204      *         the char representation of <code>codePoint</code> if it is a BMP character.
205      * @since 1.5
206      */
207     static char getLowSurrogate(int codePoint) {
208         char low = (char)codePoint;
209         if(codePoint > 0xFFFF) {
210                 low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00);
211         }
212         return low;
213     }
214 
215     static String hex6(int n) {
216         String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
217         return "000000".substring(Math.min(6, str.length())) + str;
218     }
219 
220     static String hex6(char[] map){
221         StringBuffer buff = new StringBuffer();
222         int x=0;
223         buff.append(hex6(map[x++]));
224         while(x<map.length) {
225             buff.append(" " + hex6(map[x++]));
226         }
227         return buff.toString();
228     }
229 
230     void setCharSource(int ch) {
231         chSource = ch;
232     }
233 
234     void setLowerCaseMap(char[] map) {
235         lowerCaseMap = map;
236     }
237 
238     void setUpperCaseMap(char[] map) {
239         upperCaseMap = map;
240     }
241 
242     void setTitleCaseMap(char[] map) {
243         titleCaseMap = map;
244     }
245 
246     void setLocale(Locale locale) {
247         this.locale = locale;
248     }
249 
250     void setContext(String[] context) {
251         this.context = context;
252     }
253 
254     public int getCharSource() {
255         return chSource;
256     }
257 
258     public char[] getLowerCaseMap() {
259         return lowerCaseMap;
260     }
261 
262     public char[] getUpperCaseMap() {
263         return upperCaseMap;
264     }
265 
266     public char[] getTitleCaseMap() {
267         return titleCaseMap;
268     }
269 
270     public Locale getLocale() {
271         return locale;
272     }
273 
274     public String[] getContext() {
275         return context;
276     }
277 
278 
279     int chSource;
280     Locale locale;
281     char[] lowerCaseMap;
282     char[] upperCaseMap;
283     char[] titleCaseMap;
284     String[] context;
285 
286     /**
287      * Fields that can be found in the SpecialCasing.txt file.
288      */
289     static int REQUIRED_FIELDS = 5;
290     static int FIELD_SOURCE = 0;
291     static int FIELD_LOWERCASE = 1;
292     static int FIELD_TITLECASE = 2;
293     static int FIELD_UPPERCASE = 3;
294     static int FIELD_CONDITIONS = 4;
295 
296     /**
297      * Context values
298      */
299     static String CONTEXT_FINAL = "FINAL";
300     static String CONTEXT_NONFINAL = "NON_FINAL";
301     static String CONTEXT_MODERN = "MODERN";
302     static String CONTEXT_NONMODERN = "NON_MODERN";
303 
304     public int compareTo(SpecialCaseMap otherObject) {
305         if (chSource < otherObject.chSource) {
306             return -1;
307         }
308         else if (chSource > otherObject.chSource) {
309             return 1;
310         }
311         else return 0;
312     }
313 
314     public boolean equals(Object o1) {
315         if (this == o1) {
316             return true;
317         }
318         if (o1 == null || !(o1 instanceof SpecialCaseMap)) {
319             return false;
320         }
321         SpecialCaseMap other = (SpecialCaseMap)o1;
322         boolean bEqual = false;
323         if (0 == compareTo(other)) {
324             bEqual = true;
325         }
326         return bEqual;
327     }
328 
329     public String toString() {
330         StringBuffer buff = new StringBuffer();
331         buff.append(hex6(getCharSource()));
332         buff.append("|" + hex6(lowerCaseMap));
333         buff.append("|" + hex6(upperCaseMap));
334         buff.append("|" + hex6(titleCaseMap));
335         buff.append("|" + context);
336         return buff.toString();
337     }
338 
339     public int hashCode() {
340         return chSource;
341     }
342 
343     public static void main(String[] args) {
344         SpecialCaseMap[] spec = null;
345         if (args.length == 2 ) {
346             try {
347                 File file = new File(args[0]);
348                 int plane = Integer.parseInt(args[1]);
349                 spec = SpecialCaseMap.readSpecFile(file, plane);
350                 System.out.println("SpecialCaseMap[" + spec.length + "]:");
351                 for (int x=0; x<spec.length; x++) {
352                     System.out.println(spec[x].toString());
353                 }
354             }
355             catch(Exception e) {
356                 e.printStackTrace();
357             }
358         }
359 
360     }
361 
362 }