View Javadoc
1   /*
2    * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package build.tools.generatebreakiteratordata;
27  
28  import java.util.Hashtable;
29  import java.util.Vector;
30  
31  /**
32   * The Builder class for DictionaryBasedBreakIterator inherits almost all of
33   * its functionality from RuleBasedBreakIteratorBuilder, but extends it with
34   * extra logic to handle the "<dictionary>" token.
35   */
36  class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder {
37  
38      /**
39       * A list of flags indicating which character categories are contained in
40       * the dictionary file (this is used to determine which ranges of characters
41       * to apply the dictionary to)
42       */
43      private boolean[] categoryFlags;
44  
45      /**
46       * A CharSet that contains all the characters represented in the dictionary
47       */
48      private CharSet dictionaryChars = new CharSet();
49      private String dictionaryExpression = "";
50  
51      public DictionaryBasedBreakIteratorBuilder(String description) {
52          super(description);
53      }
54  
55      /**
56       * We override handleSpecialSubstitution() to add logic to handle
57       * the <dictionary> tag.  If we see a substitution named "<dictionary>",
58       * parse the substitution expression and store the result in
59       * dictionaryChars.
60       */
61      protected void handleSpecialSubstitution(String replace, String replaceWith,
62                                               int startPos, String description) {
63          super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
64  
65          if (replace.equals("<dictionary>")) {
66              if (replaceWith.charAt(0) == '(') {
67                  error("Dictionary group can't be enclosed in (", startPos, description);
68              }
69              dictionaryExpression = replaceWith;
70              dictionaryChars = CharSet.parseString(replaceWith);
71          }
72      }
73  
74      /**
75       * The other half of the logic to handle the dictionary characters happens
76       * here. After the inherited builder has derived the real character
77       * categories, we set up the categoryFlags array in the iterator. This array
78       * contains "true" for every character category that includes a dictionary
79       * character.
80       */
81      protected void buildCharCategories(Vector<String> tempRuleList) {
82          super.buildCharCategories(tempRuleList);
83  
84          categoryFlags = new boolean[categories.size()];
85          for (int i = 0; i < categories.size(); i++) {
86              CharSet cs = categories.elementAt(i);
87              if (!(cs.intersection(dictionaryChars).empty())) {
88                  categoryFlags[i] = true;
89              }
90          }
91      }
92  
93      // This function is actually called by
94      // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by
95      // the function above. This gives us a way to create a separate character
96      // category for the dictionary characters even when
97      // RuleBasedBreakIteratorBuilder isn't making a distinction.
98      protected void mungeExpressionList(Hashtable<String, Object> expressions) {
99          expressions.put(dictionaryExpression, dictionaryChars);
100     }
101 
102     void makeFile(String filename) {
103         super.setAdditionalData(super.toByteArray(categoryFlags));
104         super.makeFile(filename);
105     }
106 }