View Javadoc
1   /*
2    * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  /*
26   *******************************************************************************
27   * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
28   *                                                                             *
29   * The original version of this source code and documentation is copyrighted   *
30   * and owned by IBM, These materials are provided under terms of a License     *
31   * Agreement between IBM and Sun. This technology is protected by multiple     *
32   * US and International patents. This notice and attribution to IBM may not    *
33   * to removed.                                                                 *
34   *******************************************************************************
35   */
36  
37  package sun.text.normalizer;
38  
39  import java.io.InputStream;
40  import java.io.DataInputStream;
41  import java.io.IOException;
42  
43  /**
44   * Trie implementation which stores data in char, 16 bits.
45   * @author synwee
46   * @see com.ibm.icu.impl.Trie
47   * @since release 2.1, Jan 01 2002
48   */
49  
50   // note that i need to handle the block calculations later, since chartrie
51   // in icu4c uses the same index array.
52  public class CharTrie extends Trie
53  {
54      // public constructors ---------------------------------------------
55  
56      /**
57      * <p>Creates a new Trie with the settings for the trie data.</p>
58      * <p>Unserialize the 32-bit-aligned input stream and use the data for the
59      * trie.</p>
60      * @param inputStream file input stream to a ICU data file, containing
61      *                    the trie
62      * @param dataManipulate object which provides methods to parse the char
63      *                        data
64      * @throws IOException thrown when data reading fails
65      * @draft 2.1
66      */
67      public CharTrie(InputStream inputStream,
68                      DataManipulate dataManipulate) throws IOException
69      {
70          super(inputStream, dataManipulate);
71  
72          if (!isCharTrie()) {
73              throw new IllegalArgumentException(
74                                 "Data given does not belong to a char trie.");
75          }
76          m_friendAgent_ = new FriendAgent();
77      }
78  
79      /**
80       * Make a dummy CharTrie.
81       * A dummy trie is an empty runtime trie, used when a real data trie cannot
82       * be loaded.
83       *
84       * The trie always returns the initialValue,
85       * or the leadUnitValue for lead surrogate code points.
86       * The Latin-1 part is always set up to be linear.
87       *
88       * @param initialValue the initial value that is set for all code points
89       * @param leadUnitValue the value for lead surrogate code _units_ that do not
90       *                      have associated supplementary data
91       * @param dataManipulate object which provides methods to parse the char data
92       */
93      public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
94          super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
95  
96          int dataLength, latin1Length, i, limit;
97          char block;
98  
99          /* calculate the actual size of the dummy trie data */
100 
101         /* max(Latin-1, block 0) */
102         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
103         if(leadUnitValue!=initialValue) {
104             dataLength+=DATA_BLOCK_LENGTH;
105         }
106         m_data_=new char[dataLength];
107         m_dataLength_=dataLength;
108 
109         m_initialValue_=(char)initialValue;
110 
111         /* fill the index and data arrays */
112 
113         /* indexes are preset to 0 (block 0) */
114 
115         /* Latin-1 data */
116         for(i=0; i<latin1Length; ++i) {
117             m_data_[i]=(char)initialValue;
118         }
119 
120         if(leadUnitValue!=initialValue) {
121             /* indexes for lead surrogate code units to the block after Latin-1 */
122             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
123             i=0xd800>>INDEX_STAGE_1_SHIFT_;
124             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
125             for(; i<limit; ++i) {
126                 m_index_[i]=block;
127             }
128 
129             /* data for lead surrogate code units */
130             limit=latin1Length+DATA_BLOCK_LENGTH;
131             for(i=latin1Length; i<limit; ++i) {
132                 m_data_[i]=(char)leadUnitValue;
133             }
134         }
135 
136         m_friendAgent_ = new FriendAgent();
137     }
138 
139     /**
140      * Java friend implementation
141      */
142     public class FriendAgent
143     {
144         /**
145          * Gives out the index array of the trie
146          * @return index array of trie
147          */
148         public char[] getPrivateIndex()
149         {
150             return m_index_;
151         }
152         /**
153          * Gives out the data array of the trie
154          * @return data array of trie
155          */
156         public char[] getPrivateData()
157         {
158             return m_data_;
159         }
160         /**
161          * Gives out the data offset in the trie
162          * @return data offset in the trie
163          */
164         public int getPrivateInitialValue()
165         {
166             return m_initialValue_;
167         }
168     }
169 
170     // public methods --------------------------------------------------
171 
172     /**
173      * Java friend implementation
174      * To store the index and data array into the argument.
175      * @param friend java friend UCharacterProperty object to store the array
176      */
177     public void putIndexData(UCharacterProperty friend)
178     {
179         friend.setIndexData(m_friendAgent_);
180     }
181 
182     /**
183     * Gets the value associated with the codepoint.
184     * If no value is associated with the codepoint, a default value will be
185     * returned.
186     * @param ch codepoint
187     * @return offset to data
188     * @draft 2.1
189     */
190     public final char getCodePointValue(int ch)
191     {
192         int offset;
193 
194         // fastpath for U+0000..U+D7FF
195         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
196             // copy of getRawOffset()
197             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
198                     + (ch & INDEX_STAGE_3_MASK_);
199             return m_data_[offset];
200         }
201 
202         // handle U+D800..U+10FFFF
203         offset = getCodePointOffset(ch);
204 
205         // return -1 if there is an error, in this case we return the default
206         // value: m_initialValue_
207         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
208     }
209 
210     /**
211     * Gets the value to the data which this lead surrogate character points
212     * to.
213     * Returned data may contain folding offset information for the next
214     * trailing surrogate character.
215     * This method does not guarantee correct results for trail surrogates.
216     * @param ch lead surrogate character
217     * @return data value
218     * @draft 2.1
219     */
220     public final char getLeadValue(char ch)
221     {
222        return m_data_[getLeadOffset(ch)];
223     }
224 
225     /**
226     * Get the value associated with a pair of surrogates.
227     * @param lead a lead surrogate
228     * @param trail a trail surrogate
229     * @draft 2.1
230     */
231     public final char getSurrogateValue(char lead, char trail)
232     {
233         int offset = getSurrogateOffset(lead, trail);
234         if (offset > 0) {
235             return m_data_[offset];
236         }
237         return m_initialValue_;
238     }
239 
240     /**
241     * <p>Get a value from a folding offset (from the value of a lead surrogate)
242     * and a trail surrogate.</p>
243     * <p>If the
244     * @param leadvalue value associated with the lead surrogate which contains
245     *        the folding offset
246     * @param trail surrogate
247     * @return trie data value associated with the trail character
248     * @draft 2.1
249     */
250     public final char getTrailValue(int leadvalue, char trail)
251     {
252         if (m_dataManipulate_ == null) {
253             throw new NullPointerException(
254                              "The field DataManipulate in this Trie is null");
255         }
256         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
257         if (offset > 0) {
258             return m_data_[getRawOffset(offset,
259                                         (char)(trail & SURROGATE_MASK_))];
260         }
261         return m_initialValue_;
262     }
263 
264     // protected methods -----------------------------------------------
265 
266     /**
267     * <p>Parses the input stream and stores its trie content into a index and
268     * data array</p>
269     * @param inputStream data input stream containing trie data
270     * @exception IOException thrown when data reading fails
271     */
272     protected final void unserialize(InputStream inputStream)
273                                                 throws IOException
274     {
275         DataInputStream input = new DataInputStream(inputStream);
276         int indexDataLength = m_dataOffset_ + m_dataLength_;
277         m_index_ = new char[indexDataLength];
278         for (int i = 0; i < indexDataLength; i ++) {
279             m_index_[i] = input.readChar();
280         }
281         m_data_           = m_index_;
282         m_initialValue_   = m_data_[m_dataOffset_];
283     }
284 
285     /**
286     * Gets the offset to the data which the surrogate pair points to.
287     * @param lead lead surrogate
288     * @param trail trailing surrogate
289     * @return offset to data
290     * @draft 2.1
291     */
292     protected final int getSurrogateOffset(char lead, char trail)
293     {
294         if (m_dataManipulate_ == null) {
295             throw new NullPointerException(
296                              "The field DataManipulate in this Trie is null");
297         }
298 
299         // get fold position for the next trail surrogate
300         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
301 
302         // get the real data from the folded lead/trail units
303         if (offset > 0) {
304             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
305         }
306 
307         // return -1 if there is an error, in this case we return the default
308         // value: m_initialValue_
309         return -1;
310     }
311 
312     /**
313     * Gets the value at the argument index.
314     * For use internally in TrieIterator.
315     * @param index value at index will be retrieved
316     * @return 32 bit value
317     * @see com.ibm.icu.impl.TrieIterator
318     * @draft 2.1
319     */
320     protected final int getValue(int index)
321     {
322         return m_data_[index];
323     }
324 
325     /**
326     * Gets the default initial value
327     * @return 32 bit value
328     * @draft 2.1
329     */
330     protected final int getInitialValue()
331     {
332         return m_initialValue_;
333     }
334 
335     // private data members --------------------------------------------
336 
337     /**
338     * Default value
339     */
340     private char m_initialValue_;
341     /**
342     * Array of char data
343     */
344     private char m_data_[];
345     /**
346      * Agent for friends
347      */
348     private FriendAgent m_friendAgent_;
349 }