View Javadoc
1   /*
2    * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3    */
4   
5   /*
6    * Copyright 2005 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *      http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package com.sun.org.apache.xerces.internal.util;
22  
23  /**
24   * This class is a symbol table implementation that guarantees that
25   * strings used as identifiers are unique references. Multiple calls
26   * to <code>addSymbol</code> will always return the same string
27   * reference.
28   * <p>
29   * The symbol table performs the same task as <code>String.intern()</code>
30   * with the following differences:
31   * <ul>
32   *  <li>
33   *   A new string object does not need to be created in order to
34   *   retrieve a unique reference. Symbols can be added by using
35   *   a series of characters in a character array.
36   *  </li>
37   *  <li>
38   *   Users of the symbol table can provide their own symbol hashing
39   *   implementation. For example, a simple string hashing algorithm
40   *   may fail to produce a balanced set of hashcodes for symbols
41   *   that are <em>mostly</em> unique. Strings with similar leading
42   *   characters are especially prone to this poor hashing behavior.
43   *  </li>
44   * </ul>
45   *
46   * @see SymbolHash
47   *
48   * @author Andy Clark
49   *
50   */
51  public class SymbolTable {
52  
53      //
54      // Constants
55      //
56  
57      /** Default table size. */
58      protected static final int TABLE_SIZE = 173;
59  
60  
61      /** Buckets. */
62      protected Entry[] fBuckets = null;
63  
64      // actual table size
65      protected int fTableSize;
66  
67      //
68      // Constructors
69      //
70  
71      /** Constructs a symbol table with a default number of buckets. */
72      public SymbolTable() {
73          this(TABLE_SIZE);
74      }
75  
76      /** Constructs a symbol table with a specified number of buckets. */
77      public SymbolTable(int tableSize) {
78          fTableSize = tableSize;
79          fBuckets = new Entry[fTableSize];
80      }
81  
82      //
83      // Public methods
84      //
85  
86      /**
87       * Adds the specified symbol to the symbol table and returns a
88       * reference to the unique symbol. If the symbol already exists,
89       * the previous symbol reference is returned instead, in order
90       * guarantee that symbol references remain unique.
91       *
92       * @param symbol The new symbol.
93       */
94      public String addSymbol(String symbol) {
95  
96          // search for identical symbol
97          final int hash = hash(symbol);
98          final int bucket = hash % fTableSize;
99          final int length = symbol.length();
100         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
101             if (length == entry.characters.length && hash == entry.hashCode) {
102                 if(symbol.regionMatches(0,entry.symbol,0,length)){
103                     return entry.symbol;
104                 }
105                 else{
106                     continue OUTER;
107                 }
108                 /**
109                 for (int i = 0; i < length; i++) {
110                     if (symbol.charAt(i) != entry.characters[i]) {
111                         continue OUTER;
112                     }
113                 }
114                 symbolAsArray = entry.characters;
115                 return entry.symbol;
116                  */
117             }
118         }
119 
120         // create new entry
121         Entry entry = new Entry(symbol, fBuckets[bucket]);
122         entry.hashCode = hash;
123         fBuckets[bucket] = entry;
124         return entry.symbol;
125 
126     } // addSymbol(String):String
127 
128     /**
129      * Adds the specified symbol to the symbol table and returns a
130      * reference to the unique symbol. If the symbol already exists,
131      * the previous symbol reference is returned instead, in order
132      * guarantee that symbol references remain unique.
133      *
134      * @param buffer The buffer containing the new symbol.
135      * @param offset The offset into the buffer of the new symbol.
136      * @param length The length of the new symbol in the buffer.
137      */
138     public String addSymbol(char[] buffer, int offset, int length) {
139         // search for identical symbol
140         int hash = hash(buffer, offset, length);
141         int bucket = hash % fTableSize;
142         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
143             if (length == entry.characters.length && hash ==entry.hashCode) {
144                 for (int i = 0; i < length; i++) {
145                     if (buffer[offset + i] != entry.characters[i]) {
146                         continue OUTER;
147                     }
148                 }
149                 return entry.symbol;
150             }
151         }
152 
153         // add new entry
154         Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
155         fBuckets[bucket] = entry;
156         entry.hashCode = hash;
157         return entry.symbol;
158 
159     } // addSymbol(char[],int,int):String
160 
161     /**
162      * Returns a hashcode value for the specified symbol. The value
163      * returned by this method must be identical to the value returned
164      * by the <code>hash(char[],int,int)</code> method when called
165      * with the character array that comprises the symbol string.
166      *
167      * @param symbol The symbol to hash.
168      */
169     public int hash(String symbol) {
170 
171         int code = 0;
172         int length = symbol.length();
173         for (int i = 0; i < length; i++) {
174             code = code * 37 + symbol.charAt(i);
175         }
176         return code & 0x7FFFFFFF;
177 
178     } // hash(String):int
179 
180     /**
181      * Returns a hashcode value for the specified symbol information.
182      * The value returned by this method must be identical to the value
183      * returned by the <code>hash(String)</code> method when called
184      * with the string object created from the symbol information.
185      *
186      * @param buffer The character buffer containing the symbol.
187      * @param offset The offset into the character buffer of the start
188      *               of the symbol.
189      * @param length The length of the symbol.
190      */
191     public int hash(char[] buffer, int offset, int length) {
192 
193         int code = 0;
194         for (int i = 0; i < length; i++) {
195             code = code * 37 + buffer[offset + i];
196         }
197         return code & 0x7FFFFFFF;
198 
199     } // hash(char[],int,int):int
200 
201     /**
202      * Returns true if the symbol table already contains the specified
203      * symbol.
204      *
205      * @param symbol The symbol to look for.
206      */
207     public boolean containsSymbol(String symbol) {
208 
209         // search for identical symbol
210         int hash = hash(symbol);
211         int bucket = hash % fTableSize;
212         int length = symbol.length();
213         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
214             if (length == entry.characters.length && hash == entry.hashCode) {
215                 if(symbol.regionMatches(0,entry.symbol,0,length)){
216                     return true;
217                 }
218                 else {
219                     continue OUTER;
220                 }
221                 /**
222                 for (int i = 0; i < length; i++) {
223                     if (symbol.charAt(i) != entry.characters[i]) {
224                         continue OUTER;
225                     }
226                 }
227                  return true;
228                  */
229             }
230         }
231 
232         return false;
233 
234     } // containsSymbol(String):boolean
235 
236     /**
237      * Returns true if the symbol table already contains the specified
238      * symbol.
239      *
240      * @param buffer The buffer containing the symbol to look for.
241      * @param offset The offset into the buffer.
242      * @param length The length of the symbol in the buffer.
243      */
244     public boolean containsSymbol(char[] buffer, int offset, int length) {
245 
246         // search for identical symbol
247         int hash = hash(buffer, offset, length) ;
248         int bucket = hash % fTableSize;
249         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
250             if (length == entry.characters.length && hash == entry.hashCode) {
251                 for (int i = 0; i < length; i++) {
252                     if (buffer[offset + i] != entry.characters[i]) {
253                         continue OUTER;
254                     }
255                 }
256                 return true;
257             }
258         }
259 
260         return false;
261 
262     } // containsSymbol(char[],int,int):boolean
263 
264 
265     //
266     // Classes
267     //
268 
269     /**
270      * This class is a symbol table entry. Each entry acts as a node
271      * in a linked list.
272      */
273     protected static final class Entry {
274 
275         //
276         // Data
277         //
278 
279         /** Symbol. */
280         public String symbol;
281         int hashCode = 0;
282 
283         /**
284          * Symbol characters. This information is duplicated here for
285          * comparison performance.
286          */
287         public char[] characters;
288 
289         /** The next entry. */
290         public Entry next;
291 
292         //
293         // Constructors
294         //
295 
296         /**
297          * Constructs a new entry from the specified symbol and next entry
298          * reference.
299          */
300         public Entry(String symbol, Entry next) {
301             this.symbol = symbol.intern();
302             characters = new char[symbol.length()];
303             symbol.getChars(0, characters.length, characters, 0);
304             this.next = next;
305         }
306 
307         /**
308          * Constructs a new entry from the specified symbol information and
309          * next entry reference.
310          */
311         public Entry(char[] ch, int offset, int length, Entry next) {
312             characters = new char[length];
313             System.arraycopy(ch, offset, characters, 0, length);
314             symbol = new String(characters).intern();
315             this.next = next;
316         }
317 
318     } // class Entry
319 
320 } // class SymbolTable