View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *      http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package com.sun.org.apache.xerces.internal.impl.xpath.regex;
22  
23  /**
24   * This class represents a character class such as [a-z] or a period.
25   *
26   * @xerces.internal
27   *
28   */
29  final class RangeToken extends Token implements java.io.Serializable {
30  
31      private static final long serialVersionUID = 3257568399592010545L;
32  
33      int[] ranges;
34      boolean sorted;
35      boolean compacted;
36      RangeToken icaseCache = null;
37      int[] map = null;
38      int nonMapIndex;
39  
40      RangeToken(int type) {
41          super(type);
42          this.setSorted(false);
43      }
44  
45                                                  // for RANGE or NRANGE
46      protected void addRange(int start, int end) {
47          this.icaseCache = null;
48          //System.err.println("Token#addRange(): "+start+" "+end);
49          int r1, r2;
50          if (start <= end) {
51              r1 = start;
52              r2 = end;
53          } else {
54              r1 = end;
55              r2 = start;
56          }
57  
58          int pos = 0;
59          if (this.ranges == null) {
60              this.ranges = new int[2];
61              this.ranges[0] = r1;
62              this.ranges[1] = r2;
63              this.setSorted(true);
64          } else {
65              pos = this.ranges.length;
66              if (this.ranges[pos-1]+1 == r1) {
67                  this.ranges[pos-1] = r2;
68                  return;
69              }
70              int[] temp = new int[pos+2];
71              System.arraycopy(this.ranges, 0, temp, 0, pos);
72              this.ranges = temp;
73              if (this.ranges[pos-1] >= r1)
74                  this.setSorted(false);
75              this.ranges[pos++] = r1;
76              this.ranges[pos] = r2;
77              if (!this.sorted)
78                  this.sortRanges();
79          }
80      }
81  
82      private final boolean isSorted() {
83          return this.sorted;
84      }
85      private final void setSorted(boolean sort) {
86          this.sorted = sort;
87          if (!sort)  this.compacted = false;
88      }
89      private final boolean isCompacted() {
90          return this.compacted;
91      }
92      private final void setCompacted() {
93          this.compacted = true;
94      }
95  
96      protected void sortRanges() {
97          if (this.isSorted())
98              return;
99          if (this.ranges == null)
100             return;
101         //System.err.println("Do sorting: "+this.ranges.length);
102 
103                                                 // Bubble sort
104                                                 // Why? -- In many cases,
105                                                 //         this.ranges has few elements.
106         for (int i = this.ranges.length-4;  i >= 0;  i -= 2) {
107             for (int j = 0;  j <= i;  j += 2) {
108                 if (this.ranges[j] > this.ranges[j+2]
109                     || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
110                     int tmp;
111                     tmp = this.ranges[j+2];
112                     this.ranges[j+2] = this.ranges[j];
113                     this.ranges[j] = tmp;
114                     tmp = this.ranges[j+3];
115                     this.ranges[j+3] = this.ranges[j+1];
116                     this.ranges[j+1] = tmp;
117                 }
118             }
119         }
120         this.setSorted(true);
121     }
122 
123     /**
124      * this.ranges is sorted.
125      */
126     protected void compactRanges() {
127         boolean DEBUG = false;
128         if (this.ranges == null || this.ranges.length <= 2)
129             return;
130         if (this.isCompacted())
131             return;
132         int base = 0;                           // Index of writing point
133         int target = 0;                         // Index of processing point
134 
135         while (target < this.ranges.length) {
136             if (base != target) {
137                 this.ranges[base] = this.ranges[target++];
138                 this.ranges[base+1] = this.ranges[target++];
139             } else
140                 target += 2;
141             int baseend = this.ranges[base+1];
142             while (target < this.ranges.length) {
143                 if (baseend+1 < this.ranges[target])
144                     break;
145                 if (baseend+1 == this.ranges[target]) {
146                     if (DEBUG)
147                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
148                                            +", "+this.ranges[base+1]
149                                            +"], ["+this.ranges[target]
150                                            +", "+this.ranges[target+1]
151                                            +"] -> ["+this.ranges[base]
152                                            +", "+this.ranges[target+1]
153                                            +"]");
154                     this.ranges[base+1] = this.ranges[target+1];
155                     baseend = this.ranges[base+1];
156                     target += 2;
157                 } else if (baseend >= this.ranges[target+1]) {
158                     if (DEBUG)
159                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
160                                            +", "+this.ranges[base+1]
161                                            +"], ["+this.ranges[target]
162                                            +", "+this.ranges[target+1]
163                                            +"] -> ["+this.ranges[base]
164                                            +", "+this.ranges[base+1]
165                                            +"]");
166                     target += 2;
167                 } else if (baseend < this.ranges[target+1]) {
168                     if (DEBUG)
169                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
170                                            +", "+this.ranges[base+1]
171                                            +"], ["+this.ranges[target]
172                                            +", "+this.ranges[target+1]
173                                            +"] -> ["+this.ranges[base]
174                                            +", "+this.ranges[target+1]
175                                            +"]");
176                     this.ranges[base+1] = this.ranges[target+1];
177                     baseend = this.ranges[base+1];
178                     target += 2;
179                 } else {
180                     throw new RuntimeException("Token#compactRanges(): Internel Error: ["
181                                                +this.ranges[base]
182                                                +","+this.ranges[base+1]
183                                                +"] ["+this.ranges[target]
184                                                +","+this.ranges[target+1]+"]");
185                 }
186             } // while
187             base += 2;
188         }
189 
190         if (base != this.ranges.length) {
191             int[] result = new int[base];
192             System.arraycopy(this.ranges, 0, result, 0, base);
193             this.ranges = result;
194         }
195         this.setCompacted();
196     }
197 
198     protected void mergeRanges(Token token) {
199         RangeToken tok = (RangeToken)token;
200         this.sortRanges();
201         tok.sortRanges();
202         if (tok.ranges == null)
203             return;
204         this.icaseCache = null;
205         this.setSorted(true);
206         if (this.ranges == null) {
207             this.ranges = new int[tok.ranges.length];
208             System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
209             return;
210         }
211         int[] result = new int[this.ranges.length+tok.ranges.length];
212         for (int i = 0, j = 0, k = 0;  i < this.ranges.length || j < tok.ranges.length;) {
213             if (i >= this.ranges.length) {
214                 result[k++] = tok.ranges[j++];
215                 result[k++] = tok.ranges[j++];
216             } else if (j >= tok.ranges.length) {
217                 result[k++] = this.ranges[i++];
218                 result[k++] = this.ranges[i++];
219             } else if (tok.ranges[j] < this.ranges[i]
220                        || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
221                 result[k++] = tok.ranges[j++];
222                 result[k++] = tok.ranges[j++];
223             } else {
224                 result[k++] = this.ranges[i++];
225                 result[k++] = this.ranges[i++];
226             }
227         }
228         this.ranges = result;
229     }
230 
231     protected void subtractRanges(Token token) {
232         if (token.type == NRANGE) {
233             this.intersectRanges(token);
234             return;
235         }
236         RangeToken tok = (RangeToken)token;
237         if (tok.ranges == null || this.ranges == null)
238             return;
239         this.icaseCache = null;
240         this.sortRanges();
241         this.compactRanges();
242         tok.sortRanges();
243         tok.compactRanges();
244 
245         //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
246 
247         int[] result = new int[this.ranges.length+tok.ranges.length];
248         int wp = 0, src = 0, sub = 0;
249         while (src < this.ranges.length && sub < tok.ranges.length) {
250             int srcbegin = this.ranges[src];
251             int srcend = this.ranges[src+1];
252             int subbegin = tok.ranges[sub];
253             int subend = tok.ranges[sub+1];
254             if (srcend < subbegin) {            // Not overlapped
255                                                 // src: o-----o
256                                                 // sub:         o-----o
257                                                 // res: o-----o
258                                                 // Reuse sub
259                 result[wp++] = this.ranges[src++];
260                 result[wp++] = this.ranges[src++];
261             } else if (srcend >= subbegin
262                        && srcbegin <= subend) { // Overlapped
263                                                 // src:    o--------o
264                                                 // sub:  o----o
265                                                 // sub:      o----o
266                                                 // sub:          o----o
267                                                 // sub:  o------------o
268                 if (subbegin <= srcbegin && srcend <= subend) {
269                                                 // src:    o--------o
270                                                 // sub:  o------------o
271                                                 // res: empty
272                                                 // Reuse sub
273                     src += 2;
274                 } else if (subbegin <= srcbegin) {
275                                                 // src:    o--------o
276                                                 // sub:  o----o
277                                                 // res:       o-----o
278                                                 // Reuse src(=res)
279                     this.ranges[src] = subend+1;
280                     sub += 2;
281                 } else if (srcend <= subend) {
282                                                 // src:    o--------o
283                                                 // sub:          o----o
284                                                 // res:    o-----o
285                                                 // Reuse sub
286                     result[wp++] = srcbegin;
287                     result[wp++] = subbegin-1;
288                     src += 2;
289                 } else {
290                                                 // src:    o--------o
291                                                 // sub:      o----o
292                                                 // res:    o-o    o-o
293                                                 // Reuse src(=right res)
294                     result[wp++] = srcbegin;
295                     result[wp++] = subbegin-1;
296                     this.ranges[src] = subend+1;
297                     sub += 2;
298                 }
299             } else if (subend < srcbegin) {
300                                                 // Not overlapped
301                                                 // src:          o-----o
302                                                 // sub: o----o
303                 sub += 2;
304             } else {
305                 throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
306                                            +","+this.ranges[src+1]
307                                            +"] - ["+tok.ranges[sub]
308                                            +","+tok.ranges[sub+1]
309                                            +"]");
310             }
311         }
312         while (src < this.ranges.length) {
313             result[wp++] = this.ranges[src++];
314             result[wp++] = this.ranges[src++];
315         }
316         this.ranges = new int[wp];
317         System.arraycopy(result, 0, this.ranges, 0, wp);
318                                                 // this.ranges is sorted and compacted.
319     }
320 
321     /**
322      * @param tok Ignore whether it is NRANGE or not.
323      */
324     protected void intersectRanges(Token token) {
325         RangeToken tok = (RangeToken)token;
326         if (tok.ranges == null || this.ranges == null)
327             return;
328         this.icaseCache = null;
329         this.sortRanges();
330         this.compactRanges();
331         tok.sortRanges();
332         tok.compactRanges();
333 
334         int[] result = new int[this.ranges.length+tok.ranges.length];
335         int wp = 0, src1 = 0, src2 = 0;
336         while (src1 < this.ranges.length && src2 < tok.ranges.length) {
337             int src1begin = this.ranges[src1];
338             int src1end = this.ranges[src1+1];
339             int src2begin = tok.ranges[src2];
340             int src2end = tok.ranges[src2+1];
341             if (src1end < src2begin) {          // Not overlapped
342                                                 // src1: o-----o
343                                                 // src2:         o-----o
344                                                 // res:  empty
345                                                 // Reuse src2
346                 src1 += 2;
347             } else if (src1end >= src2begin
348                        && src1begin <= src2end) { // Overlapped
349                                                 // src1:    o--------o
350                                                 // src2:  o----o
351                                                 // src2:      o----o
352                                                 // src2:          o----o
353                                                 // src2:  o------------o
354                 if (src2begin <= src2begin && src1end <= src2end) {
355                                                 // src1:    o--------o
356                                                 // src2:  o------------o
357                                                 // res:     o--------o
358                                                 // Reuse src2
359                     result[wp++] = src1begin;
360                     result[wp++] = src1end;
361                     src1 += 2;
362                 } else if (src2begin <= src1begin) {
363                                                 // src1:    o--------o
364                                                 // src2:  o----o
365                                                 // res:     o--o
366                                                 // Reuse the rest of src1
367                     result[wp++] = src1begin;
368                     result[wp++] = src2end;
369                     this.ranges[src1] = src2end+1;
370                     src2 += 2;
371                 } else if (src1end <= src2end) {
372                                                 // src1:    o--------o
373                                                 // src2:          o----o
374                                                 // res:           o--o
375                                                 // Reuse src2
376                     result[wp++] = src2begin;
377                     result[wp++] = src1end;
378                     src1 += 2;
379                 } else {
380                                                 // src1:    o--------o
381                                                 // src2:      o----o
382                                                 // res:       o----o
383                                                 // Reuse the rest of src1
384                     result[wp++] = src2begin;
385                     result[wp++] = src2end;
386                     this.ranges[src1] = src2end+1;
387                 }
388             } else if (src2end < src1begin) {
389                                                 // Not overlapped
390                                                 // src1:          o-----o
391                                                 // src2: o----o
392                 src2 += 2;
393             } else {
394                 throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
395                                            +this.ranges[src1]
396                                            +","+this.ranges[src1+1]
397                                            +"] & ["+tok.ranges[src2]
398                                            +","+tok.ranges[src2+1]
399                                            +"]");
400             }
401         }
402         while (src1 < this.ranges.length) {
403             result[wp++] = this.ranges[src1++];
404             result[wp++] = this.ranges[src1++];
405         }
406         this.ranges = new int[wp];
407         System.arraycopy(result, 0, this.ranges, 0, wp);
408                                                 // this.ranges is sorted and compacted.
409     }
410 
411     /**
412      * for RANGE: Creates complement.
413      * for NRANGE: Creates the same meaning RANGE.
414      */
415     static Token complementRanges(Token token) {
416         if (token.type != RANGE && token.type != NRANGE)
417             throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
418         RangeToken tok = (RangeToken)token;
419         tok.sortRanges();
420         tok.compactRanges();
421         int len = tok.ranges.length+2;
422         if (tok.ranges[0] == 0)
423             len -= 2;
424         int last = tok.ranges[tok.ranges.length-1];
425         if (last == UTF16_MAX)
426             len -= 2;
427         RangeToken ret = Token.createRange();
428         ret.ranges = new int[len];
429         int wp = 0;
430         if (tok.ranges[0] > 0) {
431             ret.ranges[wp++] = 0;
432             ret.ranges[wp++] = tok.ranges[0]-1;
433         }
434         for (int i = 1;  i < tok.ranges.length-2;  i += 2) {
435             ret.ranges[wp++] = tok.ranges[i]+1;
436             ret.ranges[wp++] = tok.ranges[i+1]-1;
437         }
438         if (last != UTF16_MAX) {
439             ret.ranges[wp++] = last+1;
440             ret.ranges[wp] = UTF16_MAX;
441         }
442         ret.setCompacted();
443         return ret;
444     }
445 
446     synchronized RangeToken getCaseInsensitiveToken() {
447         if (this.icaseCache != null)
448             return this.icaseCache;
449 
450         RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
451         for (int i = 0;  i < this.ranges.length;  i += 2) {
452             for (int ch = this.ranges[i];  ch <= this.ranges[i+1];  ch ++) {
453                 if (ch > 0xffff)
454                     uppers.addRange(ch, ch);
455                 else {
456                     char uch = Character.toUpperCase((char)ch);
457                     uppers.addRange(uch, uch);
458                 }
459             }
460         }
461         RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
462         for (int i = 0;  i < uppers.ranges.length;  i += 2) {
463             for (int ch = uppers.ranges[i];  ch <= uppers.ranges[i+1];  ch ++) {
464                 if (ch > 0xffff)
465                     lowers.addRange(ch, ch);
466                 else {
467                     char uch = Character.toUpperCase((char)ch);
468                     lowers.addRange(uch, uch);
469                 }
470             }
471         }
472         lowers.mergeRanges(uppers);
473         lowers.mergeRanges(this);
474         lowers.compactRanges();
475 
476         this.icaseCache = lowers;
477         return lowers;
478     }
479 
480     void dumpRanges() {
481         System.err.print("RANGE: ");
482         if (this.ranges == null)
483             System.err.println(" NULL");
484         for (int i = 0;  i < this.ranges.length;  i += 2) {
485             System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
486         }
487         System.err.println("");
488     }
489 
490     boolean match(int ch) {
491         if (this.map == null)  this.createMap();
492         boolean ret;
493         if (this.type == RANGE) {
494             if (ch < MAPSIZE)
495                 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
496             ret = false;
497             for (int i = this.nonMapIndex;  i < this.ranges.length;  i += 2) {
498                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
499                     return true;
500             }
501         } else {
502             if (ch < MAPSIZE)
503                 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
504             ret = true;
505             for (int i = this.nonMapIndex;  i < this.ranges.length;  i += 2) {
506                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
507                     return false;
508             }
509         }
510         return ret;
511     }
512 
513     private static final int MAPSIZE = 256;
514     private void createMap() {
515         int asize = MAPSIZE/32;                 // 32 is the number of bits in `int'.
516         int [] map = new int[asize];
517         int nonMapIndex = this.ranges.length;
518         for (int i = 0; i < asize; ++i) {
519             map[i] = 0;
520         }
521         for (int i = 0; i < this.ranges.length;  i += 2) {
522             int s = this.ranges[i];
523             int e = this.ranges[i+1];
524             if (s < MAPSIZE) {
525                 for (int j = s; j <= e && j < MAPSIZE; j++) {
526                     map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
527                 }
528             }
529             else {
530                 nonMapIndex = i;
531                 break;
532             }
533             if (e >= MAPSIZE) {
534                 nonMapIndex = i;
535                 break;
536             }
537         }
538         this.map = map;
539         this.nonMapIndex = nonMapIndex;
540         //for (int i = 0;  i < asize;  i ++)  System.err.println("Map: "+Integer.toString(this.map[i], 16));
541     }
542 
543     public String toString(int options) {
544         String ret;
545         if (this.type == RANGE) {
546             if (this == Token.token_dot)
547                 ret = ".";
548             else if (this == Token.token_0to9)
549                 ret = "\\d";
550             else if (this == Token.token_wordchars)
551                 ret = "\\w";
552             else if (this == Token.token_spaces)
553                 ret = "\\s";
554             else {
555                 StringBuffer sb = new StringBuffer();
556                 sb.append("[");
557                 for (int i = 0;  i < this.ranges.length;  i += 2) {
558                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0)  sb.append(",");
559                     if (this.ranges[i] == this.ranges[i+1]) {
560                         sb.append(escapeCharInCharClass(this.ranges[i]));
561                     } else {
562                         sb.append(escapeCharInCharClass(this.ranges[i]));
563                         sb.append((char)'-');
564                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
565                     }
566                 }
567                 sb.append("]");
568                 ret = sb.toString();
569             }
570         } else {
571             if (this == Token.token_not_0to9)
572                 ret = "\\D";
573             else if (this == Token.token_not_wordchars)
574                 ret = "\\W";
575             else if (this == Token.token_not_spaces)
576                 ret = "\\S";
577             else {
578                 StringBuffer sb = new StringBuffer();
579                 sb.append("[^");
580                 for (int i = 0;  i < this.ranges.length;  i += 2) {
581                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0)  sb.append(",");
582                     if (this.ranges[i] == this.ranges[i+1]) {
583                         sb.append(escapeCharInCharClass(this.ranges[i]));
584                     } else {
585                         sb.append(escapeCharInCharClass(this.ranges[i]));
586                         sb.append('-');
587                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
588                     }
589                 }
590                 sb.append("]");
591                 ret = sb.toString();
592             }
593         }
594         return ret;
595     }
596 
597     private static String escapeCharInCharClass(int ch) {
598         String ret;
599         switch (ch) {
600           case '[':  case ']':  case '-':  case '^':
601           case ',':  case '\\':
602             ret = "\\"+(char)ch;
603             break;
604           case '\f':  ret = "\\f";  break;
605           case '\n':  ret = "\\n";  break;
606           case '\r':  ret = "\\r";  break;
607           case '\t':  ret = "\\t";  break;
608           case 0x1b:  ret = "\\e";  break;
609           //case 0x0b:  ret = "\\v";  break;
610           default:
611             if (ch < 0x20) {
612                 String pre = "0"+Integer.toHexString(ch);
613                 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
614             } else if (ch >= 0x10000) {
615                 String pre = "0"+Integer.toHexString(ch);
616                 ret = "\\v"+pre.substring(pre.length()-6, pre.length());
617             } else
618                 ret = ""+(char)ch;
619         }
620         return ret;
621     }
622 
623 }