View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 1999-2004 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  /*
21   * $Id: XMLChar.java,v 1.2.4.1 2005/09/15 08:16:01 suresh_emailid Exp $
22   */
23  
24  package com.sun.org.apache.xml.internal.utils;
25  
26  /**
27   * This class defines the basic XML character properties. The data
28   * in this class can be used to verify that a character is a valid
29   * XML character or if the character is a space, name start, or name
30   * character.
31   * <p>
32   * A series of convenience methods are supplied to ease the burden
33   * of the developer. Because inlining the checks can improve per
34   * character performance, the tables of character properties are
35   * public. Using the character as an index into the <code>CHARS</code>
36   * array and applying the appropriate mask flag (e.g.
37   * <code>MASK_VALID</code>), yields the same results as calling the
38   * convenience methods. There is one exception: check the comments
39   * for the <code>isValid</code> method for details.
40   *
41   * @author Glenn Marcy, IBM
42   * @author Andy Clark, IBM
43   * @author Eric Ye, IBM
44   * @author Arnaud  Le Hors, IBM
45   * @author Rahul Srivastava, Sun Microsystems Inc.
46   *
47   */
48  public class XMLChar {
49  
50      //
51      // Constants
52      //
53  
54      /** Character flags. */
55      private static final byte[] CHARS = new byte[1 << 16];
56  
57      /** Valid character mask. */
58      public static final int MASK_VALID = 0x01;
59  
60      /** Space character mask. */
61      public static final int MASK_SPACE = 0x02;
62  
63      /** Name start character mask. */
64      public static final int MASK_NAME_START = 0x04;
65  
66      /** Name character mask. */
67      public static final int MASK_NAME = 0x08;
68  
69      /** Pubid character mask. */
70      public static final int MASK_PUBID = 0x10;
71  
72      /**
73       * Content character mask. Special characters are those that can
74       * be considered the start of markup, such as '&lt;' and '&amp;'.
75       * The various newline characters are considered special as well.
76       * All other valid XML characters can be considered content.
77       * <p>
78       * This is an optimization for the inner loop of character scanning.
79       */
80      public static final int MASK_CONTENT = 0x20;
81  
82      /** NCName start character mask. */
83      public static final int MASK_NCNAME_START = 0x40;
84  
85      /** NCName character mask. */
86      public static final int MASK_NCNAME = 0x80;
87  
88      //
89      // Static initialization
90      //
91  
92      static {
93  
94          //
95          // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
96          //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
97          //
98  
99          int charRange[] = {
100             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
101         };
102 
103         //
104         // [3] S ::= (#x20 | #x9 | #xD | #xA)+
105         //
106 
107         int spaceChar[] = {
108             0x0020, 0x0009, 0x000D, 0x000A,
109         };
110 
111         //
112         // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
113         //                  CombiningChar | Extender
114         //
115 
116         int nameChar[] = {
117             0x002D, 0x002E, // '-' and '.'
118         };
119 
120         //
121         // [5] Name ::= (Letter | '_' | ':') (NameChar)*
122         //
123 
124         int nameStartChar[] = {
125             0x003A, 0x005F, // ':' and '_'
126         };
127 
128         //
129         // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
130         //
131 
132         int pubidChar[] = {
133             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
134             0x005F
135         };
136 
137         int pubidRange[] = {
138             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
139         };
140 
141         //
142         // [84] Letter ::= BaseChar | Ideographic
143         //
144 
145         int letterRange[] = {
146             // BaseChar
147             0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
148             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
149             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
150             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
151             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
152             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
153             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
154             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
155             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
156             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
157             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
158             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
159             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
160             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
161             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
162             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
163             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
164             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
165             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
166             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
167             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
168             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
169             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
170             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
171             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
172             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
173             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
174             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
175             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
176             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
177             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
178             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
179             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
180             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
181             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
182             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
183             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
184             0xAC00, 0xD7A3,
185             // Ideographic
186             0x3021, 0x3029, 0x4E00, 0x9FA5,
187         };
188         int letterChar[] = {
189             // BaseChar
190             0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
191             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
192             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
193             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
194             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
195             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
196             0x1F5D, 0x1FBE, 0x2126, 0x212E,
197             // Ideographic
198             0x3007,
199         };
200 
201         //
202         // [87] CombiningChar ::= ...
203         //
204 
205         int combiningCharRange[] = {
206             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
207             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
208             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
209             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
210             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
211             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
212             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
213             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
214             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
215             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
216             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
217             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
218             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
219             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
220             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
221             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
222             0x20D0, 0x20DC, 0x302A, 0x302F,
223         };
224 
225         int combiningCharChar[] = {
226             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
227             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
228             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
229             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
230         };
231 
232         //
233         // [88] Digit ::= ...
234         //
235 
236         int digitRange[] = {
237             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
238             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
239             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
240             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
241         };
242 
243         //
244         // [89] Extender ::= ...
245         //
246 
247         int extenderRange[] = {
248             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
249         };
250 
251         int extenderChar[] = {
252             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
253         };
254 
255         //
256         // SpecialChar ::= '<', '&', '\n', '\r', ']'
257         //
258 
259         int specialChar[] = {
260             '<', '&', '\n', '\r', ']',
261         };
262 
263         //
264         // Initialize
265         //
266 
267         // set valid characters
268         for (int i = 0; i < charRange.length; i += 2) {
269             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
270                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
271             }
272         }
273 
274         // remove special characters
275         for (int i = 0; i < specialChar.length; i++) {
276             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
277         }
278 
279         // set space characters
280         for (int i = 0; i < spaceChar.length; i++) {
281             CHARS[spaceChar[i]] |= MASK_SPACE;
282         }
283 
284         // set name start characters
285         for (int i = 0; i < nameStartChar.length; i++) {
286             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
287                                        MASK_NCNAME_START | MASK_NCNAME;
288         }
289         for (int i = 0; i < letterRange.length; i += 2) {
290             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
291                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
292                             MASK_NCNAME_START | MASK_NCNAME;
293             }
294         }
295         for (int i = 0; i < letterChar.length; i++) {
296             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
297                                     MASK_NCNAME_START | MASK_NCNAME;
298         }
299 
300         // set name characters
301         for (int i = 0; i < nameChar.length; i++) {
302             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
303         }
304         for (int i = 0; i < digitRange.length; i += 2) {
305             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
306                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
307             }
308         }
309         for (int i = 0; i < combiningCharRange.length; i += 2) {
310             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
311                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
312             }
313         }
314         for (int i = 0; i < combiningCharChar.length; i++) {
315             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
316         }
317         for (int i = 0; i < extenderRange.length; i += 2) {
318             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
319                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
320             }
321         }
322         for (int i = 0; i < extenderChar.length; i++) {
323             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
324         }
325 
326         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
327         CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
328 
329         // set Pubid characters
330         for (int i = 0; i < pubidChar.length; i++) {
331             CHARS[pubidChar[i]] |= MASK_PUBID;
332         }
333         for (int i = 0; i < pubidRange.length; i += 2) {
334             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
335                 CHARS[j] |= MASK_PUBID;
336             }
337         }
338 
339     } // <clinit>()
340 
341     //
342     // Public static methods
343     //
344 
345     /**
346      * Returns true if the specified character is a supplemental character.
347      *
348      * @param c The character to check.
349      */
350     public static boolean isSupplemental(int c) {
351         return (c >= 0x10000 && c <= 0x10FFFF);
352     }
353 
354     /**
355      * Returns true the supplemental character corresponding to the given
356      * surrogates.
357      *
358      * @param h The high surrogate.
359      * @param l The low surrogate.
360      */
361     public static int supplemental(char h, char l) {
362         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
363     }
364 
365     /**
366      * Returns the high surrogate of a supplemental character
367      *
368      * @param c The supplemental character to "split".
369      */
370     public static char highSurrogate(int c) {
371         return (char) (((c - 0x00010000) >> 10) + 0xD800);
372     }
373 
374     /**
375      * Returns the low surrogate of a supplemental character
376      *
377      * @param c The supplemental character to "split".
378      */
379     public static char lowSurrogate(int c) {
380         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
381     }
382 
383     /**
384      * Returns whether the given character is a high surrogate
385      *
386      * @param c The character to check.
387      */
388     public static boolean isHighSurrogate(int c) {
389         return (0xD800 <= c && c <= 0xDBFF);
390     }
391 
392     /**
393      * Returns whether the given character is a low surrogate
394      *
395      * @param c The character to check.
396      */
397     public static boolean isLowSurrogate(int c) {
398         return (0xDC00 <= c && c <= 0xDFFF);
399     }
400 
401 
402     /**
403      * Returns true if the specified character is valid. This method
404      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
405      * <p>
406      * If the program chooses to apply the mask directly to the
407      * <code>CHARS</code> array, then they are responsible for checking
408      * the surrogate character range.
409      *
410      * @param c The character to check.
411      */
412     public static boolean isValid(int c) {
413         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
414                (0x10000 <= c && c <= 0x10FFFF);
415     } // isValid(int):boolean
416 
417     /**
418      * Returns true if the specified character is invalid.
419      *
420      * @param c The character to check.
421      */
422     public static boolean isInvalid(int c) {
423         return !isValid(c);
424     } // isInvalid(int):boolean
425 
426     /**
427      * Returns true if the specified character can be considered content.
428      *
429      * @param c The character to check.
430      */
431     public static boolean isContent(int c) {
432         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
433                (0x10000 <= c && c <= 0x10FFFF);
434     } // isContent(int):boolean
435 
436     /**
437      * Returns true if the specified character can be considered markup.
438      * Markup characters include '&lt;', '&amp;', and '%'.
439      *
440      * @param c The character to check.
441      */
442     public static boolean isMarkup(int c) {
443         return c == '<' || c == '&' || c == '%';
444     } // isMarkup(int):boolean
445 
446     /**
447      * Returns true if the specified character is a space character
448      * as defined by production [3] in the XML 1.0 specification.
449      *
450      * @param c The character to check.
451      */
452     public static boolean isSpace(int c) {
453         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
454     } // isSpace(int):boolean
455 
456     /**
457      * Returns true if the specified character is a valid name start
458      * character as defined by production [5] in the XML 1.0
459      * specification.
460      *
461      * @param c The character to check.
462      */
463     public static boolean isNameStart(int c) {
464         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
465     } // isNameStart(int):boolean
466 
467     /**
468      * Returns true if the specified character is a valid name
469      * character as defined by production [4] in the XML 1.0
470      * specification.
471      *
472      * @param c The character to check.
473      */
474     public static boolean isName(int c) {
475         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
476     } // isName(int):boolean
477 
478     /**
479      * Returns true if the specified character is a valid NCName start
480      * character as defined by production [4] in Namespaces in XML
481      * recommendation.
482      *
483      * @param c The character to check.
484      */
485     public static boolean isNCNameStart(int c) {
486         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
487     } // isNCNameStart(int):boolean
488 
489     /**
490      * Returns true if the specified character is a valid NCName
491      * character as defined by production [5] in Namespaces in XML
492      * recommendation.
493      *
494      * @param c The character to check.
495      */
496     public static boolean isNCName(int c) {
497         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
498     } // isNCName(int):boolean
499 
500     /**
501      * Returns true if the specified character is a valid Pubid
502      * character as defined by production [13] in the XML 1.0
503      * specification.
504      *
505      * @param c The character to check.
506      */
507     public static boolean isPubid(int c) {
508         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
509     } // isPubid(int):boolean
510 
511     /*
512      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
513      */
514     /**
515      * Check to see if a string is a valid Name according to [5]
516      * in the XML 1.0 Recommendation
517      *
518      * @param name string to check
519      * @return true if name is a valid Name
520      */
521     public static boolean isValidName(String name) {
522         if (name.length() == 0)
523             return false;
524         char ch = name.charAt(0);
525         if( isNameStart(ch) == false)
526            return false;
527         for (int i = 1; i < name.length(); i++ ) {
528            ch = name.charAt(i);
529            if( isName( ch ) == false ){
530               return false;
531            }
532         }
533         return true;
534     } // isValidName(String):boolean
535 
536 
537     /*
538      * from the namespace rec
539      * [4] NCName ::= (Letter | '_') (NCNameChar)*
540      */
541     /**
542      * Check to see if a string is a valid NCName according to [4]
543      * from the XML Namespaces 1.0 Recommendation
544      *
545      * @param ncName string to check
546      * @return true if name is a valid NCName
547      */
548     public static boolean isValidNCName(String ncName) {
549         if (ncName.length() == 0)
550             return false;
551         char ch = ncName.charAt(0);
552         if( isNCNameStart(ch) == false)
553            return false;
554         for (int i = 1; i < ncName.length(); i++ ) {
555            ch = ncName.charAt(i);
556            if( isNCName( ch ) == false ){
557               return false;
558            }
559         }
560         return true;
561     } // isValidNCName(String):boolean
562 
563     /*
564      * [7] Nmtoken ::= (NameChar)+
565      */
566     /**
567      * Check to see if a string is a valid Nmtoken according to [7]
568      * in the XML 1.0 Recommendation
569      *
570      * @param nmtoken string to check
571      * @return true if nmtoken is a valid Nmtoken
572      */
573     public static boolean isValidNmtoken(String nmtoken) {
574         if (nmtoken.length() == 0)
575             return false;
576         for (int i = 0; i < nmtoken.length(); i++ ) {
577            char ch = nmtoken.charAt(i);
578            if(  ! isName( ch ) ){
579               return false;
580            }
581         }
582         return true;
583     } // isValidName(String):boolean
584 
585 
586 
587 
588 
589     // encodings
590 
591     /**
592      * Returns true if the encoding name is a valid IANA encoding.
593      * This method does not verify that there is a decoder available
594      * for this encoding, only that the characters are valid for an
595      * IANA encoding name.
596      *
597      * @param ianaEncoding The IANA encoding name.
598      */
599     public static boolean isValidIANAEncoding(String ianaEncoding) {
600         if (ianaEncoding != null) {
601             int length = ianaEncoding.length();
602             if (length > 0) {
603                 char c = ianaEncoding.charAt(0);
604                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
605                     for (int i = 1; i < length; i++) {
606                         c = ianaEncoding.charAt(i);
607                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
608                             (c < '0' || c > '9') && c != '.' && c != '_' &&
609                             c != '-') {
610                             return false;
611                         }
612                     }
613                     return true;
614                 }
615             }
616         }
617         return false;
618     } // isValidIANAEncoding(String):boolean
619 
620     /**
621      * Returns true if the encoding name is a valid Java encoding.
622      * This method does not verify that there is a decoder available
623      * for this encoding, only that the characters are valid for an
624      * Java encoding name.
625      *
626      * @param javaEncoding The Java encoding name.
627      */
628     public static boolean isValidJavaEncoding(String javaEncoding) {
629         if (javaEncoding != null) {
630             int length = javaEncoding.length();
631             if (length > 0) {
632                 for (int i = 1; i < length; i++) {
633                     char c = javaEncoding.charAt(i);
634                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
635                         (c < '0' || c > '9') && c != '.' && c != '_' &&
636                         c != '-') {
637                         return false;
638                     }
639                 }
640                 return true;
641             }
642         }
643         return false;
644     } // isValidIANAEncoding(String):boolean
645 
646    /**
647      * Simple check to determine if qname is legal. If it returns false
648      * then <param>str</param> is illegal; if it returns true then
649      * <param>str</param> is legal.
650      */
651     public static boolean isValidQName(String str) {
652 
653        final int colon = str.indexOf(':');
654 
655        if (colon == 0 || colon == str.length() - 1) {
656            return false;
657        }
658 
659        if (colon > 0) {
660            final String prefix = str.substring(0,colon);
661            final String localPart = str.substring(colon+1);
662            return isValidNCName(prefix) && isValidNCName(localPart);
663        }
664        else {
665            return isValidNCName(str);
666        }
667     }
668 
669 } // class XMLChar