View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 2001-2005 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *      http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package com.sun.org.apache.xerces.internal.impl.dv.xs;
22  
23  import com.sun.org.apache.xerces.internal.impl.dv.InvalidDatatypeValueException;
24  import com.sun.org.apache.xerces.internal.util.URI;
25  import com.sun.org.apache.xerces.internal.impl.dv.ValidationContext;
26  
27  /**
28   * Represent the schema type "anyURI"
29   *
30   * @xerces.internal
31   *
32   * @author Neeraj Bajaj, Sun Microsystems, inc.
33   * @author Sandy Gao, IBM
34   *
35   */
36  public class AnyURIDV extends TypeValidator {
37  
38      private static final URI BASE_URI;
39      static {
40          URI uri = null;
41          try {
42              uri = new URI("abc://def.ghi.jkl");
43          } catch (URI.MalformedURIException ex) {
44          }
45          BASE_URI = uri;
46      }
47  
48      public short getAllowedFacets(){
49          return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE );
50      }
51  
52      // before we return string we have to make sure it is correct URI as per spec.
53      // for some types (string and derived), they just return the string itself
54      public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException {
55          // check 3.2.17.c0 must: URI (rfc 2396/2723)
56          try {
57              if( content.length() != 0 ) {
58                  // encode special characters using XLink 5.4 algorithm
59                  final String encoded = encode(content);
60                  // Support for relative URLs
61                  // According to Java 1.1: URLs may also be specified with a
62                  // String and the URL object that it is related to.
63                  new URI(BASE_URI, encoded );
64              }
65          } catch (URI.MalformedURIException ex) {
66              throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"});
67          }
68  
69          // REVISIT: do we need to return the new URI object?
70          return content;
71      }
72  
73      // which ASCII characters need to be escaped
74      private static boolean gNeedEscaping[] = new boolean[128];
75      // the first hex character if a character needs to be escaped
76      private static char gAfterEscaping1[] = new char[128];
77      // the second hex character if a character needs to be escaped
78      private static char gAfterEscaping2[] = new char[128];
79      private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
80                                       '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
81      // initialize the above 3 arrays
82      static {
83          for (int i = 0; i <= 0x1f; i++) {
84              gNeedEscaping[i] = true;
85              gAfterEscaping1[i] = gHexChs[i >> 4];
86              gAfterEscaping2[i] = gHexChs[i & 0xf];
87          }
88          gNeedEscaping[0x7f] = true;
89          gAfterEscaping1[0x7f] = '7';
90          gAfterEscaping2[0x7f] = 'F';
91          char[] escChs = {' ', '<', '>', '"', '{', '}',
92                           '|', '\\', '^', '~', '`'};
93          int len = escChs.length;
94          char ch;
95          for (int i = 0; i < len; i++) {
96              ch = escChs[i];
97              gNeedEscaping[ch] = true;
98              gAfterEscaping1[ch] = gHexChs[ch >> 4];
99              gAfterEscaping2[ch] = gHexChs[ch & 0xf];
100         }
101     }
102 
103     // To encode special characters in anyURI, by using %HH to represent
104     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
105     // and non-ASCII characters (whose value >= 128).
106     private static String encode(String anyURI){
107         int len = anyURI.length(), ch;
108         StringBuffer buffer = new StringBuffer(len*3);
109 
110         // for each character in the anyURI
111         int i = 0;
112         for (; i < len; i++) {
113             ch = anyURI.charAt(i);
114             // if it's not an ASCII character, break here, and use UTF-8 encoding
115             if (ch >= 128)
116                 break;
117             if (gNeedEscaping[ch]) {
118                 buffer.append('%');
119                 buffer.append(gAfterEscaping1[ch]);
120                 buffer.append(gAfterEscaping2[ch]);
121             }
122             else {
123                 buffer.append((char)ch);
124             }
125         }
126 
127         // we saw some non-ascii character
128         if (i < len) {
129             // get UTF-8 bytes for the remaining sub-string
130             byte[] bytes = null;
131             byte b;
132             try {
133                 bytes = anyURI.substring(i).getBytes("UTF-8");
134             } catch (java.io.UnsupportedEncodingException e) {
135                 // should never happen
136                 return anyURI;
137             }
138             len = bytes.length;
139 
140             // for each byte
141             for (i = 0; i < len; i++) {
142                 b = bytes[i];
143                 // for non-ascii character: make it positive, then escape
144                 if (b < 0) {
145                     ch = b + 256;
146                     buffer.append('%');
147                     buffer.append(gHexChs[ch >> 4]);
148                     buffer.append(gHexChs[ch & 0xf]);
149                 }
150                 else if (gNeedEscaping[b]) {
151                     buffer.append('%');
152                     buffer.append(gAfterEscaping1[b]);
153                     buffer.append(gAfterEscaping2[b]);
154                 }
155                 else {
156                     buffer.append((char)b);
157                 }
158             }
159         }
160 
161         // If encoding happened, create a new string;
162         // otherwise, return the orginal one.
163         if (buffer.length() != len) {
164             return buffer.toString();
165         }
166         else {
167             return anyURI;
168         }
169     }
170 
171 } // class AnyURIDV