View Javadoc
1   /*
2    * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package com.sun.activation.registries;
27  
28  /**
29   *      A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
30   *      Useful for parsing MIME content types.
31   */
32  public class MailcapTokenizer {
33  
34      public static final int UNKNOWN_TOKEN = 0;
35      public static final int START_TOKEN = 1;
36      public static final int STRING_TOKEN = 2;
37      public static final int EOI_TOKEN = 5;
38      public static final int SLASH_TOKEN = '/';
39      public static final int SEMICOLON_TOKEN = ';';
40      public static final int EQUALS_TOKEN = '=';
41  
42      /**
43       *  Constructor
44       *
45       *  @parameter  inputString the string to tokenize
46       */
47      public MailcapTokenizer(String inputString) {
48          data = inputString;
49          dataIndex = 0;
50          dataLength = inputString.length();
51  
52          currentToken = START_TOKEN;
53          currentTokenValue = "";
54  
55          isAutoquoting = false;
56          autoquoteChar = ';';
57      }
58  
59      /**
60       *  Set whether auto-quoting is on or off.
61       *
62       *  Auto-quoting means that all characters after the first
63       *  non-whitespace, non-control character up to the auto-quote
64       *  terminator character or EOI (minus any whitespace immediatley
65       *  preceeding it) is considered a token.
66       *
67       *  This is required for handling command strings in a mailcap entry.
68       */
69      public void setIsAutoquoting(boolean value) {
70          isAutoquoting = value;
71      }
72  
73      /**
74       *  Retrieve current token.
75       *
76       *  @returns    The current token value
77       */
78      public int getCurrentToken() {
79          return currentToken;
80      }
81  
82      /*
83       *  Get a String that describes the given token.
84       */
85      public static String nameForToken(int token) {
86          String name = "really unknown";
87  
88          switch(token) {
89              case UNKNOWN_TOKEN:
90                  name = "unknown";
91                  break;
92              case START_TOKEN:
93                  name = "start";
94                  break;
95              case STRING_TOKEN:
96                  name = "string";
97                  break;
98              case EOI_TOKEN:
99                  name = "EOI";
100                 break;
101             case SLASH_TOKEN:
102                 name = "'/'";
103                 break;
104             case SEMICOLON_TOKEN:
105                 name = "';'";
106                 break;
107             case EQUALS_TOKEN:
108                 name = "'='";
109                 break;
110         }
111 
112         return name;
113     }
114 
115     /*
116      *  Retrieve current token value.
117      *
118      *  @returns    A String containing the current token value
119      */
120     public String getCurrentTokenValue() {
121         return currentTokenValue;
122     }
123     /*
124      *  Process the next token.
125      *
126      *  @returns    the next token
127      */
128     public int nextToken() {
129         if (dataIndex < dataLength) {
130             //  skip white space
131             while ((dataIndex < dataLength) &&
132                     (isWhiteSpaceChar(data.charAt(dataIndex)))) {
133                 ++dataIndex;
134             }
135 
136             if (dataIndex < dataLength) {
137                 //  examine the current character and see what kind of token we have
138                 char c = data.charAt(dataIndex);
139                 if (isAutoquoting) {
140                     if (c == ';' || c == '=') {
141                         currentToken = c;
142                         currentTokenValue = new Character(c).toString();
143                         ++dataIndex;
144                     } else {
145                         processAutoquoteToken();
146                     }
147                 } else {
148                     if (isStringTokenChar(c)) {
149                         processStringToken();
150                     } else if ((c == '/') || (c == ';') || (c == '=')) {
151                         currentToken = c;
152                         currentTokenValue = new Character(c).toString();
153                         ++dataIndex;
154                     } else {
155                         currentToken = UNKNOWN_TOKEN;
156                         currentTokenValue = new Character(c).toString();
157                         ++dataIndex;
158                     }
159                 }
160             } else {
161                 currentToken = EOI_TOKEN;
162                 currentTokenValue = null;
163             }
164         } else {
165             currentToken = EOI_TOKEN;
166             currentTokenValue = null;
167         }
168 
169         return currentToken;
170     }
171 
172     private void processStringToken() {
173         //  capture the initial index
174         int initialIndex = dataIndex;
175 
176         //  skip to 1st non string token character
177         while ((dataIndex < dataLength) &&
178                 isStringTokenChar(data.charAt(dataIndex))) {
179             ++dataIndex;
180         }
181 
182         currentToken = STRING_TOKEN;
183         currentTokenValue = data.substring(initialIndex, dataIndex);
184     }
185 
186     private void processAutoquoteToken() {
187         //  capture the initial index
188         int initialIndex = dataIndex;
189 
190         //  now skip to the 1st non-escaped autoquote termination character
191         //  XXX - doesn't actually consider escaping
192         boolean foundTerminator = false;
193         while ((dataIndex < dataLength) && !foundTerminator) {
194             char c = data.charAt(dataIndex);
195             if (c != autoquoteChar) {
196                 ++dataIndex;
197             } else {
198                 foundTerminator = true;
199             }
200         }
201 
202         currentToken = STRING_TOKEN;
203         currentTokenValue =
204             fixEscapeSequences(data.substring(initialIndex, dataIndex));
205     }
206 
207     private static boolean isSpecialChar(char c) {
208         boolean lAnswer = false;
209 
210         switch(c) {
211             case '(':
212             case ')':
213             case '<':
214             case '>':
215             case '@':
216             case ',':
217             case ';':
218             case ':':
219             case '\\':
220             case '"':
221             case '/':
222             case '[':
223             case ']':
224             case '?':
225             case '=':
226                 lAnswer = true;
227                 break;
228         }
229 
230         return lAnswer;
231     }
232 
233     private static boolean isControlChar(char c) {
234         return Character.isISOControl(c);
235     }
236 
237     private static boolean isWhiteSpaceChar(char c) {
238         return Character.isWhitespace(c);
239     }
240 
241     private static boolean isStringTokenChar(char c) {
242         return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
243     }
244 
245     private static String fixEscapeSequences(String inputString) {
246         int inputLength = inputString.length();
247         StringBuffer buffer = new StringBuffer();
248         buffer.ensureCapacity(inputLength);
249 
250         for (int i = 0; i < inputLength; ++i) {
251             char currentChar = inputString.charAt(i);
252             if (currentChar != '\\') {
253                 buffer.append(currentChar);
254             } else {
255                 if (i < inputLength - 1) {
256                     char nextChar = inputString.charAt(i + 1);
257                     buffer.append(nextChar);
258 
259                     //  force a skip over the next character too
260                     ++i;
261                 } else {
262                     buffer.append(currentChar);
263                 }
264             }
265         }
266 
267         return buffer.toString();
268     }
269 
270     private String  data;
271     private int     dataIndex;
272     private int     dataLength;
273     private int     currentToken;
274     private String  currentTokenValue;
275     private boolean isAutoquoting;
276     private char    autoquoteChar;
277 
278     /*
279     public static void main(String[] args) {
280         for (int i = 0; i < args.length; ++i) {
281             MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
282 
283             System.out.println("Original: |" + args[i] + "|");
284 
285             int currentToken = tokenizer.nextToken();
286             while (currentToken != EOI_TOKEN) {
287                 switch(currentToken) {
288                     case UNKNOWN_TOKEN:
289                         System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
290                         break;
291                     case START_TOKEN:
292                         System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
293                         break;
294                     case STRING_TOKEN:
295                         System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
296                         break;
297                     case EOI_TOKEN:
298                         System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
299                         break;
300                     case SLASH_TOKEN:
301                         System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
302                         break;
303                     case SEMICOLON_TOKEN:
304                         System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
305                         break;
306                     case EQUALS_TOKEN:
307                         System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
308                         break;
309                     default:
310                         System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
311                         break;
312                 }
313 
314                 currentToken = tokenizer.nextToken();
315             }
316 
317             System.out.println("");
318         }
319     }
320     */
321 }