View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 1999-2004 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package com.sun.org.apache.regexp.internal;
22  
23  import java.io.Serializable;
24  
25  /**
26   * A class that holds compiled regular expressions.  This is exposed mainly
27   * for use by the recompile utility (which helps you produce precompiled
28   * REProgram objects). You should not otherwise need to work directly with
29   * this class.
30  *
31   * @see RE
32   * @see RECompiler
33   *
34   * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
35   */
36  public class REProgram implements Serializable
37  {
38      static final int OPT_HASBACKREFS = 1;
39  
40      char[] instruction;         // The compiled regular expression 'program'
41      int lenInstruction;         // The amount of the instruction buffer in use
42      char[] prefix;              // Prefix string optimization
43      int flags;                  // Optimization flags (REProgram.OPT_*)
44      int maxParens = -1;
45  
46      /**
47       * Constructs a program object from a character array
48       * @param instruction Character array with RE opcode instructions in it
49       */
50      public REProgram(char[] instruction)
51      {
52          this(instruction, instruction.length);
53      }
54  
55      /**
56       * Constructs a program object from a character array
57       * @param parens Count of parens in the program
58       * @param instruction Character array with RE opcode instructions in it
59       */
60      public REProgram(int parens, char[] instruction)
61      {
62          this(instruction, instruction.length);
63          this.maxParens = parens;
64      }
65  
66      /**
67       * Constructs a program object from a character array
68       * @param instruction Character array with RE opcode instructions in it
69       * @param lenInstruction Amount of instruction array in use
70       */
71      public REProgram(char[] instruction, int lenInstruction)
72      {
73          setInstructions(instruction, lenInstruction);
74      }
75  
76      /**
77       * Returns a copy of the current regular expression program in a character
78       * array that is exactly the right length to hold the program.  If there is
79       * no program compiled yet, getInstructions() will return null.
80       * @return A copy of the current compiled RE program
81       */
82      public char[] getInstructions()
83      {
84          // Ensure program has been compiled!
85          if (lenInstruction != 0)
86          {
87              // Return copy of program
88              char[] ret = new char[lenInstruction];
89              System.arraycopy(instruction, 0, ret, 0, lenInstruction);
90              return ret;
91          }
92          return null;
93      }
94  
95      /**
96       * Sets a new regular expression program to run.  It is this method which
97       * performs any special compile-time search optimizations.  Currently only
98       * two optimizations are in place - one which checks for backreferences
99       * (so that they can be lazily allocated) and another which attempts to
100      * find an prefix anchor string so that substantial amounts of input can
101      * potentially be skipped without running the actual program.
102      * @param instruction Program instruction buffer
103      * @param lenInstruction Length of instruction buffer in use
104      */
105     public void setInstructions(char[] instruction, int lenInstruction)
106     {
107         // Save reference to instruction array
108         this.instruction = instruction;
109         this.lenInstruction = lenInstruction;
110 
111         // Initialize other program-related variables
112         flags = 0;
113         prefix = null;
114 
115         // Try various compile-time optimizations if there's a program
116         if (instruction != null && lenInstruction != 0)
117         {
118             // If the first node is a branch
119             if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
120             {
121                 // to the end node
122                 int next = instruction[0 + RE.offsetNext];
123                 if (instruction[next + RE.offsetOpcode] == RE.OP_END)
124                 {
125                     // and the branch starts with an atom
126                     if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
127                     {
128                         // then get that atom as an prefix because there's no other choice
129                         int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
130                         prefix = new char[lenAtom];
131                         System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
132                     }
133                 }
134             }
135 
136             BackrefScanLoop:
137 
138             // Check for backreferences
139             for (int i = 0; i < lenInstruction; i += RE.nodeSize)
140             {
141                 switch (instruction[i + RE.offsetOpcode])
142                 {
143                     case RE.OP_ANYOF:
144                         i += (instruction[i + RE.offsetOpdata] * 2);
145                         break;
146 
147                     case RE.OP_ATOM:
148                         i += instruction[i + RE.offsetOpdata];
149                         break;
150 
151                     case RE.OP_BACKREF:
152                         flags |= OPT_HASBACKREFS;
153                         break BackrefScanLoop;
154                 }
155             }
156         }
157     }
158 }