Mercurial > hg > openjdk > jigsaw > nashorn
changeset 114:e42fd1640ff9
8006028: Integrate Joni regexp engine with Nashorn
Reviewed-by: lagergren, attila
line wrap: on
line diff
--- a/THIRD_PARTY_README Fri Feb 22 16:31:10 2013 +0100 +++ b/THIRD_PARTY_README Fri Feb 22 17:00:22 2013 +0100 @@ -98,3 +98,26 @@ OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- end of LICENSE --- + +%% This notice is provided with respect to Joni library which is included +with the Nashorn technology. + +--- begin of LICENSE --- +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +--- end of LICENSE ---
--- a/docs/DEVELOPER_README Fri Feb 22 16:31:10 2013 +0100 +++ b/docs/DEVELOPER_README Fri Feb 22 17:00:22 2013 +0100 @@ -338,6 +338,15 @@ this system property. +SYSTEM_PROPERTY: nashorn.regexp.impl=[jdk|joni] + +This property defines the regular expression engine to be used by +Nashorn. The default implementation is "jdk" which is based on the +JDK's java.util.regex package. Set this property to "joni" to install +an implementation based on Joni, the regular expression engine used by +the JRuby project. + + =============== 2. The loggers. ===============
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/JoniRegExp.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.nashorn.internal.runtime.regexp; + +import jdk.nashorn.internal.runtime.ParserException; +import jdk.nashorn.internal.runtime.regexp.joni.Matcher; +import jdk.nashorn.internal.runtime.regexp.joni.Option; +import jdk.nashorn.internal.runtime.regexp.joni.Regex; +import jdk.nashorn.internal.runtime.regexp.joni.Region; +import jdk.nashorn.internal.runtime.regexp.joni.Syntax; +import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +/** + * Regular expression implementation based on the Joni engine from the JRuby project. + */ +public class JoniRegExp extends RegExp { + + /** Compiled Joni Regex */ + private Regex regex; + + /** Matcher */ + private RegExpMatcher matcher; + + /** + * Construct a Regular expression from the given {@code pattern} and {@code flags} strings. + * + * @param pattern RegExp pattern string + * @param flags RegExp flag string + * @throws ParserException if flags is invalid or pattern string has syntax error. + */ + public JoniRegExp(final String pattern, final String flags) throws ParserException { + super(pattern, flags); + + int option = Option.SINGLELINE; + + if (this.isIgnoreCase()) { + option |= Option.IGNORECASE; + } + if (this.isMultiline()) { + option &= ~Option.SINGLELINE; + option |= Option.NEGATE_SINGLELINE; + } + + try { + RegExpScanner parsed; + + try { + parsed = RegExpScanner.scan(pattern); + } catch (final PatternSyntaxException e) { + // refine the exception with a better syntax error, if this + // passes, just rethrow what we have + Pattern.compile(pattern, 0); + throw e; + } + + if (parsed != null) { + char[] javaPattern = parsed.getJavaPattern().toCharArray(); + this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT); + this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead(); + } + } catch (final PatternSyntaxException e2) { + throwParserException("syntax", e2.getMessage()); + } catch (JOniException e2) { + throwParserException("syntax", e2.getMessage()); + } + } + + @Override + public RegExpMatcher match(final String input) { + if (regex == null) { + return null; + } + + RegExpMatcher matcher = this.matcher; + + if (matcher == null || input != matcher.getInput()) { + matcher = new JoniMatcher(input); + this.matcher = matcher; + } + + return matcher; + } + + /** + * RegExp Factory class for Joni regexp engine. + */ + public static class Factory extends RegExpFactory { + + @Override + protected RegExp compile(final String pattern, final String flags) throws ParserException { + return new JoniRegExp(pattern, flags); + } + + @Override + protected String replaceToken(final String str) { + return str.equals("[^]") ? "[\\s\\S]" : str; + } + } + + class JoniMatcher implements RegExpMatcher { + final String input; + final Matcher matcher; + + JoniMatcher(final String input) { + this.input = input; + this.matcher = regex.matcher(input.toCharArray()); + } + + @Override + public boolean search(final int start) { + return matcher.search(start, input.length(), Option.NONE) > -1; + } + + @Override + public String getInput() { + return input; + } + + @Override + public int start() { + return matcher.getBegin(); + } + + @Override + public int start(final int group) { + return group == 0 ? start() : matcher.getRegion().beg[group]; + } + + @Override + public int end() { + return matcher.getEnd(); + } + + @Override + public int end(final int group) { + return group == 0 ? end() : matcher.getRegion().end[group]; + } + + @Override + public String group() { + return input.substring(matcher.getBegin(), matcher.getEnd()); + } + + @Override + public String group(final int group) { + if (group == 0) { + return group(); + } + final Region region = matcher.getRegion(); + return input.substring(region.beg[group], region.end[group]); + } + + @Override + public int groupCount() { + final Region region = matcher.getRegion(); + return region == null ? 0 : region.numRegs - 1; + } + } +}
--- a/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java Fri Feb 22 16:31:10 2013 +0100 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java Fri Feb 22 17:00:22 2013 +0100 @@ -27,14 +27,34 @@ import jdk.nashorn.internal.parser.Lexer; import jdk.nashorn.internal.runtime.ParserException; +import jdk.nashorn.internal.runtime.options.Options; /** * Factory class for regular expressions. This class creates instances of {@link DefaultRegExp}. + * An alternative factory can be installed using the {@code nashorn.regexp.impl} system property. */ public class RegExpFactory { - private final static RegExpFactory instance = new RegExpFactory(); + private final static RegExpFactory instance; + + private final static String JDK = "jdk"; + private final static String JONI = "joni"; + + static { + final String impl = Options.getStringProperty("nashorn.regexp.impl", JDK); + switch (impl) { + case JONI: + instance = new JoniRegExp.Factory(); + break; + case JDK: + instance = new RegExpFactory(); + break; + default: + instance = null; + throw new InternalError("Unsupported RegExp factory: " + impl); + } + } /** * Creates a Regular expression from the given {@code pattern} and {@code flags} strings.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Analyser.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,2162 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll; +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsClear; +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt; +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAtSimple; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; +import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode; +import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; + +import java.util.HashSet; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; +import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; +import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr; + +final class Analyser extends Parser { + + protected Analyser(ScanEnvironment env, char[] chars, int p, int end) { + super(env, chars, p, end); + } + + protected final void compile() { + regex.state = RegexState.COMPILING; + + if (Config.DEBUG) { + Config.log.println(new String(chars, getBegin(), getEnd())); + } + + reset(); + + regex.numMem = 0; + regex.numRepeat = 0; + regex.numNullCheck = 0; + //regex.repeatRangeAlloc = 0; + regex.repeatRangeLo = null; + regex.repeatRangeHi = null; + regex.numCombExpCheck = 0; + + if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0; + + parse(); + + if (Config.USE_NAMED_GROUP) { + /* mixed use named group and no-named group */ + if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) { + if (env.numNamed != env.numMem) { + root = disableNoNameGroupCapture(root); + } else { + numberedRefCheck(root); + } + } + } // USE_NAMED_GROUP + + if (Config.USE_NAMED_GROUP) { + if (env.numCall > 0) { + env.unsetAddrList = new UnsetAddrList(env.numCall); + setupSubExpCall(root); + // r != 0 ??? + subexpRecursiveCheckTrav(root); + // r < 0 -< err, FOUND_CALLED_NODE = 1 + subexpInfRecursiveCheckTrav(root); + // r != 0 recursion infinite ??? + regex.numCall = env.numCall; + } else { + regex.numCall = 0; + } + } // USE_NAMED_GROUP + + if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { + Config.log.println("<RAW TREE>"); + Config.log.println(root + "\n"); + } + + root = setupTree(root, 0); + if (Config.DEBUG_PARSE_TREE) { + if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println("<TREE>"); + root.verifyTree(new HashSet<Node>(), env.reg.warnings); + Config.log.println(root + "\n"); + } + + regex.captureHistory = env.captureHistory; + regex.btMemStart = env.btMemStart; + regex.btMemEnd = env.btMemEnd; + + if (isFindCondition(regex.options)) { + regex.btMemEnd = bsAll(); + } else { + regex.btMemEnd = env.btMemEnd; + regex.btMemEnd |= regex.captureHistory; + } + + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) { + setupCombExpCheck(root, 0); + + if (Config.USE_SUBEXP_CALL && env.hasRecursion) { + env.numCombExpCheck = 0; + } else { // USE_SUBEXP_CALL + if (env.combExpMaxRegNum > 0) { + for (int i=1; i<env.combExpMaxRegNum; i++) { + if (bsAt(env.backrefedMem, i)) { + env.numCombExpCheck = 0; + break; + } + } + } + } + + } // USE_SUBEXP_CALL + regex.numCombExpCheck = env.numCombExpCheck; + } // USE_COMBINATION_EXPLOSION_CHECK + + regex.clearOptimizeInfo(); + + if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root); + + env.memNodes = null; + + new ArrayCompiler(this).compile(); + //new AsmCompiler(this).compile(); + + if (regex.numRepeat != 0 || regex.btMemEnd != 0) { + regex.stackPopLevel = StackPopLevel.ALL; + } else { + if (regex.btMemStart != 0) { + regex.stackPopLevel = StackPopLevel.MEM_START; + } else { + regex.stackPopLevel = StackPopLevel.FREE; + } + } + + if (Config.DEBUG_COMPILE) { + if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString()); + Config.log.println("stack used: " + regex.stackNeeded); + if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n"); + Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); + + } // DEBUG_COMPILE + + regex.state = RegexState.NORMAL; + } + + private void noNameDisableMapFor_cosAlt(Node node, int[]map, Ptr counter) { + ConsAltNode can = (ConsAltNode)node; + do { + can.setCar(noNameDisableMap(can.car, map, counter)); + } while ((can = can.cdr) != null); + } + + private void noNameDisableMapFor_quantifier(Node node, int[]map, Ptr counter) { + QuantifierNode qn = (QuantifierNode)node; + Node target = qn.target; + Node old = target; + target = noNameDisableMap(target, map, counter); + + if (target != old) { + qn.setTarget(target); + if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target); + } + } + + private Node noNameDisableMapFor_enclose(Node node, int[]map, Ptr counter) { + EncloseNode en = (EncloseNode)node; + if (en.type == EncloseType.MEMORY) { + if (en.isNamedGroup()) { + counter.p++; + map[en.regNum] = counter.p; + en.regNum = counter.p; + //en.target = noNameDisableMap(en.target, map, counter); + en.setTarget(noNameDisableMap(en.target, map, counter)); // ??? + } else { + node = en.target; + en.target = null; // remove first enclose: /(a)(?<b>c)/ + node = noNameDisableMap(node, map, counter); + } + } else { + //en.target = noNameDisableMap(en.target, map, counter); + en.setTarget(noNameDisableMap(en.target, map, counter)); // ??? + } + return node; + } + + private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) { + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorNode.PREC_READ: + case AnchorNode.PREC_READ_NOT: + case AnchorNode.LOOK_BEHIND: + case AnchorNode.LOOK_BEHIND_NOT: + an.setTarget(noNameDisableMap(an.target, map, counter)); + } + } + + private Node noNameDisableMap(Node node, int[]map, Ptr counter) { + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + noNameDisableMapFor_cosAlt(node, map, counter); + break; + case NodeType.QTFR: + noNameDisableMapFor_quantifier(node, map, counter); + break; + case NodeType.ENCLOSE: + node = noNameDisableMapFor_enclose(node, map, counter); + break; + case NodeType.ANCHOR: + noNameDisableMapFor_anchor(node, map, counter); + break; + } // switch + return node; + } + + private void renumberByMap(Node node, int[]map) { + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + renumberByMap(can.car, map); + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + renumberByMap(((QuantifierNode)node).target, map); + break; + + case NodeType.ENCLOSE: + renumberByMap(((EncloseNode)node).target, map); + break; + + case NodeType.BREF: + ((BackRefNode)node).renumber(map); + break; + } // switch + } + + protected final void numberedRefCheck(Node node) { + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + numberedRefCheck(can.car); + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + numberedRefCheck(((QuantifierNode)node).target); + break; + + case NodeType.ENCLOSE: + numberedRefCheck(((EncloseNode)node).target); + break; + + case NodeType.BREF: + BackRefNode br = (BackRefNode)node; + if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); + break; + } // switch + } + + protected final Node disableNoNameGroupCapture(Node root) { + int[]map = new int[env.numMem + 1]; + + for (int i=1; i<=env.numMem; i++) map[i] = 0; + + root = noNameDisableMap(root, map, new Ptr(0)); + renumberByMap(root, map); + + for (int i=1, pos=1; i<=env.numMem; i++) { + if (map[i] > 0) { + env.memNodes[pos] = env.memNodes[i]; + pos++; + } + } + + int loc = env.captureHistory; + env.captureHistory = bsClear(); + + for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { + if (bsAt(loc, i)) { + env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]); + } + } + + env.numMem = env.numNamed; + regex.numMem = env.numNamed; + + regex.renumberNameTable(map); + + return root; + } + + private void swap(Node a, Node b) { + a.swap(b); + + if (root == b) { + root = a; + } else if (root == a) { + root = b; + } + } + + // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + private int quantifiersMemoryInfo(Node node) { + int info = 0; + + switch(node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + int v = quantifiersMemoryInfo(can.car); + if (v > info) info = v; + } while ((can = can.cdr) != null); + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (cn.isRecursion()) { + return TargetInfo.IS_EMPTY_REC; /* tiny version */ + } else { + info = quantifiersMemoryInfo(cn.target); + } + } // USE_SUBEXP_CALL + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + if (qn.upper != 0) { + info = quantifiersMemoryInfo(qn.target); + } + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch (en.type) { + case EncloseType.MEMORY: + return TargetInfo.IS_EMPTY_MEM; + + case EncloseType.OPTION: + case EncloseNode.STOP_BACKTRACK: + info = quantifiersMemoryInfo(en.target); + break; + + default: + break; + } // inner switch + break; + + case NodeType.BREF: + case NodeType.STR: + case NodeType.CTYPE: + case NodeType.CCLASS: + case NodeType.CANY: + case NodeType.ANCHOR: + default: + break; + } // switch + + return info; + } + + private int getMinMatchLength(Node node) { + int min = 0; + + switch (node.getType()) { + case NodeType.BREF: + BackRefNode br = (BackRefNode)node; + if (br.isRecursion()) break; + + if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF); + min = getMinMatchLength(env.memNodes[br.back[0]]); + + for (int i=1; i<br.backNum; i++) { + if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); + int tmin = getMinMatchLength(env.memNodes[br.back[i]]); + if (min > tmin) min = tmin; + } + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (cn.isRecursion()) { + EncloseNode en = (EncloseNode)cn.target; + if (en.isMinFixed()) min = en.minLength; + } else { + min = getMinMatchLength(cn.target); + } + } // USE_SUBEXP_CALL + break; + + case NodeType.LIST: + ConsAltNode can = (ConsAltNode)node; + do { + min += getMinMatchLength(can.car); + } while ((can = can.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode y = (ConsAltNode)node; + do { + Node x = y.car; + int tmin = getMinMatchLength(x); + if (y == node) { + min = tmin; + } else if (min > tmin) { + min = tmin; + } + } while ((y = y.cdr) != null); + break; + + case NodeType.STR: + min = ((StringNode)node).length(); + break; + + case NodeType.CTYPE: + min = 1; + break; + + case NodeType.CCLASS: + case NodeType.CANY: + min = 1; + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + if (qn.lower > 0) { + min = getMinMatchLength(qn.target); + min = MinMaxLen.distanceMultiply(min, qn.lower); + } + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch (en.type) { + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL) { + if (en.isMinFixed()) { + min = en.minLength; + } else { + min = getMinMatchLength(en.target); + en.minLength = min; + en.setMinFixed(); + } + } // USE_SUBEXP_CALL + break; + + case EncloseType.OPTION: + case EncloseType.STOP_BACKTRACK: + min = getMinMatchLength(en.target); + break; + } // inner switch + break; + + case NodeType.ANCHOR: + default: + break; + } // switch + + return min; + } + + private int getMaxMatchLength(Node node) { + int max = 0; + + switch (node.getType()) { + case NodeType.LIST: + ConsAltNode ln = (ConsAltNode)node; + do { + int tmax = getMaxMatchLength(ln.car); + max = MinMaxLen.distanceAdd(max, tmax); + } while ((ln = ln.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode an = (ConsAltNode)node; + do { + int tmax = getMaxMatchLength(an.car); + if (max < tmax) max = tmax; + } while ((an = an.cdr) != null); + break; + + case NodeType.STR: + max = ((StringNode)node).length(); + break; + + case NodeType.CTYPE: + max = 1; + break; + + case NodeType.CCLASS: + case NodeType.CANY: + max = 1; + break; + + case NodeType.BREF: + BackRefNode br = (BackRefNode)node; + if (br.isRecursion()) { + max = MinMaxLen.INFINITE_DISTANCE; + break; + } + + for (int i=0; i<br.backNum; i++) { + if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); + int tmax = getMaxMatchLength(env.memNodes[br.back[i]]); + if (max < tmax) max = tmax; + } + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (!cn.isRecursion()) { + max = getMaxMatchLength(cn.target); + } else { + max = MinMaxLen.INFINITE_DISTANCE; + } + } // USE_SUBEXP_CALL + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + if (qn.upper != 0) { + max = getMaxMatchLength(qn.target); + if (max != 0) { + if (!isRepeatInfinite(qn.upper)) { + max = MinMaxLen.distanceMultiply(max, qn.upper); + } else { + max = MinMaxLen.INFINITE_DISTANCE; + } + } + } + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch (en.type) { + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL) { + if (en.isMaxFixed()) { + max = en.maxLength; + } else { + max = getMaxMatchLength(en.target); + en.maxLength = max; + en.setMaxFixed(); + } + } // USE_SUBEXP_CALL + break; + + case EncloseType.OPTION: + case EncloseType.STOP_BACKTRACK: + max = getMaxMatchLength(en.target); + break; + } // inner switch + break; + + case NodeType.ANCHOR: + default: + break; + } // switch + + return max; + } + + private static final int GET_CHAR_LEN_VARLEN = -1; + private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; + protected final int getCharLengthTree(Node node) { + return getCharLengthTree(node, 0); + } + + private int getCharLengthTree(Node node, int level) { + level++; + + int len = 0; + returnCode = 0; + + switch(node.getType()) { + case NodeType.LIST: + ConsAltNode ln = (ConsAltNode)node; + do { + int tlen = getCharLengthTree(ln.car, level); + if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen); + } while (returnCode == 0 && (ln = ln.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode an = (ConsAltNode)node; + boolean varLen = false; + + int tlen = getCharLengthTree(an.car, level); + while (returnCode == 0 && (an = an.cdr) != null) { + int tlen2 = getCharLengthTree(an.car, level); + if (returnCode == 0) { + if (tlen != tlen2) varLen = true; + } + } + + if (returnCode == 0) { + if (varLen) { + if (level == 1) { + returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; + } else { + returnCode = GET_CHAR_LEN_VARLEN; + } + } else { + len = tlen; + } + } + break; + + case NodeType.STR: + StringNode sn = (StringNode)node; + len = sn.length(); + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + if (qn.lower == qn.upper) { + tlen = getCharLengthTree(qn.target, level); + if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower); + } else { + returnCode = GET_CHAR_LEN_VARLEN; + } + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (!cn.isRecursion()) { + len = getCharLengthTree(cn.target, level); + } else { + returnCode = GET_CHAR_LEN_VARLEN; + } + } // USE_SUBEXP_CALL + break; + + case NodeType.CTYPE: + len = 1; + + case NodeType.CCLASS: + case NodeType.CANY: + len = 1; + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch(en.type) { + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL) { + if (en.isCLenFixed()) { + len = en.charLength; + } else { + len = getCharLengthTree(en.target, level); + if (returnCode == 0) { + en.charLength = len; + en.setCLenFixed(); + } + } + } // USE_SUBEXP_CALL + break; + + case EncloseType.OPTION: + case EncloseType.STOP_BACKTRACK: + len = getCharLengthTree(en.target, level); + break; + } // inner switch + break; + + case NodeType.ANCHOR: + break; + + default: + returnCode = GET_CHAR_LEN_VARLEN; + } // switch + return len; + } + + /* x is not included y ==> 1 : 0 */ + private boolean isNotIncluded(Node x, Node y) { + Node tmp; + + // !retry:! + retry: while(true) { + + int yType = y.getType(); + + switch(x.getType()) { + case NodeType.CTYPE: + switch(yType) { + case NodeType.CTYPE: + CTypeNode cny = (CTypeNode)y; + CTypeNode cnx = (CTypeNode)x; + return cny.ctype == cnx.ctype && cny.not != cnx.not; + + case NodeType.CCLASS: + // !swap:! + tmp = x; + x = y; + y = tmp; + // !goto retry;! + continue retry; + + case NodeType.STR: + // !goto swap;! + tmp = x; + x = y; + y = tmp; + continue retry; + + default: + break; + } // inner switch + break; + + case NodeType.CCLASS: + CClassNode xc = (CClassNode)x; + + switch(yType) { + case NodeType.CTYPE: + switch(((CTypeNode)y).ctype) { + case CharacterType.WORD: + if (!((CTypeNode)y).not) { + if (xc.mbuf == null && !xc.isNot()) { + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + if (xc.bs.at(i)) { + if (EncodingHelper.isWord(i)) return false; + } + } + return true; + } + return false; + } else { + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + if (!EncodingHelper.isWord(i)) { + if (!xc.isNot()) { + if (xc.bs.at(i)) return false; + } else { + if (!xc.bs.at(i)) return false; + } + } + } + return true; + } + // break; not reached + + default: + break; + } // inner switch + break; + + case NodeType.CCLASS: + CClassNode yc = (CClassNode)y; + + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + boolean v = xc.bs.at(i); + if ((v && !xc.isNot()) || (!v && xc.isNot())) { + v = yc.bs.at(i); + if ((v && !yc.isNot()) || (!v && yc.isNot())) return false; + } + } + if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true; + return false; + // break; not reached + + case NodeType.STR: + // !goto swap;! + tmp = x; + x = y; + y = tmp; + continue retry; + + default: + break; + + } // inner switch + break; // case NodeType.CCLASS + + case NodeType.STR: + StringNode xs = (StringNode)x; + if (xs.length() == 0) break; + + switch (yType) { + case NodeType.CTYPE: + CTypeNode cy = ((CTypeNode)y); + switch (cy.ctype) { + case CharacterType.WORD: + return !cy.not; + + default: + break; + + } // inner switch + break; + + case NodeType.CCLASS: + CClassNode cc = (CClassNode)y; + int code = xs.chars[xs.p]; + return !cc.isCodeInCC(code); + + case NodeType.STR: + StringNode ys = (StringNode)y; + int len = xs.length(); + if (len > ys.length()) len = ys.length(); + if (xs.isAmbig() || ys.isAmbig()) { + /* tiny version */ + return false; + } else { + for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) { + if (ys.chars[p] != xs.chars[q]) return true; + } + } + break; + + default: + break; + } // inner switch + + break; // case NodeType.STR + + } // switch + + break; + } // retry: while + return false; + } + + private Node getHeadValueNode(Node node, boolean exact) { + Node n = null; + + switch(node.getType()) { + case NodeType.BREF: + case NodeType.ALT: + case NodeType.CANY: + break; + + case NodeType.CALL: + break; // if (Config.USE_SUBEXP_CALL) + + case NodeType.CTYPE: + case NodeType.CCLASS: + if (!exact) n = node; + break; + + case NodeType.LIST: + n = getHeadValueNode(((ConsAltNode)node).car, exact); + break; + + case NodeType.STR: + StringNode sn = (StringNode)node; + if (sn.end <= sn.p) break; // ??? + + if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ + // nothing + } else { + n = node; + } + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + if (qn.lower > 0) { + if (qn.headExact != null) { + n = qn.headExact; + } else { + n = getHeadValueNode(qn.target, exact); + } + } + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + + switch (en.type) { + case EncloseType.OPTION: + int options = regex.options; + regex.options = en.option; + n = getHeadValueNode(en.target, exact); + regex.options = options; + break; + + case EncloseType.MEMORY: + case EncloseType.STOP_BACKTRACK: + n = getHeadValueNode(en.target, exact); + break; + } // inner switch + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact); + break; + + default: + break; + } // switch + + return n; + } + + // true: invalid + private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) { + if ((node.getType2Bit() & typeMask) == 0) return true; + + boolean invalid = false; + + switch(node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); + } while (!invalid && (can = can.cdr) != null); + break; + + case NodeType.QTFR: + invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if ((en.type & encloseMask) == 0) return true; + invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + if ((an.type & anchorMask) == 0) return true; + + if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); + break; + + default: + break; + + } // switch + + return invalid; + } + + private static final int RECURSION_EXIST = 1; + private static final int RECURSION_INFINITE = 2; + private int subexpInfRecursiveCheck(Node node, boolean head) { + int r = 0; + + switch (node.getType()) { + case NodeType.LIST: + int min; + ConsAltNode x = (ConsAltNode)node; + do { + int ret = subexpInfRecursiveCheck(x.car, head); + if (ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + min = getMinMatchLength(x.car); + if (min != 0) head = false; + } + } while ((x = x.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + r = RECURSION_EXIST; + do { + int ret = subexpInfRecursiveCheck(can.car, head); + if (ret == RECURSION_INFINITE) return ret; + r &= ret; + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + r = subexpInfRecursiveCheck(qn.target, head); + if (r == RECURSION_EXIST) { + if (qn.lower == 0) r = 0; + } + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: + r = subexpInfRecursiveCheck(an.target, head); + break; + } // inner switch + break; + + case NodeType.CALL: + r = subexpInfRecursiveCheck(((CallNode)node).target, head); + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if (en.isMark2()) { + return 0; + } else if (en.isMark1()) { + return !head ? RECURSION_EXIST : RECURSION_INFINITE; + // throw exception here ??? + } else { + en.setMark2(); + r = subexpInfRecursiveCheck(en.target, head); + en.clearMark2(); + } + break; + + default: + break; + } // switch + return r; + } + + protected final int subexpInfRecursiveCheckTrav(Node node) { + int r = 0; + + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + r = subexpInfRecursiveCheckTrav(can.car); + } while (r == 0 && (can = can.cdr) != null); + break; + + case NodeType.QTFR: + r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target); + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: + r = subexpInfRecursiveCheckTrav(an.target); + break; + } // inner switch + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if (en.isRecursion()) { + en.setMark1(); + r = subexpInfRecursiveCheck(en.target, true); + if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION); + en.clearMark1(); + } + r = subexpInfRecursiveCheckTrav(en.target); + break; + + default: + break; + } // switch + + return r; + } + + private int subexpRecursiveCheck(Node node) { + int r = 0; + + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + r |= subexpRecursiveCheck(can.car); + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + r = subexpRecursiveCheck(((QuantifierNode)node).target); + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: + r = subexpRecursiveCheck(an.target); + break; + } // inner switch + break; + + case NodeType.CALL: + CallNode cn = (CallNode)node; + r = subexpRecursiveCheck(cn.target); + if (r != 0) cn.setRecursion(); + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if (en.isMark2()) { + return 0; + } else if (en.isMark1()) { + return 1; /* recursion */ + } else { + en.setMark2(); + r = subexpRecursiveCheck(en.target); + en.clearMark2(); + } + break; + + default: + break; + } // switch + + return r; + } + + private static final int FOUND_CALLED_NODE = 1; + protected final int subexpRecursiveCheckTrav(Node node) { + int r = 0; + + switch (node.getType()) { + case NodeType.LIST: + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + int ret = subexpRecursiveCheckTrav(can.car); + if (ret == FOUND_CALLED_NODE) { + r = FOUND_CALLED_NODE; + } + // else if (ret < 0) return ret; ??? + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + r = subexpRecursiveCheckTrav(qn.target); + if (qn.upper == 0) { + if (r == FOUND_CALLED_NODE) qn.isRefered = true; + } + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: + r = subexpRecursiveCheckTrav(an.target); + break; + } // inner switch + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if (!en.isRecursion()) { + if (en.isCalled()) { + en.setMark1(); + r = subexpRecursiveCheck(en.target); + if (r != 0) en.setRecursion(); + en.clearMark1(); + } + } + r = subexpRecursiveCheckTrav(en.target); + if (en.isCalled()) r |= FOUND_CALLED_NODE; + break; + + default: + break; + } // switch + + return r; + } + + private void setCallAttr(CallNode cn) { + cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes! + if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); + + ((EncloseNode)cn.target).setCalled(); + env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum); + cn.unsetAddrList = env.unsetAddrList; + } + + protected final void setupSubExpCall(Node node) { + + switch(node.getType()) { + case NodeType.LIST: + ConsAltNode ln = (ConsAltNode)node; + do { + setupSubExpCall(ln.car); + } while ((ln = ln.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode can = (ConsAltNode)node; + do { + setupSubExpCall(can.car); + } while ((can = can.cdr) != null); + break; + + case NodeType.QTFR: + setupSubExpCall(((QuantifierNode)node).target); + break; + + case NodeType.ENCLOSE: + setupSubExpCall(((EncloseNode)node).target); + break; + + case NodeType.CALL: + CallNode cn = (CallNode)node; + + if (cn.groupNum != 0) { + int gNum = cn.groupNum; + + if (Config.USE_NAMED_GROUP) { + if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { + newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); + } + } // USE_NAMED_GROUP + if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd); + setCallAttr(cn); + } else { + if (Config.USE_NAMED_GROUP) { + NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd); + + if (ne == null) { + newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); + } else if (ne.backNum > 1) { + newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd); + } else { + cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ? + setCallAttr(cn); + } + } + } + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: + setupSubExpCall(an.target); + break; + } + break; + + } // switch + } + + /* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (?<!A|B) ==> (?<!A)(?<!B) + */ + private Node divideLookBehindAlternatives(Node node) { + AnchorNode an = (AnchorNode)node; + int anchorType = an.type; + Node head = an.target; + Node np = ((ConsAltNode)head).car; + + swap(node, head); + + Node tmp = node; + node = head; + head = tmp; + + ((ConsAltNode)node).setCar(head); + ((AnchorNode)head).setTarget(np); + np = node; + + while ((np = ((ConsAltNode)np).cdr) != null) { + AnchorNode insert = new AnchorNode(anchorType); + insert.setTarget(((ConsAltNode)np).car); + ((ConsAltNode)np).setCar(insert); + } + + if (anchorType == AnchorType.LOOK_BEHIND_NOT) { + np = node; + do { + ((ConsAltNode)np).toListNode(); /* alt -> list */ + } while ((np = ((ConsAltNode)np).cdr) != null); + } + + return node; + } + + private Node setupLookBehind(Node node) { + AnchorNode an = (AnchorNode)node; + int len = getCharLengthTree(an.target); + switch(returnCode) { + case 0: + an.charLength = len; + break; + case GET_CHAR_LEN_VARLEN: + newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + break; + case GET_CHAR_LEN_TOP_ALT_VARLEN: + if (syntax.differentLengthAltLookBehind()) { + return divideLookBehindAlternatives(node); + } else { + newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + } + } + return node; + } + + private void nextSetup(Node node, Node nextNode) { + // retry: + retry: while(true) { + + int type = node.getType(); + if (type == NodeType.QTFR) { + QuantifierNode qn = (QuantifierNode)node; + if (qn.greedy && isRepeatInfinite(qn.upper)) { + if (Config.USE_QTFR_PEEK_NEXT) { + StringNode n = (StringNode)getHeadValueNode(nextNode, true); + /* '\0': for UTF-16BE etc... */ + if (n != null && n.chars[n.p] != 0) { // ????????? + qn.nextHeadExact = n; + } + } // USE_QTFR_PEEK_NEXT + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn.lower <= 1) { + if (qn.target.isSimple()) { + Node x = getHeadValueNode(qn.target, false); + if (x != null) { + Node y = getHeadValueNode(nextNode, false); + if (y != null && isNotIncluded(x, y)) { + EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose + en.setStopBtSimpleRepeat(); + //en.setTarget(qn.target); // optimize it ?? + swap(node, en); + + en.setTarget(node); + } + } + } + } + } + } else if (type == NodeType.ENCLOSE) { + EncloseNode en = (EncloseNode)node; + if (en.isMemory()) { + node = en.target; + // !goto retry;! + continue retry; + } + } + + break; + } // while + } + + private void updateStringNodeCaseFoldMultiByte(StringNode sn) { + char[] chars = sn.chars; + int end = sn.end; + value = sn.p; + int sp = 0; + char buf; + + while (value < end) { + int ovalue = value; + buf = Character.toLowerCase(chars[value++]); + + if (chars[ovalue] != buf) { + + char[] sbuf = new char[sn.length() << 1]; + System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p); + value = ovalue; + while (value < end) { + buf = Character.toLowerCase(chars[value++]); + if (sp >= sbuf.length) { + char[]tmp = new char[sbuf.length << 1]; + System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); + sbuf = tmp; + } + sbuf[sp++] = buf; + } + sn.set(sbuf, 0, sp); + return; + } + sp++; + } + } + + private void updateStringNodeCaseFold(Node node) { + StringNode sn = (StringNode)node; + updateStringNodeCaseFoldMultiByte(sn); + } + + private Node expandCaseFoldMakeRemString(char[] chars, int p, int end) { + StringNode node = new StringNode(chars, p, end); + + updateStringNodeCaseFold(node); + node.setAmbig(); + node.setDontGetOptInfo(); + return node; + } + + private boolean expandCaseFoldStringAlt(int itemNum, char[] items, + char[] chars, int p, int slen, int end, ObjPtr<Node> node) { + + ConsAltNode altNode; + node.p = altNode = newAltNode(null, null); + + StringNode snode = new StringNode(chars, p, p + slen); + altNode.setCar(snode); + + for (int i=0; i<itemNum; i++) { + snode = new StringNode(); + + snode.catCode(items[i]); + + ConsAltNode an = newAltNode(null, null); + an.setCar(snode); + altNode.setCdr(an); + altNode = an; + } + return false; + } + + private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; + private Node expandCaseFoldString(Node node) { + StringNode sn = (StringNode)node; + + if (sn.isAmbig() || sn.length() <= 0) return node; + + char[] chars = sn.chars; + int p = sn.p; + int end = sn.end; + int altNum = 1; + + ConsAltNode topRoot = null, root = null; + ObjPtr<Node> prevNode = new ObjPtr<Node>(); + StringNode stringNode = null; + + while (p < end) { + char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars[p]); + + if (items.length == 0) { + if (stringNode == null) { + if (root == null && prevNode.p != null) { + topRoot = root = ConsAltNode.listAdd(null, prevNode.p); + } + + prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); + + if (root != null) ConsAltNode.listAdd(root, stringNode); + + } + + stringNode.cat(chars, p, p + 1); + } else { + altNum *= (items.length + 1); + if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (root == null && prevNode.p != null) { + topRoot = root = ConsAltNode.listAdd(null, prevNode.p); + } + + expandCaseFoldStringAlt(items.length, items, chars, p, 1, end, prevNode); + if (root != null) ConsAltNode.listAdd(root, prevNode.p); + stringNode = null; + } + p++; + } + + if (p < end) { + Node srem = expandCaseFoldMakeRemString(chars, p, end); + + if (prevNode.p != null && root == null) { + topRoot = root = ConsAltNode.listAdd(null, prevNode.p); + } + + if (root == null) { + prevNode.p = srem; + } else { + ConsAltNode.listAdd(root, srem); + } + } + /* ending */ + Node xnode = topRoot != null ? topRoot : prevNode.p; + + swap(node, xnode); + return xnode; + } + + private static final int CEC_THRES_NUM_BIG_REPEAT = 512; + private static final int CEC_INFINITE_NUM = 0x7fffffff; + + private static final int CEC_IN_INFINITE_REPEAT = (1<<0); + private static final int CEC_IN_FINITE_REPEAT = (1<<1); + private static final int CEC_CONT_BIG_REPEAT = (1<<2); + + protected final int setupCombExpCheck(Node node, int state) { + int r = state; + int ret; + + switch (node.getType()) { + case NodeType.LIST: + ConsAltNode ln = (ConsAltNode)node; + + do { + r = setupCombExpCheck(ln.car, r); + //prev = ((ConsAltNode)node).car; + } while (r >= 0 && (ln = ln.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode an = (ConsAltNode)node; + do { + ret = setupCombExpCheck(an.car, state); + r |= ret; + } while (ret >= 0 && (an = an.cdr) != null); + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + int childState = state; + int addState = 0; + int varNum; + + if (!isRepeatInfinite(qn.upper)) { + if (qn.upper > 1) { + /* {0,1}, {1,1} are allowed */ + childState |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env.backrefedMem == 0) { + if (qn.target.getType() == NodeType.ENCLOSE) { + EncloseNode en = (EncloseNode)qn.target; + if (en.type == EncloseType.MEMORY) { + if (en.target.getType() == NodeType.QTFR) { + QuantifierNode q = (QuantifierNode)en.target; + if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) { + qn.upper = qn.lower == 0 ? 1 : qn.lower; + if (qn.upper == 1) childState = state; + } + } + } + } + } + } + } + + if ((state & CEC_IN_FINITE_REPEAT) != 0) { + qn.combExpCheckNum = -1; + } else { + if (isRepeatInfinite(qn.upper)) { + varNum = CEC_INFINITE_NUM; + childState |= CEC_IN_INFINITE_REPEAT; + } else { + varNum = qn.upper - qn.lower; + } + + if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn.combExpCheckNum == 0) { + env.numCombExpCheck++; + qn.combExpCheckNum = env.numCombExpCheck; + if (env.currMaxRegNum > env.combExpMaxRegNum) { + env.combExpMaxRegNum = env.currMaxRegNum; + } + } + } + } + r = setupCombExpCheck(qn.target, childState); + r |= addState; + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch( en.type) { + case EncloseNode.MEMORY: + if (env.currMaxRegNum < en.regNum) { + env.currMaxRegNum = en.regNum; + } + r = setupCombExpCheck(en.target, state); + break; + + default: + r = setupCombExpCheck(en.target, state); + } // inner switch + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (cn.isRecursion()) { + env.hasRecursion = true; + } else { + r = setupCombExpCheck(cn.target, state); + } + } // USE_SUBEXP_CALL + break; + + default: + break; + + } // switch + + return r; + } + + private static final int IN_ALT = (1<<0); + private static final int IN_NOT = (1<<1); + private static final int IN_REPEAT = (1<<2); + private static final int IN_VAR_REPEAT = (1<<3); + private static final int EXPAND_STRING_MAX_LENGTH = 100; + + /* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ + protected final Node setupTree(Node node, int state) { + restart: while (true) { + switch (node.getType()) { + case NodeType.LIST: + ConsAltNode lin = (ConsAltNode)node; + Node prev = null; + do { + setupTree(lin.car, state); + if (prev != null) { + nextSetup(prev, lin.car); + } + prev = lin.car; + } while ((lin = lin.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode aln = (ConsAltNode)node; + do { + setupTree(aln.car, (state | IN_ALT)); + } while ((aln = aln.cdr) != null); + break; + + case NodeType.CCLASS: + break; + + case NodeType.STR: + if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { + node = expandCaseFoldString(node); + } + break; + + case NodeType.CTYPE: + case NodeType.CANY: + break; + + case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ? + break; + + case NodeType.BREF: + BackRefNode br = (BackRefNode)node; + for (int i=0; i<br.backNum; i++) { + if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); + env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]); + env.btMemStart = bsOnAt(env.btMemStart, br.back[i]); + if (Config.USE_BACKREF_WITH_LEVEL) { + if (br.isNestLevel()) { + env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]); + } + } // USE_BACKREF_AT_LEVEL + ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed(); + } + break; + + case NodeType.QTFR: + QuantifierNode qn = (QuantifierNode)node; + Node target = qn.target; + + if ((state & IN_REPEAT) != 0) qn.setInRepeat(); + + if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { + int d = getMinMatchLength(target); + if (d == 0) { + qn.targetEmptyInfo = TargetInfo.IS_EMPTY; + if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { + int info = quantifiersMemoryInfo(target); + if (info > 0) qn.targetEmptyInfo = info; + } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + // strange stuff here (turned off) + } + } + + state |= IN_REPEAT; + if (qn.lower != qn.upper) state |= IN_VAR_REPEAT; + + target = setupTree(target, state); + + /* expand string */ + if (target.getType() == NodeType.STR) { + if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && + qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { + StringNode sn = (StringNode)target; + int len = sn.length(); + + if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { + StringNode str = qn.convertToString(sn.flag); + int n = qn.lower; + for (int i = 0; i < n; i++) { + str.cat(sn.chars, sn.p, sn.end); + } + break; /* break case NT_QTFR: */ + } + + } + } + if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { + if (qn.greedy && qn.targetEmptyInfo != 0) { + if (target.getType() == NodeType.QTFR) { + QuantifierNode tqn = (QuantifierNode)target; + if (tqn.headExact != null) { + qn.headExact = tqn.headExact; + tqn.headExact = null; + } + } else { + qn.headExact = getHeadValueNode(qn.target, true); + } + } + } // USE_OP_PUSH_OR_JUMP_EXACT + break; + + case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + switch (en.type) { + case EncloseType.OPTION: + int options = regex.options; + regex.options = en.option; + setupTree(en.target, state); + regex.options = options; + break; + + case EncloseType.MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + env.btMemStart = bsOnAt(env.btMemStart, en.regNum); + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + + } + setupTree(en.target, state); + break; + + case EncloseType.STOP_BACKTRACK: + setupTree(en.target, state); + if (en.target.getType() == NodeType.QTFR) { + QuantifierNode tqn = (QuantifierNode)en.target; + if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { + /* (?>a*), a*+ etc... */ + if (tqn.target.isSimple()) en.setStopBtSimpleRepeat(); + } + } + break; + + } // inner switch + break; + + case NodeType.ANCHOR: + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.PREC_READ: + setupTree(an.target, state); + break; + + case AnchorType.PREC_READ_NOT: + setupTree(an.target, (state | IN_NOT)); + break; + + case AnchorType.LOOK_BEHIND: + if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + node = setupLookBehind(node); + if (node.getType() != NodeType.ANCHOR) continue restart; + setupTree(((AnchorNode)node).target, state); + break; + + case AnchorType.LOOK_BEHIND_NOT: + if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + node = setupLookBehind(node); + if (node.getType() != NodeType.ANCHOR) continue restart; + setupTree(((AnchorNode)node).target, (state | IN_NOT)); + break; + + } // inner switch + break; + } // switch + return node; + } // restart: while + } + + private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; + private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd + opt.clear(); + opt.setBoundNode(oenv.mmd); + + switch (node.getType()) { + case NodeType.LIST: { + OptEnvironment nenv = new OptEnvironment(); + NodeOptInfo nopt = new NodeOptInfo(); + nenv.copy(oenv); + ConsAltNode lin = (ConsAltNode)node; + do { + optimizeNodeLeft(lin.car, nopt, nenv); + nenv.mmd.add(nopt.length); + opt.concatLeftNode(nopt); + } while ((lin = lin.cdr) != null); + break; + } + + case NodeType.ALT: { + NodeOptInfo nopt = new NodeOptInfo(); + ConsAltNode aln = (ConsAltNode)node; + do { + optimizeNodeLeft(aln.car, nopt, oenv); + if (aln == node) { + opt.copy(nopt); + } else { + opt.altMerge(nopt, oenv); + } + } while ((aln = aln.cdr) != null); + break; + } + + case NodeType.STR: { + StringNode sn = (StringNode)node; + + int slen = sn.length(); + + if (!sn.isAmbig()) { + opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); + + if (slen > 0) { + opt.map.addChar(sn.chars[sn.p]); + } + + opt.length.set(slen, slen); + } else { + int max; + if (sn.isDontGetOptInfo()) { + max = sn.length(); + } else { + opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); + opt.exb.ignoreCase = true; + + if (slen > 0) { + opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); + } + + max = slen; + } + opt.length.set(slen, max); + } + + if (opt.exb.length == slen) { + opt.exb.reachEnd = true; + } + break; + } + + case NodeType.CCLASS: { + CClassNode cc = (CClassNode)node; + /* no need to check ignore case. (setted in setup_tree()) */ + if (cc.mbuf != null || cc.isNot()) { + opt.length.set(1, 1); + } else { + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + boolean z = cc.bs.at(i); + if ((z && !cc.isNot()) || (!z && cc.isNot())) { + opt.map.addChar(i); + } + } + opt.length.set(1, 1); + } + break; + } + + case NodeType.CTYPE: { + int min; + int max = 1; + if (max == 1) { + min = 1; + CTypeNode cn = (CTypeNode)node; + + switch (cn.ctype) { + case CharacterType.WORD: + if (cn.not) { + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + if (!EncodingHelper.isWord(i)) { + opt.map.addChar(i); + } + } + } else { + for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { + if (EncodingHelper.isWord(i)) { + opt.map.addChar(i); + } + } + } + break; + } // inner switch + } else { + min = 1; + } + opt.length.set(min, max); + break; + } + + case NodeType.CANY: { + opt.length.set(1, 1); + break; + } + + case NodeType.ANCHOR: { + AnchorNode an = (AnchorNode)node; + switch (an.type) { + case AnchorType.BEGIN_BUF: + case AnchorType.BEGIN_POSITION: + case AnchorType.BEGIN_LINE: + case AnchorType.END_BUF: + case AnchorType.SEMI_END_BUF: + case AnchorType.END_LINE: + opt.anchor.add(an.type); + break; + + case AnchorType.PREC_READ: + NodeOptInfo nopt = new NodeOptInfo(); + optimizeNodeLeft(an.target, nopt, oenv); + if (nopt.exb.length > 0) { + opt.expr.copy(nopt.exb); + } else if (nopt.exm.length > 0) { + opt.expr.copy(nopt.exm); + } + opt.expr.reachEnd = false; + if (nopt.map.value > 0) opt.map.copy(nopt.map); + break; + + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case AnchorType.LOOK_BEHIND_NOT: + break; + + } // inner switch + break; + } + + case NodeType.BREF: { + BackRefNode br = (BackRefNode)node; + + if (br.isRecursion()) { + opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); + break; + } + + Node[]nodes = oenv.scanEnv.memNodes; + + int min = getMinMatchLength(nodes[br.back[0]]); + int max = getMaxMatchLength(nodes[br.back[0]]); + + for (int i=1; i<br.backNum; i++) { + int tmin = getMinMatchLength(nodes[br.back[i]]); + int tmax = getMaxMatchLength(nodes[br.back[i]]); + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + opt.length.set(min, max); + break; + } + + case NodeType.CALL: { + if (Config.USE_SUBEXP_CALL) { + CallNode cn = (CallNode)node; + if (cn.isRecursion()) { + opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); + } else { + int safe = oenv.options; + oenv.options = ((EncloseNode)cn.target).option; + optimizeNodeLeft(cn.target, opt, oenv); + oenv.options = safe; + } + } // USE_SUBEXP_CALL + break; + } + + case NodeType.QTFR: { + NodeOptInfo nopt = new NodeOptInfo(); + QuantifierNode qn = (QuantifierNode)node; + optimizeNodeLeft(qn.target, nopt, oenv); + if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { + if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { + if (isMultiline(oenv.options)) { + opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); + } else { + opt.anchor.add(AnchorType.ANYCHAR_STAR); + } + } + } else { + if (qn.lower > 0) { + opt.copy(nopt); + if (nopt.exb.length > 0) { + if (nopt.exb.reachEnd) { + int i; + for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { + opt.exb.concat(nopt.exb); + } + if (i < qn.lower) { + opt.exb.reachEnd = false; + } + } + } + if (qn.lower != qn.upper) { + opt.exb.reachEnd = false; + opt.exm.reachEnd = false; + } + if (qn.lower > 1) { + opt.exm.reachEnd = false; + } + + } + } + int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); + int max; + if (isRepeatInfinite(qn.upper)) { + max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; + } else { + max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); + } + opt.length.set(min, max); + break; + } + + case NodeType.ENCLOSE: { + EncloseNode en = (EncloseNode)node; + switch (en.type) { + case EncloseType.OPTION: + int save = oenv.options; + oenv.options = en.option; + optimizeNodeLeft(en.target, opt, oenv); + oenv.options = save; + break; + + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { + int min = 0; + int max = MinMaxLen.INFINITE_DISTANCE; + if (en.isMinFixed()) min = en.minLength; + if (en.isMaxFixed()) max = en.maxLength; + opt.length.set(min, max); + } else { // USE_SUBEXP_CALL + optimizeNodeLeft(en.target, opt, oenv); + if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { + if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { + opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); + } + } + } + break; + + case EncloseType.STOP_BACKTRACK: + optimizeNodeLeft(en.target, opt, oenv); + break; + } // inner switch + break; + } + + default: + newInternalException(ERR_PARSER_BUG); + } // switch + } + + protected final void setOptimizedInfoFromTree(Node node) { + NodeOptInfo opt = new NodeOptInfo(); + OptEnvironment oenv = new OptEnvironment(); + + oenv.options = regex.options; + oenv.caseFoldFlag = regex.caseFoldFlag; + oenv.scanEnv = env; + oenv.mmd.clear(); // ?? + + optimizeNodeLeft(node, opt, oenv); + + regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | + AnchorType.BEGIN_POSITION | + AnchorType.ANYCHAR_STAR | + AnchorType.ANYCHAR_STAR_ML); + + regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | + AnchorType.SEMI_END_BUF); + + if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { + regex.anchorDmin = opt.length.min; + regex.anchorDmax = opt.length.max; + } + + if (opt.exb.length > 0 || opt.exm.length > 0) { + opt.exb.select(opt.exm); + if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { + // !goto set_map;! + regex.setOptimizeMapInfo(opt.map); + regex.setSubAnchor(opt.map.anchor); + } else { + regex.setExactInfo(opt.exb); + regex.setSubAnchor(opt.exb.anchor); + } + } else if (opt.map.value > 0) { + // !set_map:! + regex.setOptimizeMapInfo(opt.map); + regex.setSubAnchor(opt.map.anchor); + } else { + regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; + if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; + } + + if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { + Config.log.println(regex.optimizeInfoToString()); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ApplyCaseFold.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,91 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; + +final class ApplyCaseFold { + + // i_apply_case_fold + public void apply(int from, int[]to, int length, Object o) { + ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o; + + ScanEnvironment env = arg.env; + CClassNode cc = arg.cc; + BitSet bs = cc.bs; + + if (length == 1) { + boolean inCC = cc.isCodeInCC(from); + + if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) { + if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) { + if (to[0] >= BitSet.SINGLE_BYTE_SIZE) { + cc.addCodeRange(env, to[0], to[0]); + } else { + /* /(?i:[^A-C])/.match("a") ==> fail. */ + bs.set(to[0]); + } + } + } else { + if (inCC) { + if (to[0] >= BitSet.SINGLE_BYTE_SIZE) { + if (cc.isNot()) cc.clearNotFlag(); + cc.addCodeRange(env, to[0], to[0]); + } else { + if (cc.isNot()) { + bs.clear(to[0]); + } else { + bs.set(to[0]); + } + } + } + } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + } else { + if (cc.isCodeInCC(from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) { + StringNode node = null; + for (int i=0; i<length; i++) { + if (i == 0) { + node = new StringNode(); + /* char-class expanded multi-char only + compare with string folded at match time. */ + node.setAmbig(); + } + node.catCode(to[i]); + } + + ConsAltNode alt = ConsAltNode.newAltNode(node, null); + + if (arg.tail == null) { + arg.altRoot = alt; + } else { + arg.tail.setCdr(alt); + } + arg.tail = alt; + } + + } + + } + + static final ApplyCaseFold INSTANCE = new ApplyCaseFold(); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ApplyCaseFoldArg.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,35 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; + +public final class ApplyCaseFoldArg { + final ScanEnvironment env; + final CClassNode cc; + ConsAltNode altRoot; + ConsAltNode tail; + + public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc) { + this.env = env; + this.cc = cc; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ArrayCompiler.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,1263 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDynamic; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; +import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; +import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; + +final class ArrayCompiler extends Compiler { + private int[] code; + private int codeLength; + + private char[][] templates; + private int templateNum; + + ArrayCompiler(Analyser analyser) { + super(analyser); + } + + @Override + protected final void prepare() { + int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2); + code = new int[codeSize]; + codeLength = 0; + } + + @Override + protected final void finish() { + addOpcode(OPCode.END); + addOpcode(OPCode.FINISH); // for stack bottom + + regex.code = code; + regex.codeLength = codeLength; + regex.templates = templates; + regex.templateNum = templateNum; + regex.factory = MatcherFactory.DEFAULT; + + if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) { + analyser.env.unsetAddrList.fix(regex); + analyser.env.unsetAddrList = null; + } + } + + @Override + protected void compileAltNode(ConsAltNode node) { + ConsAltNode aln = node; + int len = 0; + + do { + len += compileLengthTree(aln.car); + if (aln.cdr != null) { + len += OPSize.PUSH + OPSize.JUMP; + } + } while ((aln = aln.cdr) != null); + + int pos = codeLength + len; /* goal position */ + + aln = node; + do { + len = compileLengthTree(aln.car); + if (aln.cdr != null) { + addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP); + } + compileTree(aln.car); + if (aln.cdr != null) { + len = pos - (codeLength + OPSize.JUMP); + addOpcodeRelAddr(OPCode.JUMP, len); + } + } while ((aln = aln.cdr) != null); + } + + private boolean isNeedStrLenOpExact(int op) { + return op == OPCode.EXACTN || + op == OPCode.EXACTMB2N || + op == OPCode.EXACTMB3N || + op == OPCode.EXACTMBN || + op == OPCode.EXACTN_IC || + op == OPCode.EXACTN_IC_SB; + } + + private boolean opTemplated(int op) { + return isNeedStrLenOpExact(op); + } + + private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) { + int op; + + if (ignoreCase) { + switch(strLength) { + case 1: op = OPCode.EXACT1_IC; break; + default:op = OPCode.EXACTN_IC; break; + } // switch + } else { + switch (mbLength) { + case 1: + switch (strLength) { + case 1: op = OPCode.EXACT1; break; + case 2: op = OPCode.EXACT2; break; + case 3: op = OPCode.EXACT3; break; + case 4: op = OPCode.EXACT4; break; + case 5: op = OPCode.EXACT5; break; + default:op = OPCode.EXACTN; break; + } // inner switch + break; + case 2: + switch (strLength) { + case 1: op = OPCode.EXACTMB2N1; break; + case 2: op = OPCode.EXACTMB2N2; break; + case 3: op = OPCode.EXACTMB2N3; break; + default:op = OPCode.EXACTMB2N; break; + } // inner switch + break; + case 3: + op = OPCode.EXACTMB3N; + break; + default: + op = OPCode.EXACTMBN; + } // switch + } + return op; + } + + private void compileTreeEmptyCheck(Node node, int emptyInfo) { + int savedNumNullCheck = regex.numNullCheck; + + if (emptyInfo != 0) { + addOpcode(OPCode.NULL_CHECK_START); + addMemNum(regex.numNullCheck); /* NULL CHECK ID */ + regex.numNullCheck++; + } + + compileTree(node); + + if (emptyInfo != 0) { + switch(emptyInfo) { + case TargetInfo.IS_EMPTY: + addOpcode(OPCode.NULL_CHECK_END); + break; + case TargetInfo.IS_EMPTY_MEM: + addOpcode(OPCode.NULL_CHECK_END_MEMST); + break; + case TargetInfo.IS_EMPTY_REC: + addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH); + break; + } // switch + + addMemNum(savedNumNullCheck); /* NULL CHECK ID */ + } + } + + private int addCompileStringlength(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) { + int op = selectStrOpcode(mbLength, strLength, ignoreCase); + int len = OPSize.OPCODE; + + if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { + // string length, template index, template string pointer + len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX; + } else { + if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH; + len += mbLength * strLength; + } + if (op == OPCode.EXACTMBN) len += OPSize.LENGTH; + return len; + } + + @Override + protected final void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) { + int op = selectStrOpcode(mbLength, strLength, ignoreCase); + addOpcode(op); + + if (op == OPCode.EXACTMBN) addLength(mbLength); + + if (isNeedStrLenOpExact(op)) { + if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) { + addLength(mbLength * strLength); + } else { + addLength(strLength); + } + } + + if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { + addInt(templateNum); + addInt(p); + addTemplate(chars); + } else { + addChars(chars, p, mbLength * strLength); + } + } + + private int compileLengthStringNode(Node node) { + StringNode sn = (StringNode)node; + if (sn.length() <= 0) return 0; + boolean ambig = sn.isAmbig(); + + int p, prev; + p = prev = sn.p; + int end = sn.end; + char[] chars = sn.chars; + p++; + + int slen = 1; + int rlen = 0; + + while (p < end) { + slen++; + p++; + } + int r = addCompileStringlength(chars, prev, 1, slen, ambig); + rlen += r; + return rlen; + } + + private int compileLengthStringRawNode(StringNode sn) { + if (sn.length() <= 0) return 0; + return addCompileStringlength(sn.chars, sn.p, 1 /*sb*/, sn.length(), false); + } + + private void addMultiByteCClass(CodeRangeBuffer mbuf) { + addLength(mbuf.used); + addInts(mbuf.p, mbuf.used); + } + + private int compileLengthCClassNode(CClassNode cc) { + if (cc.isShare()) return OPSize.OPCODE + OPSize.POINTER; + + int len; + if (cc.mbuf == null) { + len = OPSize.OPCODE + BitSet.BITSET_SIZE; + } else { + if (cc.bs.isEmpty()) { + len = OPSize.OPCODE; + } else { + len = OPSize.OPCODE + BitSet.BITSET_SIZE; + } + + len += OPSize.LENGTH + cc.mbuf.used; + } + return len; + } + + @Override + protected void compileCClassNode(CClassNode cc) { + if (cc.isShare()) { // shared char class + addOpcode(OPCode.CCLASS_NODE); + addPointer(cc); + return; + } + + if (cc.mbuf == null) { + if (cc.isNot()) { + addOpcode(OPCode.CCLASS_NOT); + } else { + addOpcode(OPCode.CCLASS); + } + addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset + } else { + if (cc.bs.isEmpty()) { + if (cc.isNot()) { + addOpcode(OPCode.CCLASS_MB_NOT); + } else { + addOpcode(OPCode.CCLASS_MB); + } + addMultiByteCClass(cc.mbuf); + } else { + if (cc.isNot()) { + addOpcode(OPCode.CCLASS_MIX_NOT); + } else { + addOpcode(OPCode.CCLASS_MIX); + } + // store the bit set and mbuf themself! + addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset + addMultiByteCClass(cc.mbuf); + } + } + } + + @Override + protected void compileCTypeNode(CTypeNode node) { + CTypeNode cn = node; + int op; + switch (cn.ctype) { + case CharacterType.WORD: + if (cn.not) { + op = OPCode.NOT_WORD; + } else { + op = OPCode.WORD; + } + break; + + default: + newInternalException(ERR_PARSER_BUG); + return; // not reached + } // inner switch + addOpcode(op); + } + + @Override + protected void compileAnyCharNode() { + if (isMultiline(regex.options)) { + addOpcode(OPCode.ANYCHAR_ML); + } else { + addOpcode(OPCode.ANYCHAR); + } + } + + @Override + protected void compileCallNode(CallNode node) { + addOpcode(OPCode.CALL); + node.unsetAddrList.add(codeLength, node.target); + addAbsAddr(0); /*dummy addr.*/ + } + + @Override + protected void compileBackrefNode(BackRefNode node) { + BackRefNode br = node; + if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) { + addOpcode(OPCode.BACKREF_WITH_LEVEL); + addOption(regex.options & Option.IGNORECASE); + addLength(br.nestLevel); + // !goto add_bacref_mems;! + addLength(br.backNum); + for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); + return; + } else { // USE_BACKREF_AT_LEVEL + if (br.backNum == 1) { + if (isIgnoreCase(regex.options)) { + addOpcode(OPCode.BACKREFN_IC); + addMemNum(br.back[0]); + } else { + switch (br.back[0]) { + case 1: + addOpcode(OPCode.BACKREF1); + break; + case 2: + addOpcode(OPCode.BACKREF2); + break; + default: + addOpcode(OPCode.BACKREFN); + addOpcode(br.back[0]); + break; + } // switch + } + } else { + if (isIgnoreCase(regex.options)) { + addOpcode(OPCode.BACKREF_MULTI_IC); + } else { + addOpcode(OPCode.BACKREF_MULTI); + } + // !add_bacref_mems:! + addLength(br.backNum); + for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); + } + } + } + + private static final int REPEAT_RANGE_ALLOC = 8; + private void entryRepeatRange(int id, int lower, int upper) { + if (regex.repeatRangeLo == null) { + regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC]; + regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC]; + } else if (id >= regex.repeatRangeLo.length){ + int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC]; + System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length); + regex.repeatRangeLo = tmp; + tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC]; + System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length); + regex.repeatRangeHi = tmp; + } + + regex.repeatRangeLo[id] = lower; + regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper; + } + + private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) { + int numRepeat = regex.numRepeat; + addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG); + addMemNum(numRepeat); /* OP_REPEAT ID */ + regex.numRepeat++; + addRelAddr(targetLen + OPSize.REPEAT_INC); + + entryRepeatRange(numRepeat, qn.lower, qn.upper); + + compileTreeEmptyCheck(qn.target, emptyInfo); + + if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) { + addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG); + } else { + addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG); + } + + addMemNum(numRepeat); /* OP_REPEAT ID */ + } + + private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50 + + private static boolean cknOn(int ckn) { + return ckn > 0; + } + + private int compileCECLengthQuantifierNode(QuantifierNode qn) { + boolean infinite = isRepeatInfinite(qn.upper); + int emptyInfo = qn.targetEmptyInfo; + + int tlen = compileLengthTree(qn.target); + int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; + int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0; + + /* anychar repeat */ + if (qn.target.getType() == NodeType.CANY) { + if (qn.greedy && infinite) { + if (qn.nextHeadExact != null && !cknOn(ckn)) { + return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen; + } else { + return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen; + } + } + } + + int modTLen; + if (emptyInfo != 0) { + modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); + } else { + modTLen = tlen; + } + + int len; + if (infinite && qn.lower <= 1) { + if (qn.greedy) { + if (qn.lower == 1) { + len = OPSize.JUMP; + } else { + len = 0; + } + len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP; + } else { + if (qn.lower == 0) { + len = OPSize.JUMP; + } else { + len = 0; + } + len += modTLen + OPSize.PUSH + cklen; + } + } else if (qn.upper == 0) { + if (qn.isRefered) { /* /(?<n>..){0}/ */ + len = OPSize.JUMP + tlen; + } else { + len = 0; + } + } else if (qn.upper == 1 && qn.greedy) { + if (qn.lower == 0) { + if (cknOn(ckn)) { + len = OPSize.STATE_CHECK_PUSH + tlen; + } else { + len = OPSize.PUSH + tlen; + } + } else { + len = tlen; + } + } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ + len = OPSize.PUSH + cklen + OPSize.JUMP + tlen; + } else { + len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; + + if (cknOn(ckn)) { + len += OPSize.STATE_CHECK; + } + } + return len; + } + + @Override + protected void compileCECQuantifierNode(QuantifierNode qn) { + boolean infinite = isRepeatInfinite(qn.upper); + int emptyInfo = qn.targetEmptyInfo; + + int tlen = compileLengthTree(qn.target); + + int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; + + if (qn.isAnyCharStar()) { + compileTreeNTimes(qn.target, qn.lower); + if (qn.nextHeadExact != null && !cknOn(ckn)) { + if (isMultiline(regex.options)) { + addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); + } else { + addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT); + } + if (cknOn(ckn)) { + addStateCheckNum(ckn); + } + StringNode sn = (StringNode)qn.nextHeadExact; + addChars(sn.chars, sn.p, 1); + return; + } else { + if (isMultiline(regex.options)) { + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_ANYCHAR_ML_STAR); + } else { + addOpcode(OPCode.ANYCHAR_ML_STAR); + } + } else { + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_ANYCHAR_STAR); + } else { + addOpcode(OPCode.ANYCHAR_STAR); + } + } + if (cknOn(ckn)) { + addStateCheckNum(ckn); + } + return; + } + } + + int modTLen; + if (emptyInfo != 0) { + modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); + } else { + modTLen = tlen; + } + if (infinite && qn.lower <= 1) { + if (qn.greedy) { + if (qn.lower == 1) { + addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH : + OPSize.PUSH); + } + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_PUSH); + addStateCheckNum(ckn); + addRelAddr(modTLen + OPSize.JUMP); + } else { + addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); + } + compileTreeEmptyCheck(qn.target, emptyInfo); + addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ? + OPSize.STATE_CHECK_PUSH : + OPSize.PUSH))); + } else { + if (qn.lower == 0) { + addOpcodeRelAddr(OPCode.JUMP, modTLen); + } + compileTreeEmptyCheck(qn.target, emptyInfo); + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP); + addStateCheckNum(ckn); + addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP)); + } else { + addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); + } + } + } else if (qn.upper == 0) { + if (qn.isRefered) { /* /(?<n>..){0}/ */ + addOpcodeRelAddr(OPCode.JUMP, tlen); + compileTree(qn.target); + } // else r=0 ??? + } else if (qn.upper == 1 && qn.greedy) { + if (qn.lower == 0) { + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_PUSH); + addStateCheckNum(ckn); + addRelAddr(tlen); + } else { + addOpcodeRelAddr(OPCode.PUSH, tlen); + } + } + compileTree(qn.target); + } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */ + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK_PUSH); + addStateCheckNum(ckn); + addRelAddr(OPSize.JUMP); + } else { + addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP); + } + + addOpcodeRelAddr(OPCode.JUMP, tlen); + compileTree(qn.target); + } else { + compileRangeRepeatNode(qn, modTLen, emptyInfo); + if (cknOn(ckn)) { + addOpcode(OPCode.STATE_CHECK); + addStateCheckNum(ckn); + } + } + } + + private int compileNonCECLengthQuantifierNode(QuantifierNode qn) { + boolean infinite = isRepeatInfinite(qn.upper); + int emptyInfo = qn.targetEmptyInfo; + + int tlen = compileLengthTree(qn.target); + + /* anychar repeat */ + if (qn.target.getType() == NodeType.CANY) { + if (qn.greedy && infinite) { + if (qn.nextHeadExact != null) { + return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower; + } else { + return OPSize.ANYCHAR_STAR + tlen * qn.lower; + } + } + } + + int modTLen = 0; + if (emptyInfo != 0) { + modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); + } else { + modTLen = tlen; + } + + int len; + if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + len = OPSize.JUMP; + } else { + len = tlen * qn.lower; + } + + if (qn.greedy) { + if (qn.headExact != null) { + len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP; + } else if (qn.nextHeadExact != null) { + len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP; + } else { + len += OPSize.PUSH + modTLen + OPSize.JUMP; + } + } else { + len += OPSize.JUMP + modTLen + OPSize.PUSH; + } + + } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */ + len = OPSize.JUMP + tlen; + } else if (!infinite && qn.greedy && + (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) { + len = tlen * qn.lower; + len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower); + } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ + len = OPSize.PUSH + OPSize.JUMP + tlen; + } else { + len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; + } + return len; + } + + @Override + protected void compileNonCECQuantifierNode(QuantifierNode qn) { + boolean infinite = isRepeatInfinite(qn.upper); + int emptyInfo = qn.targetEmptyInfo; + + int tlen = compileLengthTree(qn.target); + + if (qn.isAnyCharStar()) { + compileTreeNTimes(qn.target, qn.lower); + if (qn.nextHeadExact != null) { + if (isMultiline(regex.options)) { + addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); + } else { + addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT); + } + StringNode sn = (StringNode)qn.nextHeadExact; + addChars(sn.chars, sn.p, 1); + return; + } else { + if (isMultiline(regex.options)) { + addOpcode(OPCode.ANYCHAR_ML_STAR); + } else { + addOpcode(OPCode.ANYCHAR_STAR); + } + return; + } + } + + int modTLen; + if (emptyInfo != 0) { + modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); + } else { + modTLen = tlen; + } + if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (qn.greedy) { + if (qn.headExact != null) { + addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1); + } else if (qn.nextHeadExact != null) { + addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT); + } else { + addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH); + } + } else { + addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP); + } + } else { + compileTreeNTimes(qn.target, qn.lower); + } + + if (qn.greedy) { + if (qn.headExact != null) { + addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP); + StringNode sn = (StringNode)qn.headExact; + addChars(sn.chars, sn.p, 1); + compileTreeEmptyCheck(qn.target, emptyInfo); + addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1)); + } else if (qn.nextHeadExact != null) { + addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP); + StringNode sn = (StringNode)qn.nextHeadExact; + addChars(sn.chars, sn.p, 1); + compileTreeEmptyCheck(qn.target, emptyInfo); + addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT)); + } else { + addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); + compileTreeEmptyCheck(qn.target, emptyInfo); + addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH)); + } + } else { + addOpcodeRelAddr(OPCode.JUMP, modTLen); + compileTreeEmptyCheck(qn.target, emptyInfo); + addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); + } + } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */ + addOpcodeRelAddr(OPCode.JUMP, tlen); + compileTree(qn.target); + } else if (!infinite && qn.greedy && + (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn.upper - qn.lower; + compileTreeNTimes(qn.target, qn.lower); + + for (int i=0; i<n; i++) { + addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH); + compileTree(qn.target); + } + } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ + addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP); + addOpcodeRelAddr(OPCode.JUMP, tlen); + compileTree(qn.target); + } else { + compileRangeRepeatNode(qn, modTLen, emptyInfo); + } + } + + private int compileLengthOptionNode(EncloseNode node) { + int prev = regex.options; + regex.options = node.option; + int tlen = compileLengthTree(node.target); + regex.options = prev; + + if (isDynamic(prev ^ node.option)) { + return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION; + } else { + return tlen; + } + } + + @Override + protected void compileOptionNode(EncloseNode node) { + int prev = regex.options; + + if (isDynamic(prev ^ node.option)) { + addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option); + addOpcodeOption(OPCode.SET_OPTION, prev); + addOpcode(OPCode.FAIL); + } + + regex.options = node.option; + compileTree(node.target); + regex.options = prev; + + if (isDynamic(prev ^ node.option)) { + addOpcodeOption(OPCode.SET_OPTION, prev); + } + } + + private int compileLengthEncloseNode(EncloseNode node) { + if (node.isOption()) { + return compileLengthOptionNode(node); + } + + int tlen; + if (node.target != null) { + tlen = compileLengthTree(node.target); + } else { + tlen = 0; + } + + int len; + switch (node.type) { + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL && node.isCalled()) { + len = OPSize.MEMORY_START_PUSH + tlen + OPSize.CALL + OPSize.JUMP + OPSize.RETURN; + if (bsAt(regex.btMemEnd, node.regNum)) { + len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH; + } else { + len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END; + } + } else { // USE_SUBEXP_CALL + if (bsAt(regex.btMemStart, node.regNum)) { + len = OPSize.MEMORY_START_PUSH; + } else { + len = OPSize.MEMORY_START; + } + len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END); + } + break; + + case EncloseType.STOP_BACKTRACK: + if (node.isStopBtSimpleRepeat()) { + QuantifierNode qn = (QuantifierNode)node.target; + tlen = compileLengthTree(qn.target); + len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP; + } else { + len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT; + } + break; + + default: + newInternalException(ERR_PARSER_BUG); + return 0; // not reached + } // switch + return len; + } + + @Override + protected void compileEncloseNode(EncloseNode node) { + int len; + switch (node.type) { + case EncloseType.MEMORY: + if (Config.USE_SUBEXP_CALL) { + if (node.isCalled()) { + addOpcode(OPCode.CALL); + node.callAddr = codeLength + OPSize.ABSADDR + OPSize.JUMP; + node.setAddrFixed(); + addAbsAddr(node.callAddr); + len = compileLengthTree(node.target); + len += OPSize.MEMORY_START_PUSH + OPSize.RETURN; + if (bsAt(regex.btMemEnd, node.regNum)) { + len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH; + } else { + len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END; + } + addOpcodeRelAddr(OPCode.JUMP, len); + } + } // USE_SUBEXP_CALL + + if (bsAt(regex.btMemStart, node.regNum)) { + addOpcode(OPCode.MEMORY_START_PUSH); + } else { + addOpcode(OPCode.MEMORY_START); + } + + addMemNum(node.regNum); + compileTree(node.target); + + if (Config.USE_SUBEXP_CALL && node.isCalled()) { + if (bsAt(regex.btMemEnd, node.regNum)) { + addOpcode(node.isRecursion() ? OPCode.MEMORY_END_PUSH_REC : OPCode.MEMORY_END_PUSH); + } else { + addOpcode(node.isRecursion() ? OPCode.MEMORY_END_REC : OPCode.MEMORY_END); + } + addMemNum(node.regNum); + addOpcode(OPCode.RETURN); + } else { // USE_SUBEXP_CALL + if (bsAt(regex.btMemEnd, node.regNum)) { + addOpcode(OPCode.MEMORY_END_PUSH); + } else { + addOpcode(OPCode.MEMORY_END); + } + addMemNum(node.regNum); + } + break; + + case EncloseType.STOP_BACKTRACK: + if (node.isStopBtSimpleRepeat()) { + QuantifierNode qn = (QuantifierNode)node.target; + + compileTreeNTimes(qn.target, qn.lower); + + len = compileLengthTree(qn.target); + addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP); + compileTree(qn.target); + addOpcode(OPCode.POP); + addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP)); + } else { + addOpcode(OPCode.PUSH_STOP_BT); + compileTree(node.target); + addOpcode(OPCode.POP_STOP_BT); + } + break; + + default: + newInternalException(ERR_PARSER_BUG); + break; + } // switch + } + + private int compileLengthAnchorNode(AnchorNode node) { + int tlen; + if (node.target != null) { + tlen = compileLengthTree(node.target); + } else { + tlen = 0; + } + + int len; + switch (node.type) { + case AnchorType.PREC_READ: + len = OPSize.PUSH_POS + tlen + OPSize.POP_POS; + break; + + case AnchorType.PREC_READ_NOT: + len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS; + break; + + case AnchorType.LOOK_BEHIND: + len = OPSize.LOOK_BEHIND + tlen; + break; + + case AnchorType.LOOK_BEHIND_NOT: + len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = OPSize.OPCODE; + break; + } // switch + return len; + } + + @Override + protected void compileAnchorNode(AnchorNode node) { + int len; + int n; + + switch (node.type) { + case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break; + case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break; + case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break; + case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break; + case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break; + case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break; + + case AnchorType.WORD_BOUND: + addOpcode(OPCode.WORD_BOUND); + break; + + case AnchorType.NOT_WORD_BOUND: + addOpcode(OPCode.NOT_WORD_BOUND); + break; + + case AnchorType.WORD_BEGIN: + if (Config.USE_WORD_BEGIN_END) + addOpcode(OPCode.WORD_BEGIN); + break; + + case AnchorType.WORD_END: + if (Config.USE_WORD_BEGIN_END) + addOpcode(OPCode.WORD_END); + break; + + case AnchorType.PREC_READ: + addOpcode(OPCode.PUSH_POS); + compileTree(node.target); + addOpcode(OPCode.POP_POS); + break; + + case AnchorType.PREC_READ_NOT: + len = compileLengthTree(node.target); + addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS); + compileTree(node.target); + addOpcode(OPCode.FAIL_POS); + break; + + case AnchorType.LOOK_BEHIND: + addOpcode(OPCode.LOOK_BEHIND); + if (node.charLength < 0) { + n = analyser.getCharLengthTree(node.target); + if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + } else { + n = node.charLength; + } + addLength(n); + compileTree(node.target); + break; + + case AnchorType.LOOK_BEHIND_NOT: + len = compileLengthTree(node.target); + addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT); + if (node.charLength < 0) { + n = analyser.getCharLengthTree(node.target); + if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); + } else { + n = node.charLength; + } + addLength(n); + compileTree(node.target); + addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT); + break; + + default: + newInternalException(ERR_PARSER_BUG); + } // switch + } + + private int compileLengthTree(Node node) { + int len = 0; + + switch (node.getType()) { + case NodeType.LIST: + ConsAltNode lin = (ConsAltNode)node; + do { + len += compileLengthTree(lin.car); + } while ((lin = lin.cdr) != null); + break; + + case NodeType.ALT: + ConsAltNode aln = (ConsAltNode)node; + int n = 0; + do { + len += compileLengthTree(aln.car); + n++; + } while ((aln = aln.cdr) != null); + len += (OPSize.PUSH + OPSize.JUMP) * (n - 1); + break; + + case NodeType.STR: + StringNode sn = (StringNode)node; + if (sn.isRaw()) { + len = compileLengthStringRawNode(sn); + } else { + len = compileLengthStringNode(sn); + } + break; + + case NodeType.CCLASS: + len = compileLengthCClassNode((CClassNode)node); + break; + + case NodeType.CTYPE: + case NodeType.CANY: + len = OPSize.OPCODE; + break; + + case NodeType.BREF: + BackRefNode br = (BackRefNode)node; + + if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) { + len = OPSize.OPCODE + OPSize.OPTION + OPSize.LENGTH + + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum); + } else { // USE_BACKREF_AT_LEVEL + if (br.backNum == 1) { + len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2) + ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM)); + } else { + len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum); + } + } + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + len = OPSize.CALL; + break; + } // USE_SUBEXP_CALL + break; + + case NodeType.QTFR: + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + len = compileCECLengthQuantifierNode((QuantifierNode)node); + } else { + len = compileNonCECLengthQuantifierNode((QuantifierNode)node); + } + break; + + case NodeType.ENCLOSE: + len = compileLengthEncloseNode((EncloseNode)node); + break; + + case NodeType.ANCHOR: + len = compileLengthAnchorNode((AnchorNode)node); + break; + + default: + newInternalException(ERR_PARSER_BUG); + + } //switch + return len; + } + + private void ensure(int size) { + if (size >= code.length) { + int length = code.length << 1; + while (length <= size) length <<= 1; + int[]tmp = new int[length]; + System.arraycopy(code, 0, tmp, 0, code.length); + code = tmp; + } + } + + private void addInt(int i) { + if (codeLength >= code.length) { + int[]tmp = new int[code.length << 1]; + System.arraycopy(code, 0, tmp, 0, code.length); + code = tmp; + } + code[codeLength++] = i; + } + + void setInt(int i, int offset) { + ensure(offset); + regex.code[offset] = i; + } + + private void addObject(Object o) { + if (regex.operands == null) { + regex.operands = new Object[4]; + } else if (regex.operandLength >= regex.operands.length) { + Object[]tmp = new Object[regex.operands.length << 1]; + System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length); + regex.operands = tmp; + } + addInt(regex.operandLength); + regex.operands[regex.operandLength++] = o; + } + + private void addChars(char[] chars, int p ,int length) { + ensure(codeLength + length); + int end = p + length; + + while (p < end) code[codeLength++] = chars[p++]; + } + + private void addInts(int[]ints, int length) { + ensure(codeLength + length); + System.arraycopy(ints, 0, code, codeLength, length); + codeLength += length; + } + + private void addOpcode(int opcode) { + addInt(opcode); + + switch(opcode) { + case OPCode.ANYCHAR_STAR: + case OPCode.ANYCHAR_STAR_SB: + case OPCode.ANYCHAR_ML_STAR: + case OPCode.ANYCHAR_ML_STAR_SB: + case OPCode.ANYCHAR_STAR_PEEK_NEXT: + case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: + case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: + case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: + case OPCode.STATE_CHECK_ANYCHAR_STAR: + case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: + case OPCode.STATE_CHECK_ANYCHAR_ML_STAR: + case OPCode.MEMORY_START_PUSH: + case OPCode.MEMORY_END_PUSH: + case OPCode.MEMORY_END_PUSH_REC: + case OPCode.MEMORY_END_REC: + case OPCode.NULL_CHECK_START: + case OPCode.NULL_CHECK_END_MEMST_PUSH: + case OPCode.PUSH: + case OPCode.STATE_CHECK_PUSH: + case OPCode.STATE_CHECK_PUSH_OR_JUMP: + case OPCode.STATE_CHECK: + case OPCode.PUSH_OR_JUMP_EXACT1: + case OPCode.PUSH_IF_PEEK_NEXT: + case OPCode.REPEAT: + case OPCode.REPEAT_NG: + case OPCode.REPEAT_INC_SG: + case OPCode.REPEAT_INC_NG: + case OPCode.REPEAT_INC_NG_SG: + case OPCode.PUSH_POS: + case OPCode.PUSH_POS_NOT: + case OPCode.PUSH_STOP_BT: + case OPCode.PUSH_LOOK_BEHIND_NOT: + case OPCode.CALL: + case OPCode.RETURN: // it will appear only with CALL though + regex.stackNeeded = true; + } + } + + private void addStateCheckNum(int num) { + addInt(num); + } + + private void addRelAddr(int addr) { + addInt(addr); + } + + private void addAbsAddr(int addr) { + addInt(addr); + } + + private void addLength(int length) { + addInt(length); + } + + private void addMemNum(int num) { + addInt(num); + } + + private void addPointer(Object o) { + addObject(o); + } + + private void addOption(int option) { + addInt(option); + } + + private void addOpcodeRelAddr(int opcode, int addr) { + addOpcode(opcode); + addRelAddr(addr); + } + + private void addOpcodeOption(int opcode, int option) { + addOpcode(opcode); + addOption(option); + } + + private void addTemplate(char[] chars) { + if (templateNum == 0) { + templates = new char[2][]; + } else if (templateNum == templates.length) { + char[][] tmp = new char[templateNum * 2][]; + System.arraycopy(templates, 0, tmp, 0, templateNum); + templates = tmp; + } + templates[templateNum++] = chars; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/AsmCompiler.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,109 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; + +final class AsmCompiler extends AsmCompilerSupport { + + public AsmCompiler(Analyser analyser) { + super(analyser); + } + + @Override + protected void prepare() { + REG_NUM++; + prepareMachine(); + prepareMachineInit(); + prepareMachineMatch(); + + prepareFactory(); + prepareFactoryInit(); + } + + @Override + protected void finish() { + setupFactoryInit(); + + setupMachineInit(); + setupMachineMatch(); + + setupClasses(); + } + + @Override + protected void compileAltNode(ConsAltNode node) { + } + + @Override + protected void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) { + String template = installTemplate(chars, p, strLength); + } + + @Override + protected void compileCClassNode(CClassNode node) { + if (node.bs != null) { + String bitsetName = installBitSet(node.bs.bits); + } + } + + @Override + protected void compileCTypeNode(CTypeNode node) { + } + + @Override + protected void compileAnyCharNode() { + } + + @Override + protected void compileBackrefNode(BackRefNode node) { + } + + @Override + protected void compileCallNode(CallNode node) { + } + + @Override + protected void compileCECQuantifierNode(QuantifierNode node) { + } + + @Override + protected void compileNonCECQuantifierNode(QuantifierNode node) { + } + + @Override + protected void compileOptionNode(EncloseNode node) { + } + + @Override + protected void compileEncloseNode(EncloseNode node) { + } + + @Override + protected void compileAnchorNode(AnchorNode node) { + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/AsmCompilerSupport.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,267 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import java.io.FileOutputStream; +import java.io.IOException; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.AsmConstants; +import jdk.internal.org.objectweb.asm.ClassWriter; +import jdk.internal.org.objectweb.asm.MethodVisitor; +import jdk.internal.org.objectweb.asm.Opcodes; + +abstract class AsmCompilerSupport extends Compiler implements Opcodes, AsmConstants { + protected ClassWriter factory; // matcher allocator, also bit set, code rage and string template container + protected MethodVisitor factoryInit;// factory constructor + protected String factoryName; + + protected ClassWriter machine; // matcher + protected MethodVisitor machineInit;// matcher constructor + protected MethodVisitor match; // actual matcher implementation (the matchAt method) + protected String machineName; + + // we will? try to manage visitMaxs ourselves for efficiency + protected int maxStack = 1; + protected int maxVars = LAST_INDEX; + + // for field generation + protected int bitsets, ranges, templates; + + // simple class name postfix scheme for now + static int REG_NUM = 0; + + // dummy class loader for now + private static final class DummyClassLoader extends ClassLoader { + public Class<?> defineClass(String name, byte[] bytes) { + return super.defineClass(name, bytes, 0, bytes.length); + } + }; + + private static final DummyClassLoader loader = new DummyClassLoader(); + + AsmCompilerSupport(Analyser analyser) { + super(analyser); + } + + protected final void prepareFactory() { + factory = new ClassWriter(ClassWriter.COMPUTE_MAXS); + factoryName = "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory" + REG_NUM; + + factory.visit(V1_4, ACC_PUBLIC + ACC_FINAL, factoryName, null, "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory", null); + + MethodVisitor create = factory.visitMethod(ACC_SYNTHETIC, "create", "(Lorg/joni/Regex;[BII)Lorg/joni/Matcher;", null, null); + create.visitTypeInsn(NEW, machineName); + create.visitInsn(DUP); // instance + create.visitVarInsn(ALOAD, 1); // Regex + create.visitVarInsn(ALOAD, 2); // bytes[] + create.visitVarInsn(ILOAD, 3); // p + create.visitVarInsn(ILOAD, 4); // end + create.visitMethodInsn(INVOKESPECIAL, machineName, "<init>", "(Lorg/joni/Regex;[BII)V"); + create.visitInsn(ARETURN); + create.visitMaxs(0, 0); + //create.visitMaxs(6, 5); + create.visitEnd(); + } + + protected final void prepareFactoryInit() { + factoryInit = factory.visitMethod(ACC_PUBLIC, "<init>", "()V", null, null); + factoryInit.visitVarInsn(ALOAD, 0); + factoryInit.visitMethodInsn(INVOKESPECIAL, "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory", "<init>", "()V"); + } + + protected final void setupFactoryInit() { + factoryInit.visitInsn(RETURN); + factoryInit.visitMaxs(0, 0); + //init.visitMaxs(1, 1); + factoryInit.visitEnd(); + } + + protected final void prepareMachine() { + machine = new ClassWriter(ClassWriter.COMPUTE_MAXS); + machineName = "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine" + REG_NUM; + } + + protected final void prepareMachineInit() { + machine.visit(V1_4, ACC_PUBLIC + ACC_FINAL, machineName, null, "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine", null); + machineInit = machine.visitMethod(ACC_PROTECTED, "<init>", "(Lorg/joni/Regex;[BII)V", null, null); + machineInit.visitVarInsn(ALOAD, THIS); // this + machineInit.visitVarInsn(ALOAD, 1); // Regex + machineInit.visitVarInsn(ALOAD, 2); // bytes[] + machineInit.visitVarInsn(ILOAD, 3); // p + machineInit.visitVarInsn(ILOAD, 4); // end + machineInit.visitMethodInsn(INVOKESPECIAL, "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine", "<init>", "(Lorg/joni/Regex;[BII)V"); + } + + protected final void setupMachineInit() { + if (bitsets + ranges + templates > 0) { // ok, some of these are in use, we'd like to cache the factory + machine.visitField(ACC_PRIVATE + ACC_FINAL, "factory", "L" + factoryName + ";", null, null); + machineInit.visitVarInsn(ALOAD, THIS); // this + machineInit.visitVarInsn(ALOAD, 1); // this, Regex + machineInit.visitFieldInsn(GETFIELD, "jdk/nashorn/internal/runtime/regexp/joni/Regex", "factory", "Lorg/joni/MatcherFactory;"); // this, factory + machineInit.visitTypeInsn(CHECKCAST, factoryName); + machineInit.visitFieldInsn(PUTFIELD, machineName, "factory", "L" + factoryName + ";"); // [] + } + + machineInit.visitInsn(RETURN); + machineInit.visitMaxs(0, 0); + //init.visitMaxs(5, 5); + machineInit.visitEnd(); + } + + protected final void prepareMachineMatch() { + match = machine.visitMethod(ACC_SYNTHETIC, "matchAt", "(III)I", null, null); + move(S, SSTART); // s = sstart + load("bytes", "[B"); // + astore(BYTES); // byte[]bytes = this.bytes + } + + protected final void setupMachineMatch() { + match.visitInsn(ICONST_M1); + match.visitInsn(IRETURN); + + match.visitMaxs(maxStack, maxVars); + match.visitEnd(); + } + + protected final void setupClasses() { + byte[]factoryCode = factory.toByteArray(); + byte[]machineCode = machine.toByteArray(); + + if (Config.DEBUG_ASM) { + try { + FileOutputStream fos; + fos = new FileOutputStream(factoryName.substring(factoryName.lastIndexOf('/') + 1) + ".class"); + fos.write(factoryCode); + fos.close(); + fos = new FileOutputStream(machineName.substring(machineName.lastIndexOf('/') + 1) + ".class"); + fos.write(machineCode); + fos.close(); + } catch (IOException ioe) { + ioe.printStackTrace(Config.err); + } + } + + loader.defineClass(machineName.replace('/', '.'), machineCode); + Class<?> cls = loader.defineClass(factoryName.replace('/', '.'), factoryCode); + try { + regex.factory = (MatcherFactory)cls.newInstance(); + } catch(Exception e) { + e.printStackTrace(Config.err); + } + } + + protected final void aload(int var) { + match.visitVarInsn(ALOAD, var); + } + + protected final void astore(int var) { + match.visitVarInsn(ASTORE, var); + } + + protected final void loadThis() { + match.visitVarInsn(ALOAD, THIS); + } + + protected final void load(int var) { + match.visitVarInsn(ILOAD, var); + } + + protected final void store(int var) { + match.visitVarInsn(ISTORE, var); + } + + protected final void move(int to, int from) { + load(from); + store(to); + } + + protected final void load(String field, String singature) { + loadThis(); + match.visitFieldInsn(GETFIELD, machineName, field, singature); + } + + protected final void load(String field) { + load(field, "I"); + } + + protected final void store(String field, String singature) { + loadThis(); + match.visitFieldInsn(PUTFIELD, machineName, field, singature); + } + + protected final void store(String field) { + store(field, "I"); + } + + protected final String installTemplate(char[] arr, int p, int length) { + String templateName = TEMPLATE + ++templates; + installArray(templateName, arr, p, length); + return templateName; + } + + protected final String installCodeRange(int[]arr) { + String coreRangeName = CODERANGE + ++ranges; + installArray(coreRangeName, arr); + return coreRangeName; + } + + protected final String installBitSet(int[]arr) { + String bitsetName = BITSET + ++bitsets; + installArray(bitsetName, arr); + return bitsetName; + } + + private void installArray(String name, int[]arr) { + factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[I", null, null); + factoryInit.visitVarInsn(ALOAD, THIS); // this; + loadInt(factoryInit, arr.length); // this, length + factoryInit.visitIntInsn(NEWARRAY, T_INT); // this, arr + for (int i=0;i < arr.length; i++) buildArray(i, arr[i], IASTORE); + factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[I"); + } + + private void installArray(String name, char[]arr, int p, int length) { + factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[B", null, null); + factoryInit.visitVarInsn(ALOAD, THIS); // this; + loadInt(factoryInit, arr.length); // this, length + factoryInit.visitIntInsn(NEWARRAY, T_BYTE); // this, arr + for (int i=p, j=0; i < p + length; i++, j++) buildArray(j, arr[i] & 0xff, BASTORE); + factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[B"); + } + + private void buildArray(int index, int value, int type) { + factoryInit.visitInsn(DUP); // ... arr, arr + loadInt(factoryInit, index); // ... arr, arr, index + loadInt(factoryInit, value); // ... arr, arr, index, value + factoryInit.visitInsn(type); // ... arr + } + + private void loadInt(MethodVisitor mv, int value) { + if (value >= -1 && value <= 5) { + mv.visitInsn(value + ICONST_0); // ICONST_0 == 3 + } else if (value >= 6 && value <= 127 || value >= -128 && value <= -2) { + mv.visitIntInsn(BIPUSH, value); + } else if (value >= 128 && value <= 32767 || value >= -32768 && value <= -129) { + mv.visitIntInsn(SIPUSH, value); + } else { + mv.visitLdcInsn(new Integer(value)); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/BitSet.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,115 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public final class BitSet { + static final int BITS_PER_BYTE = 8; + public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE); + private static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE; + static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM); + static final int ROOM_SHIFT = log2(BITS_IN_ROOM); + + final int[] bits = new int[BITSET_SIZE]; + + private static final int BITS_TO_STRING_WRAP = 4; + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("BitSet"); + for (int i=0; i<SINGLE_BYTE_SIZE; i++) { + if ((i % (SINGLE_BYTE_SIZE / BITS_TO_STRING_WRAP)) == 0) buffer.append("\n "); + buffer.append(at(i) ? "1" : "0"); + } + return buffer.toString(); + } + + public boolean at(int pos) { + return (bits[pos >>> ROOM_SHIFT] & bit(pos)) != 0; + } + + public void set(int pos) { + bits[pos >>> ROOM_SHIFT] |= bit(pos); + } + + public void clear(int pos) { + bits[pos >>> ROOM_SHIFT] &= ~bit(pos); + } + + public void invert(int pos) { + bits[pos >>> ROOM_SHIFT] ^= bit(pos); + } + + public void clear() { + for (int i=0; i<BITSET_SIZE; i++) bits[i]=0; + } + + public boolean isEmpty() { + for (int i=0; i<BITSET_SIZE; i++) { + if (bits[i] != 0) return false; + } + return true; + } + + public void setRange(int from, int to) { + for (int i=from; i<=to && i < SINGLE_BYTE_SIZE; i++) set(i); + } + + public void setAll() { + for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~0; + } + + public void invert() { + for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~bits[i]; + } + + public void invertTo(BitSet to) { + for (int i=0; i<BITSET_SIZE; i++) to.bits[i] = ~bits[i]; + } + + public void and(BitSet other) { + for (int i=0; i<BITSET_SIZE; i++) bits[i] &= other.bits[i]; + } + + public void or(BitSet other) { + for (int i=0; i<BITSET_SIZE; i++) bits[i] |= other.bits[i]; + } + + public void copy(BitSet other) { + for (int i=0; i<BITSET_SIZE; i++) bits[i] = other.bits[i]; + } + + public int numOn() { + int num = 0; + for (int i=0; i<SINGLE_BYTE_SIZE; i++) { + if (at(i)) num++; + } + return num; + } + + static int bit(int pos){ + return 1 << (pos % SINGLE_BYTE_SIZE); + } + + private static int log2(int n){ + int log = 0; + while ((n >>>= 1) != 0) log++; + return log; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/BitStatus.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,55 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +final class BitStatus { + public static final int BIT_STATUS_BITS_NUM = 4 * 8; + + public static int bsClear() { + return 0; + } + public static int bsAll() { + return -1; + } + public static boolean bsAt(int stats, int n) { + return (n < BIT_STATUS_BITS_NUM ? stats & (1 << n) : (stats & 1)) != 0; + } + public static int bsOnAt(int stats, int n) { + if (n < BIT_STATUS_BITS_NUM) { + stats |= (1 << n); + } else { + stats |= 1; + } + return stats; + } + public static int bsOnAtSimple(int stats, int n) { + if (n < BIT_STATUS_BITS_NUM) stats |= (1 << n); + return stats; + } + + public static int bsOnOff(int v, int f, boolean negative) { + if (negative) { + v &= ~f; + } else { + v |= f; + } + return v; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,1462 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; +import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl; +import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; + +class ByteCodeMachine extends StackMachine { + private int bestLen; // return value + private int s = 0; // current char + + private int range; // right range + private int sprev; + private int sstart; + private int sbegin; + + private final int[]code; // byte code + private int ip; // instruction pointer + + ByteCodeMachine(Regex regex, char[] chars, int p, int end) { + super(regex, chars, p, end); + this.code = regex.code; + } + + protected int stkp; // a temporary + private boolean makeCaptureHistoryTree(CaptureTreeNode node) { + //CaptureTreeNode child; + int k = stkp; + //int k = kp; + + while (k < stk) { + StackEntry e = stack[k]; + if (e.type == MEM_START) { + int n = e.getMemNum(); + if (n <= Config.MAX_CAPTURE_HISTORY_GROUP && bsAt(regex.captureHistory, n)) { + CaptureTreeNode child = new CaptureTreeNode(); + child.group = n; + child.beg = e.getMemPStr() - str; + node.addChild(child); + stkp = k + 1; + if (makeCaptureHistoryTree(child)) return true; + + k = stkp; + child.end = e.getMemPStr() - str; + } + } else if (e.type == MEM_END) { + if (e.getMemNum() == node.group) { + node.end = e.getMemPStr() - str; + stkp = k; + return false; + } + } + } + return true; /* 1: root node ending. */ + } + + private void checkCaptureHistory(Region region) { + CaptureTreeNode node; + if (region.historyRoot == null) { + node = region.historyRoot = new CaptureTreeNode(); + } else { + node = region.historyRoot; + node.clear(); + } + + // was clear ??? + node.group = 0; + node.beg = sstart - str; + node.end = s - str; + + stkp = 0; + makeCaptureHistoryTree(region.historyRoot); + } + + private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) { + + int s2 = ps2.value; + int end1 = s1 + mbLen; + + while (s1 < end1) { + char c1 = Character.toLowerCase(chars[s1++]); + char c2 = Character.toLowerCase(chars[s2++]); + + if (c1 != c2) { + return false; + } + } + ps2.value = s2; + return true; + } + + private void debugMatchBegin() { + Config.log.println("match_at: " + + "str: " + str + + ", end: " + end + + ", start: " + this.sstart + + ", sprev: " + this.sprev); + Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); + } + + private void debugMatchLoop() { + if (Config.DEBUG_MATCH) { + Config.log.printf("%4d", (s - str)).print("> \""); + int q, i; + for (i=0, q=s; i<7 && q<end && s>=0; i++) { + if (q < end) Config.log.print(new String(new char[]{chars[q++]})); + } + String str = q < end ? "...\"" : "\""; + q += str.length(); + Config.log.print(str); + for (i=0; i<20-(q-s);i++) Config.log.print(" "); + StringBuilder sb = new StringBuilder(); + new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); + Config.log.println(sb.toString()); + } + } + + protected final int matchAt(int range, int sstart, int sprev) { + this.range = range; + this.sstart = sstart; + this.sprev = sprev; + + stk = 0; + ip = 0; + + if (Config.DEBUG_MATCH) debugMatchBegin(); + + init(); + + bestLen = -1; + s = sstart; + + final int[]code = this.code; + while (true) { + if (Config.DEBUG_MATCH) debugMatchLoop(); + + sbegin = s; + switch (code[ip++]) { + case OPCode.END: if (opEnd()) return finish(); break; + case OPCode.EXACT1: opExact1(); break; + case OPCode.EXACT2: opExact2(); continue; + case OPCode.EXACT3: opExact3(); continue; + case OPCode.EXACT4: opExact4(); continue; + case OPCode.EXACT5: opExact5(); continue; + case OPCode.EXACTN: opExactN(); continue; + + case OPCode.EXACTMB2N1: opExactMB2N1(); break; + case OPCode.EXACTMB2N2: opExactMB2N2(); continue; + case OPCode.EXACTMB2N3: opExactMB2N3(); continue; + case OPCode.EXACTMB2N: opExactMB2N(); continue; + case OPCode.EXACTMB3N: opExactMB3N(); continue; + case OPCode.EXACTMBN: opExactMBN(); continue; + + case OPCode.EXACT1_IC: opExact1IC(); break; + case OPCode.EXACTN_IC: opExactNIC(); continue; + + case OPCode.CCLASS: opCClass(); break; + case OPCode.CCLASS_MB: opCClassMB(); break; + case OPCode.CCLASS_MIX: opCClassMIX(); break; + case OPCode.CCLASS_NOT: opCClassNot(); break; + case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; + case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; + case OPCode.CCLASS_NODE: opCClassNode(); break; + + case OPCode.ANYCHAR: opAnyChar(); break; + case OPCode.ANYCHAR_ML: opAnyCharML(); break; + case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; + case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; + case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; + case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; + case OPCode.STATE_CHECK_ANYCHAR_STAR: opStateCheckAnyCharStar(); break; + case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:opStateCheckAnyCharMLStar();break; + + case OPCode.WORD: opWord(); break; + case OPCode.NOT_WORD: opNotWord(); break; + case OPCode.WORD_BOUND: opWordBound(); continue; + case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; + case OPCode.WORD_BEGIN: opWordBegin(); continue; + case OPCode.WORD_END: opWordEnd(); continue; + + case OPCode.BEGIN_BUF: opBeginBuf(); continue; + case OPCode.END_BUF: opEndBuf(); continue; + case OPCode.BEGIN_LINE: opBeginLine(); continue; + case OPCode.END_LINE: opEndLine(); continue; + case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; + case OPCode.BEGIN_POSITION: opBeginPosition(); continue; + + case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; + case OPCode.MEMORY_START: opMemoryStart(); continue; + case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; + case OPCode.MEMORY_END: opMemoryEnd(); continue; + case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; + case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; + + case OPCode.BACKREF1: opBackRef1(); continue; + case OPCode.BACKREF2: opBackRef2(); continue; + case OPCode.BACKREFN: opBackRefN(); continue; + case OPCode.BACKREFN_IC: opBackRefNIC(); continue; + case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; + case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; + case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; + + case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; + case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; + case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; + case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; + + case OPCode.JUMP: opJump(); continue; + case OPCode.PUSH: opPush(); continue; + + // CEC + case OPCode.STATE_CHECK_PUSH: opStateCheckPush(); continue; + case OPCode.STATE_CHECK_PUSH_OR_JUMP: opStateCheckPushOrJump(); continue; + case OPCode.STATE_CHECK: opStateCheck(); continue; + + case OPCode.POP: opPop(); continue; + case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; + case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; + + case OPCode.REPEAT: opRepeat(); continue; + case OPCode.REPEAT_NG: opRepeatNG(); continue; + case OPCode.REPEAT_INC: opRepeatInc(); continue; + case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; + case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; + case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; + + case OPCode.PUSH_POS: opPushPos(); continue; + case OPCode.POP_POS: opPopPos(); continue; + case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; + case OPCode.FAIL_POS: opFailPos(); continue; + case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; + case OPCode.POP_STOP_BT: opPopStopBT(); continue; + + case OPCode.LOOK_BEHIND: opLookBehind(); continue; + case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; + case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; + + // USE_SUBEXP_CALL + case OPCode.CALL: opCall(); continue; + case OPCode.RETURN: opReturn(); continue; + + case OPCode.FINISH: + return finish(); + + case OPCode.FAIL: opFail(); continue; + + default: + throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); + + } // main switch + } // main while + } + + private boolean opEnd() { + int n = s - sstart; + + if (n > bestLen) { + if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { + if (isFindLongest(regex.options)) { + if (n > msaBestLen) { + msaBestLen = n; + msaBestS = sstart; + } else { + // goto end_best_len; + return endBestLength(); + } + } + } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + + bestLen = n; + final Region region = msaRegion; + if (region != null) { + // USE_POSIX_REGION_OPTION ... else ... + region.beg[0] = msaBegin = sstart - str; + region.end[0] = msaEnd = s - str; + for (int i = 1; i <= regex.numMem; i++) { + // opt! + if (repeatStk[memEndStk + i] != INVALID_INDEX) { + region.beg[i] = bsAt(regex.btMemStart, i) ? + stack[repeatStk[memStartStk + i]].getMemPStr() - str : + repeatStk[memStartStk + i] - str; + + + region.end[i] = bsAt(regex.btMemEnd, i) ? + stack[repeatStk[memEndStk + i]].getMemPStr() : + repeatStk[memEndStk + i] - str; + + } else { + region.beg[i] = region.end[i] = Region.REGION_NOTPOS; + } + + } + + if (Config.USE_CAPTURE_HISTORY) { + if (regex.captureHistory != 0) checkCaptureHistory(region); + } + } else { + msaBegin = sstart - str; + msaEnd = s - str; + } + } else { + Region region = msaRegion; + if (Config.USE_POSIX_API_REGION_OPTION) { + if (!isPosixRegion(regex.options)) { + if (region != null) { + region.clear(); + } else { + msaBegin = msaEnd = 0; + } + } + } else { + if (region != null) { + region.clear(); + } else { + msaBegin = msaEnd = 0; + } + } // USE_POSIX_REGION_OPTION + } + // end_best_len: + /* default behavior: return first-matching result. */ + return endBestLength(); + } + + private boolean endBestLength() { + if (isFindCondition(regex.options)) { + if (isFindNotEmpty(regex.options) && s == sstart) { + bestLen = -1; + {opFail(); return false;} /* for retry */ + } + if (isFindLongest(regex.options) && s < range) { + {opFail(); return false;} /* for retry */ + } + } + // goto finish; + return true; + } + + private void opExact1() { + if (s >= range || code[ip] != chars[s++]) {opFail(); return;} + //if (s > range) {opFail(); return;} + ip++; + sprev = sbegin; // break; + } + + private void opExact2() { + if (s + 2 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + sprev = s; + ip++; s++; + } + + private void opExact3() { + if (s + 3 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + sprev = s; + ip++; s++; + } + + private void opExact4() { + if (s + 4 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + sprev = s; + ip++; s++; + } + + private void opExact5() { + if (s + 5 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + sprev = s; + ip++; s++; + } + + private void opExactN() { + int tlen = code[ip++]; + if (s + tlen > range) {opFail(); return;} + + if (Config.USE_STRING_TEMPLATES) { + char[] bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while (tlen-- > 0) if (bs[ps++] != chars[s++]) {opFail(); return;} + + } else { + while (tlen-- > 0) if (code[ip++] != chars[s++]) {opFail(); return;} + } + sprev = s - 1; + } + + private void opExactMB2N1() { + if (s + 2 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + sprev = sbegin; // break; + } + + private void opExactMB2N2() { + if (s + 4 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + sprev = s; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + } + + private void opExactMB2N3() { + if (s + 6 > range) {opFail(); return;} + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + sprev = s; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + } + + private void opExactMB2N() { + int tlen = code[ip++]; + if (s + tlen * 2 > range) {opFail(); return;} + + if (Config.USE_STRING_TEMPLATES) { + char[] bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while(tlen-- > 0) { + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + } + } else { + while(tlen-- > 0) { + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + } + } + sprev = s - 2; + } + + private void opExactMB3N() { + int tlen = code[ip++]; + if (s + tlen * 3 > range) {opFail(); return;} + + if (Config.USE_STRING_TEMPLATES) { + char[] bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while (tlen-- > 0) { + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + } + } else { + while (tlen-- > 0) { + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + } + } + + sprev = s - 3; + } + + private void opExactMBN() { + int tlen = code[ip++]; /* mb-len */ + int tlen2= code[ip++]; /* string len */ + + tlen2 *= tlen; + if (s + tlen2 > range) {opFail(); return;} + + if (Config.USE_STRING_TEMPLATES) { + char[] bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while (tlen2-- > 0) { + if (bs[ps] != chars[s]) {opFail(); return;} + ps++; s++; + } + } else { + while (tlen2-- > 0) { + if (code[ip] != chars[s]) {opFail(); return;} + ip++; s++; + } + } + + sprev = s - tlen; + } + + private void opExact1IC() { + if (s >= range || code[ip] != Character.toLowerCase(chars[s++])) {opFail(); return;} + ip++; + sprev = sbegin; // break; + } + + private void opExactNIC() { + int tlen = code[ip++]; + if (s + tlen > range) {opFail(); return;} + + if (Config.USE_STRING_TEMPLATES) { + char[] bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while (tlen-- > 0) if (bs[ps++] != Character.toLowerCase(chars[s++])) {opFail(); return;} + } else { + + while (tlen-- > 0) if (code[ip++] != Character.toLowerCase(chars[s++])) {opFail(); return;} + } + sprev = s - 1; + } + + private boolean isInBitSet() { + int c = chars[s]; + return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); + } + + private void opCClass() { + if (s >= range || !isInBitSet()) {opFail(); return;} + ip += BitSet.BITSET_SIZE; + s++; + sprev = sbegin; // break; + } + + private boolean isInClassMB() { + int tlen = code[ip++]; + if (s >= range) return false; + int ss = s; + s++; + int c = chars[ss]; + if (!EncodingHelper.isInCodeRange(code, ip, c)) return false; + ip += tlen; + return true; + } + + private void opCClassMB() { + // beyond string check + if (s >= range || chars[s] <= 0xff) {opFail(); return;} + if (!isInClassMB()) {opFail(); return;} // not!!! + sprev = sbegin; // break; + } + + private void opCClassMIX() { + if (s >= range) {opFail(); return;} + if (chars[s] > 0xff) { + ip += BitSet.BITSET_SIZE; + if (!isInClassMB()) {opFail(); return;} + } else { + if (!isInBitSet()) {opFail(); return;} + ip += BitSet.BITSET_SIZE; + int tlen = code[ip++]; // by code range length + ip += tlen; + s++; + } + sprev = sbegin; // break; + } + + private void opCClassNot() { + if (s >= range || isInBitSet()) {opFail(); return;} + ip += BitSet.BITSET_SIZE; + s++; + sprev = sbegin; // break; + } + + private boolean isNotInClassMB() { + int tlen = code[ip++]; + + if (!(s + 1 <= range)) { + if (s >= range) return false; + s = end; + ip += tlen; + return true; + } + + int ss = s; + s++; + int c = chars[ss]; + + if (EncodingHelper.isInCodeRange(code, ip, c)) return false; + ip += tlen; + return true; + } + + private void opCClassMBNot() { + if (s >= range) {opFail(); return;} + if (chars[s] <= 0xff) { + s++; + int tlen = code[ip++]; + ip += tlen; + sprev = sbegin; // break; + return; + } + if (!isNotInClassMB()) {opFail(); return;} + sprev = sbegin; // break; + } + + private void opCClassMIXNot() { + if (s >= range) {opFail(); return;} + if (chars[s] > 0xff) { + ip += BitSet.BITSET_SIZE; + if (!isNotInClassMB()) {opFail(); return;} + } else { + if (isInBitSet()) {opFail(); return;} + ip += BitSet.BITSET_SIZE; + int tlen = code[ip++]; + ip += tlen; + s++; + } + sprev = sbegin; // break; + } + + private void opCClassNode() { + if (s >= range) {opFail(); return;} + CClassNode cc = (CClassNode)regex.operands[code[ip++]]; + int ss = s; + s++; + int c = chars[ss]; + if (!cc.isCodeInCCLength(c)) {opFail(); return;} + sprev = sbegin; // break; + } + + private void opAnyChar() { + if (s >= range) {opFail(); return;} + if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;} + s++; + sprev = sbegin; // break; + } + + private void opAnyCharML() { + if (s >= range) {opFail(); return;} + s++; + sprev = sbegin; // break; + } + + private void opAnyCharStar() { + final char[] chars = this.chars; + while (s < range) { + pushAlt(ip, s, sprev); + if (isNewLine(chars, s, end)) {opFail(); return;} + sprev = s; + s++; + } + sprev = sbegin; // break; + } + + private void opAnyCharMLStar() { + while (s < range) { + pushAlt(ip, s, sprev); + sprev = s; + s++; + } + sprev = sbegin; // break; + } + + private void opAnyCharStarPeekNext() { + final char c = (char)code[ip]; + final char[] chars = this.chars; + + while (s < range) { + char b = chars[s]; + if (c == b) pushAlt(ip + 1, s, sprev); + if (b == EncodingHelper.NEW_LINE) {opFail(); return;} + sprev = s; + s++; + } + ip++; + sprev = sbegin; // break; + } + + private void opAnyCharMLStarPeekNext() { + final char c = (char)code[ip]; + final char[] chars = this.chars; + + while (s < range) { + if (c == chars[s]) pushAlt(ip + 1, s, sprev); + sprev = s; + s++; + } + ip++; + sprev = sbegin; // break; + } + + // CEC + private void opStateCheckAnyCharStar() { + int mem = code[ip++]; + final char[] chars = this.chars; + + while (s < range) { + if (stateCheckVal(s, mem)) {opFail(); return;} + pushAltWithStateCheck(ip, s, sprev, mem); + if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;} + sprev = s; + s++; + } + sprev = sbegin; // break; + } + + // CEC + private void opStateCheckAnyCharMLStar() { + int mem = code[ip++]; + + while (s < range) { + if (stateCheckVal(s, mem)) {opFail(); return;} + pushAltWithStateCheck(ip, s, sprev, mem); + sprev = s; + s++; + } + sprev = sbegin; // break; + } + + private void opWord() { + if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} + s++; + sprev = sbegin; // break; + } + + private void opNotWord() { + if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} + s++; + sprev = sbegin; // break; + } + + private void opWordBound() { + if (s == str) { + if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} + } else if (s == end) { + if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} + } else { + if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} + } + } + + private void opNotWordBound() { + if (s == str) { + if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} + } else if (s == end) { + if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} + } else { + if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} + } + } + + private void opWordBegin() { + if (s < range && EncodingHelper.isWord(chars[s])) { + if (s == str || !EncodingHelper.isWord(chars[sprev])) return; + } + opFail(); + } + + private void opWordEnd() { + if (s != str && EncodingHelper.isWord(chars[sprev])) { + if (s == end || !EncodingHelper.isWord(chars[s])) return; + } + opFail(); + } + + private void opBeginBuf() { + if (s != str) opFail(); + } + + private void opEndBuf() { + if (s != end) opFail(); + } + + private void opBeginLine() { + if (s == str) { + if (isNotBol(msaOptions)) opFail(); + return; + } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) { + return; + } + opFail(); + } + + private void opEndLine() { + if (s == end) { + if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { + if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) { + if (isNotEol(msaOptions)) opFail(); + } + return; + } else { + if (isNotEol(msaOptions)) opFail(); + return; + } + } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) { + return; + } + opFail(); + } + + private void opSemiEndBuf() { + if (s == end) { + if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { + if (str == end || !isNewLine(chars, sprev, end)) { + if (isNotEol(msaOptions)) opFail(); + } + return; + } else { + if (isNotEol(msaOptions)) opFail(); + return; + } + } else if (isNewLine(chars, s, end) && s + 1 == end) { + return; + } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) { + int ss = s + 2; + if (ss == end) return; + } + opFail(); + } + + private void opBeginPosition() { + if (s != msaStart) opFail(); + } + + private void opMemoryStartPush() { + int mem = code[ip++]; + pushMemStart(mem, s); + } + + private void opMemoryStart() { + int mem = code[ip++]; + repeatStk[memStartStk + mem] = s; + } + + private void opMemoryEndPush() { + int mem = code[ip++]; + pushMemEnd(mem, s); + } + + private void opMemoryEnd() { + int mem = code[ip++]; + repeatStk[memEndStk + mem] = s; + } + + private void opMemoryEndPushRec() { + int mem = code[ip++]; + int stkp = getMemStart(mem); /* should be before push mem-end. */ + pushMemEnd(mem, s); + repeatStk[memStartStk + mem] = stkp; + } + + private void opMemoryEndRec() { + int mem = code[ip++]; + repeatStk[memEndStk + mem] = s; + int stkp = getMemStart(mem); + + if (BitStatus.bsAt(regex.btMemStart, mem)) { + repeatStk[memStartStk + mem] = stkp; + } else { + repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); + } + + pushMemEndMark(mem); + } + + private boolean backrefInvalid(int mem) { + return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; + } + + private int backrefStart(int mem) { + return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; + } + + private int backrefEnd(int mem) { + return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; + } + + private void backref(int mem) { + /* if you want to remove following line, + you should check in parse and compile time. (numMem) */ + if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} + + int pstart = backrefStart(mem); + int pend = backrefEnd(mem); + + int n = pend - pstart; + if (s + n > range) {opFail(); return;} + sprev = s; + + // STRING_CMP + while(n-- > 0) if (chars[pstart++] != chars[s++]) {opFail(); return;} + + int len; + + // beyond string check + if (sprev < range) { + while (sprev + 1 < s) sprev++; + } + } + + private void opBackRef1() { + backref(1); + } + + private void opBackRef2() { + backref(2); + } + + private void opBackRefN() { + backref(code[ip++]); + } + + private void opBackRefNIC() { + int mem = code[ip++]; + /* if you want to remove following line, + you should check in parse and compile time. (numMem) */ + if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} + + int pstart = backrefStart(mem); + int pend = backrefEnd(mem); + + int n = pend - pstart; + if (s + n > range) {opFail(); return;} + sprev = s; + + value = s; + if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} + s = value; + + int len; + // if (sprev < chars.length) + while (sprev + 1 < s) sprev++; + } + + private void opBackRefMulti() { + int tlen = code[ip++]; + + int i; + loop:for (i=0; i<tlen; i++) { + int mem = code[ip++]; + if (backrefInvalid(mem)) continue; + + int pstart = backrefStart(mem); + int pend = backrefEnd(mem); + + int n = pend - pstart; + if (s + n > range) {opFail(); return;} + + sprev = s; + int swork = s; + + while (n-- > 0) { + if (chars[pstart++] != chars[swork++]) continue loop; + } + + s = swork; + + int len; + + // beyond string check + if (sprev < range) { + while (sprev + 1 < s) sprev++; + } + + ip += tlen - i - 1; // * SIZE_MEMNUM (1) + break; /* success */ + } + if (i == tlen) {opFail(); return;} + } + + private void opBackRefMultiIC() { + int tlen = code[ip++]; + + int i; + loop:for (i=0; i<tlen; i++) { + int mem = code[ip++]; + if (backrefInvalid(mem)) continue; + + int pstart = backrefStart(mem); + int pend = backrefEnd(mem); + + int n = pend - pstart; + if (s + n > range) {opFail(); return;} + + sprev = s; + + value = s; + if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC + s = value; + + int len; + // if (sprev < chars.length) + while (sprev + 1 < s) sprev++; + + ip += tlen - i - 1; // * SIZE_MEMNUM (1) + break; /* success */ + } + if (i == tlen) {opFail(); return;} + } + + private boolean memIsInMemp(int mem, int num, int memp) { + for (int i=0; i<num; i++) { + int m = code[memp++]; + if (mem == m) return true; + } + return false; + } + + // USE_BACKREF_AT_LEVEL // (s) and (end) implicit + private boolean backrefMatchAtNestedLevel(boolean ignoreCase, int caseFoldFlag, + int nest, int memNum, int memp) { + int pend = -1; + int level = 0; + int k = stk - 1; + + while (k >= 0) { + StackEntry e = stack[k]; + + if (e.type == CALL_FRAME) { + level--; + } else if (e.type == RETURN) { + level++; + } else if (level == nest) { + if (e.type == MEM_START) { + if (memIsInMemp(e.getMemNum(), memNum, memp)) { + int pstart = e.getMemPStr(); + if (pend != -1) { + if (pend - pstart > end - s) return false; /* or goto next_mem; */ + int p = pstart; + + value = s; + if (ignoreCase) { + if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { + return false; /* or goto next_mem; */ + } + } else { + while (p < pend) { + if (chars[p++] != chars[value++]) return false; /* or goto next_mem; */ + } + } + s = value; + + return true; + } + } + } else if (e.type == MEM_END) { + if (memIsInMemp(e.getMemNum(), memNum, memp)) { + pend = e.getMemPStr(); + } + } + } + k--; + } + return false; + } + + private void opBackRefAtLevel() { + int ic = code[ip++]; + int level = code[ip++]; + int tlen = code[ip++]; + + sprev = s; + if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit + int len; + while (sprev + 1 < s) sprev++; + ip += tlen; // * SIZE_MEMNUM + } else { + {opFail(); return;} + } + } + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + private void opSetOptionPush() { + // option = code[ip++]; // final for now + pushAlt(ip, s, sprev); + ip += OPSize.SET_OPTION + OPSize.FAIL; + } + + private void opSetOption() { + // option = code[ip++]; // final for now + } + + private void opNullCheckStart() { + int mem = code[ip++]; + pushNullCheckStart(mem, s); + } + + private void nullCheckFound() { + // null_check_found: + /* empty loop founded, skip next instruction */ + switch(code[ip++]) { + case OPCode.JUMP: + case OPCode.PUSH: + ip++; // p += SIZE_RELADDR; + break; + case OPCode.REPEAT_INC: + case OPCode.REPEAT_INC_NG: + case OPCode.REPEAT_INC_SG: + case OPCode.REPEAT_INC_NG_SG: + ip++; // p += SIZE_MEMNUM; + break; + default: + throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); + } // switch + } + + private void opNullCheckEnd() { + int mem = code[ip++]; + int isNull = nullCheck(mem, s); /* mem: null check id */ + + if (isNull != 0) { + if (Config.DEBUG_MATCH) { + Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); + } + + nullCheckFound(); + } + } + + // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + private void opNullCheckEndMemST() { + int mem = code[ip++]; /* mem: null check id */ + int isNull = nullCheckMemSt(mem, s); + + if (isNull != 0) { + if (Config.DEBUG_MATCH) { + Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); + } + + if (isNull == -1) {opFail(); return;} + nullCheckFound(); + } + } + + // USE_SUBEXP_CALL + private void opNullCheckEndMemSTPush() { + int mem = code[ip++]; /* mem: null check id */ + + int isNull; + if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { + isNull = nullCheckMemStRec(mem, s); + } else { + isNull = nullCheckRec(mem, s); + } + + if (isNull != 0) { + if (Config.DEBUG_MATCH) { + Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s); + } + + if (isNull == -1) {opFail(); return;} + nullCheckFound(); + } else { + pushNullCheckEnd(mem); + } + } + + private void opJump() { + ip += code[ip] + 1; + } + + private void opPush() { + int addr = code[ip++]; + pushAlt(ip + addr, s, sprev); + } + + // CEC + private void opStateCheckPush() { + int mem = code[ip++]; + if (stateCheckVal(s, mem)) {opFail(); return;} + int addr = code[ip++]; + pushAltWithStateCheck(ip + addr, s, sprev, mem); + } + + // CEC + private void opStateCheckPushOrJump() { + int mem = code[ip++]; + int addr= code[ip++]; + + if (stateCheckVal(s, mem)) { + ip += addr; + } else { + pushAltWithStateCheck(ip + addr, s, sprev, mem); + } + } + + // CEC + private void opStateCheck() { + int mem = code[ip++]; + if (stateCheckVal(s, mem)) {opFail(); return;} + pushStateCheck(s, mem); + } + + private void opPop() { + popOne(); + } + + private void opPushOrJumpExact1() { + int addr = code[ip++]; + // beyond string check + if (s < range && code[ip] == chars[s]) { + ip++; + pushAlt(ip + addr, s, sprev); + return; + } + ip += addr + 1; + } + + private void opPushIfPeekNext() { + int addr = code[ip++]; + // beyond string check + if (s < range && code[ip] == chars[s]) { + ip++; + pushAlt(ip + addr, s, sprev); + return; + } + ip++; + } + + private void opRepeat() { + int mem = code[ip++]; /* mem: OP_REPEAT ID */ + int addr= code[ip++]; + + // ensure1(); + repeatStk[mem] = stk; + pushRepeat(mem, ip); + + if (regex.repeatRangeLo[mem] == 0) { // lower + pushAlt(ip + addr, s, sprev); + } + } + + private void opRepeatNG() { + int mem = code[ip++]; /* mem: OP_REPEAT ID */ + int addr= code[ip++]; + + // ensure1(); + repeatStk[mem] = stk; + pushRepeat(mem, ip); + + if (regex.repeatRangeLo[mem] == 0) { + pushAlt(ip, s, sprev); + ip += addr; + } + } + + private void repeatInc(int mem, int si) { + StackEntry e = stack[si]; + + e.increaseRepeatCount(); + + if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { + /* end of repeat. Nothing to do. */ + } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { + pushAlt(ip, s, sprev); + ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ + } else { + ip = e.getRepeatPCode(); + } + pushRepeatInc(si); + } + + private void opRepeatInc() { + int mem = code[ip++]; /* mem: OP_REPEAT ID */ + int si = repeatStk[mem]; + repeatInc(mem, si); + } + + private void opRepeatIncSG() { + int mem = code[ip++]; /* mem: OP_REPEAT ID */ + int si = getRepeat(mem); + repeatInc(mem, si); + } + + private void repeatIncNG(int mem, int si) { + StackEntry e = stack[si]; + + e.increaseRepeatCount(); + + if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { + if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { + int pcode = e.getRepeatPCode(); + pushRepeatInc(si); + pushAlt(pcode, s, sprev); + } else { + ip = e.getRepeatPCode(); + pushRepeatInc(si); + } + } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { + pushRepeatInc(si); + } + } + + private void opRepeatIncNG() { + int mem = code[ip++]; + int si = repeatStk[mem]; + repeatIncNG(mem, si); + } + + private void opRepeatIncNGSG() { + int mem = code[ip++]; + int si = getRepeat(mem); + repeatIncNG(mem, si); + } + + private void opPushPos() { + pushPos(s, sprev); + } + + private void opPopPos() { + StackEntry e = stack[posEnd()]; + s = e.getStatePStr(); + sprev= e.getStatePStrPrev(); + } + + private void opPushPosNot() { + int addr = code[ip++]; + pushPosNot(ip + addr, s, sprev); + } + + private void opFailPos() { + popTilPosNot(); + opFail(); + } + + private void opPushStopBT() { + pushStopBT(); + } + + private void opPopStopBT() { + stopBtEnd(); + } + + private void opLookBehind() { + int tlen = code[ip++]; + s = EncodingHelper.stepBack(str, s, tlen); + if (s == -1) {opFail(); return;} + sprev = EncodingHelper.prevCharHead(str, s); + } + + private void opLookBehindSb() { + int tlen = code[ip++]; + s -= tlen; + if (s < str) {opFail(); return;} + sprev = s == str ? -1 : s - 1; + } + + private void opPushLookBehindNot() { + int addr = code[ip++]; + int tlen = code[ip++]; + int q = EncodingHelper.stepBack(str, s, tlen); + if (q == -1) { + /* too short case -> success. ex. /(?<!XXX)a/.match("a") + If you want to change to fail, replace following line. */ + ip += addr; + // return FAIL; + } else { + pushLookBehindNot(ip + addr, s, sprev); + s = q; + sprev = EncodingHelper.prevCharHead(str, s); + } + } + + private void opFailLookBehindNot() { + popTilLookBehindNot(); + opFail(); + } + + private void opCall() { + int addr = code[ip++]; + pushCallFrame(ip); + ip = addr; // absolute address + } + + private void opReturn() { + ip = sreturn(); + pushReturn(); + } + + private void opFail() { + if (stack == null) { + ip = regex.codeLength - 1; + return; + } + + + StackEntry e = pop(); + ip = e.getStatePCode(); + s = e.getStatePStr(); + sprev = e.getStatePStrPrev(); + + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.getStateCheck() != 0) { + e.type = STATE_CHECK_MARK; + stk++; + } + } + } + + private int finish() { + return bestLen; + } +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodePrinter.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,416 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; + +class ByteCodePrinter { + final int[]code; + final int codeLength; + final char[][] templates; + + Object[]operands; + int operantCount; + WarnCallback warnings; + + public ByteCodePrinter(Regex regex) { + code = regex.code; + codeLength = regex.codeLength; + operands = regex.operands; + operantCount = regex.operandLength; + + templates = regex.templates; + warnings = regex.warnings; + } + + public String byteCodeListToString() { + return compiledByteCodeListToString(); + } + + private void pString(StringBuilder sb, int len, int s) { + sb.append(":"); + while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); + } + + private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) { + sb.append(":T:"); + while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]})); + } + + private void pLenString(StringBuilder sb, int len, int mbLen, int s) { + int x = len * mbLen; + sb.append(":" + len + ":"); + while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); + } + + private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, char[] tm, int idx) { + int x = len * mbLen; + sb.append(":T:" + len + ":"); + while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]})); + } + + public int compiledByteCodeToString(StringBuilder sb, int bp) { + int len, n, mem, addr, scn, cod; + BitSet bs; + CClassNode cc; + int tm, idx; + + sb.append("[" + OPCode.OpCodeNames[code[bp]]); + int argType = OPCode.OpCodeArgTypes[code[bp]]; + int ip = bp; + if (argType != Arguments.SPECIAL) { + bp++; + switch (argType) { + case Arguments.NON: + break; + + case Arguments.RELADDR: + sb.append(":(" + code[bp] + ")"); + bp += OPSize.RELADDR; + break; + + case Arguments.ABSADDR: + sb.append(":(" + code[bp] + ")"); + bp += OPSize.ABSADDR; + break; + + case Arguments.LENGTH: + sb.append(":" + code[bp]); + bp += OPSize.LENGTH; + break; + + case Arguments.MEMNUM: + sb.append(":" + code[bp]); + bp += OPSize.MEMNUM; + break; + + case Arguments.OPTION: + sb.append(":" + code[bp]); + bp += OPSize.OPTION; + break; + + case Arguments.STATE_CHECK: + sb.append(":" + code[bp]); + bp += OPSize.STATE_CHECK; + break; + } + } else { + switch (code[bp++]) { + case OPCode.EXACT1: + case OPCode.ANYCHAR_STAR_PEEK_NEXT: + case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: + case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: + case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: + pString(sb, 1, bp++); + break; + + case OPCode.EXACT2: + pString(sb, 2, bp); + bp += 2; + break; + + case OPCode.EXACT3: + pString(sb, 3, bp); + bp += 3; + break; + + case OPCode.EXACT4: + pString(sb, 4, bp); + bp += 4; + break; + + case OPCode.EXACT5: + pString(sb, 5, bp); + bp += 5; + break; + + case OPCode.EXACTN: + len = code[bp]; + bp += OPSize.LENGTH; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 1, templates[tm], idx); + } else { + pLenString(sb, len, 1, bp); + bp += len; + } + break; + + case OPCode.EXACTMB2N1: + pString(sb, 2, bp); + bp += 2; + break; + + case OPCode.EXACTMB2N2: + pString(sb, 4, bp); + bp += 4; + break; + + case OPCode.EXACTMB2N3: + pString(sb, 6, bp); + bp += 6; + break; + + case OPCode.EXACTMB2N: + len = code[bp]; + bp += OPSize.LENGTH; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 2, templates[tm], idx); + } else { + pLenString(sb, len, 2, bp); + bp += len * 2; + } + break; + + case OPCode.EXACTMB3N: + len = code[bp]; + bp += OPSize.LENGTH; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 3, templates[tm], idx); + } else { + pLenString(sb, len, 3, bp); + bp += len * 3; + } + break; + + case OPCode.EXACTMBN: + int mbLen = code[bp]; + bp += OPSize.LENGTH; + len = code[bp]; + bp += OPSize.LENGTH; + n = len * mbLen; + + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + sb.append(":T:" + mbLen + ":" + len + ":"); + + while (n-- > 0) sb.append(new String(new char[]{templates[tm][idx++]})); + } else { + sb.append(":" + mbLen + ":" + len + ":"); + + while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]})); + } + + break; + + case OPCode.EXACT1_IC: + case OPCode.EXACT1_IC_SB: + final int MAX_CHAR_LENGTH = 6; + byte[]bytes = new byte[MAX_CHAR_LENGTH]; + for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i]; + pString(sb, 1, bp); + bp++; + break; + + case OPCode.EXACTN_IC: + case OPCode.EXACTN_IC_SB: + len = code[bp]; + bp += OPSize.LENGTH; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 1, templates[tm], idx); + } else { + pLenString(sb, len, 1, bp); + bp += len; + } + break; + + case OPCode.CCLASS: + case OPCode.CCLASS_SB: + bs = new BitSet(); + System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); + n = bs.numOn(); + bp += BitSet.BITSET_SIZE; + sb.append(":" + n); + break; + + case OPCode.CCLASS_NOT: + case OPCode.CCLASS_NOT_SB: + bs = new BitSet(); + System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); + n = bs.numOn(); + bp += BitSet.BITSET_SIZE; + sb.append(":" + n); + break; + + case OPCode.CCLASS_MB: + case OPCode.CCLASS_MB_NOT: + len = code[bp]; + bp += OPSize.LENGTH; + cod = code[bp]; + //bp += OPSize.CODE_POINT; + bp += len; + sb.append(":" + cod + ":" + len); + break; + + case OPCode.CCLASS_MIX: + case OPCode.CCLASS_MIX_NOT: + bs = new BitSet(); + System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); + n = bs.numOn(); + bp += BitSet.BITSET_SIZE; + len = code[bp]; + bp += OPSize.LENGTH; + cod = code[bp]; + //bp += OPSize.CODE_POINT; + bp += len; + sb.append(":" + n + ":" + cod + ":" + len); + break; + + case OPCode.CCLASS_NODE: + cc = (CClassNode)operands[code[bp]]; + bp += OPSize.POINTER; + n = cc.bs.numOn(); + sb.append(":" + cc + ":" + n); + break; + + case OPCode.BACKREFN_IC: + mem = code[bp]; + bp += OPSize.MEMNUM; + sb.append(":" + mem); + break; + + case OPCode.BACKREF_MULTI_IC: + case OPCode.BACKREF_MULTI: + sb.append(" "); + len = code[bp]; + bp += OPSize.LENGTH; + for (int i=0; i<len; i++) { + mem = code[bp]; + bp += OPSize.MEMNUM; + if (i > 0) sb.append(", "); + sb.append(mem); + } + break; + + case OPCode.BACKREF_WITH_LEVEL: { + int option = code[bp]; + bp += OPSize.OPTION; + sb.append(":" + option); + int level = code[bp]; + bp += OPSize.LENGTH; + sb.append(":" + level); + sb.append(" "); + len = code[bp]; + bp += OPSize.LENGTH; + for (int i=0; i<len; i++) { + mem = code[bp]; + bp += OPSize.MEMNUM; + if (i > 0) sb.append(", "); + sb.append(mem); + } + break; + } + + case OPCode.REPEAT: + case OPCode.REPEAT_NG: + mem = code[bp]; + bp += OPSize.MEMNUM; + addr = code[bp]; + bp += OPSize.RELADDR; + sb.append(":" + mem + ":" + addr); + break; + + case OPCode.PUSH_OR_JUMP_EXACT1: + case OPCode.PUSH_IF_PEEK_NEXT: + addr = code[bp]; + bp += OPSize.RELADDR; + sb.append(":(" + addr + ")"); + pString(sb, 1, bp); + bp++; + break; + + case OPCode.LOOK_BEHIND: + case OPCode.LOOK_BEHIND_SB: + len = code[bp]; + bp += OPSize.LENGTH; + sb.append(":" + len); + break; + + case OPCode.PUSH_LOOK_BEHIND_NOT: + addr = code[bp]; + bp += OPSize.RELADDR; + len = code[bp]; + bp += OPSize.LENGTH; + sb.append(":" + len + ":(" + addr + ")"); + break; + + case OPCode.STATE_CHECK_PUSH: + case OPCode.STATE_CHECK_PUSH_OR_JUMP: + scn = code[bp]; + bp += OPSize.STATE_CHECK_NUM; + addr = code[bp]; + bp += OPSize.RELADDR; + sb.append(":" + scn + ":(" + addr + ")"); + break; + + default: + throw new InternalException("undefined code: " + code[--bp]); + } + } + + sb.append("]"); + + // @opcode_address(opcode_size) + if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")"); + + return bp; + } + + private String compiledByteCodeListToString() { + StringBuilder sb = new StringBuilder(); + sb.append("code length: " + codeLength + "\n"); + + int ncode = 0; + int bp = 0; + int end = codeLength; + + while (bp < end) { + ncode++; + + if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " "); + + bp = compiledByteCodeToString(sb, bp); + } + sb.append("\n"); + return sb.toString(); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/CaptureTreeNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,74 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public class CaptureTreeNode { + + + int group; + int beg; + int end; + // int allocated; + int numChildren; + CaptureTreeNode[]children; + + CaptureTreeNode() { + beg = Region.REGION_NOTPOS; + end = Region.REGION_NOTPOS; + group = -1; + } + + static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8; + void addChild(CaptureTreeNode child) { + if (children == null) { + children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE]; + } else if (numChildren >= children.length) { + CaptureTreeNode[]tmp = new CaptureTreeNode[children.length << 1]; + System.arraycopy(children, 0, tmp, 0, children.length); + children = tmp; + } + + children[numChildren] = child; + numChildren++; + } + + void clear() { + for (int i=0; i<numChildren; i++) { + children[i] = null; // ??? + } + numChildren = 0; + beg = end = Region.REGION_NOTPOS; + group = -1; + } + + CaptureTreeNode cloneTree() { + CaptureTreeNode clone = new CaptureTreeNode(); + clone.beg = beg; + clone.end = end; + + for (int i=0; i<numChildren; i++) { + CaptureTreeNode child = children[i].cloneTree(); + clone.addChild(child); + } + return clone; + } + + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,378 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; + +public final class CodeRangeBuffer { + private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5; + private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff; + + int[]p; + int used; + + public CodeRangeBuffer(int[]ranges) { + p = ranges; + used = ranges[0] + 1; + } + + public CodeRangeBuffer() { + p = new int[INIT_MULTI_BYTE_RANGE_SIZE]; + writeCodePoint(0, 0); + } + + public int[]getCodeRange() { + return p; + } + + private CodeRangeBuffer(CodeRangeBuffer orig) { + p = new int[orig.p.length]; + System.arraycopy(orig.p, 0, p, 0, p.length); + used = orig.used; + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("CodeRange"); + buf.append("\n used: " + used); + buf.append("\n code point: " + p[0]); + buf.append("\n ranges: "); + + for (int i=0; i<p[0]; i++) { + buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]"); + if (i > 0 && i % 6 == 0) buf.append("\n "); + } + + return buf.toString(); + } + + private static String rangeNumToString(int num){ + return "0x" + Integer.toString(num, 16); + } + + public void expand(int low) { + int length = p.length; + do { length <<= 1; } while (length < low); + int[]tmp = new int[length]; + System.arraycopy(p, 0, tmp, 0, used); + p = tmp; + } + + public void ensureSize(int size) { + int length = p.length; + while (length < size ) { length <<= 1; } + if (p.length != length) { + int[]tmp = new int[length]; + System.arraycopy(p, 0, tmp, 0, used); + p = tmp; + } + } + + private void moveRight(int from, int to, int n) { + if (to + n > p.length) expand(to + n); + System.arraycopy(p, from, p, to, n); + if (to + n > used) used = to + n; + } + + protected void moveLeft(int from, int to, int n) { + System.arraycopy(p, from, p, to, n); + } + + private void moveLeftAndReduce(int from, int to) { + System.arraycopy(p, from, p, to, used - from); + used -= from - to; + } + + public void writeCodePoint(int pos, int b) { + int u = pos + 1; + if (p.length < u) expand(u); + p[pos] = b; + if (used < u) used = u; + } + + public CodeRangeBuffer clone() { + return new CodeRangeBuffer(this); + } + + // ugly part: these methods should be made OO + // add_code_range_to_buf + public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) { + if (from > to) { + int n = from; + from = to; + to = n; + } + + if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode + + int[]p = pbuf.p; + int n = p[0]; + + int low = 0; + int bound = n; + + while (low < bound) { + int x = (low + bound) >>> 1; + if (from > p[x * 2 + 2]) { + low = x + 1; + } else { + bound = x; + } + } + + int high = low; + bound = n; + + while (high < bound) { + int x = (high + bound) >>> 1; + if (to >= p[x * 2 + 1] - 1) { + high = x + 1; + } else { + bound = x; + } + } + + int incN = low + 1 - high; + + if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES); + + if (incN != 1) { + if (from > p[low * 2 + 1]) from = p[low * 2 + 1]; + if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2]; + } + + if (incN != 0 && high < n) { + int fromPos = 1 + high * 2; + int toPos = 1 + (low + 1) * 2; + int size = (n - high) * 2; + + if (incN > 0) { + pbuf.moveRight(fromPos, toPos, size); + } else { + pbuf.moveLeftAndReduce(fromPos, toPos); + } + } + + int pos = 1 + low * 2; + // pbuf.ensureSize(pos + 2); + pbuf.writeCodePoint(pos, from); + pbuf.writeCodePoint(pos + 1, to); + n += incN; + pbuf.writeCodePoint(0, n); + + return pbuf; + } + + // add_code_range, be aware of it returning null! + public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) { + if (from >to) { + if (env.syntax.allowEmptyRangeInCC()) { + return pbuf; + } else { + throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); + } + } + return addCodeRangeToBuff(pbuf, from, to); + } + + // SET_ALL_MULTI_BYTE_RANGE + protected static CodeRangeBuffer setAllMultiByteRange(CodeRangeBuffer pbuf) { + return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE); + } + + // ADD_ALL_MULTI_BYTE_RANGE + public static CodeRangeBuffer addAllMultiByteRange(CodeRangeBuffer pbuf) { + return setAllMultiByteRange(pbuf); + } + + // not_code_range_buf + public static CodeRangeBuffer notCodeRangeBuff(CodeRangeBuffer bbuf) { + CodeRangeBuffer pbuf = null; + + if (bbuf == null) return setAllMultiByteRange(pbuf); + + int[]p = bbuf.p; + int n = p[0]; + + if (n <= 0) return setAllMultiByteRange(pbuf); + + int pre = EncodingHelper.mbcodeStartPosition(); + + int from; + int to = 0; + for (int i=0; i<n; i++) { + from = p[i * 2 + 1]; + to = p[i * 2 + 2]; + if (pre <= from - 1) { + pbuf = addCodeRangeToBuff(pbuf, pre, from - 1); + } + if (to == ALL_MULTI_BYTE_RANGE) break; + pre = to + 1; + } + + if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE); + return pbuf; + } + + // or_code_range_buf + public static CodeRangeBuffer orCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, + CodeRangeBuffer bbuf2, boolean not2) { + CodeRangeBuffer pbuf = null; + + if (bbuf1 == null && bbuf2 == null) { + if (not1 || not2) { + return setAllMultiByteRange(pbuf); + } + return null; + } + + if (bbuf2 == null) { + CodeRangeBuffer tbuf; + boolean tnot; + // swap + tnot = not1; not1 = not2; not2 = tnot; + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; + } + + if (bbuf1 == null) { + if (not1) { + return setAllMultiByteRange(pbuf); + } else { + if (!not2) { + return bbuf2.clone(); + } else { + return notCodeRangeBuff(bbuf2); + } + } + } + + if (not1) { + CodeRangeBuffer tbuf; + boolean tnot; + // swap + tnot = not1; not1 = not2; not2 = tnot; + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; + } + + if (!not2 && !not1) { /* 1 OR 2 */ + pbuf = bbuf2.clone(); + } else if (!not1) { /* 1 OR (not 2) */ + pbuf = notCodeRangeBuff(bbuf2); + } + + int[]p1 = bbuf1.p; + int n1 = p1[0]; + + for (int i=0; i<n1; i++) { + int from = p1[i * 2 + 1]; + int to = p1[i * 2 + 2]; + pbuf = addCodeRangeToBuff(pbuf, from, to); + } + + return pbuf; + } + + // and_code_range1 + public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, int[]data, int n) { + for (int i=0; i<n; i++) { + int from2 = data[i * 2 + 1]; + int to2 = data[i * 2 + 2]; + if (from2 < from1) { + if (to2 < from1) { + continue; + } else { + from1 = to2 + 1; + } + } else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1); + } + from1 = to2 + 1; + } else { + to1 = from2 - 1; + } + } else { + from1 = from2; + } + if (from1 > to1) break; + } + + if (from1 <= to1) { + pbuf = addCodeRangeToBuff(pbuf, from1, to1); + } + + return pbuf; + } + + // and_code_range_buf + public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, + CodeRangeBuffer bbuf2, boolean not2) { + CodeRangeBuffer pbuf = null; + + if (bbuf1 == null) { + if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */ + return null; + } else if (bbuf2 == null) { + if (not2) return bbuf1.clone(); + return null; + } + + if (not1) { + CodeRangeBuffer tbuf; + boolean tnot; + // swap + tnot = not1; not1 = not2; not2 = tnot; + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; + } + + int[]p1 = bbuf1.p; + int n1 = p1[0]; + int[]p2 = bbuf2.p; + int n2 = p2[0]; + + if (!not2 && !not1) { /* 1 AND 2 */ + for (int i=0; i<n1; i++) { + int from1 = p1[i * 2 + 1]; + int to1 = p1[i * 2 + 2]; + + for (int j=0; j<n2; j++) { + int from2 = p2[j * 2 + 1]; + int to2 = p2[j * 2 + 2]; + + if (from2 > to1) break; + if (to2 < from1) continue; + int from = from1 > from2 ? from1 : from2; + int to = to1 < to2 ? to1 : to2; + pbuf = addCodeRangeToBuff(pbuf, from, to); + } + } + } else if (!not1) { /* 1 AND (not 2) */ + for (int i=0; i<n1; i++) { + int from1 = p1[i * 2 + 1]; + int to1 = p1[i * 2 + 2]; + pbuf = andCodeRange1(pbuf, from1, to1, p2, n2); + } + } + + return pbuf; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Compiler.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,178 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; + +abstract class Compiler implements ErrorMessages { + protected final Analyser analyser; + protected final Regex regex; + + protected Compiler(Analyser analyser) { + this.analyser = analyser; + this.regex = analyser.regex; + } + + final void compile() { + prepare(); + compileTree(analyser.root); + finish(); + } + + protected abstract void prepare(); + protected abstract void finish(); + + protected abstract void compileAltNode(ConsAltNode node); + + private void compileStringRawNode(StringNode sn) { + if (sn.length() <= 0) return; + addCompileString(sn.chars, sn.p, 1 /*sb*/, sn.length(), false); + } + + private void compileStringNode(StringNode node) { + StringNode sn = node; + if (sn.length() <= 0) return; + + boolean ambig = sn.isAmbig(); + + int p, prev; + p = prev = sn.p; + int end = sn.end; + char[] chars = sn.chars; + p++; + int slen = 1; + + while (p < end) { + slen++; + p++; + } + addCompileString(chars, prev, 1, slen, ambig); + } + + protected abstract void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase); + + protected abstract void compileCClassNode(CClassNode node); + protected abstract void compileCTypeNode(CTypeNode node); + protected abstract void compileAnyCharNode(); + protected abstract void compileCallNode(CallNode node); + protected abstract void compileBackrefNode(BackRefNode node); + protected abstract void compileCECQuantifierNode(QuantifierNode node); + protected abstract void compileNonCECQuantifierNode(QuantifierNode node); + protected abstract void compileOptionNode(EncloseNode node); + protected abstract void compileEncloseNode(EncloseNode node); + protected abstract void compileAnchorNode(AnchorNode node); + + protected final void compileTree(Node node) { + switch (node.getType()) { + case NodeType.LIST: + ConsAltNode lin = (ConsAltNode)node; + do { + compileTree(lin.car); + } while ((lin = lin.cdr) != null); + break; + + case NodeType.ALT: + compileAltNode((ConsAltNode)node); + break; + + case NodeType.STR: + StringNode sn = (StringNode)node; + if (sn.isRaw()) { + compileStringRawNode(sn); + } else { + compileStringNode(sn); + } + break; + + case NodeType.CCLASS: + compileCClassNode((CClassNode)node); + break; + + case NodeType.CTYPE: + compileCTypeNode((CTypeNode)node); + break; + + case NodeType.CANY: + compileAnyCharNode(); + break; + + case NodeType.BREF: + compileBackrefNode((BackRefNode)node); + break; + + case NodeType.CALL: + if (Config.USE_SUBEXP_CALL) { + compileCallNode((CallNode)node); + break; + } // USE_SUBEXP_CALL + break; + + case NodeType.QTFR: + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + compileCECQuantifierNode((QuantifierNode)node); + } else { + compileNonCECQuantifierNode((QuantifierNode)node); + } + break; + + case NodeType.ENCLOSE: + EncloseNode enode = (EncloseNode)node; + if (enode.isOption()) { + compileOptionNode(enode); + } else { + compileEncloseNode(enode); + } + break; + + case NodeType.ANCHOR: + compileAnchorNode((AnchorNode)node); + break; + + default: + // undefined node type + newInternalException(ERR_PARSER_BUG); + } // switch + } + + protected final void compileTreeNTimes(Node node, int n) { + for (int i=0; i<n; i++) compileTree(node); + } + + protected void newSyntaxException(String message) { + throw new SyntaxException(message); + } + + protected void newInternalException(String message) { + throw new InternalException(message); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,100 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import java.io.PrintStream; + +public interface Config { + final int CHAR_TABLE_SIZE = 256; + + /* from jcodings */ + final boolean VANILLA = false; + final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30); + final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR; + final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN; + final boolean USE_CRNL_AS_LINE_TERMINATOR = false; + + final boolean USE_NAMED_GROUP = true; + final boolean USE_SUBEXP_CALL = true; + final boolean USE_BACKREF_WITH_LEVEL = true; /* \k<name+n>, \k<name-n> */ + + final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */ + final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */ + final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false; + + final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true; + + final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false; + final boolean USE_CAPTURE_HISTORY = false; + final boolean USE_VARIABLE_META_CHARS = true; + final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */ + final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */ + final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true; + final boolean USE_COMBINATION_EXPLOSION_CHECK = false; + + final int NREGION = 10; + final int MAX_BACKREF_NUM = 1000; + final int MAX_REPEAT_NUM = 100000; + final int MAX_MULTI_BYTE_RANGES_NUM = 10000; + + + final boolean USE_WARN = true; + + // internal config + final boolean USE_PARSE_TREE_NODE_RECYCLE = true; + final boolean USE_OP_PUSH_OR_JUMP_EXACT = true; + final boolean USE_SHARED_CCLASS_TABLE = false; + final boolean USE_QTFR_PEEK_NEXT = true; + + final int INIT_MATCH_STACK_SIZE = 64; + final int DEFAULT_MATCH_STACK_LIMIT_SIZE = 0; /* unlimited */ + final int NUMBER_OF_POOLED_STACKS = 4; + + + + final boolean DONT_OPTIMIZE = false; + + final boolean USE_STRING_TEMPLATES = true; // use embeded string templates in Regex object as byte arrays instead of compiling them into int bytecode array + + + final int MAX_CAPTURE_HISTORY_GROUP = 31; + + + final int CHECK_STRING_THRESHOLD_LEN = 7; + final int CHECK_BUFF_MAX_SIZE = 0x4000; + + final boolean NON_UNICODE_SDW = true; + + + final PrintStream log = System.out; + final PrintStream err = System.err; + + final boolean DEBUG_ALL = false; + + final boolean DEBUG = DEBUG_ALL; + final boolean DEBUG_PARSE_TREE = DEBUG_ALL; + final boolean DEBUG_PARSE_TREE_RAW = true; + final boolean DEBUG_COMPILE = DEBUG_ALL; + final boolean DEBUG_COMPILE_BYTE_CODE_INFO = DEBUG_ALL; + final boolean DEBUG_SEARCH = DEBUG_ALL; + final boolean DEBUG_MATCH = DEBUG_ALL; + final boolean DEBUG_ASM = true; + final boolean DEBUG_ASM_EXEC = true; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,285 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; + +import java.util.Arrays; + +public class EncodingHelper { + + public final static char NEW_LINE = 0xa; + public final static char RETURN = 0xd; + + final static char[] EMPTYCHARS = new char[0]; + final static int[][] codeRanges = new int[15][]; + + public static int digitVal(int code) { + return code - '0'; + } + + public static int odigitVal(int code) { + return digitVal(code); + } + + public static boolean isXDigit(int code) { + return Character.isDigit(code) || (code >= 'a' && code <= 'f') || (code >= 'A' && code <= 'F'); + } + + public static int xdigitVal(int code) { + if (Character.isDigit(code)) { + return code - '0'; + } else if (code >= 'a' && code <= 'f') { + return code - 'a' + 10; + } else { + return code - 'A' + 10; + } + } + + public static boolean isDigit(int code) { + return code >= '0' && code <= '9'; + } + + public static boolean isWord(int code) { + // letter, digit, or '_' + return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0; + } + + public static boolean isNewLine(int code) { + return code == NEW_LINE; + } + + public static boolean isNewLine(char[] chars, int p, int end) { + return p < end && chars[p] == NEW_LINE; + } + + public static boolean isCrnl(char[] chars, int p, int end) { + return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE; + } + + // Encoding.prevCharHead + public static int prevCharHead(int p, int s) { + return s <= p ? -1 : s - 1; + } + + /* onigenc_get_right_adjust_char_head_with_prev */ + public static int rightAdjustCharHeadWithPrev(int s, IntHolder prev) { + if (prev != null) prev.value = -1; /* Sorry */ + return s; + } + + // Encoding.stepBack + public static int stepBack(int p, int s, int n) { + while (s != -1 && n-- > 0) { + if (s <= p) return -1; + s--; + } + return s; + } + + /* onigenc_with_ascii_strncmp */ + public static int strNCmp(char[] chars1, int p1, int end, char[] chars2, int p2, int n) { + while (n-- > 0) { + if (p1 >= end) return chars2[p2]; + int c = chars1[p1]; + int x = chars2[p2] - c; + if (x != 0) return x; + + p2++; + p1++; + } + return 0; + } + + public static int mbcToCode(byte[] bytes, int p, int end) { + int code = 0; + for (int i = p; i < end; i++) { + code = (code << 8) | (bytes[i] & 0xff); + } + return code; + } + + public static int mbcodeStartPosition() { + return 0x80; + } + + public static char[] caseFoldCodesByString(int flag, char c) { + if (Character.isUpperCase(c)) { + return new char[] {Character.toLowerCase(c)}; + } else if (Character.isLowerCase(c)) { + return new char[] {Character.toUpperCase(c)}; + } else { + return EMPTYCHARS; + } + } + + public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) { + int[] code = new int[1]; + + for (int c = 0; c < 0xffff; c++) { + if (Character.getType(c) == Character.LOWERCASE_LETTER) { + + int upper = code[0] = Character.toUpperCase(c); + fun.apply(c, code, 1, arg); + + code[0] = c; + fun.apply(upper, code, 1, arg); + } + } + } + + // CodeRange.isInCodeRange + public static boolean isInCodeRange(int[]p, int code) { + int low = 0; + int n = p[0]; + int high = n; + + while (low < high) { + int x = (low + high) >> 1; + if (code > p[(x << 1) + 2]) { + low = x + 1; + } else { + high = x; + } + } + return low < n && code >= p[(low << 1) + 1]; + } + + public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) { + sbOut.value = 0x100; // use bitset for codes smaller than 256 + int[] range = null; + + if (ctype < codeRanges.length) { + range = codeRanges[ctype]; + + if (range == null) { + // format: [numberOfRanges, rangeStart, rangeEnd, ...] + range = new int[16]; + int rangeCount = 0; + int lastCode = -2; + + for (int code = 0; code <= 0xffff; code++) { + if (isCodeCType(code, ctype)) { + if (lastCode < code -1) { + if (rangeCount * 2 + 2 >= range.length) { + range = Arrays.copyOf(range, range.length * 2); + } + range[rangeCount * 2 + 1] = code; + rangeCount++; + } + range[rangeCount * 2] = lastCode = code; + } + } + + if (rangeCount * 2 + 1 < range.length) { + range = Arrays.copyOf(range, rangeCount * 2 + 1); + } + + range[0] = rangeCount; + codeRanges[ctype] = range; + } + } + + return range; + } + + // CodeRange.isInCodeRange + public static boolean isInCodeRange(int[]p, int offset, int code) { + int low = 0; + int n = p[offset]; + int high = n ; + + while (low < high) { + int x = (low + high) >> 1; + if (code > p[(x << 1) + 2 + offset]) { + low = x + 1; + } else { + high = x; + } + } + return low < n && code >= p[(low << 1) + 1 + offset]; + } + + /** + * @see [http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt] + */ + public static boolean isCodeCType(int code, int ctype) { + int type; + switch (ctype) { + case CharacterType.NEWLINE: + return code == EncodingHelper.NEW_LINE; + case CharacterType.ALPHA: + return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0; + case CharacterType.BLANK: + return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR; + case CharacterType.CNTRL: + type = Character.getType(code); + return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED; + case CharacterType.DIGIT: + return EncodingHelper.isDigit(code); + case CharacterType.GRAPH: + switch (code) { + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + return false; + default: + type = Character.getType(code); + return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED; + } + case CharacterType.LOWER: + return Character.isLowerCase(code); + case CharacterType.PRINT: + type = Character.getType(code); + return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED; + case CharacterType.PUNCT: + return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0; + case CharacterType.SPACE: + // ECMA 7.2 and 7.3 + switch (code) { + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + return true; + default: + // true if Unicode separator or BOM + return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff; + } + case CharacterType.UPPER: + return Character.isUpperCase(code); + case CharacterType.XDIGIT: + return EncodingHelper.isXDigit(code); + case CharacterType.WORD: + return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0; + case CharacterType.ALNUM: + return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0; + case CharacterType.ASCII: + return code < 0x80; + default: + throw new RuntimeException("illegal character type: " + ctype); + } + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,1274 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline; +import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar; +import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.PosixBracket; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException; + +class Lexer extends ScannerSupport { + protected final ScanEnvironment env; + protected final Syntax syntax; // fast access to syntax + protected final Token token = new Token(); // current token + + protected Lexer(ScanEnvironment env, char[] chars, int p, int end) { + super(chars, p, end); + this.env = env; + this.syntax = env.syntax; + } + + /** + * @return 0: normal {n,m}, 2: fixed {n} + * !introduce returnCode here + */ + private int fetchRangeQuantifier() { + mark(); + boolean synAllow = syntax.allowInvalidInterval(); + + if (!left()) { + if (synAllow) { + return 1; /* "....{" : OK! */ + } else { + newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); + } + } + + if (!synAllow) { + c = peek(); + if (c == ')' || c == '(' || c == '|') { + newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); + } + } + + int low = scanUnsignedNumber(); + if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + + boolean nonLow = false; + if (p == _p) { /* can't read low */ + if (syntax.allowIntervalLowAbbrev()) { + low = 0; + nonLow = true; + } else { + return invalidRangeQuantifier(synAllow); + } + } + + if (!left()) return invalidRangeQuantifier(synAllow); + + fetch(); + int up; + int ret = 0; + if (c == ',') { + int prev = p; // ??? last + up = scanUnsignedNumber(); + if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + + if (p == prev) { + if (nonLow) return invalidRangeQuantifier(synAllow); + up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } else { + if (nonLow) return invalidRangeQuantifier(synAllow); + unfetch(); + up = low; /* {n} : exact n times */ + ret = 2; /* fixed */ + } + + if (!left()) return invalidRangeQuantifier(synAllow); + fetch(); + + if (syntax.opEscBraceInterval()) { + if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow); + fetch(); + } + + if (c != '}') return invalidRangeQuantifier(synAllow); + + if (!isRepeatInfinite(up) && low > up) { + newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); + } + + token.type = TokenType.INTERVAL; + token.setRepeatLower(low); + token.setRepeatUpper(up); + + return ret; /* 0: normal {n,m}, 2: fixed {n} */ + } + + private int invalidRangeQuantifier(boolean synAllow) { + if (synAllow) { + restore(); + return 1; + } else { + newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN); + return 0; // not reached + } + } + + /* \M-, \C-, \c, or \... */ + private int fetchEscapedValue() { + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); + fetch(); + + switch(c) { + + case 'M': + if (syntax.op2EscCapitalMBarMeta()) { + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); + fetch(); + if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX); + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); + fetch(); + if (c == syntax.metaCharTable.esc) { + c = fetchEscapedValue(); + } + c = ((c & 0xff) | 0x80); + } else { + fetchEscapedValueBackSlash(); + } + break; + + case 'C': + if (syntax.op2EscCapitalCBarControl()) { + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); + fetch(); + if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX); + fetchEscapedValueControl(); + } else { + fetchEscapedValueBackSlash(); + } + break; + + case 'c': + if (syntax.opEscCControl()) { + fetchEscapedValueControl(); + } + /* fall through */ + + default: + fetchEscapedValueBackSlash(); + } // switch + + return c; // ??? + } + + private void fetchEscapedValueBackSlash() { + c = env.convertBackslashValue(c); + } + + private void fetchEscapedValueControl() { + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); + fetch(); + if (c == '?') { + c = 0177; + } else { + if (c == syntax.metaCharTable.esc) { + c = fetchEscapedValue(); + } + c &= 0x9f; + } + } + + private int nameEndCodePoint(int start) { + switch(start) { + case '<': + return '>'; + case '\'': + return '\''; + default: + return 0; + } + } + + // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL + /* + \k<name+n>, \k<name-n> + \k<num+n>, \k<num-n> + \k<-num+n>, \k<-num-n> + */ + + // value implicit (rnameEnd) + private boolean fetchNameWithLevel(int startCode, Ptr rbackNum, Ptr rlevel) { + int src = p; + boolean existLevel = false; + int isNum = 0; + int sign = 1; + + int endCode = nameEndCodePoint(startCode); + int pnumHead = p; + int nameEnd = stop; + + String err = null; + if (!left()) { + newValueException(ERR_EMPTY_GROUP_NAME); + } else { + fetch(); + if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); + if (Character.isDigit(c)) { + isNum = 1; + } else if (c == '-') { + isNum = 2; + sign = -1; + pnumHead = p; + } else if (!EncodingHelper.isWord(c)) { + err = ERR_INVALID_GROUP_NAME; + } + } + + while (left()) { + nameEnd = p; + fetch(); + if (c == endCode || c == ')' || c == '+' || c == '-') { + if (isNum == 2) err = ERR_INVALID_GROUP_NAME; + break; + } + + if (isNum != 0) { + if (EncodingHelper.isDigit(c)) { + isNum = 1; + } else { + err = ERR_INVALID_GROUP_NAME; + // isNum = 0; + } + } else if (!EncodingHelper.isWord(c)) { + err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + boolean isEndCode = false; + if (err == null && c != endCode) { + if (c == '+' || c == '-') { + int flag = c == '-' ? -1 : 1; + + fetch(); + if (!EncodingHelper.isDigit(c)) newValueException(ERR_INVALID_GROUP_NAME, src, stop); + unfetch(); + int level = scanUnsignedNumber(); + if (level < 0) newValueException(ERR_TOO_BIG_NUMBER); + rlevel.p = level * flag; + existLevel = true; + + fetch(); + isEndCode = c == endCode; + } + + if (!isEndCode) { + err = ERR_INVALID_GROUP_NAME; + nameEnd = stop; + } + } + + if (err == null) { + if (isNum != 0) { + mark(); + p = pnumHead; + int backNum = scanUnsignedNumber(); + restore(); + if (backNum < 0) { + newValueException(ERR_TOO_BIG_NUMBER); + } else if (backNum == 0) { + newValueException(ERR_INVALID_GROUP_NAME, src, stop); + } + rbackNum.p = backNum * sign; + } + value = nameEnd; + return existLevel; + } else { + newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); + return false; // not reached + } + } + + // USE_NAMED_GROUP + // ref: 0 -> define name (don't allow number name) + // 1 -> reference name (allow number name) + private int fetchNameForNamedGroup(int startCode, boolean ref) { + int src = p; + value = 0; + + int isNum = 0; + int sign = 1; + + int endCode = nameEndCodePoint(startCode); + int pnumHead = p; + int nameEnd = stop; + + String err = null; + if (!left()) { + newValueException(ERR_EMPTY_GROUP_NAME); + } else { + fetch(); + if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); + if (EncodingHelper.isDigit(c)) { + if (ref) { + isNum = 1; + } else { + err = ERR_INVALID_GROUP_NAME; + // isNum = 0; + } + } else if (c == '-') { + if (ref) { + isNum = 2; + sign = -1; + pnumHead = p; + } else { + err = ERR_INVALID_GROUP_NAME; + // isNum = 0; + } + } else if (!EncodingHelper.isWord(c)) { + err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (err == null) { + while (left()) { + nameEnd = p; + fetch(); + if (c == endCode || c == ')') { + if (isNum == 2) err = ERR_INVALID_GROUP_NAME; + break; + } + + if (isNum != 0) { + if (EncodingHelper.isDigit(c)) { + isNum = 1; + } else { + if (!EncodingHelper.isWord(c)) { + err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } else { + err = ERR_INVALID_GROUP_NAME; + } + // isNum = 0; + } + } else { + if (!EncodingHelper.isWord(c)) { + err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != endCode) { + err = ERR_INVALID_GROUP_NAME; + nameEnd = stop; + } + + int backNum = 0; + if (isNum != 0) { + mark(); + p = pnumHead; + backNum = scanUnsignedNumber(); + restore(); + if (backNum < 0) { + newValueException(ERR_TOO_BIG_NUMBER); + } else if (backNum == 0) { + newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); + } + backNum *= sign; + } + value = nameEnd; + return backNum; + } else { + while (left()) { + nameEnd = p; + fetch(); + if (c == endCode || c == ')') break; + } + if (!left()) nameEnd = stop; + newValueException(err, src, nameEnd); + return 0; // not reached + } + } + + // #else USE_NAMED_GROUP + // make it return nameEnd! + private final int fetchNameForNoNamedGroup(int startCode, boolean ref) { + int src = p; + value = 0; + + int isNum = 0; + int sign = 1; + + int endCode = nameEndCodePoint(startCode); + int pnumHead = p; + int nameEnd = stop; + + String err = null; + if (!left()) { + newValueException(ERR_EMPTY_GROUP_NAME); + } else { + fetch(); + if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); + + if (EncodingHelper.isDigit(c)) { + isNum = 1; + } else if (c == '-') { + isNum = 2; + sign = -1; + pnumHead = p; + } else { + err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while(left()) { + nameEnd = p; + + fetch(); + if (c == endCode || c == ')') break; + if (!EncodingHelper.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME; + } + + if (err == null && c != endCode) { + err = ERR_INVALID_GROUP_NAME; + nameEnd = stop; + } + + if (err == null) { + mark(); + p = pnumHead; + int backNum = scanUnsignedNumber(); + restore(); + if (backNum < 0) { + newValueException(ERR_TOO_BIG_NUMBER); + } else if (backNum == 0){ + newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); + } + backNum *= sign; + + value = nameEnd; + return backNum; + } else { + newValueException(err, src, nameEnd); + return 0; // not reached + } + } + + protected final int fetchName(int startCode, boolean ref) { + if (Config.USE_NAMED_GROUP) { + return fetchNameForNamedGroup(startCode, ref); + } else { + return fetchNameForNoNamedGroup(startCode, ref); + } + } + + private boolean strExistCheckWithEsc(int[]s, int n, int bad) { + int p = this.p; + int to = this.stop; + + boolean inEsc = false; + int i=0; + while(p < to) { + if (inEsc) { + inEsc = false; + p ++; + } else { + int x = chars[p]; + int q = p + 1; + if (x == s[0]) { + for (i=1; i<n && q < to; i++) { + x = chars[q]; + if (x != s[i]) break; + q++; + } + if (i >= n) return true; + p++; + } else { + x = chars[p]; + if (x == bad) return false; + else if (x == syntax.metaCharTable.esc) inEsc = true; + p = q; + } + } + } + return false; + } + + private static final int send[] = new int[]{':', ']'}; + + private void fetchTokenInCCFor_charType(boolean flag, int type) { + token.type = TokenType.CHAR_TYPE; + token.setPropCType(type); + token.setPropNot(flag); + } + + private void fetchTokenInCCFor_p() { + int c2 = peek(); // !!! migrate to peekIs + if (c2 == '{' && syntax.op2EscPBraceCharProperty()) { + inc(); + token.type = TokenType.CHAR_PROPERTY; + token.setPropNot(c == 'P'); + + if (syntax.op2EscPBraceCircumflexNot()) { + c2 = fetchTo(); + if (c2 == '^') { + token.setPropNot(!token.getPropNot()); + } else { + unfetch(); + } + } + } else { + syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); + } + } + + private void fetchTokenInCCFor_x() { + if (!left()) return; + int last = p; + + if (peekIs('{') && syntax.opEscXBraceHex8()) { + inc(); + int num = scanUnsignedHexadecimalNumber(8); + if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); + if (left()) { + int c2 = peek(); + if (EncodingHelper.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); + } + + if (p > last + 1 && left() && peekIs('}')) { + inc(); + token.type = TokenType.CODE_POINT; + token.base = 16; + token.setCode(num); + } else { + /* can't read nothing or invalid format */ + p = last; + } + } else if (syntax.opEscXHex2()) { + int num = scanUnsignedHexadecimalNumber(2); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.RAW_BYTE; + token.base = 16; + token.setC(num); + } + } + + private void fetchTokenInCCFor_u() { + if (!left()) return; + int last = p; + + if (syntax.op2EscUHex4()) { + int num = scanUnsignedHexadecimalNumber(4); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.CODE_POINT; + token.base = 16; + token.setCode(num); + } + } + + private void fetchTokenInCCFor_digit() { + if (syntax.opEscOctal3()) { + unfetch(); + int last = p; + int num = scanUnsignedOctalNumber(3); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.RAW_BYTE; + token.base = 8; + token.setC(num); + } + } + + private void fetchTokenInCCFor_posixBracket() { + if (syntax.opPosixBracket() && peekIs(':')) { + token.backP = p; /* point at '[' is readed */ + inc(); + if (strExistCheckWithEsc(send, send.length, ']')) { + token.type = TokenType.POSIX_BRACKET_OPEN; + } else { + unfetch(); + // remove duplication, goto cc_in_cc; + if (syntax.op2CClassSetOp()) { + token.type = TokenType.CC_CC_OPEN; + } else { + env.ccEscWarn("["); + } + } + } else { // cc_in_cc: + if (syntax.op2CClassSetOp()) { + token.type = TokenType.CC_CC_OPEN; + } else { + env.ccEscWarn("["); + } + } + } + + private void fetchTokenInCCFor_and() { + if (syntax.op2CClassSetOp() && left() && peekIs('&')) { + inc(); + token.type = TokenType.CC_AND; + } + } + + protected final TokenType fetchTokenInCC() { + if (!left()) { + token.type = TokenType.EOT; + return token.type; + } + + fetch(); + token.type = TokenType.CHAR; + token.base = 0; + token.setC(c); + token.escaped = false; + + if (c == ']') { + token.type = TokenType.CC_CLOSE; + } else if (c == '-') { + token.type = TokenType.CC_RANGE; + } else if (c == syntax.metaCharTable.esc) { + if (!syntax.backSlashEscapeInCC()) return token.type; + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); + fetch(); + token.escaped = true; + token.setC(c); + + switch (c) { + case 'w': + fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); + break; + case 'W': + fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); + break; + case 'd': + fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); + break; + case 'D': + fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); + break; + case 's': + fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); + break; + case 'S': + fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); + break; + case 'h': + if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); + break; + case 'H': + if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); + break; + case 'p': + case 'P': + fetchTokenInCCFor_p(); + break; + case 'x': + fetchTokenInCCFor_x(); + break; + case 'u': + fetchTokenInCCFor_u(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + fetchTokenInCCFor_digit(); + break; + + default: + unfetch(); + int num = fetchEscapedValue(); + if (token.getC() != num) { + token.setCode(num); + token.type = TokenType.CODE_POINT; + } + break; + } // switch + + } else if (c == '[') { + fetchTokenInCCFor_posixBracket(); + } else if (c == '&') { + fetchTokenInCCFor_and(); + } + return token.type; + } + + protected final int backrefRelToAbs(int relNo) { + return env.numMem + 1 + relNo; + } + + private void fetchTokenFor_repeat(int lower, int upper) { + token.type = TokenType.OP_REPEAT; + token.setRepeatLower(lower); + token.setRepeatUpper(upper); + greedyCheck(); + } + + private void fetchTokenFor_openBrace() { + switch (fetchRangeQuantifier()) { + case 0: + greedyCheck(); + break; + case 2: + if (syntax.fixedIntervalIsGreedyOnly()) { + possessiveCheck(); + } else { + greedyCheck(); + } + break; + default: /* 1 : normal char */ + } // inner switch + } + + private void fetchTokenFor_anchor(int subType) { + token.type = TokenType.ANCHOR; + token.setAnchor(subType); + } + + private void fetchTokenFor_xBrace() { + if (!left()) return; + + int last = p; + if (peekIs('{') && syntax.opEscXBraceHex8()) { + inc(); + int num = scanUnsignedHexadecimalNumber(8); + if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); + if (left()) { + if (EncodingHelper.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); + } + + if (p > last + 1 && left() && peekIs('}')) { + inc(); + token.type = TokenType.CODE_POINT; + token.setCode(num); + } else { + /* can't read nothing or invalid format */ + p = last; + } + } else if (syntax.opEscXHex2()) { + int num = scanUnsignedHexadecimalNumber(2); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.RAW_BYTE; + token.base = 16; + token.setC(num); + } + } + + private void fetchTokenFor_uHex() { + if (!left()) return; + int last = p; + + if (syntax.op2EscUHex4()) { + int num = scanUnsignedHexadecimalNumber(4); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.CODE_POINT; + token.base = 16; + token.setCode(num); + } + } + + private void fetchTokenFor_digit() { + unfetch(); + int last = p; + int num = scanUnsignedNumber(); + if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref + } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ + if (syntax.strictCheckBackref()) { + if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF); + } + token.type = TokenType.BACKREF; + token.setBackrefNum(1); + token.setBackrefRef1(num); + token.setBackrefByName(false); + if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false); + return; + } + + if (c == '8' || c == '9') { /* normal char */ // skip_backref: + p = last; + inc(); + return; + } + p = last; + + fetchTokenFor_zero(); /* fall through */ + } + + private void fetchTokenFor_zero() { + if (syntax.opEscOctal3()) { + int last = p; + int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); + if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); + if (p == last) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + token.type = TokenType.RAW_BYTE; + token.base = 8; + token.setC(num); + } else if (c != '0') { + inc(); + } + } + + private void fetchTokenFor_namedBackref() { + if (syntax.op2EscKNamedBackref()) { + if (left()) { + fetch(); + if (c =='<' || c == '\'') { + int last = p; + int backNum; + if (Config.USE_BACKREF_WITH_LEVEL) { + Ptr rbackNum = new Ptr(); + Ptr rlevel = new Ptr(); + token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel)); + token.setBackrefLevel(rlevel.p); + backNum = rbackNum.p; + } else { + backNum = fetchName(c, true); + } // USE_BACKREF_AT_LEVEL + int nameEnd = value; // set by fetchNameWithLevel/fetchName + + if (backNum != 0) { + if (backNum < 0) { + backNum = backrefRelToAbs(backNum); + if (backNum <= 0) newValueException(ERR_INVALID_BACKREF); + } + + if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) { + newValueException(ERR_INVALID_BACKREF); + } + token.type = TokenType.BACKREF; + token.setBackrefByName(false); + token.setBackrefNum(1); + token.setBackrefRef1(backNum); + } else { + NameEntry e = env.reg.nameToGroupNumbers(chars, last, nameEnd); + if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, last, nameEnd); + + if (syntax.strictCheckBackref()) { + if (e.backNum == 1) { + if (e.backRef1 > env.numMem || + env.memNodes == null || + env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF); + } else { + for (int i=0; i<e.backNum; i++) { + if (e.backRefs[i] > env.numMem || + env.memNodes == null || + env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF); + } + } + } + + token.type = TokenType.BACKREF; + token.setBackrefByName(true); + + if (e.backNum == 1) { + token.setBackrefNum(1); + token.setBackrefRef1(e.backRef1); + } else { + token.setBackrefNum(e.backNum); + token.setBackrefRefs(e.backRefs); + } + } + } else { + unfetch(); + syntaxWarn(Warnings.INVALID_BACKREFERENCE); + } + } else { + syntaxWarn(Warnings.INVALID_BACKREFERENCE); + } + } + } + + private void fetchTokenFor_subexpCall() { + if (syntax.op2EscGSubexpCall()) { + if (left()) { + fetch(); + if (c == '<' || c == '\'') { + int last = p; + int gNum = fetchName(c, true); + int nameEnd = value; + token.type = TokenType.CALL; + token.setCallNameP(last); + token.setCallNameEnd(nameEnd); + token.setCallGNum(gNum); + } else { + unfetch(); + syntaxWarn(Warnings.INVALID_SUBEXP_CALL); + } + } else { + syntaxWarn(Warnings.INVALID_SUBEXP_CALL); + } + } + } + + private void fetchTokenFor_charProperty() { + if (peekIs('{') && syntax.op2EscPBraceCharProperty()) { + inc(); + token.type = TokenType.CHAR_PROPERTY; + token.setPropNot(c == 'P'); + + if (syntax.op2EscPBraceCircumflexNot()) { + fetch(); + if (c == '^') { + token.setPropNot(!token.getPropNot()); + } else { + unfetch(); + } + } + } else { + syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); + } + } + + private void fetchTokenFor_metaChars() { + if (c == syntax.metaCharTable.anyChar) { + token.type = TokenType.ANYCHAR; + } else if (c == syntax.metaCharTable.anyTime) { + fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); + } else if (c == syntax.metaCharTable.zeroOrOneTime) { + fetchTokenFor_repeat(0, 1); + } else if (c == syntax.metaCharTable.oneOrMoreTime) { + fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); + } else if (c == syntax.metaCharTable.anyCharAnyTime) { + token.type = TokenType.ANYCHAR_ANYTIME; + // goto out + } + } + + protected final TokenType fetchToken() { + // mark(); // out + start: + while(true) { + if (!left()) { + token.type = TokenType.EOT; + return token.type; + } + + token.type = TokenType.STRING; + token.base = 0; + token.backP = p; + + fetch(); + + if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) + if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); + + token.backP = p; + fetch(); + + token.setC(c); + token.escaped = true; + switch(c) { + + case '*': + if (syntax.opEscAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); + break; + case '+': + if (syntax.opEscPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); + break; + case '?': + if (syntax.opEscQMarkZeroOne()) fetchTokenFor_repeat(0, 1); + break; + case '{': + if (syntax.opEscBraceInterval()) fetchTokenFor_openBrace(); + break; + case '|': + if (syntax.opEscVBarAlt()) token.type = TokenType.ALT; + break; + case '(': + if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; + break; + case ')': + if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; + break; + case 'w': + if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); + break; + case 'W': + if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); + break; + case 'b': + if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.WORD_BOUND); + break; + case 'B': + if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); + break; + case '<': + if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_BEGIN); + break; + case '>': + if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_END); + break; + case 's': + if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); + break; + case 'S': + if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); + break; + case 'd': + if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); + break; + case 'D': + if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); + break; + case 'h': + if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); + break; + case 'H': + if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); + break; + case 'A': + if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); + break; + case 'Z': + if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); + break; + case 'z': + if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); + break; + case 'G': + if (syntax.opEscCapitalGBeginAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); + break; + case '`': + if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); + break; + case '\'': + if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); + break; + case 'x': + fetchTokenFor_xBrace(); + break; + case 'u': + fetchTokenFor_uHex(); + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + fetchTokenFor_digit(); + break; + case '0': + fetchTokenFor_zero(); + break; + case 'k': + if (Config.USE_NAMED_GROUP) fetchTokenFor_namedBackref(); + break; + case 'g': + if (Config.USE_SUBEXP_CALL) fetchTokenFor_subexpCall(); + break; + case 'Q': + if (syntax.op2EscCapitalQQuote()) token.type = TokenType.QUOTE_OPEN; + break; + case 'p': + case 'P': + fetchTokenFor_charProperty(); + break; + + default: + unfetch(); + int num = fetchEscapedValue(); + + /* set_raw: */ + if (token.getC() != num) { + token.type = TokenType.CODE_POINT; + token.setCode(num); + } else { /* string */ + p = token.backP + 1; + } + break; + + } // switch (c) + + } else { + token.setC(c); + token.escaped = false; + + if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { + fetchTokenFor_metaChars(); + break; + } + + { + switch(c) { + case '.': + if (syntax.opDotAnyChar()) token.type = TokenType.ANYCHAR; + break; + case '*': + if (syntax.opAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); + break; + case '+': + if (syntax.opPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); + break; + case '?': + if (syntax.opQMarkZeroOne()) fetchTokenFor_repeat(0, 1); + break; + case '{': + if (syntax.opBraceInterval()) fetchTokenFor_openBrace(); + break; + case '|': + if (syntax.opVBarAlt()) token.type = TokenType.ALT; + break; + + case '(': + if (peekIs('?') && syntax.op2QMarkGroupEffect()) { + inc(); + if (peekIs('#')) { + fetch(); + while (true) { + if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); + fetch(); + if (c == syntax.metaCharTable.esc) { + if (left()) fetch(); + } else { + if (c == ')') break; + } + } + continue start; // goto start + } + unfetch(); + } + + if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; + break; + case ')': + if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; + break; + case '^': + if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); + break; + case '$': + if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE); + break; + case '[': + if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN; + break; + case ']': + //if (*src > env->pattern) /* /].../ is allowed. */ + //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); + break; + case '#': + if (Option.isExtend(env.option)) { + while (left()) { + fetch(); + if (EncodingHelper.isNewLine(c)) break; + } + continue start; // goto start + } + break; + + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': + if (Option.isExtend(env.option)) continue start; // goto start + break; + + default: // string + break; + + } // switch + } + } + + break; + } // while + return token.type; + } + + private void greedyCheck() { + if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) { + + fetch(); + + token.setRepeatGreedy(false); + token.setRepeatPossessive(false); + } else { + possessiveCheck(); + } + } + + private void possessiveCheck() { + if (left() && peekIs('+') && + (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL || + syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) { + + fetch(); + + token.setRepeatGreedy(true); + token.setRepeatPossessive(true); + } else { + token.setRepeatGreedy(true); + token.setRepeatPossessive(false); + } + } + + protected final int fetchCharPropertyToCType() { + mark(); + + while (left()) { + int last = p; + fetch(); + if (c == '}') { + String name = new String(chars, _p, last - _p); + return PosixBracket.propertyNameToCType(name); + } else if (c == '(' || c == ')' || c == '{' || c == '|') { + String name = new String(chars, _p, last - _p); + throw new JOniException(ERR_INVALID_CHAR_PROPERTY_NAME.replaceAll("%n", name)); + } + } + newInternalException(ERR_PARSER_BUG); + return 0; // not reached + } + + protected final void syntaxWarn(String message, char c) { + syntaxWarn(message.replace("<%n>", Character.toString(c))); + } + + protected final void syntaxWarn(String message) { + if (Config.USE_WARN) { + env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/"); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,556 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; + +public abstract class Matcher extends IntHolder { + protected final Regex regex; + + protected final char[] chars; + protected final int str; + protected final int end; + + protected int msaStart; + protected int msaOptions; + protected final Region msaRegion; + protected int msaBestLen; + protected int msaBestS; + + protected int msaBegin; + protected int msaEnd; + + public Matcher(Regex regex, char[] chars) { + this(regex, chars, 0, chars.length); + } + + public Matcher(Regex regex, char[] chars, int p, int end) { + this.regex = regex; + + this.chars = chars; + this.str = p; + this.end = end; + + this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); + } + + // main matching method + protected abstract int matchAt(int range, int sstart, int sprev); + + protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum); + protected abstract void stateCheckBuffClear(); + + public final Region getRegion() { + return msaRegion; + } + + public final Region getEagerRegion() { + return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd); + } + + public final int getBegin() { + return msaBegin; + } + + public final int getEnd() { + return msaEnd; + } + + protected final void msaInit(int option, int start) { + msaOptions = option; + msaStart = start; + if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1; + } + + public final int match(int at, int range, int option) { + msaInit(option, at); + + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + int offset = at = str; + stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction? + } // USE_COMBINATION_EXPLOSION_CHECK + + int prev = EncodingHelper.prevCharHead(str, at); + + if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { + return matchAt(end /*range*/, at, prev); + } else { + return matchAt(range /*range*/, at, prev); + } + } + + int low, high; // these are the return values + private boolean forwardSearchRange(char[] chars, int str, int end, int s, int range, IntHolder lowPrev) { + int pprev = -1; + int p = s; + + if (Config.DEBUG_SEARCH) { + Config.log.println("forward_search_range: "+ + "str: " + str + + ", end: " + end + + ", s: " + s + + ", range: " + range); + } + + if (regex.dMin > 0) { + p += regex.dMin; + } + + retry:while (true) { + p = regex.searchAlgorithm.search(regex, chars, p, end, range); + + if (p != -1 && p < range) { + if (p - regex.dMin < s) { + // retry_gate: + pprev = p; + p++; + continue retry; + } + + if (regex.subAnchor != 0) { + switch (regex.subAnchor) { + case AnchorType.BEGIN_LINE: + if (p != str) { + int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); + if (!EncodingHelper.isNewLine(chars, prev, end)) { + // goto retry_gate; + pprev = p; + p++; + continue retry; + } + } + break; + + case AnchorType.END_LINE: + if (p == end) { + if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { + int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); + if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) { + // goto retry_gate; + pprev = p; + p++; + continue retry; + } + } + } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { + //if () break; + // goto retry_gate; + pprev = p; + p++; + continue retry; + } + break; + } // switch + } + + if (regex.dMax == 0) { + low = p; + if (lowPrev != null) { // ??? // remove null checks + if (low > s) { + lowPrev.value = EncodingHelper.prevCharHead(s, p); + } else { + lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); + } + } + } else { + if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { + low = p - regex.dMax; + + if (low > s) { + low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); + if (lowPrev != null && lowPrev.value == -1) { + lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); + } + } else { + if (lowPrev != null) { + lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low); + } + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + high = p - regex.dMin; + + if (Config.DEBUG_SEARCH) { + Config.log.println("forward_search_range success: "+ + "low: " + (low - str) + + ", high: " + (high - str) + + ", dmin: " + regex.dMin + + ", dmax: " + regex.dMax); + } + + return true; /* success */ + } + + return false; /* fail */ + } //while + } + + // low, high + private boolean backwardSearchRange(char[] chars, int str, int end, int s, int range, int adjrange) { + range += regex.dMin; + int p = s; + + retry:while (true) { + p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range); + + if (p != -1) { + if (regex.subAnchor != 0) { + switch (regex.subAnchor) { + case AnchorType.BEGIN_LINE: + if (p != str) { + int prev = EncodingHelper.prevCharHead(str, p); + if (!EncodingHelper.isNewLine(chars, prev, end)) { + p = prev; + continue retry; + } + } + break; + + case AnchorType.END_LINE: + if (p == end) { + if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { + int prev = EncodingHelper.prevCharHead(adjrange, p); + if (prev == -1) return false; + if (EncodingHelper.isNewLine(chars, prev, end)) { + p = prev; + continue retry; + } + } + } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { + p = EncodingHelper.prevCharHead(adjrange, p); + if (p == -1) return false; + continue retry; + } + break; + } // switch + } + + /* no needs to adjust *high, *high is used as range check only */ + if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { + low = p - regex.dMax; + high = p - regex.dMin; + } + + if (Config.DEBUG_SEARCH) { + Config.log.println("backward_search_range: "+ + "low: " + (low - str) + + ", high: " + (high - str)); + } + + return true; + } + + if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail."); + return false; + } // while + } + + // MATCH_AND_RETURN_CHECK + private boolean matchCheck(int upperRange, int s, int prev) { + if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { + if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { + //range = upperRange; + if (matchAt(upperRange, s, prev) != -1) { + if (!isFindLongest(regex.options)) return true; + } + } else { + //range = upperRange; + if (matchAt(upperRange, s, prev) != -1) return true; + } + } else { + if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { + if (matchAt(end, s, prev) != -1) { + //range = upperRange; + if (!isFindLongest(regex.options)) return true; + } + } else { + //range = upperRange; + if (matchAt(end, s, prev) != -1) return true; + } + } + return false; + } + + public final int search(int start, int range, int option) { + int s, prev; + int origStart = start; + int origRange = range; + + if (Config.DEBUG_SEARCH) { + Config.log.println("onig_search (entry point): "+ + "str: " + str + + ", end: " + (end - str) + + ", start: " + (start - str) + + ", range " + (range - str)); + } + + if (start > end || start < str) return -1; + + /* anchor optimize: resume search range */ + if (regex.anchor != 0 && str < end) { + int minSemiEnd, maxSemiEnd; + + if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { + /* search start-position only */ + // !begin_position:! + if (range > start) { + range = start + 1; + } else { + range = start; + } + } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { + /* search str-position only */ + if (range > start) { + if (start != str) return -1; // mismatch_no_msa; + range = str + 1; + } else { + if (range <= str) { + start = str; + range = str; + } else { + return -1; // mismatch_no_msa; + } + } + } else if ((regex.anchor & AnchorType.END_BUF) != 0) { + minSemiEnd = maxSemiEnd = end; + // !end_buf:! + if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; + } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { + int preEnd = EncodingHelper.stepBack(str, end, 1); + maxSemiEnd = end; + if (EncodingHelper.isNewLine(chars, preEnd, end)) { + minSemiEnd = preEnd; + if (Config.USE_CRNL_AS_LINE_TERMINATOR) { + preEnd = EncodingHelper.stepBack(str, preEnd, 1); + if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) { + minSemiEnd = preEnd; + } + } + if (minSemiEnd > str && start <= minSemiEnd) { + // !goto end_buf;! + if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; + } + } else { + minSemiEnd = end; + // !goto end_buf;! + if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; + } + } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { + // goto !begin_position;! + if (range > start) { + range = start + 1; + } else { + range = start; + } + } + + } else if (str == end) { /* empty string */ + // empty address ? + if (Config.DEBUG_SEARCH) { + Config.log.println("onig_search: empty string."); + } + + if (regex.thresholdLength == 0) { + s = start = str; + prev = -1; + msaInit(option, start); + + if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear(); + + if (matchCheck(end, s, prev)) return match(s); + return mismatch(); + } + return -1; // goto mismatch_no_msa; + } + + if (Config.DEBUG_SEARCH) { + Config.log.println("onig_search(apply anchor): " + + "end: " + (end - str) + + ", start " + (start - str) + + ", range " + (range - str)); + } + + msaInit(option, origStart); + if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + int offset = Math.min(start, range) - str; + stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); + } + + s = start; + if (range > start) { /* forward search */ + if (s > str) { + prev = EncodingHelper.prevCharHead(str, s); + } else { + prev = 0; // -1 + } + + if (regex.searchAlgorithm != SearchAlgorithm.NONE) { + int schRange = range; + if (regex.dMax != 0) { + if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { + schRange = end; + } else { + schRange += regex.dMax; + if (schRange > end) schRange = end; + } + } + if ((end - start) < regex.thresholdLength) return mismatch(); + + if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { + do { + if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev + if (s < low) { + s = low; + prev = value; + } + while (s <= high) { + if (matchCheck(origRange, s, prev)) return match(s); // ??? + prev = s; + s++; + } + } while (s < range); + return mismatch(); + + } else { /* check only. */ + if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch(); + + if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { + do { + if (matchCheck(origRange, s, prev)) return match(s); + prev = s; + s++; + } while (s < range); + return mismatch(); + } + + } + } + + do { + if (matchCheck(origRange, s, prev)) return match(s); + prev = s; + s++; + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + if (matchCheck(origRange, s, prev)) return match(s); + } + } else { /* backward search */ + if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { + if (origStart < end) { + origStart++; // /* is upper range */ + } + } + + if (regex.searchAlgorithm != SearchAlgorithm.NONE) { + int adjrange; + if (range < end) { + adjrange = range; + } else { + adjrange = end; + } + if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { + do { + int schStart = s + regex.dMax; + if (schStart > end) schStart = end; + if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high + if (s > high) s = high; + while (s != -1 && s >= low) { + prev = EncodingHelper.prevCharHead(str, s); + if (matchCheck(origStart, s, prev)) return match(s); + s = prev; + } + } while (s >= range); + return mismatch(); + } else { /* check only. */ + if ((end - range) < regex.thresholdLength) return mismatch(); + + int schStart = s; + if (regex.dMax != 0) { + if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { + schStart = end; + } else { + schStart += regex.dMax; + if (schStart > end) { + schStart = end; + } + } + } + if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); + } + } + + do { + prev = EncodingHelper.prevCharHead(str, s); + if (matchCheck(origStart, s, prev)) return match(s); + s = prev; + } while (s >= range); + + } + return mismatch(); + } + + private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) { + if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa; + + if (range > start) { + if ((minSemiEnd - start) > regex.anchorDmax) { + start = minSemiEnd - regex.anchorDmax; + if (start >= end) { + /* match with empty at end */ + start = EncodingHelper.prevCharHead(str, end); + } + } + if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { + range = maxSemiEnd - regex.anchorDmin + 1; + } + if (start >= range) return true; // mismatch_no_msa; + } else { + if ((minSemiEnd - range) > regex.anchorDmax) { + range = minSemiEnd - regex.anchorDmax; + } + if ((maxSemiEnd - start) < regex.anchorDmin) { + start = maxSemiEnd - regex.anchorDmin; + } + if (range > start) return true; // mismatch_no_msa; + } + return false; + } + + private int match(int s) { + return s - str; // sstart ??? + } + + private int mismatch() { + if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { + if (msaBestLen >= 0) { + int s = msaBestS; + return match(s); + } + } + // falls through finish: + return -1; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,31 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public abstract class MatcherFactory { + public abstract Matcher create(Regex regex, char[] chars, int p, int end); + + static final MatcherFactory DEFAULT = new MatcherFactory() { + @Override + public Matcher create(Regex regex, char[] chars, int p, int end) { + return new ByteCodeMachine(regex, chars, p, end); + } + }; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/MinMaxLen.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,139 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +final class MinMaxLen { + int min; /* min byte length */ + int max; /* max byte length */ + + MinMaxLen() { + } + + MinMaxLen(int min, int max) { + this.min = min; + this.max = max; + } + + /* 1000 / (min-max-dist + 1) */ + private static final short distValues[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int distanceValue() { + if (max == INFINITE_DISTANCE) return 0; + int d = max - min; + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return d < distValues.length ? distValues[d] : 1; + } + + int compareDistanceValue(MinMaxLen other, int v1, int v2) { + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distanceValue(); + v2 *= other.distanceValue(); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (other.min < min) return 1; + if (other.min > min) return -1; + return 0; + } + + boolean equal(MinMaxLen other) { + return min == other.min && max == other.max; + } + + void set(int min, int max) { + this.min = min; + this.max = max; + } + + void clear() { + min = max = 0; + } + + void copy(MinMaxLen other) { + min = other.min; + max = other.max; + } + + void add(MinMaxLen other) { + min = distanceAdd(min, other.min); + max = distanceAdd(max, other.max); + } + + void addLength(int len) { + min = distanceAdd(min, len); + max = distanceAdd(max, len); + } + + void altMerge(MinMaxLen other) { + if (min > other.min) min = other.min; + if (max < other.max) max = other.max; + } + + static final int INFINITE_DISTANCE = 0x7FFFFFFF; + static int distanceAdd(int d1, int d2) { + if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) { + return INFINITE_DISTANCE; + } else { + if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2; + else return INFINITE_DISTANCE; + } + } + + static int distanceMultiply(int d, int m) { + if (m == 0) return 0; + if (d < INFINITE_DISTANCE / m) { + return d * m; + } else { + return INFINITE_DISTANCE; + } + } + + static String distanceRangeToString(int a, int b) { + String s = ""; + if (a == INFINITE_DISTANCE) { + s += "inf"; + } else { + s += "(" + a + ")"; + } + + s += "-"; + + if (b == INFINITE_DISTANCE) { + s += "inf"; + } else { + s += "(" + b + ")"; + } + return s; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/NameEntry.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,97 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public final class NameEntry { + static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8; + + public final char[] name; + public final int nameP; + public final int nameEnd; + + int backNum; + int backRef1; + int backRefs[]; + + public NameEntry(char[] chars, int p, int end) { + name = chars; + nameP = p; + nameEnd = end; + } + + public int[] getBackRefs() { + switch (backNum) { + case 0: + return new int[]{}; + case 1: + return new int[]{backRef1}; + default: + int[]result = new int[backNum]; + System.arraycopy(backRefs, 0, result, 0, backNum); + return result; + } + } + + private void alloc() { + backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM]; + } + + private void ensureSize() { + if (backNum > backRefs.length) { + int[]tmp = new int[backRefs.length << 1]; + System.arraycopy(backRefs, 0, tmp, 0, backRefs.length); + backRefs = tmp; + } + } + + public void addBackref(int backRef) { + backNum++; + + switch (backNum) { + case 1: + backRef1 = backRef; + break; + case 2: + alloc(); + backRefs[0] = backRef1; + backRefs[1] = backRef; + break; + default: + ensureSize(); + backRefs[backNum - 1] = backRef; + } + } + + public String toString() { + StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " "); + if (backNum == 0) { + buff.append("-"); + } else if (backNum == 1){ + buff.append(backRef1); + } else { + for (int i=0; i<backNum; i++){ + if (i > 0) buff.append(", "); + buff.append(backRefs[i]); + } + } + return buff.toString(); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/NativeMachine.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,27 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public abstract class NativeMachine extends Matcher { + + protected NativeMachine(Regex regex, char[] chars, int p, int end) { + super(regex, chars, p, end); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/NodeOptInfo.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,125 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public final class NodeOptInfo { + final MinMaxLen length = new MinMaxLen(); + final OptAnchorInfo anchor = new OptAnchorInfo(); + final OptExactInfo exb = new OptExactInfo(); /* boundary */ + final OptExactInfo exm = new OptExactInfo(); /* middle */ + final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */ + final OptMapInfo map = new OptMapInfo(); /* boundary */ + + public void setBoundNode(MinMaxLen mmd) { + exb.mmd.copy(mmd); + expr.mmd.copy(mmd); + map.mmd.copy(mmd); + } + + public void clear() { + length.clear(); + anchor.clear(); + exb.clear(); + exm.clear(); + expr.clear(); + map.clear(); + } + + public void copy(NodeOptInfo other) { + length.copy(other.length); + anchor.copy(other.anchor); + exb.copy(other.exb); + exm.copy(other.exm); + expr.copy(other.expr); + map.copy(other.map); + } + + public void concatLeftNode(NodeOptInfo other) { + OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ? + tanchor.concat(anchor, other.anchor, length.max, other.length.max); + anchor.copy(tanchor); + + if (other.exb.length > 0 && length.max == 0) { + tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max); + other.exb.anchor.copy(tanchor); + } + + if (other.map.value > 0 && length.max == 0) { + if (other.map.mmd.max == 0) { + other.map.anchor.leftAnchor |= anchor.leftAnchor; + } + } + + boolean exbReach = exb.reachEnd; + boolean exmReach = exm.reachEnd; + + if (other.length.max != 0) { + exb.reachEnd = exm.reachEnd = false; + } + + if (other.exb.length > 0) { + if (exbReach) { + exb.concat(other.exb); + other.exb.clear(); + } else if (exmReach) { + exm.concat(other.exb); + other.exb.clear(); + } + } + + exm.select(other.exb); + exm.select(other.exm); + + if (expr.length > 0) { + if (other.length.max > 0) { + // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison) + int otherLengthMax = other.length.max; + if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1; + if (expr.length > otherLengthMax) expr.length = otherLengthMax; + if (expr.mmd.max == 0) { + exb.select(expr); + } else { + exm.select(expr); + } + } + } else if (other.expr.length > 0) { + expr.copy(other.expr); + } + + map.select(other.map); + length.add(other.length); + } + + public void altMerge(NodeOptInfo other, OptEnvironment env) { + anchor.altMerge(other.anchor); + exb.altMerge(other.exb, env); + exm.altMerge(other.exm, env); + expr.altMerge(other.expr, env); + map.altMerge(other.map); + length.altMerge(other.length); + } + + public void setBound(MinMaxLen mmd) { + exb.mmd.copy(mmd); + expr.mmd.copy(mmd); + map.mmd.copy(mmd); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/OptAnchorInfo.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,92 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; + +final class OptAnchorInfo implements AnchorType { + int leftAnchor; + int rightAnchor; + + void clear() { + leftAnchor = rightAnchor = 0; + } + + void copy(OptAnchorInfo other) { + leftAnchor = other.leftAnchor; + rightAnchor = other.rightAnchor; + } + + void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) { + leftAnchor = left.leftAnchor; + if (leftLength == 0) leftAnchor |= right.leftAnchor; + + rightAnchor = right.rightAnchor; + if (rightLength == 0) rightAnchor |= left.rightAnchor; + } + + boolean isSet(int anchor) { + if ((leftAnchor & anchor) != 0) return true; + return (rightAnchor & anchor) != 0; + } + + void add(int anchor) { + if (isLeftAnchor(anchor)) { + leftAnchor |= anchor; + } else { + rightAnchor |= anchor; + } + } + + void remove(int anchor) { + if (isLeftAnchor(anchor)) { + leftAnchor &= ~anchor; + } else { + rightAnchor &= ~anchor; + } + } + + void altMerge(OptAnchorInfo other) { + leftAnchor &= other.leftAnchor; + rightAnchor &= other.rightAnchor; + } + + static boolean isLeftAnchor(int anchor) { // make a mask for it ? + return !(anchor == END_BUF || anchor == SEMI_END_BUF || + anchor == END_LINE || anchor == PREC_READ || + anchor == PREC_READ_NOT); + } + + static String anchorToString(int anchor) { + StringBuffer s = new StringBuffer("["); + + if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf "); + if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line "); + if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos "); + if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf "); + if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf "); + if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line "); + if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star "); + if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl "); + s.append("]"); + + return s.toString(); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/OptEnvironment.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,35 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +// remove this one in future and pass mmd directly +final class OptEnvironment { + final MinMaxLen mmd = new MinMaxLen(); + int options; + int caseFoldFlag; + ScanEnvironment scanEnv; + + void copy(OptEnvironment other) { + mmd.copy(other.mmd); + options = other.options; + caseFoldFlag = other.caseFoldFlag; + scanEnv = other.scanEnv; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/OptExactInfo.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,153 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +final class OptExactInfo { + static final int OPT_EXACT_MAXLEN = 24; + + final MinMaxLen mmd = new MinMaxLen(); + final OptAnchorInfo anchor = new OptAnchorInfo(); + + boolean reachEnd; + boolean ignoreCase; + + final char chars[] = new char[OPT_EXACT_MAXLEN]; + int length; + + boolean isFull() { + return length >= OPT_EXACT_MAXLEN; + } + + void clear() { + mmd.clear(); + anchor.clear(); + + reachEnd = false; + ignoreCase = false; + length = 0; + } + + void copy(OptExactInfo other) { + mmd.copy(other.mmd); + anchor.copy(other.anchor); + reachEnd = other.reachEnd; + ignoreCase = other.ignoreCase; + length = other.length; + + System.arraycopy(other.chars, 0, chars, 0, OPT_EXACT_MAXLEN); + } + + void concat(OptExactInfo other) { + if (!ignoreCase && other.ignoreCase) { + if (length >= other.length) return; /* avoid */ + ignoreCase = true; + } + + int p = 0; // add->s; + int end = p + other.length; + + int i; + for (i = length; p < end;) { + if (i + 1 > OPT_EXACT_MAXLEN) break; + chars[i++] = other.chars[p++]; + } + + length = i; + reachEnd = (p == end ? other.reachEnd : false); + + OptAnchorInfo tmp = new OptAnchorInfo(); + tmp.concat(anchor, other.anchor, 1, 1); + if (!other.reachEnd) tmp.rightAnchor = 0; + anchor.copy(tmp); + } + + // ?? raw is not used here + void concatStr(char[] lchars, int p, int end, boolean raw) { + int i; + for (i = length; p < end && i < OPT_EXACT_MAXLEN;) { + if (i + 1 > OPT_EXACT_MAXLEN) break; + chars[i++] = lchars[p++]; + } + + length = i; + } + + void altMerge(OptExactInfo other, OptEnvironment env) { + if (other.length == 0 || length == 0) { + clear(); + return; + } + + if (!mmd.equal(other.mmd)) { + clear(); + return; + } + + int i; + for (i = 0; i < length && i < other.length; i++) { + if (chars[i] != other.chars[i]) break; + } + + if (!other.reachEnd || i<other.length || i<length) reachEnd = false; + + length = i; + ignoreCase |= other.ignoreCase; + + anchor.altMerge(other.anchor); + + if (!reachEnd) anchor.rightAnchor = 0; + } + + + void select(OptExactInfo alt) { + int v1 = length; + int v2 = alt.length; + + if (v2 == 0) { + return; + } else if (v1 == 0) { + copy(alt); + return; + } else if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = OptMapInfo.positionValue(chars[0] & 0xff); + v1 = OptMapInfo.positionValue(alt.chars[0] & 0xff); + + if (length > 1) v1 += 5; + if (alt.length > 1) v2 += 5; + } + + if (!ignoreCase) v1 *= 2; + if (!alt.ignoreCase) v2 *= 2; + + if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); + } + + // comp_opt_exact_or_map_info + private static final int COMP_EM_BASE = 20; + int compare(OptMapInfo m) { + if (m.value <= 0) return -1; + + int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2); + int vm = COMP_EM_BASE * 5 * 2 / m.value; + + return mmd.compareDistanceValue(m.mmd, ve, vm); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/OptMapInfo.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,120 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +final class OptMapInfo { + + final MinMaxLen mmd = new MinMaxLen(); /* info position */ + final OptAnchorInfo anchor = new OptAnchorInfo(); + + int value; /* weighted value */ + final byte map[] = new byte[Config.CHAR_TABLE_SIZE]; + + void clear() { + mmd.clear(); + anchor.clear(); + value = 0; + for (int i=0; i<map.length; i++) map[i] = 0; + } + + void copy(OptMapInfo other) { + mmd.copy(other.mmd); + anchor.copy(other.anchor); + value = other.value; + //for(int i=0; i<map.length; i++) map[i] = other.map[i]; + System.arraycopy(other.map, 0, map, 0, other.map.length); + } + + void addChar(int c) { + int c_ = c & 0xff; + if (map[c_] == 0) { + map[c_] = 1; + value += positionValue(c_); + } + } + + void addCharAmb(char[] chars, int p, int end, int caseFoldFlag) { + addChar(chars[p]); + + caseFoldFlag &= ~Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR; + char[]items = EncodingHelper.caseFoldCodesByString(caseFoldFlag, chars[p]); + + for (int i=0; i<items.length; i++) { + addChar(items[i]); + } + } + + // select_opt_map_info + private static final int z = 1<<15; /* 32768: something big value */ + void select(OptMapInfo alt) { + if (alt.value == 0) return; + if (value == 0) { + copy(alt); + return; + } + + int v1 = z / value; + int v2 = z /alt.value; + + if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); + } + + // alt_merge_opt_map_info + void altMerge(OptMapInfo other) { + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (value == 0) return; + if (other.value == 0 || mmd.max < other.mmd.max) { + clear(); + return; + } + + mmd.altMerge(other.mmd); + + int val = 0; + for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { + if (other.map[i] != 0) map[i] = 1; + if (map[i] != 0) val += positionValue(i); + } + + value = val; + anchor.altMerge(other.anchor); + } + + static final short ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + // map_position_value + static int positionValue(int i) { + if (i < ByteValTable.length) { + return ByteValTable[i]; + } else { + return 4; /* Take it easy. */ + } + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Option.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,122 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public class Option { + + /* options */ + public static final int NONE = 0; + public static final int IGNORECASE = (1<<0); + public static final int EXTEND = (1<<1); + public static final int MULTILINE = (1<<2); + public static final int SINGLELINE = (1<<3); + public static final int FIND_LONGEST = (1<<4); + public static final int FIND_NOT_EMPTY = (1<<5); + public static final int NEGATE_SINGLELINE = (1<<6); + public static final int DONT_CAPTURE_GROUP = (1<<7); + public static final int CAPTURE_GROUP = (1<<8); + + /* options (search time) */ + public static final int NOTBOL = (1<<9); + public static final int NOTEOL = (1<<10); + public static final int POSIX_REGION = (1<<11); + public static final int MAXBIT = (1<<12); /* limit */ + + public static final int DEFAULT = NONE; + + public static String toString(int option) { + String options = ""; + if (isIgnoreCase(option)) options += "IGNORECASE "; + if (isExtend(option)) options += "EXTEND "; + if (isMultiline(option)) options += "MULTILINE "; + if (isSingleline(option)) options += "SINGLELINE "; + if (isFindLongest(option)) options += "FIND_LONGEST "; + if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY "; + if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE "; + if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP "; + if (isCaptureGroup(option)) options += "CAPTURE_GROUP "; + + if (isNotBol(option)) options += "NOTBOL "; + if (isNotEol(option)) options += "NOTEOL "; + if (isPosixRegion(option)) options += "POSIX_REGION "; + + return options; + } + + public static boolean isIgnoreCase(int option) { + return (option & IGNORECASE) != 0; + } + + public static boolean isExtend(int option) { + return (option & EXTEND) != 0; + } + + public static boolean isSingleline(int option) { + return (option & SINGLELINE) != 0; + } + + public static boolean isMultiline(int option) { + return (option & MULTILINE) != 0; + } + + public static boolean isFindLongest(int option) { + return (option & FIND_LONGEST) != 0; + } + + public static boolean isFindNotEmpty(int option) { + return (option & FIND_NOT_EMPTY) != 0; + } + + public static boolean isFindCondition(int option) { + return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0; + } + + public static boolean isNegateSingleline(int option) { + return (option & NEGATE_SINGLELINE) != 0; + } + + public static boolean isDontCaptureGroup(int option) { + return (option & DONT_CAPTURE_GROUP) != 0; + } + + public static boolean isCaptureGroup(int option) { + return (option & CAPTURE_GROUP) != 0; + } + + public static boolean isNotBol(int option) { + return (option & NOTBOL) != 0; + } + + public static boolean isNotEol(int option) { + return (option & NOTEOL) != 0; + } + + public static boolean isPosixRegion(int option) { + return (option & POSIX_REGION) != 0; + } + + /* OP_SET_OPTION is required for these options. ??? */ + // public static boolean isDynamic(int option) { + // return (option & (MULTILINE | IGNORECASE)) != 0; + // } + public static boolean isDynamic(int option) { + return false; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,953 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAtSimple; +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; + +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.PosixBracket; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr; +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg; +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; +import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; +import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; + +class Parser extends Lexer { + + protected final Regex regex; + protected Node root; + + protected int returnCode; // return code used by parser methods (they itself return parsed nodes) + // this approach will not affect recursive calls + + protected Parser(ScanEnvironment env, char[] chars, int p, int end) { + super(env, chars, p, end); + regex = env.reg; + } + + // onig_parse_make_tree + protected final Node parse() { + root = parseRegexp(); + regex.numMem = env.numMem; + return root; + } + + private static final int POSIX_BRACKET_NAME_MIN_LEN = 4; + private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20; + private static final char BRACKET_END[] = ":]".toCharArray(); + private boolean parsePosixBracket(CClassNode cc) { + mark(); + + boolean not; + if (peekIs('^')) { + inc(); + not = true; + } else { + not = false; + } + if (stop - p >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket + char[][] pbs = PosixBracket.PBSNamesLower; + for (int i=0; i<pbs.length; i++) { + char[] name = pbs[i]; + // hash lookup here ? + if (EncodingHelper.strNCmp(chars, p, stop, name, 0, name.length) == 0) { + p += name.length; + if (EncodingHelper.strNCmp(chars, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) { + newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE); + } + cc.addCType(PosixBracket.PBSValues[i], not, env, this); + inc(); + inc(); + return false; + } + } + + } + + // not_posix_bracket: + c = 0; + int i= 0; + while (left() && ((c=peek()) != ':') && c != ']') { + inc(); + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + + if (c == ':' && left()) { + inc(); + if (left()) { + fetch(); + if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE); + } + } + restore(); + return true; /* 1: is not POSIX bracket, but no error. */ + } + + private CClassNode parseCharProperty() { + int ctype = fetchCharPropertyToCType(); + CClassNode n = new CClassNode(); + n.addCType(ctype, false, env, this); + if (token.getPropNot()) n.setNot(); + return n; + } + + private boolean codeExistCheck(int code, boolean ignoreEscaped) { + mark(); + + boolean inEsc = false; + while (left()) { + if (ignoreEscaped && inEsc) { + inEsc = false; + } else { + fetch(); + if (c == code) { + restore(); + return true; + } + if (c == syntax.metaCharTable.esc) inEsc = true; + } + } + + restore(); + return false; + } + + private CClassNode parseCharClass() { + fetchTokenInCC(); + + final boolean neg; + if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { + neg = true; + fetchTokenInCC(); + } else { + neg = false; + } + + if (token.type == TokenType.CC_CLOSE) { + if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS); + env.ccEscWarn("]"); + token.type = TokenType.CHAR; /* allow []...] */ + } + + CClassNode cc = new CClassNode(); + CClassNode prevCC = null; + CClassNode workCC = null; + + CCStateArg arg = new CCStateArg(); + + boolean andStart = false; + arg.state = CCSTATE.START; + + while (token.type != TokenType.CC_CLOSE) { + boolean fetched = false; + + switch (token.type) { + + case CHAR: + if (token.getC() > 0xff) { + arg.inType = CCVALTYPE.CODE_POINT; + } else { + arg.inType = CCVALTYPE.SB; // sb_char: + } + arg.v = token.getC(); + arg.vIsRaw = false; + parseCharClassValEntry2(cc, arg); // goto val_entry2 + break; + + case RAW_BYTE: + if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */ + byte[] buf = new byte[4]; + int psave = p; + int base = token.base; + buf[0] = (byte)token.getC(); + int i; + for (i=1; i<4; i++) { + fetchTokenInCC(); + if (token.type != TokenType.RAW_BYTE || token.base != base) { + fetched = true; + break; + } + buf[i] = (byte)token.getC(); + } + + if (i == 1) { + arg.v = buf[0] & 0xff; + arg.inType = CCVALTYPE.SB; // goto raw_single + } else { + arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length); + arg.inType = CCVALTYPE.CODE_POINT; + } + } else { + arg.v = token.getC(); + arg.inType = CCVALTYPE.SB; // raw_single: + } + arg.vIsRaw = true; + parseCharClassValEntry2(cc, arg); // goto val_entry2 + break; + + case CODE_POINT: + arg.v = token.getCode(); + arg.vIsRaw = true; + parseCharClassValEntry(cc, arg); // val_entry:, val_entry2 + break; + + case POSIX_BRACKET_OPEN: + if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */ + env.ccEscWarn("["); + p = token.backP; + arg.v = token.getC(); + arg.vIsRaw = false; + parseCharClassValEntry(cc, arg); // goto val_entry + break; + } + cc.nextStateClass(arg, env); // goto next_class + break; + + case CHAR_TYPE: + cc.addCType(token.getPropCType(), token.getPropNot(), env, this); + cc.nextStateClass(arg, env); // next_class: + break; + + case CHAR_PROPERTY: + int ctype = fetchCharPropertyToCType(); + cc.addCType(ctype, token.getPropNot(), env, this); + cc.nextStateClass(arg, env); // goto next_class + break; + + case CC_RANGE: + if (arg.state == CCSTATE.VALUE) { + fetchTokenInCC(); + fetched = true; + if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */ + parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry; + break; + } else if (token.type == TokenType.CC_AND) { + env.ccEscWarn("-"); + parseCharClassRangeEndVal(cc, arg); // goto range_end_val + break; + } + arg.state = CCSTATE.RANGE; + } else if (arg.state == CCSTATE.START) { + arg.v = token.getC(); /* [-xa] is allowed */ + arg.vIsRaw = false; + fetchTokenInCC(); + fetched = true; + if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */ + parseCharClassValEntry(cc, arg); // goto val_entry + break; + } else if (arg.state == CCSTATE.RANGE) { + env.ccEscWarn("-"); + parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */ + break; + } else { /* CCS_COMPLETE */ + fetchTokenInCC(); + fetched = true; + if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */ + parseCharClassRangeEndVal(cc, arg); // goto range_end_val + break; + } else if (token.type == TokenType.CC_AND) { + env.ccEscWarn("-"); + parseCharClassRangeEndVal(cc, arg); // goto range_end_val + break; + } + + if (syntax.allowDoubleRangeOpInCC()) { + env.ccEscWarn("-"); + parseCharClassSbChar(cc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */ + break; + } + newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); + } + break; + + case CC_CC_OPEN: /* [ */ + CClassNode acc = parseCharClass(); + cc.or(acc); + break; + + case CC_AND: /* && */ + if (arg.state == CCSTATE.VALUE) { + arg.v = 0; // ??? safe v ? + arg.vIsRaw = false; + cc.nextStateValue(arg, env); + } + /* initialize local variables */ + andStart = true; + arg.state = CCSTATE.START; + if (prevCC != null) { + prevCC.and(cc); + } else { + prevCC = cc; + if (workCC == null) workCC = new CClassNode(); + cc = workCC; + } + cc.clear(); + break; + + case EOT: + newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); + + default: + newInternalException(ERR_PARSER_BUG); + } // switch + + if (!fetched) fetchTokenInCC(); + + } // while + + if (arg.state == CCSTATE.VALUE) { + arg.v = 0; // ??? safe v ? + arg.vIsRaw = false; + cc.nextStateValue(arg, env); + } + + if (prevCC != null) { + prevCC.and(cc); + cc = prevCC; + } + + if (neg) { + cc.setNot(); + } else { + cc.clearNot(); + } + + if (cc.isNot() && syntax.notNewlineInNegativeCC()) { + if (!cc.isEmpty()) { + final int NEW_LINE = 0x0a; + if (EncodingHelper.isNewLine(NEW_LINE)) { + cc.bs.set(NEW_LINE); + } + } + } + + return cc; + } + + private void parseCharClassSbChar(CClassNode cc, CCStateArg arg) { + arg.inType = CCVALTYPE.SB; + arg.v = token.getC(); + arg.vIsRaw = false; + parseCharClassValEntry2(cc, arg); // goto val_entry2 + } + + private void parseCharClassRangeEndVal(CClassNode cc, CCStateArg arg) { + arg.v = '-'; + arg.vIsRaw = false; + parseCharClassValEntry(cc, arg); // goto val_entry + } + + private void parseCharClassValEntry(CClassNode cc, CCStateArg arg) { + arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; + parseCharClassValEntry2(cc, arg); // val_entry2: + } + + private void parseCharClassValEntry2(CClassNode cc, CCStateArg arg) { + cc.nextStateValue(arg, env); + } + + private Node parseEnclose(TokenType term) { + Node node = null; + + if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); + + int option = env.option; + + if (peekIs('?') && syntax.op2QMarkGroupEffect()) { + inc(); + if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); + + boolean listCapture = false; + + fetch(); + switch(c) { + case ':': /* (?:...) grouping only */ + fetchToken(); // group: + node = parseSubExp(term); + returnCode = 1; /* group */ + return node; + case '=': + node = new AnchorNode(AnchorType.PREC_READ); + break; + case '!': /* preceding read */ + node = new AnchorNode(AnchorType.PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose + break; + case '\'': + if (Config.USE_NAMED_GROUP) { + if (syntax.op2QMarkLtNamedGroup()) { + listCapture = false; // goto named_group1 + node = parseEncloseNamedGroup2(listCapture); + break; + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + } // USE_NAMED_GROUP + break; + case '<': /* look behind (?<=...), (?<!...) */ + fetch(); + if (c == '=') { + node = new AnchorNode(AnchorType.LOOK_BEHIND); + } else if (c == '!') { + node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT); + } else { + if (Config.USE_NAMED_GROUP) { + if (syntax.op2QMarkLtNamedGroup()) { + unfetch(); + c = '<'; + + listCapture = false; // named_group1: + node = parseEncloseNamedGroup2(listCapture); // named_group2: + break; + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + + } else { // USE_NAMED_GROUP + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } // USE_NAMED_GROUP + } + break; + case '@': + if (syntax.op2AtMarkCaptureHistory()) { + if (Config.USE_NAMED_GROUP) { + if (syntax.op2QMarkLtNamedGroup()) { + fetch(); + if (c == '<' || c == '\'') { + listCapture = true; + node = parseEncloseNamedGroup2(listCapture); // goto named_group2 /* (?@<name>...) */ + } + unfetch(); + } + } // USE_NAMED_GROUP + EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory + int num = env.addMemEntry(); + if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); + en.regNum = num; + node = en; + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + break; + + // case 'p': #ifdef USE_POSIXLINE_OPTION + case '-': + case 'i': + case 'm': + case 's': + case 'x': + boolean neg = false; + while (true) { + switch(c) { + case ':': + case ')': + break; + case '-': + neg = true; + break; + case 'x': + option = bsOnOff(option, Option.EXTEND, neg); + break; + case 'i': + option = bsOnOff(option, Option.IGNORECASE, neg); + break; + case 's': + if (syntax.op2OptionPerl()) { + option = bsOnOff(option, Option.MULTILINE, neg); + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + break; + case 'm': + if (syntax.op2OptionPerl()) { + option = bsOnOff(option, Option.SINGLELINE, !neg); + } else if (syntax.op2OptionRuby()) { + option = bsOnOff(option, Option.MULTILINE, neg); + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + break; + // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined + // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); + // break; + + default: + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } // switch + + if (c == ')') { + EncloseNode en = new EncloseNode(option, 0); // node_new_option + node = en; + returnCode = 2; /* option only */ + return node; + } else if (c == ':') { + int prev = env.option; + env.option = option; + fetchToken(); + Node target = parseSubExp(term); + env.option = prev; + EncloseNode en = new EncloseNode(option, 0); // node_new_option + en.setTarget(target); + node = en; + returnCode = 0; + return node; + } + if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); + fetch(); + } // while + + default: + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } // switch + + } else { + if (isDontCaptureGroup(env.option)) { + fetchToken(); // goto group + node = parseSubExp(term); + returnCode = 1; /* group */ + return node; + } + EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory + int num = env.addMemEntry(); + en.regNum = num; + node = en; + } + + fetchToken(); + Node target = parseSubExp(term); + + if (node.getType() == NodeType.ANCHOR) { + AnchorNode an = (AnchorNode) node; + an.setTarget(target); + } else { + EncloseNode en = (EncloseNode)node; + en.setTarget(target); + if (en.type == EncloseType.MEMORY) { + /* Don't move this to previous of parse_subexp() */ + env.setMemNode(en.regNum, node); + } + } + returnCode = 0; + return node; // ?? + } + + private Node parseEncloseNamedGroup2(boolean listCapture) { + int nm = p; + int num = fetchName(c, false); + int nameEnd = value; + num = env.addMemEntry(); + if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); + + regex.nameAdd(chars, nm, nameEnd, num, syntax); + EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory + en.regNum = num; + + Node node = en; + + if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num); + env.numNamed++; + return node; + } + + private int findStrPosition(int[]s, int n, int from, int to, Ptr nextChar) { + int x; + int q; + int p = from; + int i = 0; + while (p < to) { + x = chars[p]; + q = p + 1; + if (x == s[0]) { + for (i=1; i<n && q<to; i++) { + x = chars[q]; + if (x != s[i]) break; + q++; + } + if (i >= n) { + if (chars[nextChar.p] != 0) nextChar.p = q; // we may need zero term semantics... + return p; + } + } + p = q; + } + return -1; + } + + private Node parseExp(TokenType term) { + if (token.type == term) return StringNode.EMPTY; // goto end_of_token + + Node node = null; + boolean group = false; + + switch(token.type) { + case ALT: + case EOT: + return StringNode.EMPTY; // end_of_token:, node_new_empty + + case SUBEXP_OPEN: + node = parseEnclose(TokenType.SUBEXP_CLOSE); + if (returnCode == 1) { + group = true; + } else if (returnCode == 2) { /* option only */ + int prev = env.option; + EncloseNode en = (EncloseNode)node; + env.option = en.option; + fetchToken(); + Node target = parseSubExp(term); + env.option = prev; + en.setTarget(target); + return node; + } + break; + case SUBEXP_CLOSE: + if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); + if (token.escaped) { + return parseExpTkRawByte(group); // goto tk_raw_byte + } else { + return parseExpTkByte(group); // goto tk_byte + } + case STRING: + return parseExpTkByte(group); // tk_byte: + + case RAW_BYTE: + return parseExpTkRawByte(group); // tk_raw_byte: + case CODE_POINT: + char[] buf = new char[] {(char)token.getCode()}; + // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else + node = new StringNode(buf, 0, 1); + break; + + case QUOTE_OPEN: + int[] endOp = new int[] {syntax.metaCharTable.esc, 'E'}; + int qstart = p; + Ptr nextChar = new Ptr(); + int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar); + if (qend == -1) nextChar.p = qend = stop; + node = new StringNode(chars, qstart, qend); + p = nextChar.p; + break; + + case CHAR_TYPE: + switch(token.getPropCType()) { + case CharacterType.D: + case CharacterType.S: + case CharacterType.W: + if (Config.NON_UNICODE_SDW) { + CClassNode cc = new CClassNode(); + cc.addCType(token.getPropCType(), false, env, this); + if (token.getPropNot()) cc.setNot(); + node = cc; + } + break; + + case CharacterType.WORD: + node = new CTypeNode(token.getPropCType(), token.getPropNot()); + break; + + case CharacterType.SPACE: + case CharacterType.DIGIT: + case CharacterType.XDIGIT: + // #ifdef USE_SHARED_CCLASS_TABLE ... #endif + CClassNode ccn = new CClassNode(); + ccn.addCType(token.getPropCType(), false, env, this); + if (token.getPropNot()) ccn.setNot(); + node = ccn; + break; + + default: + newInternalException(ERR_PARSER_BUG); + + } // inner switch + break; + + case CHAR_PROPERTY: + node = parseCharProperty(); + break; + + case CC_CC_OPEN: + CClassNode cc = parseCharClass(); + node = cc; + if (isIgnoreCase(env.option)) { + ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc); + EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); + + if (arg.altRoot != null) { + node = ConsAltNode.newAltNode(node, arg.altRoot); + } + } + break; + + case ANYCHAR: + node = new AnyCharNode(); + break; + + case ANYCHAR_ANYTIME: + node = new AnyCharNode(); + QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); + qn.setTarget(node); + node = qn; + break; + + case BACKREF: + int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()}; + node = new BackRefNode(token.getBackrefNum(), + backRefs, + token.getBackrefByName(), + token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL + token.getBackrefLevel(), // ... + env); + + break; + + case CALL: + if (Config.USE_SUBEXP_CALL) { + int gNum = token.getCallGNum(); + + if (gNum < 0) { + gNum = backrefRelToAbs(gNum); + if (gNum <= 0) newValueException(ERR_INVALID_BACKREF); + } + node = new CallNode(chars, token.getCallNameP(), token.getCallNameEnd(), gNum); + env.numCall++; + } // USE_SUBEXP_CALL + break; + + case ANCHOR: + node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma + break; + + case OP_REPEAT: + case INTERVAL: + if (syntax.contextIndepRepeatOps()) { + if (syntax.contextInvalidRepeatOps()) { + newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + } else { + node = StringNode.EMPTY; // node_new_empty + } + } else { + return parseExpTkByte(group); // goto tk_byte + } + break; + + default: + newInternalException(ERR_PARSER_BUG); + } //switch + + //targetp = node; + + fetchToken(); // re_entry: + + return parseExpRepeat(node, group); // repeat: + } + + private Node parseExpTkByte(boolean group) { + StringNode node = new StringNode(chars, token.backP, p); // tk_byte: + while (true) { + fetchToken(); + if (token.type != TokenType.STRING) break; + + if (token.backP == node.end) { + node.end = p; // non escaped character, remain shared, just increase shared range + } else { + node.cat(chars, token.backP, p); // non continuous string stream, need to COW + } + } + // targetp = node; + return parseExpRepeat(node, group); // string_end:, goto repeat + } + + private Node parseExpTkRawByte(boolean group) { + // tk_raw_byte: + + // important: we don't use 0xff mask here neither in the compiler + // (in the template string) so we won't have to mask target + // strings when comparing against them in the matcher + StringNode node = new StringNode((char)token.getC()); + node.setRaw(); + + int len = 1; + while (true) { + if (len >= 1) { + if (len == 1) { + fetchToken(); + node.clearRaw(); + // !goto string_end;! + return parseExpRepeat(node, group); + } + } + + fetchToken(); + if (token.type != TokenType.RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ + // USE_PAD_TO_SHORT_BYTE_CHAR ... + + newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING); + } + + // important: we don't use 0xff mask here neither in the compiler + // (in the template string) so we won't have to mask target + // strings when comparing against them in the matcher + node.cat((char)token.getC()); + len++; + } // while + } + + private Node parseExpRepeat(Node target, boolean group) { + while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: + if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); + + QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), + token.getRepeatUpper(), + token.type == TokenType.INTERVAL); + + qtfr.greedy = token.getRepeatGreedy(); + int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd()); + Node qn = qtfr; + + if (token.getRepeatPossessive()) { + EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose + en.setTarget(qn); + qn = en; + } + + if (ret == 0) { + target = qn; + } else if (ret == 2) { /* split case: /abc+/ */ + target = ConsAltNode.newListNode(target, null); + ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null)); + + fetchToken(); + return parseExpRepeatForCar(target, tmp, group); + } + fetchToken(); // goto re_entry + } + return target; + } + + private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) { + while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: + if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); + + QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), + token.getRepeatUpper(), + token.type == TokenType.INTERVAL); + + qtfr.greedy = token.getRepeatGreedy(); + int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd()); + Node qn = qtfr; + + if (token.getRepeatPossessive()) { + EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose + en.setTarget(qn); + qn = en; + } + + if (ret == 0) { + target.setCar(qn); + } else if (ret == 2) { /* split case: /abc+/ */ + assert false; + } + fetchToken(); // goto re_entry + } + return top; + } + + private Node parseBranch(TokenType term) { + Node node = parseExp(term); + + if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { + return node; + } else { + ConsAltNode top = ConsAltNode.newListNode(node, null); + ConsAltNode t = top; + + while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { + node = parseExp(term); + if (node.getType() == NodeType.LIST) { + t.setCdr((ConsAltNode)node); + while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr; + + t = ((ConsAltNode)node); + } else { + t.setCdr(ConsAltNode.newListNode(node, null)); + t = t.cdr; + } + } + return top; + } + } + + /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ + private Node parseSubExp(TokenType term) { + Node node = parseBranch(term); + + if (token.type == term) { + return node; + } else if (token.type == TokenType.ALT) { + ConsAltNode top = ConsAltNode.newAltNode(node, null); + ConsAltNode t = top; + while (token.type == TokenType.ALT) { + fetchToken(); + node = parseBranch(term); + + t.setCdr(ConsAltNode.newAltNode(node, null)); + t = t.cdr; + } + + if (token.type != term) parseSubExpError(term); + return top; + } else { + parseSubExpError(term); + return null; //not reached + } + } + + private void parseSubExpError(TokenType term) { + if (term == TokenType.SUBEXP_CLOSE) { + newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); + } else { + newInternalException(ERR_PARSER_BUG); + } + } + + private Node parseRegexp() { + fetchToken(); + return parseSubExp(TokenType.EOT); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Regex.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,413 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup; +import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; + +import java.util.HashMap; +import java.util.Iterator; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; +import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; + +public final class Regex implements RegexState { + + int[] code; /* compiled pattern */ + int codeLength; + boolean stackNeeded; + Object[]operands; /* e.g. shared CClassNode */ + int operandLength; + + int state; /* normal, searching, compiling */ // remove + int numMem; /* used memory(...) num counted from 1 */ + int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ + int numCombExpCheck; /* combination explosion check */ + int numCall; /* number of subexp call */ + int captureHistory; /* (?@...) flag (1-31) */ + int btMemStart; /* need backtrack flag */ + int btMemEnd; /* need backtrack flag */ + + int stackPopLevel; + + int[]repeatRangeLo; + int[]repeatRangeHi; + + public WarnCallback warnings; + public MatcherFactory factory; + + int options; + int userOptions; + Object userObject; + //final Syntax syntax; + final int caseFoldFlag; + + HashMap<String,NameEntry> nameTable; // named entries + + /* optimization info (string search, char-map and anchors) */ + SearchAlgorithm searchAlgorithm; /* optimize flag */ + int thresholdLength; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + int anchorDmin; /* (SEMI_)END_BUF anchor distance */ + int anchorDmax; /* (SEMI_)END_BUF anchor distance */ + int subAnchor; /* start-anchor for exact or map */ + + char[] exact; + int exactP; + int exactEnd; + + byte[] map; /* used as BM skip or char-map */ + int[] intMap; /* BM skip for exact_len > 255 */ + int[] intMapBackward; /* BM skip for backward search */ + int dMin; /* min-distance of exact or map */ + int dMax; /* max-distance of exact or map */ + + char[][] templates; + int templateNum; + + public Regex(CharSequence cs) { + this(cs.toString()); + } + + public Regex(String str) { + this(str.toCharArray(), 0, str.length(), 0); + } + + public Regex(char[] chars) { + this(chars, 0, chars.length, 0); + } + + public Regex(char[] chars, int p, int end) { + this(chars, p, end, 0); + } + + public Regex(char[] chars, int p, int end, int option) { + this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); + } + + // onig_new + public Regex(char[] chars, int p, int end, int option, Syntax syntax) { + this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); + } + + public Regex(char[]chars, int p, int end, int option, WarnCallback warnings) { + this(chars, p, end, option, Syntax.RUBY, warnings); + } + + // onig_new + public Regex(char[] chars, int p, int end, int option, Syntax syntax, WarnCallback warnings) { + this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); + } + + // onig_alloc_init + public Regex(char[] chars, int p, int end, int option, int caseFoldFlag, Syntax syntax, WarnCallback warnings) { + + if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == + (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { + throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); + } + + if ((option & Option.NEGATE_SINGLELINE) != 0) { + option |= syntax.options; + option &= ~Option.SINGLELINE; + } else { + option |= syntax.options; + } + + this.options = option; + this.caseFoldFlag = caseFoldFlag; + this.warnings = warnings; + + new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile(); + + this.warnings = null; + } + + public Matcher matcher(char[] chars) { + return matcher(chars, 0, chars.length); + } + + public Matcher matcher(char[] chars, int p, int end) { + return factory.create(this, chars, p, end); + } + + public int numberOfCaptures() { + return numMem; + } + + public int numberOfCaptureHistories() { + if (Config.USE_CAPTURE_HISTORY) { + int n = 0; + for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { + if (bsAt(captureHistory, i)) n++; + } + return n; + } else { + return 0; + } + } + + String nameTableToString() { + StringBuilder sb = new StringBuilder(); + + if (nameTable != null) { + sb.append("name table\n"); + for (NameEntry ne : nameTable.values()) { + sb.append(" " + ne + "\n"); + } + sb.append("\n"); + } + return sb.toString(); + } + + NameEntry nameFind(char[] name, int nameP, int nameEnd) { + if (nameTable != null) return nameTable.get(new String(name, nameP, nameEnd - nameP)); + return null; + } + + void renumberNameTable(int[]map) { + if (nameTable != null) { + for (NameEntry e : nameTable.values()) { + if (e.backNum > 1) { + for (int i=0; i<e.backNum; i++) { + e.backRefs[i] = map[e.backRefs[i]]; + } + } else if (e.backNum == 1) { + e.backRef1 = map[e.backRef1]; + } + } + } + } + + public int numberOfNames() { + return nameTable == null ? 0 : nameTable.size(); + } + + void nameAdd(char[] name, int nameP, int nameEnd, int backRef, Syntax syntax) { + if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME); + + NameEntry e = null; + if (nameTable == null) { + nameTable = new HashMap<String,NameEntry>(); // 13, oni defaults to 5 + } else { + e = nameFind(name, nameP, nameEnd); + } + + if (e == null) { + // dup the name here as oni does ?, what for ? (it has to manage it, we don't) + e = new NameEntry(name, nameP, nameEnd); + nameTable.put(new String(name, nameP, nameEnd - nameP), e); + } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) { + throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP)); + } + + e.addBackref(backRef); + } + + NameEntry nameToGroupNumbers(char[] name, int nameP, int nameEnd) { + return nameFind(name, nameP, nameEnd); + } + + public int nameToBackrefNumber(char[] name, int nameP, int nameEnd, Region region) { + NameEntry e = nameToGroupNumbers(name, nameP, nameEnd); + if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE, + new String(name, nameP, nameEnd - nameP)); + + switch(e.backNum) { + case 0: + throw new InternalException(ErrorMessages.ERR_PARSER_BUG); + case 1: + return e.backRef1; + default: + if (region != null) { + for (int i = e.backNum - 1; i >= 0; i--) { + if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i]; + } + } + return e.backRefs[e.backNum - 1]; + } + } + + public Iterator<NameEntry> namedBackrefIterator() { + return nameTable.values().iterator(); + } + + public boolean noNameGroupIsActive(Syntax syntax) { + if (isDontCaptureGroup(options)) return false; + + if (Config.USE_NAMED_GROUP) { + if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false; + } + return true; + } + + /* set skip map for Boyer-Moor search */ + void setupBMSkipMap() { + char[] chars = exact; + int p = exactP; + int end = exactEnd; + int len = end - p; + + if (len < Config.CHAR_TABLE_SIZE) { + // map/skip + if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; + + for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len; + for (int i=0; i<len-1; i++) map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? + } else { + if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE]; + + for (int i=0; i<len-1; i++) intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? + } + } + + void setExactInfo(OptExactInfo e) { + if (e.length == 0) return; + + // shall we copy that ? + exact = e.chars; + exactP = 0; + exactEnd = e.length; + + if (e.ignoreCase) { + searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); + } else { + if (e.length >= 2) { + setupBMSkipMap(); + searchAlgorithm = SearchAlgorithm.BM; + } else { + searchAlgorithm = SearchAlgorithm.SLOW; + } + } + + dMin = e.mmd.min; + dMax = e.mmd.max; + + if (dMin != MinMaxLen.INFINITE_DISTANCE) { + thresholdLength = dMin + (exactEnd - exactP); + } + } + + void setOptimizeMapInfo(OptMapInfo m) { + map = m.map; + + searchAlgorithm = SearchAlgorithm.MAP; + dMin = m.mmd.min; + dMax = m.mmd.max; + + if (dMin != MinMaxLen.INFINITE_DISTANCE) { + thresholdLength = dMin + 1; + } + } + + void setSubAnchor(OptAnchorInfo anc) { + subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; + subAnchor |= anc.rightAnchor & AnchorType.END_LINE; + } + + void clearOptimizeInfo() { + searchAlgorithm = SearchAlgorithm.NONE; + anchor = 0; + anchorDmax = 0; + anchorDmin = 0; + subAnchor = 0; + + exact = null; + exactP = exactEnd = 0; + } + + public String encStringToString(byte[]bytes, int p, int end) { + StringBuilder sb = new StringBuilder("\nPATTERN: /"); + + while (p < end) { + sb.append(new String(new byte[]{bytes[p]})); + p++; + } + return sb.append("/").toString(); + } + + public String optimizeInfoToString() { + String s = ""; + s += "optimize: " + searchAlgorithm.getName() + "\n"; + s += " anchor: " + OptAnchorInfo.anchorToString(anchor); + + if ((anchor & AnchorType.END_BUF_MASK) != 0) { + s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax); + } + + s += "\n"; + + if (searchAlgorithm != SearchAlgorithm.NONE) { + s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n"; + } + + s += "dmin: " + dMin + " dmax: " + dMax + "\n"; + s += "threshold length: " + thresholdLength + "\n"; + + if (exact != null) { + s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n"; + } else if (searchAlgorithm == SearchAlgorithm.MAP) { + int n=0; + for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++; + + s += "map: n = " + n + "\n"; + if (n > 0) { + int c=0; + s += "["; + for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { + if (map[i] != 0) { + if (c > 0) s += ", "; + c++; + // TODO if (enc.isPrint(i) + s += ((char)i); + } + } + s += "]\n"; + } + } + + return s; + } + + public int getOptions() { + return options; + } + + public void setUserOptions(int options) { + this.userOptions = options; + } + + public int getUserOptions() { + return userOptions; + } + + public void setUserObject(Object object) { + this.userObject = object; + } + + public Object getUserObject() { + return userObject; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Region.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,66 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public final class Region { + static final int REGION_NOTPOS = -1; + + public final int numRegs; + public final int[]beg; + public final int[]end; + public CaptureTreeNode historyRoot; + + public Region(int num) { + this.numRegs = num; + this.beg = new int[num]; + this.end = new int[num]; + } + + public Region(int begin, int end) { + this.numRegs = 1; + this.beg = new int[]{begin}; + this.end = new int[]{end}; + } + + public Region clone() { + Region region = new Region(numRegs); + System.arraycopy(beg, 0, region.beg, 0, beg.length); + System.arraycopy(end, 0, region.end, 0, end.length); + if (historyRoot != null) region.historyRoot = historyRoot.cloneTree(); + return region; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Region: \n"); + for (int i=0; i<beg.length; i++) sb.append(" " + i + ": (" + beg[i] + "-" + end[i] + ")"); + return sb.toString(); + } + + CaptureTreeNode getCaptureTree() { + return historyRoot; + } + + void clear() { + for (int i=0; i<beg.length; i++) { + beg[i] = end[i] = REGION_NOTPOS; + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ScanEnvironment.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,137 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsClear; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; + +public final class ScanEnvironment { + + private static final int SCANENV_MEMNODES_SIZE = 8; + + int option; + final int caseFoldFlag; + final public Syntax syntax; + int captureHistory; + int btMemStart; + int btMemEnd; + int backrefedMem; + + final public Regex reg; + + int numCall; + UnsetAddrList unsetAddrList; // USE_SUBEXP_CALL + public int numMem; + + int numNamed; // USE_NAMED_GROUP + + public Node memNodes[]; + + // USE_COMBINATION_EXPLOSION_CHECK + int numCombExpCheck; + int combExpMaxRegNum; + int currMaxRegNum; + boolean hasRecursion; + + public ScanEnvironment(Regex regex, Syntax syntax) { + this.reg = regex; + option = regex.options; + caseFoldFlag = regex.caseFoldFlag; + this.syntax = syntax; + } + + public void clear() { + captureHistory = bsClear(); + btMemStart = bsClear(); + btMemEnd = bsClear(); + backrefedMem = bsClear(); + + numCall = 0; + numMem = 0; + + numNamed = 0; + + memNodes = null; + + numCombExpCheck = 0; + combExpMaxRegNum = 0; + currMaxRegNum = 0; + hasRecursion = false; + } + + public int addMemEntry() { + if (numMem++ == 0) { + memNodes = new Node[SCANENV_MEMNODES_SIZE]; + } else if (numMem >= memNodes.length) { + Node[]tmp = new Node[memNodes.length << 1]; + System.arraycopy(memNodes, 0, tmp, 0, memNodes.length); + memNodes = tmp; + } + + return numMem; + } + + public void setMemNode(int num, Node node) { + if (numMem >= num) { + memNodes[num] = node; + } else { + throw new InternalException(ErrorMessages.ERR_PARSER_BUG); + } + } + + public int convertBackslashValue(int c) { + if (syntax.opEscControlChars()) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (syntax.op2EscVVtab()) return 11; // ??? + break; + default: + break; + } + } + return c; + } + + void ccEscWarn(String s) { + if (Config.USE_WARN) { + if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) { + reg.warnings.warn("character class has '" + s + "' without escape"); + } + } + } + + void closeBracketWithoutEscapeWarn(String s) { + if (Config.USE_WARN) { + if (syntax.warnCCOpNotEscaped()) { + reg.warnings.warn("regular expression has '" + s + "' without escape"); + } + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ScannerSupport.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,178 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; + +abstract class ScannerSupport extends IntHolder implements ErrorMessages { + + protected final char[] chars; // pattern + protected int p; // current scanner position + protected int stop; // pattern end (mutable) + private int lastFetched; // last fetched value for unfetch support + protected int c; // current code point + + private final int begin; // pattern begin position for reset() support + private final int end; // pattern end position for reset() support + protected int _p; // used by mark()/restore() to mark positions + + protected ScannerSupport(char[] chars, int p, int end) { + this.chars = chars; + this.begin = p; + this.end = end; + + reset(); + } + + protected int getBegin() { + return begin; + } + + protected int getEnd() { + return end; + } + + private final int INT_SIGN_BIT = 1 << 31; + + protected final int scanUnsignedNumber() { + int last = c; + int num = 0; // long ??? + while(left()) { + fetch(); + if (Character.isDigit(c)) { + int onum = num; + num = num * 10 + EncodingHelper.digitVal(c); + if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; + } else { + unfetch(); + break; + } + } + c = last; + return num; + } + + protected final int scanUnsignedHexadecimalNumber(int maxLength) { + int last = c; + int num = 0; + while(left() && maxLength-- != 0) { + fetch(); + if (EncodingHelper.isXDigit(c)) { + int onum = num; + int val = EncodingHelper.xdigitVal(c); + num = (num << 4) + val; + if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; + } else { + unfetch(); + break; + } + } + c = last; + return num; + } + + protected final int scanUnsignedOctalNumber(int maxLength) { + int last = c; + int num = 0; + while(left() && maxLength-- != 0) { + fetch(); + if (Character.isDigit(c) && c < '8') { + int onum = num; + int val = EncodingHelper.odigitVal(c); + num = (num << 3) + val; + if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; + } else { + unfetch(); + break; + } + } + c = last; + return num; + } + + protected final void reset() { + p = begin; + stop = end; + } + + protected final void mark() { + _p = p; + } + + protected final void restore() { + p = _p; + } + + protected final void inc() { + lastFetched = p; + p++; + } + + protected final void fetch() { + lastFetched = p; + c = chars[p++]; + } + + protected int fetchTo() { + lastFetched = p; + return chars[p++]; + } + + protected final void unfetch() { + p = lastFetched; + } + + protected final int peek() { + return p < stop ? chars[p] : 0; + } + + protected final boolean peekIs(int c) { + return peek() == c; + } + + protected final boolean left() { + return p < stop; + } + + protected void newSyntaxException(String message) { + throw new SyntaxException(message); + } + + protected void newValueException(String message) { + throw new ValueException(message); + } + + protected void newValueException(String message, String str) { + throw new ValueException(message, str); + } + + protected void newValueException(String message, int p, int end) { + throw new ValueException(message, new String(chars, p, end - p)); + } + + protected void newInternalException(String message) { + throw new InternalException(message); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/SearchAlgorithm.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,294 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public abstract class SearchAlgorithm { + + public abstract String getName(); + public abstract int search(Regex regex, char[] text, int textP, int textEnd, int textRange); + public abstract int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_); + + + public static final SearchAlgorithm NONE = new SearchAlgorithm() { + + public final String getName() { + return "NONE"; + } + + public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) { + return textP; + } + + public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { + return textP; + } + + }; + + public static final SearchAlgorithm SLOW = new SearchAlgorithm() { + + public final String getName() { + return "EXACT"; + } + + public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + + int end = textEnd; + end -= targetEnd - targetP - 1; + + if (end > textRange) end = textRange; + + int s = textP; + + while (s < end) { + if (text[s] == target[targetP]) { + int p = s + 1; + int t = targetP + 1; + while (t < targetEnd) { + if (target[t] != text[p++]) break; + t++; + } + + if (t == targetEnd) return s; + } + s++; + } + + return -1; + } + + public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + int s = textEnd; + s -= targetEnd - targetP; + + if (s > textStart) { + s = textStart; + } + + while (s >= textP) { + if (text[s] == target[targetP]) { + int p = s + 1; + int t = targetP + 1; + while (t < targetEnd) { + if (target[t] != text[p++]) break; + t++; + } + if (t == targetEnd) return s; + } + // s = enc.prevCharHead or s = s <= adjustText ? -1 : s - 1; + s--; + } + return -1; + } + }; + + public static final class SLOW_IC extends SearchAlgorithm { + private final int caseFoldFlag; + + public SLOW_IC(Regex regex) { + this.caseFoldFlag = regex.caseFoldFlag; + } + + public final String getName() { + return "EXACT_IC"; + } + + public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + int end = textEnd; + end -= targetEnd - targetP - 1; + + if (end > textRange) end = textRange; + int s = textP; + + while (s < end) { + if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s; + s++; + } + return -1; + } + + public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + int s = textEnd; + s -= targetEnd - targetP; + + if (s > textStart) { + s = textStart; + } + + while (s >= textP) { + if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s; + s = EncodingHelper.prevCharHead(adjustText, s); + } + return -1; + } + + private boolean lowerCaseMatch(char[] t, int tP, int tEnd, + char[] chars, int p, int end) { + + while (tP < tEnd) { + if (t[tP++] != Character.toLowerCase(chars[p++])) return false; + } + return true; + } + }; + + public static final SearchAlgorithm BM = new SearchAlgorithm() { + + public final String getName() { + return "EXACT_BM"; + } + + public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + int end = textRange + (targetEnd - targetP) - 1; + if (end > textEnd) end = textEnd; + + int tail = targetEnd - 1; + int s = textP + (targetEnd - targetP) - 1; + + if (regex.intMap == null) { + while (s < end) { + int p = s; + int t = tail; + + while (text[p] == target[t]) { + if (t == targetP) return p; + p--; t--; + } + + s += regex.map[text[s] & 0xff]; + } + } else { /* see int_map[] */ + while (s < end) { + int p = s; + int t = tail; + + while (text[p] == target[t]) { + if (t == targetP) return p; + p--; t--; + } + + s += regex.intMap[text[s] & 0xff]; + } + } + return -1; + } + + private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100; + + public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { + char[] target = regex.exact; + int targetP = regex.exactP; + int targetEnd = regex.exactEnd; + + if (regex.intMapBackward == null) { + if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) { + // goto exact_method; + return SLOW.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_); + } + setBmBackwardSkip(regex, target, targetP, targetEnd); + } + + int s = textEnd - (targetEnd - targetP); + + if (textStart < s) { + s = textStart; + } + + while (s >= textP) { + int p = s; + int t = targetP; + while (t < targetEnd && text[p] == target[t]) { + p++; t++; + } + if (t == targetEnd) return s; + + s -= regex.intMapBackward[text[s] & 0xff]; + } + return -1; + } + + + private void setBmBackwardSkip(Regex regex, char[] chars, int p, int end) { + int[] skip; + if (regex.intMapBackward == null) { + skip = new int[Config.CHAR_TABLE_SIZE]; + regex.intMapBackward = skip; + } else { + skip = regex.intMapBackward; + } + + int len = end - p; + + for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) skip[i] = len; + for (int i=len-1; i>0; i--) skip[chars[i] & 0xff] = i; + } + }; + + public static final SearchAlgorithm MAP = new SearchAlgorithm() { + + public final String getName() { + return "MAP"; + } + + public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) { + byte[] map = regex.map; + int s = textP; + + while (s < textRange) { + if (text[s] > 0xff || map[text[s]] != 0) return s; + s++; + } + return -1; + } + + public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { + byte[] map = regex.map; + int s = textStart; + + if (s >= textEnd) s = textEnd - 1; + while (s >= textP) { + if (text[s] > 0xff || map[text[s]] != 0) return s; + s--; + } + return -1; + } + }; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/StackEntry.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,164 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +final class StackEntry { + int type; + private int E1, E2, E3, E4; + + // first union member + /* byte code position */ + void setStatePCode(int pcode) { + E1 = pcode; + } + int getStatePCode() { + return E1; + } + /* string position */ + void setStatePStr(int pstr) { + E2 = pstr; + } + int getStatePStr() { + return E2; + } + /* previous char position of pstr */ + void setStatePStrPrev(int pstrPrev) { + E3 = pstrPrev; + } + int getStatePStrPrev() { + return E3; + } + + void setStateCheck(int check) { + E4 = check; + } + int getStateCheck() { + return E4; + } + + // second union member + /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + void setRepeatCount(int count) { + E1 = count; + } + int getRepeatCount() { + return E1; + } + void decreaseRepeatCount() { + E1--; + } + void increaseRepeatCount() { + E1++; + } + /* byte code position (head of repeated target) */ + void setRepeatPCode(int pcode) { + E2 = pcode; + } + int getRepeatPCode() { + return E2; + } + /* repeat id */ + void setRepeatNum(int num) { + E3 = num; + } + int getRepeatNum() { + return E3; + } + + // third union member + /* index of stack */ /*int repeat_inc struct*/ + void setSi(int si) { + E1 = si; + } + int getSi() { + return E1; + } + + // fourth union member + /* memory num */ + void setMemNum(int num) { + E1 = num; + } + int getMemNum() { + return E1; + } + /* start/end position */ + void setMemPstr(int pstr) { + E2 = pstr; + } + int getMemPStr() { + return E2; + } + + /* Following information is set, if this stack type is MEM-START */ + /* prev. info (for backtrack "(...)*" ) */ + void setMemStart(int start) { + E3 = start; + } + int getMemStart() { + return E3; + } + /* prev. info (for backtrack "(...)*" ) */ + void setMemEnd(int end) { + E4 = end; + } + int getMemEnd() { + return E4; + } + + // fifth union member + /* null check id */ + void setNullCheckNum(int num) { + E1 = num; + } + int getNullCheckNum() { + return E1; + } + /* start position */ + void setNullCheckPStr(int pstr) { + E2 = pstr; + } + int getNullCheckPStr() { + return E2; + } + + // sixth union member + /* byte code position */ + void setCallFrameRetAddr(int addr) { + E1 = addr; + } + int getCallFrameRetAddr() { + return E1; + } + /* null check id */ + void setCallFrameNum(int num) { + E2 = num; + } + int getCallFrameNum() { + return E2; + } + /* string position */ + void setCallFramePStr(int pstr) { + E3 = pstr; + } + int getCallFramePStr() { + return E3; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/StackMachine.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,621 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; + +import java.lang.ref.WeakReference; +import java.util.Arrays; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; +import jdk.nashorn.internal.runtime.regexp.joni.constants.StackType; + +abstract class StackMachine extends Matcher implements StackType { + protected static final int INVALID_INDEX = -1; + + protected StackEntry[]stack; + protected int stk; // stkEnd + + protected final int[]repeatStk; + protected final int memStartStk, memEndStk; + + // CEC + protected byte[] stateCheckBuff; // move to int[] ? + int stateCheckBuffSize; + + protected StackMachine(Regex regex, char[] chars, int p , int end) { + super(regex, chars, p, end); + + this.stack = regex.stackNeeded ? fetchStack() : null; + int n = regex.numRepeat + (regex.numMem << 1); + this.repeatStk = n > 0 ? new int[n] : null; + + memStartStk = regex.numRepeat - 1; + memEndStk = memStartStk + regex.numMem; + /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ + /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */ + } + + private static StackEntry[] allocateStack() { + StackEntry[]stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE]; + stack[0] = new StackEntry(); + return stack; + } + + private void doubleStack() { + StackEntry[] newStack = new StackEntry[stack.length << 1]; + System.arraycopy(stack, 0, newStack, 0, stack.length); + stack = newStack; + } + + static final ThreadLocal<WeakReference<StackEntry[]>> stacks + = new ThreadLocal<WeakReference<StackEntry[]>>() { + @Override + protected WeakReference<StackEntry[]> initialValue() { + return new WeakReference<StackEntry[]>(allocateStack()); + } + }; + + private static StackEntry[] fetchStack() { + WeakReference<StackEntry[]> ref = stacks.get(); + StackEntry[] stack = ref.get(); + if (stack == null) { + ref = new WeakReference<StackEntry[]>(stack = allocateStack()); + stacks.set(ref); + } + return stack; + } + + protected final void init() { + if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */ + if (repeatStk != null) { + for (int i=1; i<=regex.numMem; i++) { + repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX; + } + } + } + + protected final StackEntry ensure1() { + if (stk >= stack.length) doubleStack(); + StackEntry e = stack[stk]; + if (e == null) stack[stk] = e = new StackEntry(); + return e; + } + + protected final void pushType(int type) { + ensure1().type = type; + stk++; + } + + // CEC + + // STATE_CHECK_POS + private int stateCheckPos(int s, int snum) { + return (s - str) * regex.numCombExpCheck + (snum - 1); + } + + // STATE_CHECK_VAL + protected final boolean stateCheckVal(int s, int snum) { + if (stateCheckBuff != null) { + int x = stateCheckPos(s, snum); + return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0; + } + return false; + } + + // ELSE_IF_STATE_CHECK_MARK + private void stateCheckMark() { + StackEntry e = stack[stk]; + int x = stateCheckPos(e.getStatePStr(), e.getStateCheck()); + stateCheckBuff[x / 8] |= (1 << (x % 8)); + } + + // STATE_CHECK_BUFF_INIT + private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16; + protected final void stateCheckBuffInit(int strLength, int offset, int stateNum) { + if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) { + int size = ((strLength + 1) * stateNum + 7) >>> 3; + offset = (offset * stateNum) >>> 3; + + if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) { + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { + stateCheckBuff = new byte[size]; + } else { + // same impl, reduce... + stateCheckBuff = new byte[size]; + } + Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0); + stateCheckBuffSize = size; + } else { + stateCheckBuff = null; // reduce + stateCheckBuffSize = 0; + } + } else { + stateCheckBuff = null; // reduce + stateCheckBuffSize = 0; + } + } + + protected final void stateCheckBuffClear() { + stateCheckBuff = null; + stateCheckBuffSize = 0; + } + + private void push(int type, int pat, int s, int prev) { + StackEntry e = ensure1(); + e.type = type; + e.setStatePCode(pat); + e.setStatePStr(s); + e.setStatePStrPrev(prev); + if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0); + stk++; + } + + protected final void pushEnsured(int type, int pat) { + StackEntry e = stack[stk]; + e.type = type; + e.setStatePCode(pat); + if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0); + stk++; + } + + protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) { + StackEntry e = ensure1(); + e.type = ALT; + e.setStatePCode(pat); + e.setStatePStr(s); + e.setStatePStrPrev(sprev); + if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0); + stk++; + } + + protected final void pushStateCheck(int s, int snum) { + if (stateCheckBuff != null) { + StackEntry e = ensure1(); + e.type = STATE_CHECK_MARK; + e.setStatePStr(s); + e.setStateCheck(snum); + stk++; + } + } + + protected final void pushAlt(int pat, int s, int prev) { + push(ALT, pat, s, prev); + } + + protected final void pushPos(int s, int prev) { + push(POS, -1 /*NULL_UCHARP*/, s, prev); + } + + protected final void pushPosNot(int pat, int s, int prev) { + push(POS_NOT, pat, s, prev); + } + + protected final void pushStopBT() { + pushType(STOP_BT); + } + + protected final void pushLookBehindNot(int pat, int s, int sprev) { + push(LOOK_BEHIND_NOT, pat, s, sprev); + } + + protected final void pushRepeat(int id, int pat) { + StackEntry e = ensure1(); + e.type = REPEAT; + e.setRepeatNum(id); + e.setRepeatPCode(pat); + e.setRepeatCount(0); + stk++; + } + + protected final void pushRepeatInc(int sindex) { + StackEntry e = ensure1(); + e.type = REPEAT_INC; + e.setSi(sindex); + stk++; + } + + protected final void pushMemStart(int mnum, int s) { + StackEntry e = ensure1(); + e.type = MEM_START; + e.setMemNum(mnum); + e.setMemPstr(s); + e.setMemStart(repeatStk[memStartStk + mnum]); + e.setMemEnd(repeatStk[memEndStk + mnum]); + repeatStk[memStartStk + mnum] = stk; + repeatStk[memEndStk + mnum] = INVALID_INDEX; + stk++; + } + + protected final void pushMemEnd(int mnum, int s) { + StackEntry e = ensure1(); + e.type = MEM_END; + e.setMemNum(mnum); + e.setMemPstr(s); + e.setMemStart(repeatStk[memStartStk + mnum]); + e.setMemEnd(repeatStk[memEndStk + mnum]); + repeatStk[memEndStk + mnum] = stk; + stk++; + } + + protected final void pushMemEndMark(int mnum) { + StackEntry e = ensure1(); + e.type = MEM_END_MARK; + e.setMemNum(mnum); + stk++; + } + + protected final int getMemStart(int mnum) { + int level = 0; + int stkp = stk; + + while (stkp > 0) { + stkp--; + StackEntry e = stack[stkp]; + if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) { + level++; + } else if (e.type == MEM_START && e.getMemNum() == mnum) { + if (level == 0) break; + level--; + } + } + return stkp; + } + + protected final void pushNullCheckStart(int cnum, int s) { + StackEntry e = ensure1(); + e.type = NULL_CHECK_START; + e.setNullCheckNum(cnum); + e.setNullCheckPStr(s); + stk++; + } + + protected final void pushNullCheckEnd(int cnum) { + StackEntry e = ensure1(); + e.type = NULL_CHECK_END; + e.setNullCheckNum(cnum); + stk++; + } + + protected final void pushCallFrame(int pat) { + StackEntry e = ensure1(); + e.type = CALL_FRAME; + e.setCallFrameRetAddr(pat); + stk++; + } + + protected final void pushReturn() { + StackEntry e = ensure1(); + e.type = RETURN; + stk++; + } + + // stack debug routines here + // ... + + protected final void popOne() { + stk--; + } + + protected final StackEntry pop() { + switch (regex.stackPopLevel) { + case StackPopLevel.FREE: + return popFree(); + case StackPopLevel.MEM_START: + return popMemStart(); + default: + return popDefault(); + } + } + + private StackEntry popFree() { + while (true) { + StackEntry e = stack[--stk]; + + if ((e.type & MASK_POP_USED) != 0) { + return e; + } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.type == STATE_CHECK_MARK) stateCheckMark(); + } + } + } + + private StackEntry popMemStart() { + while (true) { + StackEntry e = stack[--stk]; + + if ((e.type & MASK_POP_USED) != 0) { + return e; + } else if (e.type == MEM_START) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); + } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.type == STATE_CHECK_MARK) stateCheckMark(); + } + } + } + + private StackEntry popDefault() { + while (true) { + StackEntry e = stack[--stk]; + + if ((e.type & MASK_POP_USED) != 0) { + return e; + } else if (e.type == MEM_START) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); + } else if (e.type == REPEAT_INC) { + //int si = stack[stk + IREPEAT_INC_SI]; + //stack[si + IREPEAT_COUNT]--; + stack[e.getSi()].decreaseRepeatCount(); + } else if (e.type == MEM_END) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); + } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.type == STATE_CHECK_MARK) stateCheckMark(); + } + } + } + + protected final void popTilPosNot() { + while (true) { + stk--; + StackEntry e = stack[stk]; + + if (e.type == POS_NOT) { + break; + } else if (e.type == MEM_START) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemStart(); + } else if (e.type == REPEAT_INC) { + //int si = stack[stk + IREPEAT_INC_SI]; + //stack[si + IREPEAT_COUNT]--; + stack[e.getSi()].decreaseRepeatCount(); + } else if (e.type == MEM_END){ + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemStart(); + } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.type == STATE_CHECK_MARK) stateCheckMark(); + } + } + } + + protected final void popTilLookBehindNot() { + while (true) { + stk--; + StackEntry e = stack[stk]; + + if (e.type == LOOK_BEHIND_NOT) { + break; + } else if (e.type == MEM_START) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); + } else if (e.type == REPEAT_INC) { + //int si = stack[stk + IREPEAT_INC_SI]; + //stack[si + IREPEAT_COUNT]--; + stack[e.getSi()].decreaseRepeatCount(); + } else if (e.type == MEM_END) { + repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); + repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); + } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { + if (e.type == STATE_CHECK_MARK) stateCheckMark(); + } + } + } + + protected final int posEnd() { + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + if ((e.type & MASK_TO_VOID_TARGET) != 0) { + e.type = VOID; + } else if (e.type == POS) { + e.type = VOID; + break; + } + } + return k; + } + + protected final void stopBtEnd() { + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + + if ((e.type & MASK_TO_VOID_TARGET) != 0) { + e.type = VOID; + } else if (e.type == STOP_BT) { + e.type = VOID; + break; + } + } + } + + // int for consistency with other null check routines + protected final int nullCheck(int id, int s) { + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == NULL_CHECK_START) { + if (e.getNullCheckNum() == id) { + return e.getNullCheckPStr() == s ? 1 : 0; + } + } + } + } + + protected final int nullCheckRec(int id, int s) { + int level = 0; + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == NULL_CHECK_START) { + if (e.getNullCheckNum() == id) { + if (level == 0) { + return e.getNullCheckPStr() == s ? 1 : 0; + } else { + level--; + } + } + } else if (e.type == NULL_CHECK_END) { + level++; + } + } + } + + protected final int nullCheckMemSt(int id, int s) { + int k = stk; + int isNull; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == NULL_CHECK_START) { + if (e.getNullCheckNum() == id) { + if (e.getNullCheckPStr() != s) { + isNull = 0; + break; + } else { + int endp; + isNull = 1; + while (k < stk) { + if (e.type == MEM_START) { + if (e.getMemEnd() == INVALID_INDEX) { + isNull = 0; + break; + } + if (bsAt(regex.btMemEnd, e.getMemNum())) { + endp = stack[e.getMemEnd()].getMemPStr(); + } else { + endp = e.getMemEnd(); + } + if (stack[e.getMemStart()].getMemPStr() != endp) { + isNull = 0; + break; + } else if (endp != s) { + isNull = -1; /* empty, but position changed */ + } + } + k++; + e = stack[k]; // !! + } + break; + } + } + } + } + return isNull; + } + + protected final int nullCheckMemStRec(int id, int s) { + int level = 0; + int k = stk; + int isNull; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == NULL_CHECK_START) { + if (e.getNullCheckNum() == id) { + if (level == 0) { + if (e.getNullCheckPStr() != s) { + isNull = 0; + break; + } else { + int endp; + isNull = 1; + while (k < stk) { + if (e.type == MEM_START) { + if (e.getMemEnd() == INVALID_INDEX) { + isNull = 0; + break; + } + if (bsAt(regex.btMemEnd, e.getMemNum())) { + endp = stack[e.getMemEnd()].getMemPStr(); + } else { + endp = e.getMemEnd(); + } + if (stack[e.getMemStart()].getMemPStr() != endp) { + isNull = 0; + break; + } else if (endp != s) { + isNull = -1;; /* empty, but position changed */ + } + } + k++; + e = stack[k]; + } + break; + } + } else { + level--; + } + } + } else if (e.type == NULL_CHECK_END) { + if (e.getNullCheckNum() == id) level++; + } + } + return isNull; + } + + protected final int getRepeat(int id) { + int level = 0; + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == REPEAT) { + if (level == 0) { + if (e.getRepeatNum() == id) return k; + } + } else if (e.type == CALL_FRAME) { + level--; + } else if (e.type == RETURN) { + level++; + } + } + } + + protected final int sreturn() { + int level = 0; + int k = stk; + while (true) { + k--; + StackEntry e = stack[k]; + + if (e.type == CALL_FRAME) { + if (level == 0) { + return e.getCallFrameRetAddr(); + } else { + level--; + } + } else if (e.type == RETURN) { + level++; + } + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Syntax.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,628 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import static jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar.INEFFECTIVE_META_CHAR; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.SyntaxProperties; + +public final class Syntax implements SyntaxProperties{ + private final int op; + private final int op2; + private final int behavior; + public final int options; + public final MetaCharTable metaCharTable; + + public Syntax(int op, int op2, int behavior, int options, MetaCharTable metaCharTable) { + this.op = op; + this.op2 = op2; + this.behavior = behavior; + this.options = options; + this.metaCharTable = metaCharTable; + } + + public static class MetaCharTable { + public final int esc; + public final int anyChar; + public final int anyTime; + public final int zeroOrOneTime; + public final int oneOrMoreTime; + public final int anyCharAnyTime; + + public MetaCharTable(int esc, int anyChar, int anyTime, + int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) { + this.esc = esc; + this.anyChar = anyChar; + this.anyTime = anyTime; + this.zeroOrOneTime = zeroOrOneTime; + this.oneOrMoreTime = oneOrMoreTime; + this.anyCharAnyTime = anyCharAnyTime; + } + } + + /** + * OP + * + */ + protected boolean isOp(int opm) { + return (op & opm) != 0; + } + + public boolean opVariableMetaCharacters() { + return isOp(OP_VARIABLE_META_CHARACTERS); + } + + public boolean opDotAnyChar() { + return isOp(OP_DOT_ANYCHAR); + } + + public boolean opAsteriskZeroInf() { + return isOp(OP_ASTERISK_ZERO_INF); + } + + public boolean opEscAsteriskZeroInf() { + return isOp(OP_ESC_ASTERISK_ZERO_INF); + } + + public boolean opPlusOneInf() { + return isOp(OP_PLUS_ONE_INF); + } + + public boolean opEscPlusOneInf() { + return isOp(OP_ESC_PLUS_ONE_INF); + } + + public boolean opQMarkZeroOne() { + return isOp(OP_QMARK_ZERO_ONE); + } + + public boolean opEscQMarkZeroOne() { + return isOp(OP_ESC_QMARK_ZERO_ONE); + } + + public boolean opBraceInterval() { + return isOp(OP_BRACE_INTERVAL); + } + + public boolean opEscBraceInterval() { + return isOp(OP_ESC_BRACE_INTERVAL); + } + + public boolean opVBarAlt() { + return isOp(OP_VBAR_ALT); + } + + public boolean opEscVBarAlt() { + return isOp(OP_ESC_VBAR_ALT); + } + + public boolean opLParenSubexp() { + return isOp(OP_LPAREN_SUBEXP); + } + + public boolean opEscLParenSubexp() { + return isOp(OP_ESC_LPAREN_SUBEXP); + } + + public boolean opEscAZBufAnchor() { + return isOp(OP_ESC_AZ_BUF_ANCHOR); + } + + public boolean opEscCapitalGBeginAnchor() { + return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR); + } + + public boolean opDecimalBackref() { + return isOp(OP_DECIMAL_BACKREF); + } + + public boolean opBracketCC() { + return isOp(OP_BRACKET_CC); + } + + public boolean opEscWWord() { + return isOp(OP_ESC_W_WORD); + } + + public boolean opEscLtGtWordBeginEnd() { + return isOp(OP_ESC_LTGT_WORD_BEGIN_END); + } + + public boolean opEscBWordBound() { + return isOp(OP_ESC_B_WORD_BOUND); + } + + public boolean opEscSWhiteSpace() { + return isOp(OP_ESC_S_WHITE_SPACE); + } + + public boolean opEscDDigit() { + return isOp(OP_ESC_D_DIGIT); + } + + public boolean opLineAnchor() { + return isOp(OP_LINE_ANCHOR); + } + + public boolean opPosixBracket() { + return isOp(OP_POSIX_BRACKET); + } + + public boolean opQMarkNonGreedy() { + return isOp(OP_QMARK_NON_GREEDY); + } + + public boolean opEscControlChars() { + return isOp(OP_ESC_CONTROL_CHARS); + } + + public boolean opEscCControl() { + return isOp(OP_ESC_C_CONTROL); + } + + public boolean opEscOctal3() { + return isOp(OP_ESC_OCTAL3); + } + + public boolean opEscXHex2() { + return isOp(OP_ESC_X_HEX2); + } + + public boolean opEscXBraceHex8() { + return isOp(OP_ESC_X_BRACE_HEX8); + } + + + /** + * OP + * + */ + protected boolean isOp2(int opm) { + return (op2 & opm) != 0; + } + + public boolean op2EscCapitalQQuote() { + return isOp2(OP2_ESC_CAPITAL_Q_QUOTE); + } + + public boolean op2QMarkGroupEffect() { + return isOp2(OP2_QMARK_GROUP_EFFECT); + } + + public boolean op2OptionPerl() { + return isOp2(OP2_OPTION_PERL); + } + + public boolean op2OptionRuby() { + return isOp2(OP2_OPTION_RUBY); + } + + public boolean op2PlusPossessiveRepeat() { + return isOp2(OP2_PLUS_POSSESSIVE_REPEAT); + } + + public boolean op2PlusPossessiveInterval() { + return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL); + } + + public boolean op2CClassSetOp() { + return isOp2(OP2_CCLASS_SET_OP); + } + + public boolean op2QMarkLtNamedGroup() { + return isOp2(OP2_QMARK_LT_NAMED_GROUP); + } + + public boolean op2EscKNamedBackref() { + return isOp2(OP2_ESC_K_NAMED_BACKREF); + } + + public boolean op2EscGSubexpCall() { + return isOp2(OP2_ESC_G_SUBEXP_CALL); + } + + public boolean op2AtMarkCaptureHistory() { + return isOp2(OP2_ATMARK_CAPTURE_HISTORY); + } + + public boolean op2EscCapitalCBarControl() { + return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL); + } + + public boolean op2EscCapitalMBarMeta() { + return isOp2(OP2_ESC_CAPITAL_M_BAR_META); + } + + public boolean op2EscVVtab() { + return isOp2(OP2_ESC_V_VTAB); + } + + public boolean op2EscUHex4() { + return isOp2(OP2_ESC_U_HEX4); + } + + public boolean op2EscGnuBufAnchor() { + return isOp2(OP2_ESC_GNU_BUF_ANCHOR); + } + + public boolean op2EscPBraceCharProperty() { + return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY); + } + + public boolean op2EscPBraceCircumflexNot() { + return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT); + } + + public boolean op2EscHXDigit() { + return isOp2(OP2_ESC_H_XDIGIT); + } + + public boolean op2IneffectiveEscape() { + return isOp2(OP2_INEFFECTIVE_ESCAPE); + } + + /** + * BEHAVIOR + * + */ + protected boolean isBehavior(int bvm) { + return (behavior & bvm) != 0; + } + + public boolean contextIndepRepeatOps() { + return isBehavior(CONTEXT_INDEP_REPEAT_OPS); + } + + public boolean contextInvalidRepeatOps() { + return isBehavior(CONTEXT_INVALID_REPEAT_OPS); + } + + public boolean allowUnmatchedCloseSubexp() { + return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP); + } + + public boolean allowInvalidInterval() { + return isBehavior(ALLOW_INVALID_INTERVAL); + } + + public boolean allowIntervalLowAbbrev() { + return isBehavior(ALLOW_INTERVAL_LOW_ABBREV); + } + + public boolean strictCheckBackref() { + return isBehavior(STRICT_CHECK_BACKREF); + } + + public boolean differentLengthAltLookBehind() { + return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND); + } + + public boolean captureOnlyNamedGroup() { + return isBehavior(CAPTURE_ONLY_NAMED_GROUP); + } + + public boolean allowMultiplexDefinitionName() { + return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME); + } + + public boolean fixedIntervalIsGreedyOnly() { + return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY); + } + + + public boolean notNewlineInNegativeCC() { + return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC); + } + + public boolean backSlashEscapeInCC() { + return isBehavior(BACKSLASH_ESCAPE_IN_CC); + } + + public boolean allowEmptyRangeInCC() { + return isBehavior(ALLOW_EMPTY_RANGE_IN_CC); + } + + public boolean allowDoubleRangeOpInCC() { + return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC); + } + + public boolean warnCCOpNotEscaped() { + return isBehavior(WARN_CC_OP_NOT_ESCAPED); + } + + public boolean warnReduntantNestedRepeat() { + return isBehavior(WARN_REDUNDANT_NESTED_REPEAT); + } + + public static final Syntax RUBY = new Syntax( + (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | + OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | + OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | + OP_ESC_C_CONTROL ) + & ~OP_ESC_LTGT_WORD_BEGIN_END ), + + ( OP2_QMARK_GROUP_EFFECT | + OP2_OPTION_RUBY | + OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | + OP2_ESC_G_SUBEXP_CALL | + OP2_ESC_P_BRACE_CHAR_PROPERTY | + OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + OP2_PLUS_POSSESSIVE_REPEAT | + OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL | + OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB | + OP2_ESC_H_XDIGIT ), + + ( GNU_REGEX_BV | + ALLOW_INTERVAL_LOW_ABBREV | + DIFFERENT_LEN_ALT_LOOK_BEHIND | + CAPTURE_ONLY_NAMED_GROUP | + ALLOW_MULTIPLEX_DEFINITION_NAME | + FIXED_INTERVAL_IS_GREEDY_ONLY | + WARN_CC_OP_NOT_ESCAPED | + WARN_REDUNDANT_NESTED_REPEAT ), + + Option.NONE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax DEFAULT = RUBY; + + public static final Syntax ASIS = new Syntax( + 0, + + OP2_INEFFECTIVE_ESCAPE, + + 0, + + Option.NONE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax PosixBasic = new Syntax( + (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP | + OP_ESC_BRACE_INTERVAL ), + + 0, + + 0, + + ( Option.SINGLELINE | Option.MULTILINE ), + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax PosixExtended = new Syntax( + ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP | + OP_BRACE_INTERVAL | + OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ), + + 0, + + ( CONTEXT_INDEP_ANCHORS | + CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | + ALLOW_UNMATCHED_CLOSE_SUBEXP | + ALLOW_DOUBLE_RANGE_OP_IN_CC ), + + ( Option.SINGLELINE | Option.MULTILINE ), + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax Emacs = new Syntax( + ( OP_DOT_ANYCHAR | OP_BRACKET_CC | + OP_ESC_BRACE_INTERVAL | + OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT | + OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | + OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF | + OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ), + + OP2_ESC_GNU_BUF_ANCHOR, + + ALLOW_EMPTY_RANGE_IN_CC, + + Option.NONE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax Grep = new Syntax( + ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET | + OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP | + OP_ESC_VBAR_ALT | + OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF | + OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR | + OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND | + OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ), + + 0, + + ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ), + + Option.NONE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax GnuRegex = new Syntax( + GNU_REGEX_OP, + 0, + GNU_REGEX_BV, + + Option.NONE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax Java = new Syntax( + (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | + OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | + OP_ESC_OCTAL3 | OP_ESC_X_HEX2 ) + & ~OP_ESC_LTGT_WORD_BEGIN_END ), + + ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | + OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT | + OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP | + OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 | + OP2_ESC_P_BRACE_CHAR_PROPERTY ), + + ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), + + Option.SINGLELINE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax Perl = new Syntax( + (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | + OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | + OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | + OP_ESC_C_CONTROL ) + & ~OP_ESC_LTGT_WORD_BEGIN_END ), + + ( OP2_ESC_CAPITAL_Q_QUOTE | + OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | + OP2_ESC_P_BRACE_CHAR_PROPERTY | + OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ), + + GNU_REGEX_BV, + + Option.SINGLELINE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax PerlNG = new Syntax( + (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | + OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | + OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | + OP_ESC_C_CONTROL ) + & ~OP_ESC_LTGT_WORD_BEGIN_END ), + + ( OP2_ESC_CAPITAL_Q_QUOTE | + OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | + OP2_ESC_P_BRACE_CHAR_PROPERTY | + OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + OP2_QMARK_LT_NAMED_GROUP | + OP2_ESC_K_NAMED_BACKREF | + OP2_ESC_G_SUBEXP_CALL ), + + ( GNU_REGEX_BV | + CAPTURE_ONLY_NAMED_GROUP | + ALLOW_MULTIPLEX_DEFINITION_NAME ), + + Option.SINGLELINE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); + + public static final Syntax JAVASCRIPT = new Syntax( + (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | + OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | OP_ESC_X_HEX2) + & ~OP_ESC_LTGT_WORD_BEGIN_END ), + + ( OP2_QMARK_GROUP_EFFECT | OP2_CCLASS_SET_OP | + OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 ), + + ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), + + Option.SINGLELINE, + + new MetaCharTable( + '\\', /* esc */ + INEFFECTIVE_META_CHAR, /* anychar '.' */ + INEFFECTIVE_META_CHAR, /* anytime '*' */ + INEFFECTIVE_META_CHAR, /* zero or one time '?' */ + INEFFECTIVE_META_CHAR, /* one or more time '+' */ + INEFFECTIVE_META_CHAR /* anychar anytime */ + ) + ); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Token.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,172 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; + +final class Token { + TokenType type; + boolean escaped; + int base; /* is number: 8, 16 (used in [....]) */ + int backP; + + // union fields + private int INT1, INT2, INT3, INT4, INT5; + private int []INTA1; + + // union accessors + int getC() { + return INT1; + } + void setC(int c) { + INT1 = c; + } + + int getCode() { + return INT1; + } + void setCode(int code) { + INT1 = code; + } + + int getAnchor() { + return INT1; + } + void setAnchor(int anchor) { + INT1 = anchor; + } + + int getSubtype() { + return INT1; + } + void setSubtype(int subtype) { + INT1 = subtype; + } + + // repeat union member + int getRepeatLower() { + return INT1; + } + void setRepeatLower(int lower) { + INT1 = lower; + } + + int getRepeatUpper() { + return INT2; + } + void setRepeatUpper(int upper) { + INT2 = upper; + } + + boolean getRepeatGreedy() { + return INT3 != 0; + } + void setRepeatGreedy(boolean greedy) { + INT3 = greedy ? 1 : 0; + } + + boolean getRepeatPossessive() { + return INT4 != 0; + } + void setRepeatPossessive(boolean possessive) { + INT4 = possessive ? 1 : 0; + } + + // backref union member + int getBackrefNum() { + return INT1; + } + void setBackrefNum(int num) { + INT1 = num; + } + + int getBackrefRef1() { + return INT2; + } + void setBackrefRef1(int ref1) { + INT2 = ref1; + } + + int[]getBackrefRefs() { + return INTA1; + } + void setBackrefRefs(int[]refs) { + INTA1 = refs; + } + + boolean getBackrefByName() { + return INT3 != 0; + } + void setBackrefByName(boolean byName) { + INT3 = byName ? 1 : 0; + } + + // USE_BACKREF_AT_LEVEL + boolean getBackrefExistLevel() { + return INT4 != 0; + } + void setBackrefExistLevel(boolean existLevel) { + INT4 = existLevel ? 1 : 0; + } + + int getBackrefLevel() { + return INT5; + } + void setBackrefLevel(int level) { + INT5 = level; + } + + // call union member + int getCallNameP() { + return INT1; + } + void setCallNameP(int nameP) { + INT1 = nameP; + } + + int getCallNameEnd() { + return INT2; + } + void setCallNameEnd(int nameEnd) { + INT2 = nameEnd; + } + + int getCallGNum() { + return INT3; + } + void setCallGNum(int gnum) { + INT3 = gnum; + } + + // prop union member + int getPropCType() { + return INT1; + } + void setPropCType(int ctype) { + INT1 = ctype; + } + + boolean getPropNot() { + return INT2 != 0; + } + void setPropNot(boolean not) { + INT2 = not ? 1 : 0; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/UnsetAddrList.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,69 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; +import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; + +public final class UnsetAddrList { + int num; + Node[]targets; + int[]offsets; + + public UnsetAddrList(int size) { + targets = new Node[size]; + offsets = new int[size]; + } + + public void add(int offset, Node node) { + if (num >= offsets.length) { + Node []ttmp = new Node[targets.length << 1]; + System.arraycopy(targets, 0, ttmp, 0, num); + targets = ttmp; + int[]otmp = new int[offsets.length << 1]; + System.arraycopy(offsets, 0, otmp, 0, num); + offsets = otmp; + } + targets[num] = node; + offsets[num] = offset; + + num++; + } + + public void fix(Regex regex) { + for (int i=0; i<num; i++) { + EncloseNode en = (EncloseNode)targets[i]; + if (!en.isAddrFixed()) new InternalException(ErrorMessages.ERR_PARSER_BUG); + regex.code[offsets[i]] = en.callAddr; // is this safe ? + } + } + + public String toString() { + StringBuilder value = new StringBuilder(); + if (num > 0) { + for (int i=0; i<num; i++) { + value.append("offset + " + offsets[i] + " target: " + targets[i].getAddressName()); + } + } + return value.toString(); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/WarnCallback.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,33 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +/** + * @author <a href="mailto:ola.bini@gmail.com">Ola Bini</a> + */ +public interface WarnCallback { + WarnCallback DEFAULT = new WarnCallback() { + public void warn(String message) { + System.err.println(message); + } + }; + + void warn(String message); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Warnings.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,26 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni; + +public interface Warnings { + final String INVALID_BACKREFERENCE = "invalid back reference"; + final String INVALID_SUBEXP_CALL = "invalid subexp call"; + final String INVALID_UNICODE_PROPERTY = "invalid Unicode Property \\<%n>"; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/AnchorNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,92 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; + +public final class AnchorNode extends Node implements AnchorType { + public int type; + public Node target; + public int charLength; + + public AnchorNode(int type) { + this.type = type; + charLength = -1; + } + + @Override + public int getType() { + return ANCHOR; + } + + @Override + protected void setChild(Node newChild) { + target = newChild; + } + + @Override + protected Node getChild() { + return target; + } + + public void setTarget(Node tgt) { + target = tgt; + tgt.parent = this; + } + + @Override + public String getName() { + return "Anchor"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(); + value.append("\n type: " + typeToString()); + value.append("\n target: " + pad(target, level + 1)); + return value.toString(); + } + + public String typeToString() { + StringBuilder type = new StringBuilder(); + if (isType(BEGIN_BUF)) type.append("BEGIN_BUF "); + if (isType(BEGIN_LINE)) type.append("BEGIN_LINE "); + if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION "); + if (isType(END_BUF)) type.append("END_BUF "); + if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF "); + if (isType(END_LINE)) type.append("END_LINE "); + if (isType(WORD_BOUND)) type.append("WORD_BOUND "); + if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); + if (isType(WORD_BEGIN)) type.append("WORD_BEGIN "); + if (isType(WORD_END)) type.append("WORD_END "); + if (isType(PREC_READ)) type.append("PREC_READ "); + if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT "); + if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND "); + if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); + if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); + if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); + return type.toString(); + } + + private boolean isType(int type) { + return (this.type & type) != 0; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/AnyCharNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,40 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +public final class AnyCharNode extends Node { + public AnyCharNode(){} + + @Override + public int getType() { + return CANY; + } + + @Override + public String getName() { + return "Any Char"; + } + + @Override + public String toString(int level) { + String value = ""; + return value; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/BackRefNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,98 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; + +public final class BackRefNode extends StateNode { + //private static int NODE_BACKREFS_SIZE = 6; + + //int state; + public int backNum; + public int back[]; + + public int nestLevel; + + public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) { + this.backNum = backNum; + if (byName) setNameRef(); + + for (int i=0; i<backNum; i++) { + if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) { + setRecursion(); /* /...(\1).../ */ + break; + } + } + + back = new int[backNum]; + System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ??? + } + + // #ifdef USE_BACKREF_AT_LEVEL + public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) { + this(backNum, backRefs, byName, env); + + if (existLevel) { + //state |= NST_NEST_LEVEL; + setNestLevel(); + this.nestLevel = nestLevel; + } + } + + @Override + public int getType() { + return BREF; + } + + @Override + public String getName() { + return "Back Ref"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(super.toString(level)); + value.append("\n backNum: " + backNum); + String backs = ""; + for (int i=0; i<back.length; i++) backs += back[i] + ", "; + value.append("\n back: " + backs); + value.append("\n nextLevel: " + nestLevel); + return value.toString(); + } + + public void renumber(int[]map) { + if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); + + int oldNum = backNum; + + int pos = 0; + for (int i=0; i<oldNum; i++) { + int n = map[back[i]]; + if (n > 0) { + back[pos] = n; + pos++; + } + } + backNum = pos; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CClassNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,545 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.*; +import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; +import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.AsciiTables; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; +import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; + +public final class CClassNode extends Node { + private static final int FLAG_NCCLASS_NOT = 1<<0; + private static final int FLAG_NCCLASS_SHARE = 1<<1; + + int flags; + public final BitSet bs = new BitSet(); // conditional creation ? + public CodeRangeBuffer mbuf; /* multi-byte info or NULL */ + + private int ctype; // for hashing purposes + + // node_new_cclass + public CClassNode() {} + + public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) { + this(not, sbOut, ranges); + this.ctype = ctype; + } + + public void clear() { + bs.clear(); + flags = 0; + mbuf = null; + } + + // node_new_cclass_by_codepoint_range, only used by shared Char Classes + public CClassNode(boolean not, int sbOut, int[]ranges) { + if (not) setNot(); + // bs.clear(); + + if (sbOut > 0 && ranges != null) { + int n = ranges[0]; + for (int i=0; i<n; i++) { + int from = ranges[i * 2 + 1]; + int to = ranges[i * 2 + 2]; + for (int j=from; j<=to; j++) { + if (j >= sbOut) { + setupBuffer(ranges); + return; + } + bs.set(j); + } + } + } + setupBuffer(ranges); + } + + @Override + public int getType() { + return CCLASS; + } + + @Override + public String getName() { + return "Character Class"; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof CClassNode)) return false; + CClassNode cc = (CClassNode)other; + return ctype == cc.ctype && isNot() == cc.isNot(); + } + + @Override + public int hashCode() { + if (Config.USE_SHARED_CCLASS_TABLE) { + int hash = 0; + hash += ctype; + if (isNot()) hash++; + return hash + (hash >> 5); + } else { + return super.hashCode(); + } + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(); + value.append("\n flags: " + flagsToString()); + value.append("\n bs: " + pad(bs, level + 1)); + value.append("\n mbuf: " + pad(mbuf, level + 1)); + + return value.toString(); + } + + public String flagsToString() { + StringBuilder flags = new StringBuilder(); + if (isNot()) flags.append("NOT "); + if (isShare()) flags.append("SHARE "); + return flags.toString(); + } + + private void setupBuffer(int[]ranges) { + if (ranges != null) { + if (ranges[0] == 0) return; + mbuf = new CodeRangeBuffer(ranges); + } + } + + public boolean isEmpty() { + return mbuf == null && bs.isEmpty(); + } + + public void addCodeRangeToBuf(int from, int to) { + mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to); + } + + public void addCodeRange(ScanEnvironment env, int from, int to) { + mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to); + } + + public void addAllMultiByteRange() { + mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf); + } + + public void clearNotFlag() { + if (isNot()) { + bs.invert(); + + mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf); + clearNot(); + } + } + + // and_cclass + public void and(CClassNode other) { + boolean not1 = isNot(); + BitSet bsr1 = bs; + CodeRangeBuffer buf1 = mbuf; + boolean not2 = other.isNot(); + BitSet bsr2 = other.bs; + CodeRangeBuffer buf2 = other.mbuf; + + if (not1) { + BitSet bs1 = new BitSet(); + bsr1.invertTo(bs1); + bsr1 = bs1; + } + + if (not2) { + BitSet bs2 = new BitSet(); + bsr2.invertTo(bs2); + bsr2 = bs2; + } + + bsr1.and(bsr2); + + if (bsr1 != bs) { + bs.copy(bsr1); + bsr1 = bs; + } + + if (not1) { + bs.invert(); + } + + CodeRangeBuffer pbuf = null; + + if (not1 && not2) { + pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false); + } else { + pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2); + + if (not1) { + pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); + } + } + mbuf = pbuf; + + } + + // or_cclass + public void or(CClassNode other) { + boolean not1 = isNot(); + BitSet bsr1 = bs; + CodeRangeBuffer buf1 = mbuf; + boolean not2 = other.isNot(); + BitSet bsr2 = other.bs; + CodeRangeBuffer buf2 = other.mbuf; + + if (not1) { + BitSet bs1 = new BitSet(); + bsr1.invertTo(bs1); + bsr1 = bs1; + } + + if (not2) { + BitSet bs2 = new BitSet(); + bsr2.invertTo(bs2); + bsr2 = bs2; + } + + bsr1.or(bsr2); + + if (bsr1 != bs) { + bs.copy(bsr1); + bsr1 = bs; + } + + if (not1) { + bs.invert(); + } + + CodeRangeBuffer pbuf = null; + if (not1 && not2) { + pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false); + } else { + pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2); + if (not1) { + pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); + } + } + mbuf = pbuf; + } + + // add_ctype_to_cc_by_range // Encoding out! + public void addCTypeByRange(int ctype, boolean not, int sbOut, int mbr[]) { + int n = mbr[0]; + + if (!not) { + for (int i=0; i<n; i++) { + for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) { + if (j >= sbOut) { + if (Config.VANILLA) { + if (j == mbr[i * 2 + 2]) { + i++; + } else if (j > mbr[i * 2 + 1]) { + addCodeRangeToBuf(j, mbr[i * 2 + 2]); + i++; + } + } else { + if (j >= mbr[i * 2 + 1]) { + addCodeRangeToBuf(j, mbr[i * 2 + 2]); + i++; + } + } + // !goto sb_end!, remove duplication! + for (; i<n; i++) { + addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); + } + return; + } + bs.set(j); + } + } + // !sb_end:! + for (int i=0; i<n; i++) { + addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); + } + + } else { + int prev = 0; + + for (int i=0; i<n; i++) { + for (int j=prev; j < mbr[2 * i + 1]; j++) { + if (j >= sbOut) { + // !goto sb_end2!, remove duplication + prev = sbOut; + for (i=0; i<n; i++) { + if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); + prev = mbr[i * 2 + 2] + 1; + } + if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); + return; + } + bs.set(j); + } + prev = mbr[2 * i + 2] + 1; + } + + for (int j=prev; j<sbOut; j++) { + bs.set(j); + } + + // !sb_end2:! + prev = sbOut; + for (int i=0; i<n; i++) { + if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); + prev = mbr[i * 2 + 2] + 1; + } + if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); + } + } + + public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) { + if (Config.NON_UNICODE_SDW) { + switch(ctype) { + case CharacterType.D: + case CharacterType.S: + case CharacterType.W: + ctype ^= CharacterType.SPECIAL_MASK; + + if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) { + // \s in JavaScript includes unicode characters. + break; + } + + if (not) { + for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { + // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); + if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c); + } + addAllMultiByteRange(); + } else { + for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { + // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); + if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c); + } + } + return; + } + } + + int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut); + if (ranges != null) { + addCTypeByRange(ctype, not, sbOut.value, ranges); + return; + } + + switch(ctype) { + case CharacterType.ALPHA: + case CharacterType.BLANK: + case CharacterType.CNTRL: + case CharacterType.DIGIT: + case CharacterType.LOWER: + case CharacterType.PUNCT: + case CharacterType.SPACE: + case CharacterType.UPPER: + case CharacterType.XDIGIT: + case CharacterType.ASCII: + case CharacterType.ALNUM: + if (not) { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c); + } + addAllMultiByteRange(); + } else { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c); + } + } + break; + + case CharacterType.GRAPH: + case CharacterType.PRINT: + if (not) { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c); + } + } else { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c); + } + addAllMultiByteRange(); + } + break; + + case CharacterType.WORD: + if (!not) { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (EncodingHelper.isWord(c)) bs.set(c); + } + + addAllMultiByteRange(); + } else { + for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { + if (!EncodingHelper.isWord(c)) bs.set(c); + } + } + break; + + default: + throw new InternalException(ErrorMessages.ERR_PARSER_BUG); + } // switch + } + + public static final class CCStateArg { + public int v; + public int vs; + public boolean vsIsRaw; + public boolean vIsRaw; + public CCVALTYPE inType; + public CCVALTYPE type; + public CCSTATE state; + } + + public void nextStateClass(CCStateArg arg, ScanEnvironment env) { + if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE); + + if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { + if (arg.type == CCVALTYPE.SB) { + bs.set(arg.vs); + } else if (arg.type == CCVALTYPE.CODE_POINT) { + addCodeRange(env, arg.vs, arg.vs); + } + } + arg.state = CCSTATE.VALUE; + arg.type = CCVALTYPE.CLASS; + } + + public void nextStateValue(CCStateArg arg, ScanEnvironment env) { + + switch(arg.state) { + case VALUE: + if (arg.type == CCVALTYPE.SB) { + if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); + bs.set(arg.vs); + } else if (arg.type == CCVALTYPE.CODE_POINT) { + addCodeRange(env, arg.vs, arg.vs); + } + break; + + case RANGE: + if (arg.inType == arg.type) { + if (arg.inType == CCVALTYPE.SB) { + if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); + + if (arg.vs > arg.v) { + if (env.syntax.allowEmptyRangeInCC()) { + // goto ccs_range_end + arg.state = CCSTATE.COMPLETE; + break; + } else { + throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); + } + } + bs.setRange(arg.vs, arg.v); + } else { + addCodeRange(env, arg.vs, arg.v); + } + } else { + if (arg.vs > arg.v) { + if (env.syntax.allowEmptyRangeInCC()) { + // goto ccs_range_end + arg.state = CCSTATE.COMPLETE; + break; + } else { + throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); + } + } + bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff); + addCodeRange(env, arg.vs, arg.v); + } + // ccs_range_end: + arg.state = CCSTATE.COMPLETE; + break; + + case COMPLETE: + case START: + arg.state = CCSTATE.VALUE; + break; + + default: + break; + + } // switch + + arg.vsIsRaw = arg.vIsRaw; + arg.vs = arg.v; + arg.type = arg.inType; + } + + // onig_is_code_in_cc_len + public boolean isCodeInCCLength(int code) { + boolean found; + + if (code > 0xff) { + if (mbuf == null) { + found = false; + } else { + found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code); + } + } else { + found = bs.at(code); + } + + if (isNot()) { + return !found; + } else { + return found; + } + } + + // onig_is_code_in_cc + public boolean isCodeInCC(int code) { + return isCodeInCCLength(code); + } + + public void setNot() { + flags |= FLAG_NCCLASS_NOT; + } + + public void clearNot() { + flags &= ~FLAG_NCCLASS_NOT; + } + + public boolean isNot() { + return (flags & FLAG_NCCLASS_NOT) != 0; + } + + public void setShare() { + flags |= FLAG_NCCLASS_SHARE; + } + + public void clearShare() { + flags &= ~FLAG_NCCLASS_SHARE; + } + + public boolean isShare() { + return (flags & FLAG_NCCLASS_SHARE) != 0; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CTypeNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,50 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +public final class CTypeNode extends Node { + public int ctype; + public boolean not; + + public CTypeNode(int type, boolean not) { + this.ctype= type; + this.not = not; + } + + @Override + public int getType() { + return CTYPE; + } + + @Override + public String getName() { + return "Character Type"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(); + value.append("\n ctype: " + ctype); + value.append("\n not: " + not); + + return value.toString(); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CallNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,86 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import java.util.Set; + +import jdk.nashorn.internal.runtime.regexp.joni.UnsetAddrList; +import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback; + +public final class CallNode extends StateNode { + public char[] name; + public int nameP; + public int nameEnd; + + public int groupNum; + public Node target; // is it an EncloseNode always ? + public UnsetAddrList unsetAddrList; + + public CallNode(char[] name, int nameP, int nameEnd, int gnum) { + this.name = name; + this.nameP = nameP; + this.nameEnd = nameEnd; + this.groupNum = gnum; /* call by number if gnum != 0 */ + } + + @Override + public int getType() { + return CALL; + } + + @Override + protected void setChild(Node newChild) { + target = newChild; + } + + @Override + protected Node getChild() { + return target; + } + + public void setTarget(Node tgt) { + target = tgt; + tgt.parent = this; + } + + @Override + public String getName() { + return "Call"; + } + + @Override + public void verifyTree(Set<Node> set, WarnCallback warnings) { + if (target == null || target.parent == this) + warnings.warn(this.getAddressName() + " doesn't point to a target or the target has been stolen"); + // do not recurse here + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(super.toString(level)); + value.append("\n name: " + new String(name, nameP, nameEnd - nameP)); + value.append("\n groupNum: " + groupNum); + value.append("\n target: " + pad(target.getAddressName(), level + 1)); + value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1)); + + return value.toString(); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/ConsAltNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,152 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import java.util.Set; + +import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback; +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; + +public final class ConsAltNode extends Node { + public Node car; + public ConsAltNode cdr; + private int type; // List or Alt + + private ConsAltNode(Node car, ConsAltNode cdr, int type) { + this.car = car; + if (car != null) car.parent = this; + this.cdr = cdr; + if (cdr != null) cdr.parent = this; + + this.type = type; + } + + public static ConsAltNode newAltNode(Node left, ConsAltNode right) { + return new ConsAltNode(left, right, ALT); + } + + public static ConsAltNode newListNode(Node left, ConsAltNode right) { + return new ConsAltNode(left, right, LIST); + } + + public static ConsAltNode listAdd(ConsAltNode list, Node x) { + ConsAltNode n = newListNode(x, null); + + if (list != null) { + while (list.cdr != null) { + list = list.cdr; + } + list.setCdr(n); + } + return n; + } + + public void toListNode() { + type = LIST; + } + + public void toAltNode() { + type = ALT; + } + + @Override + public int getType() { + return type; + } + + @Override + protected void setChild(Node newChild) { + car = newChild; + } + + @Override + protected Node getChild() { + return car; + } + + @Override + public void swap(Node with) { + if (cdr != null) { + cdr.parent = with; + if (with instanceof ConsAltNode) { + ConsAltNode withCan = (ConsAltNode)with; + withCan.cdr.parent = this; + ConsAltNode tmp = cdr; + cdr = withCan.cdr; + withCan.cdr = tmp; + } + } + super.swap(with); + } + + @Override + public void verifyTree(Set<Node> set, WarnCallback warnings) { + if (!set.contains(this)) { + set.add(this); + if (car != null) { + if (car.parent != this) { + warnings.warn("broken list car: " + this.getAddressName() + " -> " + car.getAddressName()); + } + car.verifyTree(set,warnings); + } + if (cdr != null) { + if (cdr.parent != this) { + warnings.warn("broken list cdr: " + this.getAddressName() + " -> " + cdr.getAddressName()); + } + cdr.verifyTree(set,warnings); + } + } + } + + public Node setCar(Node ca) { + car = ca; + ca.parent = this; + return car; + } + + public ConsAltNode setCdr(ConsAltNode cd) { + cdr = cd; + cd.parent = this; + return cdr; + } + + @Override + public String getName() { + switch (type) { + case ALT: + return "Alt"; + case LIST: + return "List"; + default: + throw new InternalException(ErrorMessages.ERR_PARSER_BUG); + } + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(); + value.append("\n car: " + pad(car, level + 1)); + value.append("\n cdr: " + (cdr == null ? "NULL" : cdr.toString())); + + return value.toString(); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/EncloseNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,151 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.Config; +import jdk.nashorn.internal.runtime.regexp.joni.Option; +import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; + +public final class EncloseNode extends StateNode implements EncloseType { + + public int type; // enclose type + public int regNum; + public int option; + public Node target; /* EncloseNode : ENCLOSE_MEMORY */ + public int callAddr; // AbsAddrType + public int minLength; // OnigDistance + public int maxLength; // OnigDistance + public int charLength; + public int optCount; // referenced count in optimize_node_left() + + // node_new_enclose / onig_node_new_enclose + public EncloseNode(int type) { + this.type = type; + callAddr = -1; + } + + // node_new_enclose_memory + public EncloseNode(int option, boolean isNamed) { + this(MEMORY); + if (isNamed) setNamedGroup(); + if (Config.USE_SUBEXP_CALL) this.option = option; + } + + // node_new_option + public EncloseNode(int option, int i) { + this(OPTION); + this.option = option; + } + + @Override + public int getType() { + return ENCLOSE; + } + + @Override + protected void setChild(Node newChild) { + target = newChild; + } + + @Override + protected Node getChild() { + return target; + } + + public void setTarget(Node tgt) { + target = tgt; + tgt.parent = this; + } + + @Override + public String getName() { + return "Enclose"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(super.toString(level)); + value.append("\n type: " + typeToString()); + value.append("\n regNum: " + regNum); + value.append("\n option: " + Option.toString(option)); + value.append("\n target: " + pad(target, level + 1)); + value.append("\n callAddr: " + callAddr); + value.append("\n minLength: " + minLength); + value.append("\n maxLength: " + maxLength); + value.append("\n charLength: " + charLength); + value.append("\n optCount: " + optCount); + + return value.toString(); + } + + public String typeToString() { + StringBuilder types = new StringBuilder(); + if (isStopBacktrack()) types.append("STOP_BACKTRACK "); + if (isMemory()) types.append("MEMORY "); + if (isOption()) types.append("OPTION "); + + return types.toString(); + } + + public void setEncloseStatus(int flag) { + state |= flag; + } + + public void clearEncloseStatus(int flag) { + state &= ~flag; + } + + public void clearMemory() { + type &= ~MEMORY; + } + + public void setMemory() { + type |= MEMORY; + } + + public boolean isMemory() { + return (type & MEMORY) != 0; + } + + public void clearOption() { + type &= ~OPTION; + } + + public void setOption() { + type |= OPTION; + } + + public boolean isOption() { + return (type & OPTION) != 0; + } + + public void clearStopBacktrack() { + type &= ~STOP_BACKTRACK; + } + + public void setStopBacktrack() { + type |= STOP_BACKTRACK; + } + + public boolean isStopBacktrack() { + return (type & STOP_BACKTRACK) != 0; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/Node.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,135 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import java.util.Set; + +import jdk.nashorn.internal.runtime.regexp.joni.Config; +import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback; +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; + +public abstract class Node implements NodeType { + public Node parent; + + public abstract int getType(); + + public final int getType2Bit() { + return 1 << getType(); + } + + protected void setChild(Node tgt){} // default definition + protected Node getChild(){return null;}; // default definition + + public void swap(Node with) { + Node tmp; + + //if (getChild() != null) getChild().parent = with; + //if (with.getChild() != null) with.getChild().parent = this; + + //tmp = getChild(); + //setChild(with.getChild()); + //with.setChild(tmp); + + if (parent != null) parent.setChild(with); + + if (with.parent != null) with.parent.setChild(this); + + tmp = parent; + parent = with.parent; + with.parent = tmp; + } + + // overridden by ConsAltNode and CallNode + public void verifyTree(Set<Node> set, WarnCallback warnings) { + if (!set.contains(this) && getChild() != null) { + set.add(this); + if (getChild().parent != this) { + warnings.warn("broken link to child: " + this.getAddressName() + " -> " + getChild().getAddressName()); + } + getChild().verifyTree(set, warnings); + } + } + + public abstract String getName(); + protected abstract String toString(int level); + + public String getAddressName() { + return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this)); + } + + public final String toString() { + StringBuilder s = new StringBuilder(); + s.append("<" + getAddressName() + " (" + (parent == null ? "NULL" : parent.getAddressName()) + ")>"); + return s + toString(0); + } + + protected static String pad(Object value, int level) { + if (value == null) return "NULL"; + + StringBuilder pad = new StringBuilder(" "); + for (int i=0; i<level; i++) pad.append(pad); + + return value.toString().replace("\n", "\n" + pad); + } + + public final boolean isInvalidQuantifier() { + if (!Config.VANILLA) return false; + + ConsAltNode node; + + switch(getType()) { + + case ANCHOR: + return true; + + case ENCLOSE: + /* allow enclosed elements */ + /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ + break; + + case LIST: + node = (ConsAltNode)this; + do { + if (!node.car.isInvalidQuantifier()) return false; + } while ((node = node.cdr) != null); + return false; + + case ALT: + node = (ConsAltNode)this; + do { + if (node.car.isInvalidQuantifier()) return true; + } while ((node = node.cdr) != null); + break; + + default: + break; + } + + return false; + } + + public final boolean isAllowedInLookBehind() { + return (getType2Bit() & ALLOWED_IN_LB) != 0; + } + + public final boolean isSimple() { + return (getType2Bit() & SIMPLE) != 0; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/QuantifierNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,272 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.Config; +import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment; +import jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce; +import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; + +public final class QuantifierNode extends StateNode { + + public Node target; + public int lower; + public int upper; + public boolean greedy; + + public int targetEmptyInfo; + + public Node headExact; + public Node nextHeadExact; + public boolean isRefered; /* include called node. don't eliminate even if {0} */ + + // USE_COMBINATION_EXPLOSION_CHECK + public int combExpCheckNum; /* 1,2,3...: check, 0: no check */ + + public QuantifierNode(int lower, int upper, boolean byNumber) { + this.lower = lower; + this.upper = upper; + greedy = true; + targetEmptyInfo = TargetInfo.ISNOT_EMPTY; + + if (byNumber) setByNumber(); + } + + @Override + public int getType() { + return QTFR; + } + + @Override + protected void setChild(Node newChild) { + target = newChild; + } + + @Override + protected Node getChild() { + return target; + } + + public void setTarget(Node tgt) { + target = tgt; + tgt.parent = this; + } + + public StringNode convertToString(int flag) { + StringNode sn = new StringNode(); + sn.flag = flag; + sn.swap(this); + return sn; + } + + @Override + public String getName() { + return "Quantifier"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(super.toString(level)); + value.append("\n target: " + pad(target, level + 1)); + value.append("\n lower: " + lower); + value.append("\n upper: " + upper); + value.append("\n greedy: " + greedy); + value.append("\n targetEmptyInfo: " + targetEmptyInfo); + value.append("\n headExact: " + pad(headExact, level + 1)); + value.append("\n nextHeadExact: " + pad(nextHeadExact, level + 1)); + value.append("\n isRefered: " + isRefered); + value.append("\n combExpCheckNum: " + combExpCheckNum); + + return value.toString(); + } + + public boolean isAnyCharStar() { + return greedy && isRepeatInfinite(upper) && target.getType() == CANY; + } + + /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ + protected int popularNum() { + if (greedy) { + if (lower == 0) { + if (upper == 1) return 0; + else if (isRepeatInfinite(upper)) return 1; + } else if (lower == 1) { + if (isRepeatInfinite(upper)) return 2; + } + } else { + if (lower == 0) { + if (upper == 1) return 3; + else if (isRepeatInfinite(upper)) return 4; + } else if (lower == 1) { + if (isRepeatInfinite(upper)) return 5; + } + } + return -1; + } + + protected void set(QuantifierNode other) { + setTarget(other.target); + other.target = null; + lower = other.lower; + upper = other.upper; + greedy = other.greedy; + targetEmptyInfo = other.targetEmptyInfo; + + //setHeadExact(other.headExact); + //setNextHeadExact(other.nextHeadExact); + headExact = other.headExact; + nextHeadExact = other.nextHeadExact; + isRefered = other.isRefered; + combExpCheckNum = other.combExpCheckNum; + } + + public void reduceNestedQuantifier(QuantifierNode other) { + int pnum = popularNum(); + int cnum = other.popularNum(); + + if (pnum < 0 || cnum < 0) return; + + switch(Reduce.REDUCE_TABLE[cnum][pnum]) { + case DEL: + // no need to set the parent here... + // swap ? + set(other); // *pnode = *cnode; ??? + break; + + case A: + setTarget(other.target); + lower = 0; + upper = REPEAT_INFINITE; + greedy = true; + break; + + case AQ: + setTarget(other.target); + lower = 0; + upper = REPEAT_INFINITE; + greedy = false; + break; + + case QQ: + setTarget(other.target); + lower = 0; + upper = 1; + greedy = false; + break; + + case P_QQ: + setTarget(other); + lower = 0; + upper = 1; + greedy = false; + other.lower = 1; + other.upper = REPEAT_INFINITE; + other.greedy = true; + return; + + case PQ_Q: + setTarget(other); + lower = 0; + upper = 1; + greedy = true; + other.lower = 1; + other.upper = REPEAT_INFINITE; + other.greedy = false; + return; + + case ASIS: + setTarget(other); + return; + } + // ??? remove the parent from target ??? + other.target = null; // remove target from reduced quantifier + } + + public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, char[] chars, int p, int end) { + if (lower == 1 && upper == 1) return 1; + + switch(tgt.getType()) { + + case STR: + if (!group) { + StringNode sn = (StringNode)tgt; + if (sn.canBeSplit()) { + StringNode n = sn.splitLastChar(); + if (n != null) { + setTarget(n); + return 2; + } + } + } + break; + + case QTFR: + /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QuantifierNode qnt = (QuantifierNode)tgt; + int nestQNum = popularNum(); + int targetQNum = qnt.popularNum(); + + if (Config.USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR) { + if (!isByNumber() && !qnt.isByNumber() && env.syntax.warnReduntantNestedRepeat()) { + switch(Reduce.REDUCE_TABLE[targetQNum][nestQNum]) { + case ASIS: + break; + + case DEL: + env.reg.warnings.warn(new String(chars, p, end) + + " redundant nested repeat operator"); + break; + + default: + env.reg.warnings.warn(new String(chars, p, end) + + " nested repeat operator " + Reduce.PopularQStr[targetQNum] + + " and " + Reduce.PopularQStr[nestQNum] + " was replaced with '" + + Reduce.ReduceQStr[Reduce.REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "'"); + } + } + } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + + if (targetQNum >= 0) { + if (nestQNum >= 0) { + reduceNestedQuantifier(qnt); + return 0; + } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (!isRepeatInfinite(upper) && upper > 1 && greedy) { + upper = lower == 0 ? 1 : lower; + } + } + } + + default: + break; + } + + setTarget(tgt); + return 0; + } + + public static final int REPEAT_INFINITE = -1; + public static boolean isRepeatInfinite(int n) { + return n == REPEAT_INFINITE; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/StateNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,232 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeStatus; + +public abstract class StateNode extends Node implements NodeStatus { + protected int state; + + @Override + public String toString(int level) { + return "\n state: " + stateToString(); + } + + public String stateToString() { + StringBuilder states = new StringBuilder(); + if (isMinFixed()) states.append("MIN_FIXED "); + if (isMaxFixed()) states.append("MAX_FIXED "); + if (isMark1()) states.append("MARK1 "); + if (isMark2()) states.append("MARK2 "); + if (isMemBackrefed()) states.append("MEM_BACKREFED "); + if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT "); + if (isRecursion()) states.append("RECURSION "); + if (isCalled()) states.append("CALLED "); + if (isAddrFixed()) states.append("ADDR_FIXED "); + if (isNamedGroup()) states.append("NAMED_GROUP "); + if (isNameRef()) states.append("NAME_REF "); + if (isInRepeat()) states.append("IN_REPEAT "); + if (isNestLevel()) states.append("NEST_LEVEL "); + if (isByNumber()) states.append("BY_NUMBER "); + + return states.toString(); + } + + public boolean isMinFixed() { + return (state & NST_MIN_FIXED) != 0; + } + + public void setMinFixed() { + state |= NST_MIN_FIXED; + } + + public void clearMinFixed() { + state &= ~NST_MIN_FIXED; + } + + public boolean isMaxFixed() { + return (state & NST_MAX_FIXED) != 0; + } + + public void setMaxFixed() { + state |= NST_MAX_FIXED; + } + + public void clearMaxFixed() { + state &= ~NST_MAX_FIXED; + } + + public boolean isCLenFixed() { + return (state & NST_CLEN_FIXED) != 0; + } + + public void setCLenFixed() { + state |= NST_CLEN_FIXED; + } + + public void clearCLenFixed() { + state &= ~NST_CLEN_FIXED; + } + + public boolean isMark1() { + return (state & NST_MARK1) != 0; + } + + public void setMark1() { + state |= NST_MARK1; + } + + public void clearMark1() { + state &= ~NST_MARK1; + } + + public boolean isMark2() { + return (state & NST_MARK2) != 0; + } + + public void setMark2() { + state |= NST_MARK2; + } + + public void clearMark2() { + state &= ~NST_MARK2; + } + + public boolean isMemBackrefed() { + return (state & NST_MEM_BACKREFED) != 0; + } + + public void setMemBackrefed() { + state |= NST_MEM_BACKREFED; + } + + public void clearMemBackrefed() { + state &= ~NST_MEM_BACKREFED; + } + + public boolean isStopBtSimpleRepeat() { + return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0; + } + + public void setStopBtSimpleRepeat() { + state |= NST_STOP_BT_SIMPLE_REPEAT; + } + + public void clearStopBtSimpleRepeat() { + state &= ~NST_STOP_BT_SIMPLE_REPEAT; + } + + public boolean isRecursion() { + return (state & NST_RECURSION) != 0; + } + + public void setRecursion() { + state |= NST_RECURSION; + } + + public void clearRecursion() { + state &= ~NST_RECURSION; + } + + public boolean isCalled() { + return (state & NST_CALLED) != 0; + } + + public void setCalled() { + state |= NST_CALLED; + } + + public void clearCAlled() { + state &= ~NST_CALLED; + } + + public boolean isAddrFixed() { + return (state & NST_ADDR_FIXED) != 0; + } + + public void setAddrFixed() { + state |= NST_ADDR_FIXED; + } + + public void clearAddrFixed() { + state &= ~NST_ADDR_FIXED; + } + + public boolean isNamedGroup() { + return (state & NST_NAMED_GROUP) != 0; + } + + public void setNamedGroup() { + state |= NST_NAMED_GROUP; + } + + public void clearNamedGroup() { + state &= ~NST_NAMED_GROUP; + } + + public boolean isNameRef() { + return (state & NST_NAME_REF) != 0; + } + + public void setNameRef() { + state |= NST_NAME_REF; + } + + public void clearNameRef() { + state &= ~NST_NAME_REF; + } + + public boolean isInRepeat() { + return (state & NST_IN_REPEAT) != 0; + } + + public void setInRepeat() { + state |= NST_IN_REPEAT; + } + + public void clearInRepeat() { + state &= ~NST_IN_REPEAT; + } + + public boolean isNestLevel() { + return (state & NST_NEST_LEVEL) != 0; + } + + public void setNestLevel() { + state |= NST_NEST_LEVEL; + } + + public void clearNestLevel() { + state &= ~NST_NEST_LEVEL; + } + + public boolean isByNumber() { + return (state & NST_BY_NUMBER) != 0; + } + + public void setByNumber() { + state |= NST_BY_NUMBER; + } + + public void clearByNumber() { + state &= ~NST_BY_NUMBER; + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ast/StringNode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,207 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.ast; + +import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper; +import jdk.nashorn.internal.runtime.regexp.joni.constants.StringType; + +public final class StringNode extends Node implements StringType { + + private static final int NODE_STR_MARGIN = 16; + private static final int NODE_STR_BUF_SIZE = 24; + public static final StringNode EMPTY = new StringNode(null, Integer.MAX_VALUE, Integer.MAX_VALUE); + + public char[] chars; + public int p; + public int end; + + public int flag; + + public StringNode() { + this.chars = new char[NODE_STR_BUF_SIZE]; + } + + public StringNode(char[] chars, int p, int end) { + this.chars = chars; + this.p = p; + this.end = end; + setShared(); + } + + public StringNode(char c) { + this(); + chars[end++] = c; + } + + /* Ensure there is ahead bytes available in node's buffer + * (assumes that the node is not shared) + */ + public void ensure(int ahead) { + int len = (end - p) + ahead; + if (len >= chars.length) { + char[] tmp = new char[len + NODE_STR_MARGIN]; + System.arraycopy(chars, p, tmp, 0, end - p); + chars = tmp; + } + } + + /* COW and/or ensure there is ahead bytes available in node's buffer + */ + private void modifyEnsure(int ahead) { + if (isShared()) { + int len = (end - p) + ahead; + char[] tmp = new char[len + NODE_STR_MARGIN]; + System.arraycopy(chars, p, tmp, 0, end - p); + chars = tmp; + end = end - p; + p = 0; + clearShared(); + } else { + ensure(ahead); + } + } + + @Override + public int getType() { + return STR; + } + + @Override + public String getName() { + return "String"; + } + + @Override + public String toString(int level) { + StringBuilder value = new StringBuilder(); + value.append("\n bytes: '"); + for (int i=p; i<end; i++) { + if (chars[i] >= 0x20 && chars[i] < 0x7f) { + value.append(chars[i]); + } else { + value.append(String.format("[0x%04x]", chars[i])); + } + } + value.append("'"); + return value.toString(); + } + + public int length() { + return end - p; + } + + public StringNode splitLastChar() { + StringNode n = null; + + if (end > p) { + int prev = EncodingHelper.prevCharHead(p, end); + if (prev != -1 && prev > p) { /* can be splitted. */ + n = new StringNode(chars, prev, end); + if (isRaw()) n.setRaw(); + end = prev; + } + } + return n; + } + + public boolean canBeSplit() { + if (end > p) { + return 1 < (end - p); + } + return false; + } + + public void set(char[] chars, int p, int end) { + this.chars = chars; + this.p = p; + this.end = end; + setShared(); + } + + public void cat(char[] cat, int catP, int catEnd) { + int len = catEnd - catP; + modifyEnsure(len); + System.arraycopy(cat, catP, chars, end, len); + end += len; + } + + public void cat(char c) { + modifyEnsure(1); + chars[end++] = c; + } + + public void catCode(int code) { + cat((char)code); + } + + public void clear() { + if (chars.length > NODE_STR_BUF_SIZE) chars = new char[NODE_STR_BUF_SIZE]; + flag = 0; + p = end = 0; + } + + public void setRaw() { + flag |= NSTR_RAW; + } + + public void clearRaw() { + flag &= ~NSTR_RAW; + } + + public boolean isRaw() { + return (flag & NSTR_RAW) != 0; + } + + public void setAmbig() { + flag |= NSTR_AMBIG; + } + + public void clearAmbig() { + flag &= ~NSTR_AMBIG; + } + + public boolean isAmbig() { + return (flag & NSTR_AMBIG) != 0; + } + + public void setDontGetOptInfo() { + flag |= NSTR_DONT_GET_OPT_INFO; + } + + public void clearDontGetOptInfo() { + flag &= ~NSTR_DONT_GET_OPT_INFO; + } + + public boolean isDontGetOptInfo() { + return (flag & NSTR_DONT_GET_OPT_INFO) != 0; + } + + public void setShared() { + flag |= NSTR_SHARED; + } + + public void clearShared() { + flag &= ~NSTR_SHARED; + } + + public boolean isShared() { + return (flag & NSTR_SHARED) != 0; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/bench/AbstractBench.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,49 @@ +package jdk.nashorn.internal.runtime.regexp.joni.bench; + +import jdk.nashorn.internal.runtime.regexp.joni.Option; +import jdk.nashorn.internal.runtime.regexp.joni.Regex; +import jdk.nashorn.internal.runtime.regexp.joni.Syntax; + +public abstract class AbstractBench { + protected void bench(String _reg, String _str, int warmup, int times) throws Exception { + char[] reg = _reg.toCharArray(); + char[] str = _str.toCharArray(); + + Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,Syntax.DEFAULT); + + System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); + + for(int j=0;j<warmup;j++) { + long before = System.currentTimeMillis(); + for(int i = 0; i < times; i++) { + p.matcher(str, 0, str.length).search(0, str.length, Option.NONE); + } + long time = System.currentTimeMillis() - before; + System.err.println(": " + time + "ms"); + } + } + + protected void benchBestOf(String _reg, String _str, int warmup, int times) throws Exception { + char[] reg = _reg.toCharArray(); + char[] str = _str.toCharArray(); + + Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,Syntax.DEFAULT); + + System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); + + long best = Long.MAX_VALUE; + + for(int j=0;j<warmup;j++) { + long before = System.currentTimeMillis(); + for(int i = 0; i < times; i++) { + p.matcher(str, 0, str.length).search(0, str.length, Option.NONE); + } + long time = System.currentTimeMillis() - before; + if(time < best) { + best = time; + } + System.err.print("."); + } + System.err.println(": " + best + "ms"); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchGreedyBacktrack.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,7 @@ +package jdk.nashorn.internal.runtime.regexp.joni.bench; + +public class BenchGreedyBacktrack extends AbstractBench { + public static void main(String[] args) throws Exception { + new BenchGreedyBacktrack().bench(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,1000000); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchRailsRegs.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,31 @@ +package jdk.nashorn.internal.runtime.regexp.joni.bench; + +public class BenchRailsRegs extends AbstractBench { + public static void main(String[] args) throws Exception { + final String[][] regexps = {{"a.*?[b-z]{2,4}aaaaaa","afdgdsgderaabxxaaaaaaaaaaaaaaaaaaaaaaaa"}, + {"://","/shop/viewCategory.shtml?category=DOGS"}, + {"^\\w+\\://[^/]+(/.*|$)$","/shop/viewCategory.shtml?category=DOGS"}, + {"\\A/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/signonForm\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/newAccountForm\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/newAccount\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/viewCart\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/index\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A/shop/viewCategory\\.shtml/?\\Z","/shop/viewCategory.shtml"}, + {"\\A(?:::)?([A-Z]\\w*(?:::[A-Z]\\w*)*)\\z","CategoriesController"}, + {"\\Ainsert","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"}, + {"\\A\\(?\\s*(select|show)","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"}, + {".*?\n","1b341ffe23b5298676d535fcabd3d0d7"}, + {"^find_(all_by|by)_([_a-zA-Z]\\w*)$","find_by_string_id"}, + {"\\.rjs$","categories/show.rhtml"}, + {"^[-a-z]+://","petstore.css"}, + {"^get$",""}, + {"^post$",""}, + {"^[^:]+","www.example.com"}, + {"(=|\\?|_before_type_cast)$", "updated_on"}, + {"^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/"}}; + for(String[] reg : regexps) { + new BenchRailsRegs().benchBestOf(reg[0],reg[1],10,1000000); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchSeveralRegexps.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,17 @@ +package jdk.nashorn.internal.runtime.regexp.joni.bench; + +public class BenchSeveralRegexps extends AbstractBench { + public static void main(String[] args) throws Exception { + int BASE = 1000000; + + new BenchSeveralRegexps().benchBestOf("a"," a",10,4*BASE); + + new BenchSeveralRegexps().benchBestOf(".*?=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE); + + new BenchSeveralRegexps().benchBestOf("^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE); + + new BenchSeveralRegexps().benchBestOf(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE); + + new BenchSeveralRegexps().benchBestOf(".*=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/AnchorType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,58 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface AnchorType { + final int BEGIN_BUF = (1<<0); + final int BEGIN_LINE = (1<<1); + final int BEGIN_POSITION = (1<<2); + final int END_BUF = (1<<3); + final int SEMI_END_BUF = (1<<4); + final int END_LINE = (1<<5); + + final int WORD_BOUND = (1<<6); + final int NOT_WORD_BOUND = (1<<7); + final int WORD_BEGIN = (1<<8); + final int WORD_END = (1<<9); + final int PREC_READ = (1<<10); + final int PREC_READ_NOT = (1<<11); + final int LOOK_BEHIND = (1<<12); + final int LOOK_BEHIND_NOT = (1<<13); + + final int ANYCHAR_STAR = (1<<14); /* ".*" optimize info */ + final int ANYCHAR_STAR_ML = (1<<15); /* ".*" optimize info (multi-line) */ + + final int ANYCHAR_STAR_MASK = (ANYCHAR_STAR | ANYCHAR_STAR_ML); + final int END_BUF_MASK = (END_BUF | SEMI_END_BUF); + + final int ALLOWED_IN_LB = ( LOOK_BEHIND | + BEGIN_LINE | + END_LINE | + BEGIN_BUF | + BEGIN_POSITION ); + + final int ALLOWED_IN_LB_NOT = ( LOOK_BEHIND | + LOOK_BEHIND_NOT | + BEGIN_LINE | + END_LINE | + BEGIN_BUF | + BEGIN_POSITION ); + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Arguments.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,31 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface Arguments { + final int SPECIAL = -1; + final int NON = 0; + final int RELADDR = 1; + final int ABSADDR = 2; + final int LENGTH = 3; + final int MEMNUM = 4; + final int OPTION = 5; + final int STATE_CHECK = 6; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/AsmConstants.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,49 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface AsmConstants { + final int THIS = 0; + + // argument indexes + final int RANGE = 1; + final int SSTART = 2; + final int SPREV = 3; + + // local var indexes + final int S = 4; // current index + final int BYTES = 5; // string + final int LAST_INDEX = BYTES + 1; + + // frequently used field names (all ints) + final String STR = "str"; + final String END = "end"; + final String MSA_START = "msaStart"; + final String MSA_OPTONS = "msaOptions"; + final String MSA_BEST_LEN = "msaBestLen"; + final String MSA_BEST_S = "msaBestS"; + final String MSA_BEGIN = "msaBegin"; + final String MSA_END = "msaEnd"; + + // generated field names + final String BITSET = "bitset"; + final String CODERANGE = "range"; + final String TEMPLATE = "template"; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/CCSTATE.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,27 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public enum CCSTATE { + VALUE, + RANGE, + COMPLETE, + START +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/CCVALTYPE.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,26 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public enum CCVALTYPE { + SB, + CODE_POINT, + CLASS +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/EncloseType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,29 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface EncloseType { + final int MEMORY = 1<<0; + final int OPTION = 1<<1; + final int STOP_BACKTRACK = 1<<2; + + final int ALLOWED_IN_LB = MEMORY; + final int ALLOWED_IN_LB_NOT = 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/MetaChar.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,31 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface MetaChar { + final int ESCAPE = 0; + final int ANYCHAR = 1; + final int ANYTIME = 2; + final int ZERO_OR_ONE_TIME = 3; + final int ONE_OR_MORE_TIME = 4; + final int ANYCHAR_ANYTIME = 5; + + final int INEFFECTIVE_META_CHAR = 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/NodeStatus.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,39 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface NodeStatus { + /* status bits */ + final int NST_MIN_FIXED = (1<<0); + final int NST_MAX_FIXED = (1<<1); + final int NST_CLEN_FIXED = (1<<2); + final int NST_MARK1 = (1<<3); + final int NST_MARK2 = (1<<4); + final int NST_MEM_BACKREFED = (1<<5); + final int NST_STOP_BT_SIMPLE_REPEAT= (1<<6); + final int NST_RECURSION = (1<<7); + final int NST_CALLED = (1<<8); + final int NST_ADDR_FIXED = (1<<9); + final int NST_NAMED_GROUP = (1<<10); + final int NST_NAME_REF = (1<<11); + final int NST_IN_REPEAT = (1<<12); /* STK_REPEAT is nested in stack. */ + final int NST_NEST_LEVEL = (1<<13); + final int NST_BY_NUMBER = (1<<14); /* {n,m} */ +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/NodeType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,66 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface NodeType { + /* node type */ + final int STR = 0; + final int CCLASS = 1; + final int CTYPE = 2; + final int CANY = 3; + final int BREF = 4; + final int QTFR = 5; + final int ENCLOSE = 6; + final int ANCHOR = 7; + final int LIST = 8; + final int ALT = 9; + final int CALL = 10; + + final int BIT_STR = 1 << STR; + final int BIT_CCLASS = 1 << CCLASS; + final int BIT_CTYPE = 1 << CTYPE; + final int BIT_CANY = 1 << CANY; + final int BIT_BREF = 1 << BREF; + final int BIT_QTFR = 1 << QTFR; + final int BIT_ENCLOSE = 1 << ENCLOSE; + final int BIT_ANCHOR = 1 << ANCHOR; + final int BIT_LIST = 1 << LIST; + final int BIT_ALT = 1 << ALT; + final int BIT_CALL = 1 << CALL; + + /* allowed node types in look-behind */ + final int ALLOWED_IN_LB = ( BIT_LIST | + BIT_ALT | + BIT_STR | + BIT_CCLASS | + BIT_CTYPE | + BIT_CANY | + BIT_ANCHOR | + BIT_ENCLOSE | + BIT_QTFR | + BIT_CALL ); + + final int SIMPLE = ( BIT_STR | + BIT_CCLASS | + BIT_CTYPE | + BIT_CANY | + BIT_BREF); + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/OPCode.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,387 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +import jdk.nashorn.internal.runtime.regexp.joni.Config; + +public interface OPCode { + final int FINISH = 0; /* matching process terminator (no more alternative) */ + final int END = 1; /* pattern code terminator (success end) */ + + final int EXACT1 = 2; /* single byte, N = 1 */ + final int EXACT2 = 3; /* single byte, N = 2 */ + final int EXACT3 = 4; /* single byte, N = 3 */ + final int EXACT4 = 5; /* single byte, N = 4 */ + final int EXACT5 = 6; /* single byte, N = 5 */ + final int EXACTN = 7; /* single byte */ + final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */ + final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */ + final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */ + final int EXACTMB2N = 11; /* mb-length = 2 */ + final int EXACTMB3N = 12; /* mb-length = 3 */ + final int EXACTMBN = 13; /* other length */ + + final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */ + final int EXACTN_IC = 15; /* single byte, ignore case */ + + final int CCLASS = 16; + final int CCLASS_MB = 17; + final int CCLASS_MIX = 18; + final int CCLASS_NOT = 19; + final int CCLASS_MB_NOT = 20; + final int CCLASS_MIX_NOT = 21; + final int CCLASS_NODE = 22; /* pointer to CClassNode node */ + + final int ANYCHAR = 23; /* "." */ + final int ANYCHAR_ML = 24; /* "." multi-line */ + final int ANYCHAR_STAR = 25; /* ".*" */ + final int ANYCHAR_ML_STAR = 26; /* ".*" multi-line */ + final int ANYCHAR_STAR_PEEK_NEXT = 27; + final int ANYCHAR_ML_STAR_PEEK_NEXT = 28; + + final int WORD = 29; + final int NOT_WORD = 30; + final int WORD_BOUND = 31; + final int NOT_WORD_BOUND = 32; + final int WORD_BEGIN = 33; + final int WORD_END = 34; + + final int BEGIN_BUF = 35; + final int END_BUF = 36; + final int BEGIN_LINE = 37; + final int END_LINE = 38; + final int SEMI_END_BUF = 39; + final int BEGIN_POSITION = 40; + + final int BACKREF1 = 41; + final int BACKREF2 = 42; + final int BACKREFN = 43; + final int BACKREFN_IC = 44; + final int BACKREF_MULTI = 45; + final int BACKREF_MULTI_IC = 46; + final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */ + + final int MEMORY_START = 48; + final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */ + final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */ + final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */ + final int MEMORY_END = 52; + final int MEMORY_END_REC = 53; /* push marker to stack */ + + final int FAIL = 54; /* pop stack and move */ + final int JUMP = 55; + final int PUSH = 56; + final int POP = 57; + final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */ + final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */ + + final int REPEAT = 60; /* {n,m} */ + final int REPEAT_NG = 61; /* {n,m}? (non greedy) */ + final int REPEAT_INC = 62; + final int REPEAT_INC_NG = 63; /* non greedy */ + final int REPEAT_INC_SG = 64; /* search and get in stack */ + final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */ + + final int NULL_CHECK_START = 66; /* null loop checker start */ + final int NULL_CHECK_END = 67; /* null loop checker end */ + final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */ + final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */ + + final int PUSH_POS = 70; /* (?=...) start */ + final int POP_POS = 71; /* (?=...) end */ + final int PUSH_POS_NOT = 72; /* (?!...) start */ + final int FAIL_POS = 73; /* (?!...) end */ + final int PUSH_STOP_BT = 74; /* (?>...) start */ + final int POP_STOP_BT = 75; /* (?>...) end */ + final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */ + final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */ + final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */ + + final int CALL = 79; /* \g<name> */ + final int RETURN = 80; + + final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */ + final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */ + final int STATE_CHECK = 83; /* check only */ + final int STATE_CHECK_ANYCHAR_STAR = 84; + final int STATE_CHECK_ANYCHAR_ML_STAR = 85; + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + final int SET_OPTION_PUSH = 86; /* set option and push recover option */ + final int SET_OPTION = 87; /* set option */ + + // single byte versions + final int ANYCHAR_SB = 88; /* "." */ + final int ANYCHAR_ML_SB = 89; /* "." multi-line */ + final int ANYCHAR_STAR_SB = 90; /* ".*" */ + final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */ + final int ANYCHAR_STAR_PEEK_NEXT_SB = 92; + final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93; + final int STATE_CHECK_ANYCHAR_STAR_SB = 94; + final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95; + + final int CCLASS_SB = 96; + final int CCLASS_NOT_SB = 97; + final int WORD_SB = 98; + final int NOT_WORD_SB = 99; + final int WORD_BOUND_SB = 100; + final int NOT_WORD_BOUND_SB = 101; + final int WORD_BEGIN_SB = 102; + final int WORD_END_SB = 103; + + final int LOOK_BEHIND_SB = 104; + + final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */ + final int EXACTN_IC_SB = 106; /* single byte, ignore case */ + + + public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] { + "finish", /*OP_FINISH*/ + "end", /*OP_END*/ + "exact1", /*OP_EXACT1*/ + "exact2", /*OP_EXACT2*/ + "exact3", /*OP_EXACT3*/ + "exact4", /*OP_EXACT4*/ + "exact5", /*OP_EXACT5*/ + "exactn", /*OP_EXACTN*/ + "exactmb2-n1", /*OP_EXACTMB2N1*/ + "exactmb2-n2", /*OP_EXACTMB2N2*/ + "exactmb2-n3", /*OP_EXACTMB2N3*/ + "exactmb2-n", /*OP_EXACTMB2N*/ + "exactmb3n", /*OP_EXACTMB3N*/ + "exactmbn", /*OP_EXACTMBN*/ + "exact1-ic", /*OP_EXACT1_IC*/ + "exactn-ic", /*OP_EXACTN_IC*/ + "cclass", /*OP_CCLASS*/ + "cclass-mb", /*OP_CCLASS_MB*/ + "cclass-mix", /*OP_CCLASS_MIX*/ + "cclass-not", /*OP_CCLASS_NOT*/ + "cclass-mb-not", /*OP_CCLASS_MB_NOT*/ + "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/ + "cclass-node", /*OP_CCLASS_NODE*/ + "anychar", /*OP_ANYCHAR*/ + "anychar-ml", /*OP_ANYCHAR_ML*/ + "anychar*", /*OP_ANYCHAR_STAR*/ + "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/ + "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ + "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ + "word", /*OP_WORD*/ + "not-word", /*OP_NOT_WORD*/ + "word-bound", /*OP_WORD_BOUND*/ + "not-word-bound", /*OP_NOT_WORD_BOUND*/ + "word-begin", /*OP_WORD_BEGIN*/ + "word-end", /*OP_WORD_END*/ + "begin-buf", /*OP_BEGIN_BUF*/ + "end-buf", /*OP_END_BUF*/ + "begin-line", /*OP_BEGIN_LINE*/ + "end-line", /*OP_END_LINE*/ + "semi-end-buf", /*OP_SEMI_END_BUF*/ + "begin-position", /*OP_BEGIN_POSITION*/ + "backref1", /*OP_BACKREF1*/ + "backref2", /*OP_BACKREF2*/ + "backrefn", /*OP_BACKREFN*/ + "backrefn-ic", /*OP_BACKREFN_IC*/ + "backref_multi", /*OP_BACKREF_MULTI*/ + "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/ + "backref_at_level", /*OP_BACKREF_AT_LEVEL*/ + "mem-start", /*OP_MEMORY_START*/ + "mem-start-push", /*OP_MEMORY_START_PUSH*/ + "mem-end-push", /*OP_MEMORY_END_PUSH*/ + "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ + "mem-end", /*OP_MEMORY_END*/ + "mem-end-rec", /*OP_MEMORY_END_REC*/ + "fail", /*OP_FAIL*/ + "jump", /*OP_JUMP*/ + "push", /*OP_PUSH*/ + "pop", /*OP_POP*/ + "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/ + "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/ + "repeat", /*OP_REPEAT*/ + "repeat-ng", /*OP_REPEAT_NG*/ + "repeat-inc", /*OP_REPEAT_INC*/ + "repeat-inc-ng", /*OP_REPEAT_INC_NG*/ + "repeat-inc-sg", /*OP_REPEAT_INC_SG*/ + "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/ + "null-check-start", /*OP_NULL_CHECK_START*/ + "null-check-end", /*OP_NULL_CHECK_END*/ + "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/ + "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/ + "push-pos", /*OP_PUSH_POS*/ + "pop-pos", /*OP_POP_POS*/ + "push-pos-not", /*OP_PUSH_POS_NOT*/ + "fail-pos", /*OP_FAIL_POS*/ + "push-stop-bt", /*OP_PUSH_STOP_BT*/ + "pop-stop-bt", /*OP_POP_STOP_BT*/ + "look-behind", /*OP_LOOK_BEHIND*/ + "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/ + "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ + "call", /*OP_CALL*/ + "return", /*OP_RETURN*/ + "state-check-push", /*OP_STATE_CHECK_PUSH*/ + "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ + "state-check", /*OP_STATE_CHECK*/ + "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/ + "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ + "set-option-push", /*OP_SET_OPTION_PUSH*/ + "set-option", /*OP_SET_OPTION*/ + + // single byte versions + "anychar-sb", /*OP_ANYCHAR*/ + "anychar-ml-sb", /*OP_ANYCHAR_ML*/ + "anychar*-sb", /*OP_ANYCHAR_STAR*/ + "anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/ + "anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ + "anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ + "state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/ + "state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ + + "cclass-sb", /*OP_CCLASS*/ + "cclass-not-sb", /*OP_CCLASS_NOT*/ + + "word-sb", /*OP_WORD*/ + "not-word-sb", /*OP_NOT_WORD*/ + "word-bound-sb", /*OP_WORD_BOUND*/ + "not-word-bound-sb", /*OP_NOT_WORD_BOUND*/ + "word-begin-sb", /*OP_WORD_BEGIN*/ + "word-end-sb", /*OP_WORD_END*/ + + "look-behind-sb", /*OP_LOOK_BEHIND*/ + + "exact1-ic-sb", /*OP_EXACT1_IC*/ + "exactn-ic-sb", /*OP_EXACTN_IC*/ + + } : null; + + public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] { + Arguments.NON, /*OP_FINISH*/ + Arguments.NON, /*OP_END*/ + Arguments.SPECIAL, /*OP_EXACT1*/ + Arguments.SPECIAL, /*OP_EXACT2*/ + Arguments.SPECIAL, /*OP_EXACT3*/ + Arguments.SPECIAL, /*OP_EXACT4*/ + Arguments.SPECIAL, /*OP_EXACT5*/ + Arguments.SPECIAL, /*OP_EXACTN*/ + Arguments.SPECIAL, /*OP_EXACTMB2N1*/ + Arguments.SPECIAL, /*OP_EXACTMB2N2*/ + Arguments.SPECIAL, /*OP_EXACTMB2N3*/ + Arguments.SPECIAL, /*OP_EXACTMB2N*/ + Arguments.SPECIAL, /*OP_EXACTMB3N*/ + Arguments.SPECIAL, /*OP_EXACTMBN*/ + Arguments.SPECIAL, /*OP_EXACT1_IC*/ + Arguments.SPECIAL, /*OP_EXACTN_IC*/ + Arguments.SPECIAL, /*OP_CCLASS*/ + Arguments.SPECIAL, /*OP_CCLASS_MB*/ + Arguments.SPECIAL, /*OP_CCLASS_MIX*/ + Arguments.SPECIAL, /*OP_CCLASS_NOT*/ + Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/ + Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/ + Arguments.SPECIAL, /*OP_CCLASS_NODE*/ + Arguments.NON, /*OP_ANYCHAR*/ + Arguments.NON, /*OP_ANYCHAR_ML*/ + Arguments.NON, /*OP_ANYCHAR_STAR*/ + Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ + Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ + Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ + Arguments.NON, /*OP_WORD*/ + Arguments.NON, /*OP_NOT_WORD*/ + Arguments.NON, /*OP_WORD_BOUND*/ + Arguments.NON, /*OP_NOT_WORD_BOUND*/ + Arguments.NON, /*OP_WORD_BEGIN*/ + Arguments.NON, /*OP_WORD_END*/ + Arguments.NON, /*OP_BEGIN_BUF*/ + Arguments.NON, /*OP_END_BUF*/ + Arguments.NON, /*OP_BEGIN_LINE*/ + Arguments.NON, /*OP_END_LINE*/ + Arguments.NON, /*OP_SEMI_END_BUF*/ + Arguments.NON, /*OP_BEGIN_POSITION*/ + Arguments.NON, /*OP_BACKREF1*/ + Arguments.NON, /*OP_BACKREF2*/ + Arguments.MEMNUM, /*OP_BACKREFN*/ + Arguments.SPECIAL, /*OP_BACKREFN_IC*/ + Arguments.SPECIAL, /*OP_BACKREF_MULTI*/ + Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/ + Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/ + Arguments.MEMNUM, /*OP_MEMORY_START*/ + Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/ + Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/ + Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ + Arguments.MEMNUM, /*OP_MEMORY_END*/ + Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ + Arguments.NON, /*OP_FAIL*/ + Arguments.RELADDR, /*OP_JUMP*/ + Arguments.RELADDR, /*OP_PUSH*/ + Arguments.NON, /*OP_POP*/ + Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/ + Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/ + Arguments.SPECIAL, /*OP_REPEAT*/ + Arguments.SPECIAL, /*OP_REPEAT_NG*/ + Arguments.MEMNUM, /*OP_REPEAT_INC*/ + Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/ + Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/ + Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/ + Arguments.MEMNUM, /*OP_NULL_CHECK_START*/ + Arguments.MEMNUM, /*OP_NULL_CHECK_END*/ + Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/ + Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/ + Arguments.NON, /*OP_PUSH_POS*/ + Arguments.NON, /*OP_POP_POS*/ + Arguments.RELADDR, /*OP_PUSH_POS_NOT*/ + Arguments.NON, /*OP_FAIL_POS*/ + Arguments.NON, /*OP_PUSH_STOP_BT*/ + Arguments.NON, /*OP_POP_STOP_BT*/ + Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ + Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/ + Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ + Arguments.ABSADDR, /*OP_CALL*/ + Arguments.NON, /*OP_RETURN*/ + Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ + Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ + Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ + Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ + Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ + Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ + Arguments.OPTION, /*OP_SET_OPTION*/ + + // single byte versions + Arguments.NON, /*OP_ANYCHAR*/ + Arguments.NON, /*OP_ANYCHAR_ML*/ + Arguments.NON, /*OP_ANYCHAR_STAR*/ + Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ + Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ + Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ + Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ + Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ + + Arguments.SPECIAL, /*OP_CCLASS*/ + Arguments.SPECIAL, /*OP_CCLASS_NOT*/ + + Arguments.NON, /*OP_WORD*/ + Arguments.NON, /*OP_NOT_WORD*/ + Arguments.NON, /*OP_WORD_BOUND*/ + Arguments.NON, /*OP_NOT_WORD_BOUND*/ + Arguments.NON, /*OP_WORD_BEGIN*/ + Arguments.NON, /*OP_WORD_END*/ + + Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ + + Arguments.SPECIAL, /*OP_EXACT1_IC*/ + Arguments.SPECIAL, /*OP_EXACTN_IC*/ + } : null; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/OPSize.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,76 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface OPSize { + + // this might be helpful for potential byte[] migration + final int OPCODE = 1; + final int RELADDR = 1; + final int ABSADDR = 1; + final int LENGTH = 1; + final int MEMNUM = 1; + final int STATE_CHECK_NUM = 1; + final int REPEATNUM = 1; + final int OPTION = 1; + final int CODE_POINT = 1; + final int POINTER = 1; + final int INDEX = 1; + + /* op-code + arg size */ + + final int ANYCHAR_STAR = OPCODE; + final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1); + final int JUMP = (OPCODE + RELADDR); + final int PUSH = (OPCODE + RELADDR); + final int POP = OPCODE; + final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1); + final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1); + final int REPEAT_INC = (OPCODE + MEMNUM); + final int REPEAT_INC_NG = (OPCODE + MEMNUM); + final int PUSH_POS = OPCODE; + final int PUSH_POS_NOT = (OPCODE + RELADDR); + final int POP_POS = OPCODE; + final int FAIL_POS = OPCODE; + final int SET_OPTION = (OPCODE + OPTION); + final int SET_OPTION_PUSH = (OPCODE + OPTION); + final int FAIL = OPCODE; + final int MEMORY_START = (OPCODE + MEMNUM); + final int MEMORY_START_PUSH = (OPCODE + MEMNUM); + final int MEMORY_END_PUSH = (OPCODE + MEMNUM); + final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM); + final int MEMORY_END = (OPCODE + MEMNUM); + final int MEMORY_END_REC = (OPCODE + MEMNUM); + final int PUSH_STOP_BT = OPCODE; + final int POP_STOP_BT = OPCODE; + final int NULL_CHECK_START = (OPCODE + MEMNUM); + final int NULL_CHECK_END = (OPCODE + MEMNUM); + final int LOOK_BEHIND = (OPCODE + LENGTH); + final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH); + final int FAIL_LOOK_BEHIND_NOT = OPCODE; + final int CALL = (OPCODE + ABSADDR); + final int RETURN = OPCODE; + + // #ifdef USE_COMBINATION_EXPLOSION_CHECK + final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM); + final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR); + final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR); + final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Reduce.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,61 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.A; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.AQ; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.ASIS; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.DEL; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.PQ_Q; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.P_QQ; +import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.QQ; + +public interface Reduce { + + enum ReduceType { + ASIS, /* as is */ + DEL, /* delete parent */ + A, /* to '*' */ + AQ, /* to '*?' */ + QQ, /* to '??' */ + P_QQ, /* to '+)??' */ + PQ_Q, /* to '+?)?' */ + } + + final ReduceType[][]REDUCE_TABLE = { + {DEL, A, A, QQ, AQ, ASIS}, /* '?' */ + {DEL, DEL, DEL, P_QQ, P_QQ, DEL}, /* '*' */ + {A, A, DEL, ASIS, P_QQ, DEL}, /* '+' */ + {DEL, AQ, AQ, DEL, AQ, AQ}, /* '??' */ + {DEL, DEL, DEL, DEL, DEL, DEL}, /* '*?' */ + {ASIS, PQ_Q, DEL, AQ, AQ, DEL} /* '+?' */ + }; + + + final String PopularQStr[] = new String[] { + "?", "*", "+", "??", "*?", "+?" + }; + + String ReduceQStr[]= new String[] { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" + }; + +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/RegexState.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,28 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +// we dont need this ATM +public interface RegexState { + final int NORMAL = 0; + final int SEARCHING = 1; + final int COMPILING = -1; + final int MODIFY = -2; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StackPopLevel.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,27 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface StackPopLevel { + final int FREE = 0; + final int MEM_START = 1; + final int ALL = 2; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StackType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,51 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface StackType { + /** stack **/ + final int INVALID_STACK_INDEX = -1; + + /* stack type */ + /* used by normal-POP */ + final int ALT = 0x0001; + final int LOOK_BEHIND_NOT = 0x0002; + final int POS_NOT = 0x0003; + /* handled by normal-POP */ + final int MEM_START = 0x0100; + final int MEM_END = 0x8200; + final int REPEAT_INC = 0x0300; + final int STATE_CHECK_MARK = 0x1000; + /* avoided by normal-POP */ + final int NULL_CHECK_START = 0x3000; + final int NULL_CHECK_END = 0x5000; /* for recursive call */ + final int MEM_END_MARK = 0x8400; + final int POS = 0x0500; /* used when POP-POS */ + final int STOP_BT = 0x0600; /* mark for "(?>...)" */ + final int REPEAT = 0x0700; + final int CALL_FRAME = 0x0800; + final int RETURN = 0x0900; + final int VOID = 0x0a00; /* for fill a blank */ + + /* stack type check mask */ + final int MASK_POP_USED = 0x00ff; + final int MASK_TO_VOID_TARGET = 0x10ff; + final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */ +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StringType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,27 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface StringType { + final int NSTR_RAW = 1<<0; + final int NSTR_AMBIG = 1<<1; + final int NSTR_DONT_GET_OPT_INFO = 1<<2; + final int NSTR_SHARED = 1<<3; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/SyntaxProperties.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,124 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface SyntaxProperties { + /* syntax (operators); */ + final int OP_VARIABLE_META_CHARACTERS = (1<<0); + final int OP_DOT_ANYCHAR = (1<<1); /* . */ + final int OP_ASTERISK_ZERO_INF = (1<<2); /* * */ + final int OP_ESC_ASTERISK_ZERO_INF = (1<<3); + final int OP_PLUS_ONE_INF = (1<<4); /* + */ + final int OP_ESC_PLUS_ONE_INF = (1<<5); + final int OP_QMARK_ZERO_ONE = (1<<6); /* ? */ + final int OP_ESC_QMARK_ZERO_ONE = (1<<7); + final int OP_BRACE_INTERVAL = (1<<8); /* {lower,upper} */ + final int OP_ESC_BRACE_INTERVAL = (1<<9); /* \{lower,upper\} */ + final int OP_VBAR_ALT = (1<<10); /* | */ + final int OP_ESC_VBAR_ALT = (1<<11); /* \| */ + final int OP_LPAREN_SUBEXP = (1<<12); /* (...); */ + final int OP_ESC_LPAREN_SUBEXP = (1<<13); /* \(...\); */ + final int OP_ESC_AZ_BUF_ANCHOR = (1<<14); /* \A, \Z, \z */ + final int OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15); /* \G */ + final int OP_DECIMAL_BACKREF = (1<<16); /* \num */ + final int OP_BRACKET_CC = (1<<17); /* [...] */ + final int OP_ESC_W_WORD = (1<<18); /* \w, \W */ + final int OP_ESC_LTGT_WORD_BEGIN_END = (1<<19); /* \<. \> */ + final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */ + final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */ + final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */ + final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */ + final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */ + final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */ + final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */ + final int OP_ESC_C_CONTROL = (1<<27); /* \cx */ + final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */ + final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */ + final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */ + + final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */ + final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */ + final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */ + final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */ + final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */ + final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */ + final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */ + final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?<name>...); */ + final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k<name> */ + final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g<name>, \g<n> */ + final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@<x>..); */ + final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */ + final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */ + final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */ + final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */ + final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */ + final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */ + final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */ + /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */ + final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ + final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ + + /* syntax (behavior); */ + final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */ + final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */ + final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */ + final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */ + final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */ + final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */ + final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/ + final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */ + final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */ + final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */ + final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */ + + /* syntax (behavior); in char class [...] */ + final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */ + final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */ + final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22); + final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */ + /* syntax (behavior); warning */ + final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */ + final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */ + + final int POSIX_COMMON_OP = + OP_DOT_ANYCHAR | OP_POSIX_BRACKET | + OP_DECIMAL_BACKREF | + OP_BRACKET_CC | OP_ASTERISK_ZERO_INF | + OP_LINE_ANCHOR | + OP_ESC_CONTROL_CHARS; + + final int GNU_REGEX_OP = + OP_DOT_ANYCHAR | OP_BRACKET_CC | + OP_POSIX_BRACKET | OP_DECIMAL_BACKREF | + OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP | + OP_VBAR_ALT | + OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | + OP_QMARK_ZERO_ONE | + OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR | + OP_ESC_W_WORD | + OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END | + OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT | + OP_LINE_ANCHOR; + + final int GNU_REGEX_BV = + CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | + CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL | + BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/TargetInfo.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,27 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface TargetInfo { + final int ISNOT_EMPTY = 0; + final int IS_EMPTY = 1; + final int IS_EMPTY_MEM = 2; + final int IS_EMPTY_REC = 3; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/TokenType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,48 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public enum TokenType { + EOT, /* end of token */ + RAW_BYTE, + CHAR, + STRING, + CODE_POINT, + ANYCHAR, + CHAR_TYPE, + BACKREF, + CALL, + ANCHOR, + OP_REPEAT, + INTERVAL, + ANYCHAR_ANYTIME, /* SQL '%' == .* */ + ALT, + SUBEXP_OPEN, + SUBEXP_CLOSE, + CC_OPEN, + QUOTE_OPEN, + CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + CC_CLOSE, + CC_RANGE, + POSIX_BRACKET_OPEN, + CC_AND, /* && */ + CC_CC_OPEN /* [ */ +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Traverse.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,26 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.constants; + +public interface Traverse { + final int TRAVERSE_CALLBACK_AT_FIRST = 1; + final int TRAVERSE_CALLBACK_AT_LAST = 2; + final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/AsciiTables.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,157 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +public class AsciiTables { + + public static final short AsciiCtypeTable[] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; + + public static final byte ToLowerCaseTable[] = { + (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007', + (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017', + (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027', + (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037', + (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047', + (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057', + (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067', + (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077', + (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147', + (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157', + (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167', + (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137', + (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147', + (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157', + (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167', + (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177', + (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207', + (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217', + (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227', + (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237', + (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247', + (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257', + (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267', + (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277', + (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307', + (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317', + (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327', + (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337', + (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347', + (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357', + (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367', + (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377', + }; + + public static final byte ToUpperCaseTable[] = { + (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007', + (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017', + (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027', + (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037', + (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047', + (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057', + (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067', + (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077', + (byte)'\100', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107', + (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117', + (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127', + (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137', + (byte)'\140', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107', + (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117', + (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127', + (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177', + (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207', + (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217', + (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227', + (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237', + (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247', + (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257', + (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267', + (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277', + (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307', + (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317', + (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327', + (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337', + (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347', + (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357', + (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367', + (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377', + }; + + public static final int LowerMap[][] = { + {0x41, 0x61}, + {0x42, 0x62}, + {0x43, 0x63}, + {0x44, 0x64}, + {0x45, 0x65}, + {0x46, 0x66}, + {0x47, 0x67}, + {0x48, 0x68}, + {0x49, 0x69}, + {0x4a, 0x6a}, + {0x4b, 0x6b}, + {0x4c, 0x6c}, + {0x4d, 0x6d}, + {0x4e, 0x6e}, + {0x4f, 0x6f}, + {0x50, 0x70}, + {0x51, 0x71}, + {0x52, 0x72}, + {0x53, 0x73}, + {0x54, 0x74}, + {0x55, 0x75}, + {0x56, 0x76}, + {0x57, 0x77}, + {0x58, 0x78}, + {0x59, 0x79}, + {0x5a, 0x7a} + }; +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/CharacterType.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,79 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +public interface CharacterType { + + final int NEWLINE = 0; + final int ALPHA = 1; + final int BLANK = 2; + final int CNTRL = 3; + final int DIGIT = 4; + final int GRAPH = 5; + final int LOWER = 6; + final int PRINT = 7; + final int PUNCT = 8; + final int SPACE = 9; + final int UPPER = 10; + final int XDIGIT = 11; + final int WORD = 12; + final int ALNUM = 13; /* alpha || digit */ + final int ASCII = 14; + + final int SPECIAL_MASK = 256; + final int S = SPECIAL_MASK | SPACE; + final int D = SPECIAL_MASK | DIGIT; + final int W = SPECIAL_MASK | WORD; + + final int LETTER_MASK = (1 << Character.UPPERCASE_LETTER) + | (1 << Character.LOWERCASE_LETTER) + | (1 << Character.TITLECASE_LETTER) + | (1 << Character.MODIFIER_LETTER) + | (1 << Character.OTHER_LETTER); + final int ALPHA_MASK = LETTER_MASK + | (1 << Character.COMBINING_SPACING_MARK) + | (1 << Character.NON_SPACING_MARK) + | (1 << Character.ENCLOSING_MARK); + final int ALNUM_MASK = ALPHA_MASK + | (1 << Character.DECIMAL_DIGIT_NUMBER); + final int WORD_MASK = ALNUM_MASK + | (1 << Character.CONNECTOR_PUNCTUATION); + final int PUNCT_MASK = (1 << Character.CONNECTOR_PUNCTUATION) + | (1 << Character.DASH_PUNCTUATION) + | (1 << Character.END_PUNCTUATION) + | (1 << Character.FINAL_QUOTE_PUNCTUATION) + | (1 << Character.INITIAL_QUOTE_PUNCTUATION) + | (1 << Character.OTHER_PUNCTUATION) + | (1 << Character.START_PUNCTUATION); + final int CNTRL_MASK = (1 << Character.CONTROL) + | (1 << Character.FORMAT) + | (1 << Character.PRIVATE_USE) + | (1 << Character.SURROGATE); + final int SPACE_MASK = (1 << Character.SPACE_SEPARATOR) + | (1 << Character.LINE_SEPARATOR) // 0x2028 + | (1 << Character.PARAGRAPH_SEPARATOR); // 0x2029 + final int GRAPH_MASK = SPACE_MASK + | (1 << Character.CONTROL) + | (1 << Character.SURROGATE); + final int PRINT_MASK = (1 << Character.CONTROL) + | (1 << Character.SURROGATE); + + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/IntHolder.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,24 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +public class IntHolder { + public int value; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/ObjPtr.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,35 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +public final class ObjPtr<T> { + public ObjPtr() { + this(null); + } + + public ObjPtr(T p) { + this.p = p; + } + + public T p; + + static final ObjPtr<Void> NULL = new ObjPtr<Void>(); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/PosixBracket.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,77 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS".toCharArray(), WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; +import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException; + +import java.util.HashMap; + +public class PosixBracket { + + public static final char[][] PBSNamesLower = { + "alnum".toCharArray(), + "alpha".toCharArray(), + "blank".toCharArray(), + "cntrl".toCharArray(), + "digit".toCharArray(), + "graph".toCharArray(), + "lower".toCharArray(), + "print".toCharArray(), + "punct".toCharArray(), + "space".toCharArray(), + "upper".toCharArray(), + "xdigit".toCharArray(), + "ascii".toCharArray(), + "word".toCharArray() + }; + + public static final int PBSValues[] = { + CharacterType.ALNUM, + CharacterType.ALPHA, + CharacterType.BLANK, + CharacterType.CNTRL, + CharacterType.DIGIT, + CharacterType.GRAPH, + CharacterType.LOWER, + CharacterType.PRINT, + CharacterType.PUNCT, + CharacterType.SPACE, + CharacterType.UPPER, + CharacterType.XDIGIT, + CharacterType.ASCII, + CharacterType.WORD, + }; + + public static int propertyNameToCType(String name) { + name = name.toLowerCase(); + if (!PBSTableUpper.containsKey(name)) { + throw new JOniException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replaceAll("%n", name)); + } + return PBSTableUpper.get(name); + } + + private static final HashMap<String,Integer> PBSTableUpper = new HashMap<String,Integer>(); + + static { + for (int i=0; i<PBSValues.length; i++) PBSTableUpper.put(new String(PBSNamesLower[i]), PBSValues[i]); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/Ptr.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,35 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.encoding; + +public final class Ptr { + public Ptr() { + this(0); + } + + public Ptr(int p) { + this.p = p; + } + + public int p; + + public static final Ptr NULL = new Ptr(0); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/exception/ErrorMessages.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,98 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.exception; + +import jdk.nashorn.internal.runtime.regexp.joni.Config; + +public interface ErrorMessages { + final String MISMATCH = "mismatch"; + final String NO_SUPPORT_CONFIG = "no support in this configuration"; + + /* from jcodings */ + final String ERR_INVALID_CHAR_PROPERTY_NAME = "invalid character property name <%n>"; + final String ERR_INVALID_CODE_POINT_VALUE = "invalid code point value"; + final String ERR_TOO_BIG_WIDE_CHAR_VALUE = "too big wide-char value"; + final String ERR_TOO_LONG_WIDE_CHAR_VALUE = "too long wide-char value"; + + /* internal error */ + final String ERR_MEMORY = "fail to memory allocation"; + final String ERR_MATCH_STACK_LIMIT_OVER = "match-stack limit over"; + final String ERR_TYPE_BUG = "undefined type (bug)"; + final String ERR_PARSER_BUG = "internal parser error (bug)"; + final String ERR_STACK_BUG = "stack error (bug)"; + final String ERR_UNDEFINED_BYTECODE = "undefined bytecode (bug)"; + final String ERR_UNEXPECTED_BYTECODE = "unexpected bytecode (bug)"; + final String ERR_DEFAULT_ENCODING_IS_NOT_SETTED = "default multibyte-encoding is not setted"; + final String ERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = "can't convert to wide-char on specified multibyte-encoding"; + + /* general error */ + final String ERR_INVALID_ARGUMENT = "invalid argument"; + + /* syntax error */ + final String ERR_END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace"; + final String ERR_END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket"; + final String ERR_EMPTY_CHAR_CLASS = "empty char-class"; + final String ERR_PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class"; + final String ERR_END_PATTERN_AT_ESCAPE = "end pattern at escape"; + final String ERR_END_PATTERN_AT_META = "end pattern at meta"; + final String ERR_END_PATTERN_AT_CONTROL = "end pattern at control"; + final String ERR_META_CODE_SYNTAX = "invalid meta-code syntax"; + final String ERR_CONTROL_CODE_SYNTAX = "invalid control-code syntax"; + final String ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range"; + final String ERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range"; + final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class"; + final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified"; + final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid"; + final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator"; + final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis"; + final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis"; + final String ERR_END_PATTERN_IN_GROUP = "end pattern in group"; + final String ERR_UNDEFINED_GROUP_OPTION = "undefined group option"; + final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type"; + final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind"; + final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}"; + + /* values error (syntax error) */ + final String ERR_TOO_BIG_NUMBER = "too big number"; + final String ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range"; + final String ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range"; + final String ERR_EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class"; + final String ERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range"; + final String ERR_TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified"; + final String ERR_TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string"; + final String ERR_TOO_BIG_BACKREF_NUMBER = "too big backref number"; + final String ERR_INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number"; + final String ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)"; + final String ERR_INVALID_WIDE_CHAR_VALUE = "invalid wide-char value"; + final String ERR_EMPTY_GROUP_NAME = "group name is empty"; + final String ERR_INVALID_GROUP_NAME = "invalid group name <%n>"; + final String ERR_INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>"; + final String ERR_UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference"; + final String ERR_UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference"; + final String ERR_MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>"; + final String ERR_MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call"; + final String ERR_NEVER_ENDING_RECURSION = "never ending recursion"; + final String ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history"; + final String ERR_NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination"; + final String ERR_INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options"; + final String ERR_OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count"; + final String ERR_TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value"; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/exception/InternalException.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,28 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.exception; + +public class InternalException extends JOniException{ + private static final long serialVersionUID = -3871816465397927992L; + + public InternalException(String message) { + super(message); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/exception/JOniException.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,28 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.exception; + +public class JOniException extends RuntimeException{ + private static final long serialVersionUID = -6027192180014164667L; + + public JOniException(String message) { + super(message); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/exception/SyntaxException.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,28 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.exception; + +public class SyntaxException extends JOniException{ + private static final long serialVersionUID = 7862720128961874288L; + + public SyntaxException(String message) { + super(message); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/exception/ValueException.java Fri Feb 22 17:00:22 2013 +0100 @@ -0,0 +1,37 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package jdk.nashorn.internal.runtime.regexp.joni.exception; + +public class ValueException extends SyntaxException{ + private static final long serialVersionUID = -196013852479929134L; + + public ValueException(String message) { + super(message); + } + + public ValueException(String message, String str) { + super(message.replaceAll("%n", str)); + } + + public ValueException(String message, byte[]bytes, int p, int end) { + this(message, new String(bytes, p, end - p)); + } + +}