view src/jdk/nashorn/internal/runtime/regexp/joni/ast/CClassNode.java @ 1079:e1e27c4262be

8060204: Fix warnings in Joni and tests Reviewed-by: hannesw, sundar, attila
author lagergren
date Mon, 03 Nov 2014 11:47:41 +0100
parents ac62e33a99b0
children
line wrap: on
line source

/*
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package jdk.nashorn.internal.runtime.regexp.joni.ast;

import jdk.nashorn.internal.runtime.regexp.joni.BitSet;
import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer;
import jdk.nashorn.internal.runtime.regexp.joni.Config;
import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;

@SuppressWarnings("javadoc")
public final class CClassNode extends Node {
    private static final int FLAG_NCCLASS_NOT = 1<<0;
    private static final int FLAG_NCCLASS_SHARE = 1<<1;

    int flags;
    public final BitSet bs = new BitSet();  // conditional creation ?
    public CodeRangeBuffer mbuf;            /* multi-byte info or NULL */

    private int ctype;                      // for hashing purposes

    private final static short AsciiCtypeTable[] = {
            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
            0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
            0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
            0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
            0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
            0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
            0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
            0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
            0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
            0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
            0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
            0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
            0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
            0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
    };

    // node_new_cclass
    public CClassNode() {}

    public void clear() {
        bs.clear();
        flags = 0;
        mbuf = null;
    }

    @Override
    public int getType() {
        return CCLASS;
    }

    @Override
    public String getName() {
        return "Character Class";
    }

    @Override
    public boolean equals(final Object other) {
        if (!(other instanceof CClassNode)) {
            return false;
        }
        final CClassNode cc = (CClassNode)other;
        return ctype == cc.ctype && isNot() == cc.isNot();
    }

    @Override
    public int hashCode() {
        if (Config.USE_SHARED_CCLASS_TABLE) {
            int hash = 0;
            hash += ctype;
            if (isNot()) {
                hash++;
            }
            return hash + (hash >> 5);
        }
        return super.hashCode();
    }

    @Override
    public String toString(final int level) {
        final StringBuilder value = new StringBuilder();
        value.append("\n  flags: " + flagsToString());
        value.append("\n  bs: " + pad(bs, level + 1));
        value.append("\n  mbuf: " + pad(mbuf, level + 1));

        return value.toString();
    }

    public String flagsToString() {
        final StringBuilder f = new StringBuilder();
        if (isNot()) {
            f.append("NOT ");
        }
        if (isShare()) {
            f.append("SHARE ");
        }
        return f.toString();
    }

    public boolean isEmpty() {
        return mbuf == null && bs.isEmpty();
    }

    public void addCodeRangeToBuf(final int from, final int to) {
        mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
    }

    public void addCodeRange(final ScanEnvironment env, final int from, final int to) {
        mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
    }

    public void addAllMultiByteRange() {
        mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
    }

    public void clearNotFlag() {
        if (isNot()) {
            bs.invert();

            mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
            clearNot();
        }
    }

    // and_cclass
    public void and(final CClassNode other) {
        final boolean not1 = isNot();
        BitSet bsr1 = bs;
        final CodeRangeBuffer buf1 = mbuf;
        final boolean not2 = other.isNot();
        BitSet bsr2 = other.bs;
        final CodeRangeBuffer buf2 = other.mbuf;

        if (not1) {
            final BitSet bs1 = new BitSet();
            bsr1.invertTo(bs1);
            bsr1 = bs1;
        }

        if (not2) {
            final BitSet bs2 = new BitSet();
            bsr2.invertTo(bs2);
            bsr2 = bs2;
        }

        bsr1.and(bsr2);

        if (bsr1 != bs) {
            bs.copy(bsr1);
            bsr1 = bs;
        }

        if (not1) {
            bs.invert();
        }

        CodeRangeBuffer pbuf = null;

        if (not1 && not2) {
            pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
        } else {
            pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);

            if (not1) {
                pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
            }
        }
        mbuf = pbuf;

    }

    // or_cclass
    public void or(final CClassNode other) {
        final boolean not1 = isNot();
        BitSet bsr1 = bs;
        final CodeRangeBuffer buf1 = mbuf;
        final boolean not2 = other.isNot();
        BitSet bsr2 = other.bs;
        final CodeRangeBuffer buf2 = other.mbuf;

        if (not1) {
            final BitSet bs1 = new BitSet();
            bsr1.invertTo(bs1);
            bsr1 = bs1;
        }

        if (not2) {
            final BitSet bs2 = new BitSet();
            bsr2.invertTo(bs2);
            bsr2 = bs2;
        }

        bsr1.or(bsr2);

        if (bsr1 != bs) {
            bs.copy(bsr1);
            bsr1 = bs;
        }

        if (not1) {
            bs.invert();
        }

        CodeRangeBuffer pbuf = null;
        if (not1 && not2) {
            pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
        } else {
            pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
            if (not1) {
                pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
            }
        }
        mbuf = pbuf;
    }

    // add_ctype_to_cc_by_range // Encoding out!
    public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) {
        final int n = mbr[0];

        if (!not) {
            for (int i=0; i<n; i++) {
                for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
                    if (j >= sbOut) {
                        if (Config.VANILLA) {
                            if (j == mbr[i * 2 + 2]) {
                                i++;
                            } else if (j > mbr[i * 2 + 1]) {
                                addCodeRangeToBuf(j, mbr[i * 2 + 2]);
                                i++;
                            }
                        } else {
                            if (j >= mbr[i * 2 + 1]) {
                                addCodeRangeToBuf(j, mbr[i * 2 + 2]);
                                i++;
                            }
                        }
                        // !goto sb_end!, remove duplication!
                        for (; i<n; i++) {
                            addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
                        }
                        return;
                    }
                    bs.set(j);
                }
            }
            // !sb_end:!
            for (int i=0; i<n; i++) {
                addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
            }

        } else {
            int prev = 0;

            for (int i=0; i<n; i++) {
                for (int j=prev; j < mbr[2 * i + 1]; j++) {
                    if (j >= sbOut) {
                        // !goto sb_end2!, remove duplication
                        prev = sbOut;
                        for (i=0; i<n; i++) {
                            if (prev < mbr[2 * i + 1]) {
                                addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
                            }
                            prev = mbr[i * 2 + 2] + 1;
                        }
                        if (prev < 0x7fffffff/*!!!*/) {
                            addCodeRangeToBuf(prev, 0x7fffffff);
                        }
                        return;
                    }
                    bs.set(j);
                }
                prev = mbr[2 * i + 2] + 1;
            }

            for (int j=prev; j<sbOut; j++) {
                bs.set(j);
            }

            // !sb_end2:!
            prev = sbOut;
            for (int i=0; i<n; i++) {
                if (prev < mbr[2 * i + 1]) {
                    addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
                }
                prev = mbr[i * 2 + 2] + 1;
            }
            if (prev < 0x7fffffff/*!!!*/) {
                addCodeRangeToBuf(prev, 0x7fffffff);
            }
        }
    }

    public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) {
        int ct = ctp;
        if (Config.NON_UNICODE_SDW) {
            switch (ct) {
            case CharacterType.D:
            case CharacterType.S:
            case CharacterType.W:
                ct ^= CharacterType.SPECIAL_MASK;

                if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) {
                    // \s in JavaScript includes unicode characters.
                    break;
                }

                if (not) {
                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                        // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                        if ((AsciiCtypeTable[c] & (1 << ct)) == 0) {
                            bs.set(c);
                        }
                    }
                    addAllMultiByteRange();
                } else {
                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                        // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                        if ((AsciiCtypeTable[c] & (1 << ct)) != 0) {
                            bs.set(c);
                        }
                    }
                }
                return;
            default:
                break;
            }
        }

        final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut);
        if (ranges != null) {
            addCTypeByRange(ct, not, sbOut.value, ranges);
            return;
        }

        switch(ct) {
        case CharacterType.ALPHA:
        case CharacterType.BLANK:
        case CharacterType.CNTRL:
        case CharacterType.DIGIT:
        case CharacterType.LOWER:
        case CharacterType.PUNCT:
        case CharacterType.SPACE:
        case CharacterType.UPPER:
        case CharacterType.XDIGIT:
        case CharacterType.ASCII:
        case CharacterType.ALNUM:
            if (not) {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (!EncodingHelper.isCodeCType(c, ct)) {
                        bs.set(c);
                    }
                }
                addAllMultiByteRange();
            } else {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (EncodingHelper.isCodeCType(c, ct)) {
                        bs.set(c);
                    }
                }
            }
            break;

        case CharacterType.GRAPH:
        case CharacterType.PRINT:
            if (not) {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (!EncodingHelper.isCodeCType(c, ct)) {
                        bs.set(c);
                    }
                }
            } else {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (EncodingHelper.isCodeCType(c, ct)) {
                        bs.set(c);
                    }
                }
                addAllMultiByteRange();
            }
            break;

        case CharacterType.WORD:
            if (!not) {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (EncodingHelper.isWord(c)) {
                        bs.set(c);
                    }
                }

                addAllMultiByteRange();
            } else {
                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                    if (!EncodingHelper.isWord(c)) {
                        bs.set(c);
                    }
                }
            }
            break;

        default:
            throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
        } // switch
    }

    public static final class CCStateArg {
        public int v;
        public int vs;
        public boolean vsIsRaw;
        public boolean vIsRaw;
        public CCVALTYPE inType;
        public CCVALTYPE type;
        public CCSTATE state;
    }

    public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) {
        if (arg.state == CCSTATE.RANGE) {
            throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
        }

        if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
            if (arg.type == CCVALTYPE.SB) {
                bs.set(arg.vs);
            } else if (arg.type == CCVALTYPE.CODE_POINT) {
                addCodeRange(env, arg.vs, arg.vs);
            }
        }
        arg.state = CCSTATE.VALUE;
        arg.type = CCVALTYPE.CLASS;
    }

    public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) {

        switch(arg.state) {
        case VALUE:
            if (arg.type == CCVALTYPE.SB) {
                if (arg.vs > 0xff) {
                    throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
                }
                bs.set(arg.vs);
            } else if (arg.type == CCVALTYPE.CODE_POINT) {
                addCodeRange(env, arg.vs, arg.vs);
            }
            break;

        case RANGE:
            if (arg.inType == arg.type) {
                if (arg.inType == CCVALTYPE.SB) {
                    if (arg.vs > 0xff || arg.v > 0xff) {
                        throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
                    }

                    if (arg.vs > arg.v) {
                        if (env.syntax.allowEmptyRangeInCC()) {
                            // goto ccs_range_end
                            arg.state = CCSTATE.COMPLETE;
                            break;
                        }
                        throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
                    }
                    bs.setRange(arg.vs, arg.v);
                } else {
                    addCodeRange(env, arg.vs, arg.v);
                }
            } else {
                if (arg.vs > arg.v) {
                    if (env.syntax.allowEmptyRangeInCC()) {
                        // goto ccs_range_end
                        arg.state = CCSTATE.COMPLETE;
                        break;
                    }
                    throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
                }
                bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
                addCodeRange(env, arg.vs, arg.v);
            }
            // ccs_range_end:
            arg.state = CCSTATE.COMPLETE;
            break;

        case COMPLETE:
        case START:
            arg.state = CCSTATE.VALUE;
            break;

        default:
            break;

        } // switch

        arg.vsIsRaw = arg.vIsRaw;
        arg.vs = arg.v;
        arg.type = arg.inType;
    }

    // onig_is_code_in_cc_len
    public boolean isCodeInCCLength(final int code) {
        boolean found;

        if (code > 0xff) {
            found = mbuf != null && mbuf.isInCodeRange(code);
        } else {
            found = bs.at(code);
        }

        if (isNot()) {
            return !found;
        }
        return found;
    }

    // onig_is_code_in_cc
    public boolean isCodeInCC(final int code) {
         return isCodeInCCLength(code);
    }

    public void setNot() {
        flags |= FLAG_NCCLASS_NOT;
    }

    public void clearNot() {
        flags &= ~FLAG_NCCLASS_NOT;
    }

    public boolean isNot() {
        return (flags & FLAG_NCCLASS_NOT) != 0;
    }

    public void setShare() {
        flags |= FLAG_NCCLASS_SHARE;
    }

    public void clearShare() {
        flags &= ~FLAG_NCCLASS_SHARE;
    }

    public boolean isShare() {
        return (flags & FLAG_NCCLASS_SHARE) != 0;
    }

}