view src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java @ 1079:e1e27c4262be

8060204: Fix warnings in Joni and tests Reviewed-by: hannesw, sundar, attila
author lagergren
date Mon, 03 Nov 2014 11:47:41 +0100
parents ac62e33a99b0
children
line wrap: on
line source

/*
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package jdk.nashorn.internal.runtime.regexp.joni;

import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;

@SuppressWarnings("javadoc")
public abstract class Matcher extends IntHolder {
    protected final Regex regex;

    protected final char[] chars;
    protected final int str;
    protected final int end;

    protected int msaStart;
    protected int msaOptions;
    protected final Region msaRegion;
    protected int msaBestLen;
    protected int msaBestS;

    protected int msaBegin;
    protected int msaEnd;

    public Matcher(final Regex regex, final char[] chars) {
        this(regex, chars, 0, chars.length);
    }

    public Matcher(final Regex regex, final char[] chars, final int p, final int end) {
        this.regex = regex;

        this.chars = chars;
        this.str = p;
        this.end = end;

        this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
    }

    // main matching method
    protected abstract int matchAt(int range, int sstart, int sprev);

    public final Region getRegion() {
        return msaRegion;
    }

    public final int getBegin() {
        return msaBegin;
    }

    public final int getEnd() {
        return msaEnd;
    }

    protected final void msaInit(final int option, final int start) {
        msaOptions = option;
        msaStart = start;
        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
            msaBestLen = -1;
        }
    }

    public final int match(final int at, final int range, final int option) {
        msaInit(option, at);

        final int prev = EncodingHelper.prevCharHead(str, at);

        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
            return matchAt(end /*range*/, at, prev);
        }
        return matchAt(range /*range*/, at, prev);
    }

    int low, high; // these are the return values
    private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) {
        int pprev = -1;
        int p = s;

        if (Config.DEBUG_SEARCH) {
            Config.log.println("forward_search_range: "+
                                "str: " + string +
                                ", end: " + e +
                                ", s: " + s +
                                ", range: " + range);
        }

        if (regex.dMin > 0) {
            p += regex.dMin;
        }

        retry:while (true) {
            p = regex.searchAlgorithm.search(regex, ch, p, e, range);

            if (p != -1 && p < range) {
                if (p - regex.dMin < s) {
                    // retry_gate:
                    pprev = p;
                    p++;
                    continue retry;
                }

                if (regex.subAnchor != 0) {
                    switch (regex.subAnchor) {
                    case AnchorType.BEGIN_LINE:
                        if (p != string) {
                            final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
                            if (!EncodingHelper.isNewLine(ch, prev, e)) {
                                // goto retry_gate;
                                pprev = p;
                                p++;
                                continue retry;
                            }
                        }
                        break;

                    case AnchorType.END_LINE:
                        if (p == e) {
                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
                                final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
                                if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) {
                                    // goto retry_gate;
                                    pprev = p;
                                    p++;
                                    continue retry;
                                }
                            }
                        } else if (!EncodingHelper.isNewLine(ch, p, e)) {
                            //if () break;
                            // goto retry_gate;
                            pprev = p;
                            p++;
                            continue retry;
                        }
                        break;

                    default:
                        break;
                    } // switch
                }

                if (regex.dMax == 0) {
                    low = p;
                    if (lowPrev != null) { // ??? // remove null checks
                        if (low > s) {
                            lowPrev.value = EncodingHelper.prevCharHead(s, p);
                        } else {
                            lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
                        }
                    }
                } else {
                    if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
                        low = p - regex.dMax;

                        if (low > s) {
                            low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
                            if (lowPrev != null && lowPrev.value == -1) {
                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
                            }
                        } else {
                            if (lowPrev != null) {
                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low);
                            }
                        }
                    }
                }
                /* no needs to adjust *high, *high is used as range check only */
                high = p - regex.dMin;

                if (Config.DEBUG_SEARCH) {
                    Config.log.println("forward_search_range success: "+
                                        "low: " + (low - string) +
                                        ", high: " + (high - string) +
                                        ", dmin: " + regex.dMin +
                                        ", dmax: " + regex.dMax);
                }

                return true;    /* success */
            }

            return false;   /* fail */
        } //while
    }

    // low, high
    private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) {
        int r = range;
        r += regex.dMin;
        int p = s;

        retry:while (true) {
            p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r);

            if (p != -1) {
                if (regex.subAnchor != 0) {
                    switch (regex.subAnchor) {
                    case AnchorType.BEGIN_LINE:
                        if (p != string) {
                            final int prev = EncodingHelper.prevCharHead(string, p);
                            if (!EncodingHelper.isNewLine(ch, prev, e)) {
                                p = prev;
                                continue retry;
                            }
                        }
                        break;

                    case AnchorType.END_LINE:
                        if (p == e) {
                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
                                final int prev = EncodingHelper.prevCharHead(adjrange, p);
                                if (prev == -1) {
                                    return false;
                                }
                                if (EncodingHelper.isNewLine(ch, prev, e)) {
                                    p = prev;
                                    continue retry;
                                }
                            }
                        } else if (!EncodingHelper.isNewLine(ch, p, e)) {
                            p = EncodingHelper.prevCharHead(adjrange, p);
                            if (p == -1) {
                                return false;
                            }
                            continue retry;
                        }
                        break;

                    default:
                        break;
                    } // switch
                }

                /* no needs to adjust *high, *high is used as range check only */
                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
                    low = p - regex.dMax;
                    high = p - regex.dMin;
                }

                if (Config.DEBUG_SEARCH) {
                    Config.log.println("backward_search_range: "+
                                        "low: " + (low - string) +
                                        ", high: " + (high - string));
                }

                return true;
            }

            if (Config.DEBUG_SEARCH) {
                Config.log.println("backward_search_range: fail.");
            }
            return false;
        } // while
    }

    // MATCH_AND_RETURN_CHECK
    private boolean matchCheck(final int upperRange, final int s, final int prev) {
        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
                //range = upperRange;
                if (matchAt(upperRange, s, prev) != -1) {
                    if (!isFindLongest(regex.options)) {
                        return true;
                    }
                }
            } else {
                //range = upperRange;
                if (matchAt(upperRange, s, prev) != -1) {
                    return true;
                }
            }
        } else {
            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
                if (matchAt(end, s, prev) != -1) {
                    //range = upperRange;
                    if (!isFindLongest(regex.options)) {
                        return true;
                    }
                }
            } else {
                //range = upperRange;
                if (matchAt(end, s, prev) != -1) {
                    return true;
                }
            }
        }
        return false;
    }

    public final int search(final int startp, final int rangep, final int option) {
        int start = startp, range = rangep;
        int s, prev;
        int origStart = start;
        final int origRange = range;

        if (Config.DEBUG_SEARCH) {
            Config.log.println("onig_search (entry point): "+
                    "str: " + str +
                    ", end: " + (end - str) +
                    ", start: " + (start - str) +
                    ", range " + (range - str));
        }

        if (start > end || start < str) {
            return -1;
        }

        /* anchor optimize: resume search range */
        if (regex.anchor != 0 && str < end) {
            int minSemiEnd, maxSemiEnd;

            if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
                /* search start-position only */
                // !begin_position:!
                if (range > start) {
                    range = start + 1;
                } else {
                    range = start;
                }
            } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
                /* search str-position only */
                if (range > start) {
                    if (start != str)
                     {
                        return -1; // mismatch_no_msa;
                    }
                    range = str + 1;
                } else {
                    if (range <= str) {
                        start = str;
                        range = str;
                    } else {
                        return -1; // mismatch_no_msa;
                    }
                }
            } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
                minSemiEnd = maxSemiEnd = end;
                // !end_buf:!
                if (endBuf(start, range, minSemiEnd, maxSemiEnd))
                 {
                    return -1; // mismatch_no_msa;
                }
            } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
                final int preEnd = EncodingHelper.stepBack(str, end, 1);
                maxSemiEnd = end;
                if (EncodingHelper.isNewLine(chars, preEnd, end)) {
                    minSemiEnd = preEnd;
                    if (minSemiEnd > str && start <= minSemiEnd) {
                        // !goto end_buf;!
                        if (endBuf(start, range, minSemiEnd, maxSemiEnd))
                         {
                            return -1; // mismatch_no_msa;
                        }
                    }
                } else {
                    minSemiEnd = end;
                    // !goto end_buf;!
                    if (endBuf(start, range, minSemiEnd, maxSemiEnd))
                     {
                        return -1; // mismatch_no_msa;
                    }
                }
            } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
                // goto !begin_position;!
                if (range > start) {
                    range = start + 1;
                } else {
                    range = start;
                }
            }

        } else if (str == end) { /* empty string */
            // empty address ?
            if (Config.DEBUG_SEARCH) {
                Config.log.println("onig_search: empty string.");
            }

            if (regex.thresholdLength == 0) {
                s = start = str;
                prev = -1;
                msaInit(option, start);

                if (matchCheck(end, s, prev)) {
                    return match(s);
                }
                return mismatch();
            }
            return -1; // goto mismatch_no_msa;
        }

        if (Config.DEBUG_SEARCH) {
            Config.log.println("onig_search(apply anchor): " +
                                "end: " + (end - str) +
                                ", start " + (start - str) +
                                ", range " + (range - str));
        }

        msaInit(option, origStart);

        s = start;
        if (range > start) {    /* forward search */
            if (s > str) {
                prev = EncodingHelper.prevCharHead(str, s);
            } else {
                prev = 0; // -1
            }

            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
                int schRange = range;
                if (regex.dMax != 0) {
                    if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
                        schRange = end;
                    } else {
                        schRange += regex.dMax;
                        if (schRange > end) {
                            schRange = end;
                        }
                    }
                }
                if ((end - start) < regex.thresholdLength) {
                    return mismatch();
                }

                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
                    do {
                        if (!forwardSearchRange(chars, str, end, s, schRange, this)) {
                            return mismatch(); // low, high, lowPrev
                        }
                        if (s < low) {
                            s = low;
                            prev = value;
                        }
                        while (s <= high) {
                            if (matchCheck(origRange, s, prev)) {
                                return match(s); // ???
                            }
                            prev = s;
                            s++;
                        }
                    } while (s < range);
                }
                /* check only. */
                if (!forwardSearchRange(chars, str, end, s, schRange, null)) {
                    return mismatch();
                }

                if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
                    do {
                        if (matchCheck(origRange, s, prev)) {
                            return match(s);
                        }
                        prev = s;
                        s++;
                    } while (s < range);
                    return mismatch();
                }
            }

            do {
                if (matchCheck(origRange, s, prev)) {
                    return match(s);
                }
                prev = s;
                s++;
            } while (s < range);

            if (s == range) { /* because empty match with /$/. */
                if (matchCheck(origRange, s, prev)) {
                    return match(s);
                }
            }
        } else { /* backward search */
            if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
                if (origStart < end) {
                    origStart++; // /* is upper range */
                }
            }

            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
                int adjrange;
                if (range < end) {
                    adjrange = range;
                } else {
                    adjrange = end;
                }
                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
                    do {
                        int schStart = s + regex.dMax;
                        if (schStart > end) {
                            schStart = end;
                        }
                        if (!backwardSearchRange(chars, str, end, schStart, range, adjrange))
                         {
                            return mismatch(); // low, high
                        }
                        if (s > high) {
                            s = high;
                        }
                        while (s != -1 && s >= low) {
                            prev = EncodingHelper.prevCharHead(str, s);
                            if (matchCheck(origStart, s, prev)) {
                                return match(s);
                            }
                            s = prev;
                        }
                    } while (s >= range);
                    return mismatch();
                }
                if ((end - range) < regex.thresholdLength) {
                    return mismatch();
                }

                int schStart = s;
                if (regex.dMax != 0) {
                    if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
                        schStart = end;
                    } else {
                        schStart += regex.dMax;
                        if (schStart > end) {
                            schStart = end;
                        }
                    }
                }
                if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) {
                    return mismatch();
                }
            }

            do {
                prev = EncodingHelper.prevCharHead(str, s);
                if (matchCheck(origStart, s, prev)) {
                    return match(s);
                }
                s = prev;
            } while (s >= range);

        }
        return mismatch();
    }

    private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) {
        int start = startp;
        int range = rangep;

        if ((maxSemiEnd - str) < regex.anchorDmin) {
            return true; // mismatch_no_msa;
        }

        if (range > start) {
            if ((minSemiEnd - start) > regex.anchorDmax) {
                start = minSemiEnd - regex.anchorDmax;
                if (start >= end) {
                    /* match with empty at end */
                    start = EncodingHelper.prevCharHead(str, end);
                }
            }
            if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
                range = maxSemiEnd - regex.anchorDmin + 1;
            }
            if (start >= range)
             {
                return true; // mismatch_no_msa;
            }
        } else {
            if ((minSemiEnd - range) > regex.anchorDmax) {
                range = minSemiEnd - regex.anchorDmax;
            }
            if ((maxSemiEnd - start) < regex.anchorDmin) {
                start = maxSemiEnd - regex.anchorDmin;
            }
            if (range > start)
             {
                return true; // mismatch_no_msa;
            }
        }
        return false;
    }

    private int match(final int s) {
        return s - str; // sstart ???
    }

    private int mismatch() {
        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
            if (msaBestLen >= 0) {
                final int s = msaBestS;
                return match(s);
            }
        }
        // falls through finish:
        return -1;
    }
}