Mercurial > hg > openjdk7.svn

/*
 * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

package com.sun.tools.javac.parser;

import java.io.*;
import java.nio.*;
import java.nio.ByteBuffer;
import java.nio.charset.*;
import java.nio.channels.*;
import java.util.regex.*;

import com.sun.tools.javac.util.*;

import com.sun.tools.javac.code.Source;

import static com.sun.tools.javac.parser.Token.*;
import static com.sun.tools.javac.util.LayoutCharacters.*;

/** The lexical analyzer maps an input stream consisting of
 *  ASCII characters and Unicode escapes into a token sequence.
 *
 *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
 *  you write code that depends on this, you do so at your own risk.
 *  This code and its internal interfaces are subject to change or
 *  deletion without notice.</b>
 */
public class Scanner implements Lexer {

    private static boolean scannerDebug = false;

    /** A factory for creating scanners. */
    public static class Factory {
	/** The context key for the scanner factory. */
	public static final Context.Key<Scanner.Factory> scannerFactoryKey =
	    new Context.Key<Scanner.Factory>();

	/** Get the Factory instance for this context. */
	public static Factory instance(Context context) {
	    Factory instance = context.get(scannerFactoryKey);
	    if (instance == null)
		instance = new Factory(context);
	    return instance;
	}

	final Log log;
	final Name.Table names;
	final Source source;
	final Keywords keywords;

	/** Create a new scanner factory. */
	protected Factory(Context context) {
	    context.put(scannerFactoryKey, this);
	    this.log = Log.instance(context);
	    this.names = Name.Table.instance(context);
	    this.source = Source.instance(context);
	    this.keywords = Keywords.instance(context);
	}

        public Scanner newScanner(CharSequence input) {
            if (input instanceof CharBuffer) {
                return new Scanner(this, (CharBuffer)input);
            } else {
                char[] array = input.toString().toCharArray();
                return newScanner(array, array.length);
            }
        }

        public Scanner newScanner(char[] input, int inputLength) {
            return new Scanner(this, input, inputLength);
        }
    }

    /* Output variables; set by nextToken():
     */

    /** The token, set by nextToken().
     */
    private Token token;

    /** Allow hex floating-point literals.
     */
    private boolean allowHexFloats;

    /** The token's position, 0-based offset from beginning of text.
     */
    private int pos;

    /** Character position just after the last character of the token.
     */
    private int endPos;

    /** The last character position of the previous token.
     */
    private int prevEndPos;

    /** The position where a lexical error occurred;
     */
    private int errPos = Position.NOPOS;

    /** The name of an identifier or token:
     */
    private Name name;

    /** The radix of a numeric literal token.
     */
    private int radix;

    /** Has a @deprecated been encountered in last doc comment?
     *  this needs to be reset by client.
     */
    protected boolean deprecatedFlag = false;

    /** A character buffer for literals.
     */
    private char[] sbuf = new char[128];
    private int sp;

    /** The input buffer, index of next chacter to be read,
     *  index of one past last character in buffer.
     */
    private char[] buf;
    private int bp;
    private int buflen;
    private int eofPos;

    /** The current character.
     */
    private char ch;

    /** The buffer index of the last converted unicode character
     */
    private int unicodeConversionBp = -1;

    /** The log to be used for error reporting.
     */
    private final Log log;

    /** The name table. */
    private final Name.Table names;

    /** The keyword table. */
    private final Keywords keywords;

    /** Common code for constructors. */
    private Scanner(Factory fac) {
	this.log = fac.log;
	this.names = fac.names;
	this.keywords = fac.keywords;
	this.allowHexFloats = fac.source.allowHexFloats();
    }

    private static final boolean hexFloatsWork = hexFloatsWork();
    private static boolean hexFloatsWork() {
        try {
            Float.valueOf("0x1.0p1");
            return true;
        } catch (NumberFormatException ex) {
            return false;
        }
    }

    /** Create a scanner from the input buffer.  buffer must implement
     *  array() and compact(), and remaining() must be less than limit().
     */
    protected Scanner(Factory fac, CharBuffer buffer) {
	this(fac, JavacFileManager.toArray(buffer), buffer.limit());
    }

    /**
     * Create a scanner from the input array.  This method might
     * modify the array.  To avoid copying the input array, ensure
     * that {@code inputLength < input.length} or
     * {@code input[input.length -1]} is a white space character.
     *
     * @param fac the factory which created this Scanner
     * @param input the input, might be modified
     * @param inputLength the size of the input.
     * Must be positive and less than or equal to input.length.
     */
    protected Scanner(Factory fac, char[] input, int inputLength) {
	this(fac);
        eofPos = inputLength;
	if (inputLength == input.length) {
            if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
                inputLength--;
            } else {
                char[] newInput = new char[inputLength + 1];
                System.arraycopy(input, 0, newInput, 0, input.length);
                input = newInput;
            }
        }
	buf = input;
	buflen = inputLength;
	buf[buflen] = EOI;
        bp = -1;
	scanChar();
    }

    /** Report an error at the given position using the provided arguments.
     */
    private void lexError(int pos, String key, Object... args) {
        log.error(pos, key, args);
        token = ERROR;
        errPos = pos;
    }

    /** Report an error at the current token position using the provided
     *  arguments.
     */
    private void lexError(String key, Object... args) {
	lexError(pos, key, args);
    }

    /** Convert an ASCII digit from its base (8, 10, or 16)
     *  to its value.
     */
    private int digit(int base) {
	char c = ch;
	int result = Character.digit(c, base);
	if (result >= 0 && c > 0x7f) {
	    lexError(pos+1, "illegal.nonascii.digit");
	    ch = "0123456789abcdef".charAt(result);
	}
	return result;
    }

    /** Convert unicode escape; bp points to initial '\' character
     *  (Spec 3.3).
     */
    private void convertUnicode() {
	if (ch == '\\' && unicodeConversionBp != bp) {
	    bp++; ch = buf[bp];
	    if (ch == 'u') {
		do {
		    bp++; ch = buf[bp];
		} while (ch == 'u');
		int limit = bp + 3;
		if (limit < buflen) {
		    int d = digit(16);
		    int code = d;
		    while (bp < limit && d >= 0) {
			bp++; ch = buf[bp];
			d = digit(16);
			code = (code << 4) + d;
		    }
		    if (d >= 0) {
			ch = (char)code;
			unicodeConversionBp = bp;
			return;
		    }
		}
		lexError(bp, "illegal.unicode.esc");
	    } else {
		bp--;
		ch = '\\';
	    }
	}
    }

    /** Read next character.
     */
    private void scanChar() {
	ch = buf[++bp];
	if (ch == '\\') {
	    convertUnicode();
	}
    }

    /** Read next character in comment, skipping over double '\' characters.
     */
    private void scanCommentChar() {
	scanChar();
	if (ch == '\\') {
	    if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
		bp++;
	    } else {
		convertUnicode();
	    }
	}
    }

    /** Append a character to sbuf.
     */
    private void putChar(char ch) {
	if (sp == sbuf.length) {
	    char[] newsbuf = new char[sbuf.length * 2];
	    System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
	    sbuf = newsbuf;
	}
	sbuf[sp++] = ch;
    }

    /** For debugging purposes: print character.
     */
    private void dch() {
        System.err.print(ch); System.out.flush();
    }

    /** Read next character in character or string literal and copy into sbuf.
     */
    private void scanLitChar() {
        if (ch == '\\') {
	    if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
		bp++;
		putChar('\\');
		scanChar();
	    } else {
		scanChar();
		switch (ch) {
		case '0': case '1': case '2': case '3':
		case '4': case '5': case '6': case '7':
		    char leadch = ch;
		    int oct = digit(8);
		    scanChar();
		    if ('0' <= ch && ch <= '7') {
			oct = oct * 8 + digit(8);
			scanChar();
			if (leadch <= '3' && '0' <= ch && ch <= '7') {
			    oct = oct * 8 + digit(8);
			    scanChar();
			}
		    }
		    putChar((char)oct);
		    break;
		case 'b':
		    putChar('\b'); scanChar(); break;
		case 't':
		    putChar('\t'); scanChar(); break;
		case 'n':
		    putChar('\n'); scanChar(); break;
		case 'f':
		    putChar('\f'); scanChar(); break;
		case 'r':
		    putChar('\r'); scanChar(); break;
		case '\'':
		    putChar('\''); scanChar(); break;
		case '\"':
		    putChar('\"'); scanChar(); break;
		case '\\':
		    putChar('\\'); scanChar(); break;
		default:
 		    lexError(bp, "illegal.esc.char");
		}
	    }
	} else if (bp != buflen) {
            putChar(ch); scanChar();
        }
    }

    /** Read fractional part of hexadecimal floating point number.
     */
    private void scanHexExponentAndSuffix() {
        if (ch == 'p' || ch == 'P') {
	    putChar(ch);
            scanChar();
            if (ch == '+' || ch == '-') {
		putChar(ch);
                scanChar();
	    }
	    if ('0' <= ch && ch <= '9') {
		do {
		    putChar(ch);
		    scanChar();
		} while ('0' <= ch && ch <= '9');
		if (!allowHexFloats) {
		    lexError("unsupported.fp.lit");
                    allowHexFloats = true;
                }
                else if (!hexFloatsWork)
		    lexError("unsupported.cross.fp.lit");
	    } else
		lexError("malformed.fp.lit");
	} else {
	    lexError("malformed.fp.lit");
	}
	if (ch == 'f' || ch == 'F') {
	    putChar(ch);
	    scanChar();
            token = FLOATLITERAL;
	} else {
	    if (ch == 'd' || ch == 'D') {
		putChar(ch);
		scanChar();
	    }
	    token = DOUBLELITERAL;
	}
    }

    /** Read fractional part of floating point number.
     */
    private void scanFraction() {
        while (digit(10) >= 0) {
	    putChar(ch);
            scanChar();
        }
	int sp1 = sp;
        if (ch == 'e' || ch == 'E') {
	    putChar(ch);
            scanChar();
            if (ch == '+' || ch == '-') {
		putChar(ch);
                scanChar();
	    }
	    if ('0' <= ch && ch <= '9') {
		do {
		    putChar(ch);
		    scanChar();
		} while ('0' <= ch && ch <= '9');
		return;
	    }
	    lexError("malformed.fp.lit");
	    sp = sp1;
	}
    }

    /** Read fractional part and 'd' or 'f' suffix of floating point number.
     */
    private void scanFractionAndSuffix() {
	this.radix = 10;
	scanFraction();
	if (ch == 'f' || ch == 'F') {
	    putChar(ch);
	    scanChar();
            token = FLOATLITERAL;
	} else {
	    if (ch == 'd' || ch == 'D') {
		putChar(ch);
		scanChar();
	    }
	    token = DOUBLELITERAL;
	}
    }

    /** Read fractional part and 'd' or 'f' suffix of floating point number.
     */
    private void scanHexFractionAndSuffix(boolean seendigit) {
	this.radix = 16;
	assert ch == '.';
	putChar(ch);
	scanChar();
        while (digit(16) >= 0) {
	    seendigit = true;
	    putChar(ch);
            scanChar();
        }
	if (!seendigit)
	    lexError("invalid.hex.number");
	else
	    scanHexExponentAndSuffix();
    }

    /** Read a number.
     *  @param radix  The radix of the number; one of 8, 10, 16.
     */
    private void scanNumber(int radix) {
	this.radix = radix;
	// for octal, allow base-10 digit in case it's a float literal
	int digitRadix = (radix <= 10) ? 10 : 16;
	boolean seendigit = false;
	while (digit(digitRadix) >= 0) {
	    seendigit = true;
	    putChar(ch);
	    scanChar();
	}
	if (radix == 16 && ch == '.') {
	    scanHexFractionAndSuffix(seendigit);
	} else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
	    scanHexExponentAndSuffix();
	} else if (radix <= 10 && ch == '.') {
	    putChar(ch);
	    scanChar();
	    scanFractionAndSuffix();
	} else if (radix <= 10 &&
		   (ch == 'e' || ch == 'E' ||
		    ch == 'f' || ch == 'F' ||
		    ch == 'd' || ch == 'D')) {
	    scanFractionAndSuffix();
	} else {
	    if (ch == 'l' || ch == 'L') {
		scanChar();
		token = LONGLITERAL;
	    } else {
		token = INTLITERAL;
	    }
	}
    }

    /** Read an identifier.
     */
    private void scanIdent() {
	boolean isJavaIdentifierPart;
	char high;
	do {
	    if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
	    // optimization, was: putChar(ch);

	    scanChar();
	    switch (ch) {
	    case 'A': case 'B': case 'C': case 'D': case 'E':
	    case 'F': case 'G': case 'H': case 'I': case 'J':
	    case 'K': case 'L': case 'M': case 'N': case 'O':
	    case 'P': case 'Q': case 'R': case 'S': case 'T':
	    case 'U': case 'V': case 'W': case 'X': case 'Y':
	    case 'Z':
	    case 'a': case 'b': case 'c': case 'd': case 'e':
	    case 'f': case 'g': case 'h': case 'i': case 'j':
	    case 'k': case 'l': case 'm': case 'n': case 'o':
	    case 'p': case 'q': case 'r': case 's': case 't':
	    case 'u': case 'v': case 'w': case 'x': case 'y':
	    case 'z':
	    case '$': case '_':
	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
            case '\u0015': case '\u0016': case '\u0017':
            case '\u0018': case '\u0019': case '\u001B':
            case '\u007F':
		break;
            case '\u001A': // EOI is also a legal identifier part
                if (bp >= buflen) {
                    name = names.fromChars(sbuf, 0, sp);
                    token = keywords.key(name);
                    return;
                }
                break;
	    default:
                if (ch < '\u0080') {
                    // all ASCII range chars already handled, above
                    isJavaIdentifierPart = false;
                } else {
		    high = scanSurrogates();
                    if (high != 0) {
	                if (sp == sbuf.length) {
                            putChar(high);
                        } else {
                            sbuf[sp++] = high;
                        }
                        isJavaIdentifierPart = Character.isJavaIdentifierPart(
                            Character.toCodePoint(high, ch));
                    } else {
                        isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
                    }
                }
		if (!isJavaIdentifierPart) {
		    name = names.fromChars(sbuf, 0, sp);
		    token = keywords.key(name);
		    return;
		}
	    }
	} while (true);
    }

    /** Are surrogates supported?
     */
    final static boolean surrogatesSupported = surrogatesSupported();
    private static boolean surrogatesSupported() {
        try {
            Character.isHighSurrogate('a');
            return true;
        } catch (NoSuchMethodError ex) {
            return false;
        }
    }

    /** Scan surrogate pairs.  If 'ch' is a high surrogate and
     *  the next character is a low surrogate, then put the low
     *  surrogate in 'ch', and return the high surrogate.
     *  otherwise, just return 0.
     */
    private char scanSurrogates() {
        if (surrogatesSupported && Character.isHighSurrogate(ch)) {
            char high = ch;

            scanChar();

            if (Character.isLowSurrogate(ch)) {
                return high;
            }

            ch = high;
        }

        return 0;
    }

    /** Return true if ch can be part of an operator.
     */
    private boolean isSpecial(char ch) {
        switch (ch) {
        case '!': case '%': case '&': case '*': case '?':
        case '+': case '-': case ':': case '<': case '=':
        case '>': case '^': case '|': case '~':
	case '@':
            return true;
        default:
            return false;
        }
    }

    /** Read longest possible sequence of special characters and convert
     *  to token.
     */
    private void scanOperator() {
	while (true) {
	    putChar(ch);
	    Name newname = names.fromChars(sbuf, 0, sp);
            if (keywords.key(newname) == IDENTIFIER) {
		sp--;
		break;
	    }
            name = newname;
            token = keywords.key(newname);
	    scanChar();
	    if (!isSpecial(ch)) break;
	}
    }

    /**
     * Scan a documention comment; determine if a deprecated tag is present.
     * Called once the initial /, * have been skipped, positioned at the second *
     * (which is treated as the beginning of the first line).
     * Stops positioned at the closing '/'.
     */
    @SuppressWarnings("fallthrough")
    private void scanDocComment() {
	boolean deprecatedPrefix = false;

	forEachLine:
	while (bp < buflen) {

	    // Skip optional WhiteSpace at beginning of line
	    while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
		scanCommentChar();
	    }

	    // Skip optional consecutive Stars
	    while (bp < buflen && ch == '*') {
		scanCommentChar();
		if (ch == '/') {
		    return;
		}
	    }

	    // Skip optional WhiteSpace after Stars
	    while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
		scanCommentChar();
	    }

	    deprecatedPrefix = false;
	    // At beginning of line in the JavaDoc sense.
	    if (bp < buflen && ch == '@' && !deprecatedFlag) {
		scanCommentChar();
		if (bp < buflen && ch == 'd') {
		    scanCommentChar();
		    if (bp < buflen && ch == 'e') {
			scanCommentChar();
			if (bp < buflen && ch == 'p') {
			    scanCommentChar();
			    if (bp < buflen && ch == 'r') {
				scanCommentChar();
				if (bp < buflen && ch == 'e') {
				    scanCommentChar();
				    if (bp < buflen && ch == 'c') {
					scanCommentChar();
					if (bp < buflen && ch == 'a') {
					    scanCommentChar();
					    if (bp < buflen && ch == 't') {
						scanCommentChar();
						if (bp < buflen && ch == 'e') {
						    scanCommentChar();
						    if (bp < buflen && ch == 'd') {
							deprecatedPrefix = true;
							scanCommentChar();
						    }}}}}}}}}}}
	    if (deprecatedPrefix && bp < buflen) {
		if (Character.isWhitespace(ch)) {
		    deprecatedFlag = true;
		} else if (ch == '*') {
		    scanCommentChar();
		    if (ch == '/') {
			deprecatedFlag = true;
			return;
		    }
		}
	    }

	    // Skip rest of line
	    while (bp < buflen) {
		switch (ch) {
		case '*':
		    scanCommentChar();
		    if (ch == '/') {
			return;
		    }
		    break;
		case CR: // (Spec 3.4)
		    scanCommentChar();
		    if (ch != LF) {
			continue forEachLine;
		    }
		    /* fall through to LF case */
		case LF: // (Spec 3.4)
		    scanCommentChar();
		    continue forEachLine;
		default:
		    scanCommentChar();
		}
	    } // rest of line
	} // forEachLine
	return;
    }

    /** The value of a literal token, recorded as a string.
     *  For integers, leading 0x and 'l' suffixes are suppressed.
     */
    public String stringVal() {
	return new String(sbuf, 0, sp);
    }

    /** Read token.
     */
    public void nextToken() {

	try {
	    prevEndPos = endPos;
	    sp = 0;

	    while (true) {
		pos = bp;
		switch (ch) {
		case ' ': // (Spec 3.6)
		case '\t': // (Spec 3.6)
		case FF: // (Spec 3.6)
		    do {
			scanChar();
		    } while (ch == ' ' || ch == '\t' || ch == FF);
		    endPos = bp;
		    processWhiteSpace();
		    break;
		case LF: // (Spec 3.4)
		    scanChar();
		    endPos = bp;
		    processLineTerminator();
		    break;
		case CR: // (Spec 3.4)
		    scanChar();
		    if (ch == LF) {
			scanChar();
		    }
		    endPos = bp;
		    processLineTerminator();
		    break;
		case 'A': case 'B': case 'C': case 'D': case 'E':
		case 'F': case 'G': case 'H': case 'I': case 'J':
		case 'K': case 'L': case 'M': case 'N': case 'O':
		case 'P': case 'Q': case 'R': case 'S': case 'T':
		case 'U': case 'V': case 'W': case 'X': case 'Y':
		case 'Z':
		case 'a': case 'b': case 'c': case 'd': case 'e':
		case 'f': case 'g': case 'h': case 'i': case 'j':
		case 'k': case 'l': case 'm': case 'n': case 'o':
		case 'p': case 'q': case 'r': case 's': case 't':
		case 'u': case 'v': case 'w': case 'x': case 'y':
		case 'z':
		case '$': case '_':
		    scanIdent();
		    return;
		case '0':
		    scanChar();
		    if (ch == 'x' || ch == 'X') {
			scanChar();
			if (ch == '.') {
			    scanHexFractionAndSuffix(false);
			} else if (digit(16) < 0) {
			    lexError("invalid.hex.number");
			} else {
			    scanNumber(16);
			}
		    } else {
			putChar('0');
			scanNumber(8);
		    }
		    return;
		case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
		    scanNumber(10);
		    return;
		case '.':
		    scanChar();
		    if ('0' <= ch && ch <= '9') {
			putChar('.');
			scanFractionAndSuffix();
		    } else if (ch == '.') {
			putChar('.'); putChar('.');
			scanChar();
			if (ch == '.') {
			    scanChar();
			    putChar('.');
			    token = ELLIPSIS;
			} else {
			    lexError("malformed.fp.lit");
			}
		    } else {
			token = DOT;
		    }
		    return;
		case ',':
		    scanChar(); token = COMMA; return;
		case ';':
		    scanChar(); token = SEMI; return;
		case '(':
		    scanChar(); token = LPAREN; return;
		case ')':
		    scanChar(); token = RPAREN; return;
		case '[':
		    scanChar(); token = LBRACKET; return;
		case ']':
		    scanChar(); token = RBRACKET; return;
		case '{':
		    scanChar(); token = LBRACE; return;
		case '}':
		    scanChar(); token = RBRACE; return;
		case '/':
		    scanChar();
		    if (ch == '/') {
			do {
			    scanCommentChar();
			} while (ch != CR && ch != LF && bp < buflen);
			if (bp < buflen) {
			    endPos = bp;
			    processComment(CommentStyle.LINE);
			}
			break;
		    } else if (ch == '*') {
			scanChar();
                        CommentStyle style;
			if (ch == '*') {
                            style = CommentStyle.JAVADOC;
			    scanDocComment();
			} else {
                            style = CommentStyle.BLOCK;
			    while (bp < buflen) {
				if (ch == '*') {
				    scanChar();
				    if (ch == '/') break;
				} else {
				    scanCommentChar();
				}
			    }
			}
			if (ch == '/') {
			    scanChar();
			    endPos = bp;
			    processComment(style);
			    break;
			} else {
			    lexError("unclosed.comment");
			    return;
			}
		    } else if (ch == '=') {
			name = names.slashequals;
			token = SLASHEQ;
			scanChar();
		    } else {
			name = names.slash;
			token = SLASH;
		    }
		    return;
		case '\'':
		    scanChar();
		    if (ch == '\'') {
			lexError("empty.char.lit");
		    } else {
			if (ch == CR || ch == LF)
			    lexError(pos, "illegal.line.end.in.char.lit");
			scanLitChar();
			if (ch == '\'') {
			    scanChar();
			    token = CHARLITERAL;
			} else {
			    lexError(pos, "unclosed.char.lit");
			}
		    }
		    return;
		case '\"':
		    scanChar();
		    while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
			scanLitChar();
		    if (ch == '\"') {
			token = STRINGLITERAL;
			scanChar();
		    } else {
			lexError(pos, "unclosed.str.lit");
		    }
		    return;
		default:
		    if (isSpecial(ch)) {
			scanOperator();
		    } else {
                        boolean isJavaIdentifierStart;
                        if (ch < '\u0080') {
                            // all ASCII range chars already handled, above
                            isJavaIdentifierStart = false;
                        } else {
                            char high = scanSurrogates();
                            if (high != 0) {
	                        if (sp == sbuf.length) {
                                    putChar(high);
                                } else {
                                    sbuf[sp++] = high;
                                }

                                isJavaIdentifierStart = Character.isJavaIdentifierStart(
                                    Character.toCodePoint(high, ch));
                            } else {
                                isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
                            }
                        }
                        if (isJavaIdentifierStart) {
			    scanIdent();
		        } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
			    token = EOF;
                            pos = bp = eofPos;
		        } else {
                            lexError("illegal.char", String.valueOf((int)ch));
			    scanChar();
		        }
		    }
		    return;
		}
	    }
	} finally {
	    endPos = bp;
	    if (scannerDebug)
		System.out.println("nextToken(" + pos
				   + "," + endPos + ")=|" +
				   new String(getRawCharacters(pos, endPos))
				   + "|");
	}
    }

    /** Return the current token, set by nextToken().
     */
    public Token token() {
        return token;
    }

    /** Sets the current token.
     */
    public void token(Token token) {
        this.token = token;
    }

    /** Return the current token's position: a 0-based
     *  offset from beginning of the raw input stream
     *  (before unicode translation)
     */
    public int pos() {
        return pos;
    }

    /** Return the last character position of the current token.
     */
    public int endPos() {
        return endPos;
    }

    /** Return the last character position of the previous token.
     */
    public int prevEndPos() {
        return prevEndPos;
    }

    /** Return the position where a lexical error occurred;
     */
    public int errPos() {
        return errPos;
    }

    /** Set the position where a lexical error occurred;
     */
    public void errPos(int pos) {
        errPos = pos;
    }

    /** Return the name of an identifier or token for the current token.
     */
    public Name name() {
        return name;
    }

    /** Return the radix of a numeric literal token.
     */
    public int radix() {
        return radix;
    }

    /** Has a @deprecated been encountered in last doc comment?
     *  This needs to be reset by client with resetDeprecatedFlag.
     */
    public boolean deprecatedFlag() {
        return deprecatedFlag;
    }

    public void resetDeprecatedFlag() {
        deprecatedFlag = false;
    }

    /**
     * Returns the documentation string of the current token.
     */
    public String docComment() {
        return null;
    }

    /**
     * Returns a copy of the input buffer, up to its inputLength.
     * Unicode escape sequences are not translated.
     */
    public char[] getRawCharacters() {
        char[] chars = new char[buflen];
        System.arraycopy(buf, 0, chars, 0, buflen);
        return chars;
    }

    /**
     * Returns a copy of a character array subset of the input buffer.
     * The returned array begins at the <code>beginIndex</code> and
     * extends to the character at index <code>endIndex - 1</code>.
     * Thus the length of the substring is <code>endIndex-beginIndex</code>.
     * This behavior is like
     * <code>String.substring(beginIndex, endIndex)</code>.
     * Unicode escape sequences are not translated.
     *
     * @param beginIndex the beginning index, inclusive.
     * @param endIndex the ending index, exclusive.
     * @throws IndexOutOfBounds if either offset is outside of the
     *         array bounds
     */
    public char[] getRawCharacters(int beginIndex, int endIndex) {
        int length = endIndex - beginIndex;
        char[] chars = new char[length];
        System.arraycopy(buf, beginIndex, chars, 0, length);
        return chars;
    }

    public enum CommentStyle {
        LINE,
        BLOCK,
        JAVADOC,
    }

    /**
     * Called when a complete comment has been scanned. pos and endPos
     * will mark the comment boundary.
     */
    protected void processComment(CommentStyle style) {
	if (scannerDebug)
	    System.out.println("processComment(" + pos
			       + "," + endPos + "," + style + ")=|"
                               + new String(getRawCharacters(pos, endPos))
			       + "|");
    }

    /**
     * Called when a complete whitespace run has been scanned. pos and endPos
     * will mark the whitespace boundary.
     */
    protected void processWhiteSpace() {
	if (scannerDebug)
	    System.out.println("processWhitespace(" + pos
			       + "," + endPos + ")=|" +
			       new String(getRawCharacters(pos, endPos))
			       + "|");
    }

    /**
     * Called when a line terminator has been processed.
     */
    protected void processLineTerminator() {
	if (scannerDebug)
	    System.out.println("processTerminator(" + pos
			       + "," + endPos + ")=|" +
			       new String(getRawCharacters(pos, endPos))
			       + "|");
    }

    /** Build a map for translating between line numbers and
     * positions in the input.
     *
     * @return a LineMap */
    public Position.LineMap getLineMap() {
	return Position.makeLineMap(buf, buflen, false);
    }

}
author	xiomara
date	Fri, 31 Aug 2007 00:44:13 +0000
parents	a4ed3fb96592
children