# HG changeset patch # User andrew # Date 1340979601 -3600 # Node ID 411912b74c9c33ce9361d7b8b38ab7018081807e # Parent 0dd012bd32dd1f5db77060c2dc4f1cbbcd90b5ff# Parent 5eb867cdd08ca299fe03b31760acd57aac2b5673 Merge jdk7u6-b16 diff -r 0dd012bd32dd -r 411912b74c9c .hgtags --- a/.hgtags Thu Jun 28 00:42:08 2012 +0100 +++ b/.hgtags Fri Jun 29 15:20:01 2012 +0100 @@ -165,12 +165,21 @@ c51876b27811ba0f6ea3409ba19d357b7400908a icedtea-2.2-branchpoint 7d18bccaec3781f3d4f2d71879f91e257db2f0f7 jdk7u4-b13 82c5b3166b3194e7348b2a9d146b6760c9a77128 jdk7u4-b14 +36490d49683f7be9d8fbbe1f8eefa1fe9fe550fa jdk7u5-b01 36490d49683f7be9d8fbbe1f8eefa1fe9fe550fa jdk7u4-b15 5c881231f1161fbc4fe86383ce59dd6a1bbe4f8e jdk7u4-b16 77a453ae863fee408035c8986f7e3bb22b7252cb jdk7u4-b17 02400887d8c82eab88933df7167d1f8ab826113f jdk7u4-b18 eb1f8bea2e93a498a9b9f42d33efad564d960407 jdk7u4-b19 b08fa5f665726f578674c2d93b21b37a4330b16f jdk7u4-b20 +501dd924118687733a875bdbebfba3f98ca38a6c jdk7u4-b30 +a90108a5e1612782c4ab49a3e7de422cce7280d8 jdk7u4-b21 +9de44cdaafe7d5fcc0e8f0bc7c7b4df782e8293e jdk7u4-b22 +75ffe94cf0fbbe1a3ed7f613fb60dc2b76c89cb5 jdk7u4-b31 +aa6df713daa9dbddd7fa31db29c6c493687feaa9 jdk7u5-b02 +661bf134f96513c06f009cb4c38e21d661fb2ed5 jdk7u5-b04 +bfd88f182bbeb52d27574efc97e7b1e7c417d90b jdk7u5-b05 +00bf886e24075477b8df5ddff31542eaa5f2fce0 jdk7u5-b30 82c5b3166b3194e7348b2a9d146b6760c9a77128 jdk7u6-b01 6211e6e5c90efab0d033a99d9797a60aaad1f203 jdk7u6-b02 2b8fd45212c5846f7c60e0b64fec9f0fba77b372 jdk7u6-b03 @@ -184,3 +193,6 @@ 5078a73b3448849f3328af5e0323b3e1b8d2d26c jdk7u6-b11 c378e596fb5b2ebeb60b89da7ad33f329d407e2d jdk7u6-b12 15b71daf5e69c169fcbd383c0251cfc99e558d8a jdk7u6-b13 +da79c0fdf9a8b5403904e6ffdd8f5dc335d489d0 jdk7u6-b14 +94474d6f28284a1ef492984dd6d6f66f8787de80 jdk7u6-b15 +0b329a8d325b6a58d89c6042dac62ce5852380ab jdk7u6-b16 diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java --- a/src/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java Fri Jun 29 15:20:01 2012 +0100 @@ -286,12 +286,13 @@ //STAX related properties //defaultValues. + protected boolean fSupportDTD = true; protected boolean fReplaceEntityReferences = true; protected boolean fSupportExternalEntities = false; protected boolean fReportCdataEvent = false ; protected boolean fIsCoalesce = false ; protected String fDeclaredEncoding = null; - /** Disallow doctype declaration. */ + /** Xerces Feature: Disallow doctype declaration. */ protected boolean fDisallowDoctype = false; // drivers @@ -1847,7 +1848,7 @@ // start general entity if (!fEntityStore.isDeclaredEntity(name)) { //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception - if (fDisallowDoctype && fReplaceEntityReferences) { + if (!fSupportDTD && fReplaceEntityReferences) { reportFatalError("EntityNotDeclared", new Object[]{name}); return; } diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java --- a/src/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java Fri Jun 29 15:20:01 2012 +0100 @@ -278,7 +278,7 @@ fDoctypeSystemId = null; fSeenDoctypeDecl = false; fNamespaceContext.reset(); - fDisallowDoctype = !((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); + fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); // xerces features fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); @@ -628,7 +628,7 @@ // scanning methods /** Scans a doctype declaration. */ - protected boolean scanDoctypeDecl(boolean ignore) throws IOException, XNIException { + protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { // spaces if (!fEntityScanner.skipSpaces()) { @@ -653,7 +653,7 @@ fHasExternalDTD = fDoctypeSystemId != null; // Attempt to locate an external subset with an external subset resolver. - if (!ignore && !fHasExternalDTD && fExternalSubsetResolver != null) { + if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); fDTDDescription.setRootName(fDoctypeName); fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); @@ -661,7 +661,7 @@ } // call handler - if (!ignore && fDocumentHandler != null) { + if (supportDTD && fDocumentHandler != null) { // NOTE: I don't like calling the doctypeDecl callback until // end of the *full* doctype line (including internal // subset) is parsed correctly but SAX2 requires that @@ -916,6 +916,10 @@ } case SCANNER_STATE_DOCTYPE: { + if (fDisallowDoctype) { + reportFatalError("DoctypeNotAllowed", null); + } + if (fSeenDoctypeDecl) { reportFatalError("AlreadySeenDoctype", null); @@ -924,7 +928,7 @@ // scanDoctypeDecl() sends XNI doctypeDecl event that // in SAX is converted to startDTD() event. - if (scanDoctypeDecl(fDisallowDoctype)) { + if (scanDoctypeDecl(fSupportDTD)) { //allow parsing of entity decls to continue in order to stay well-formed setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); fSeenInternalSubset = true; @@ -934,8 +938,6 @@ setDriver(fContentDriver); //always return DTD event, the event however, will not contain any entities return fDTDDriver.next(); - // If no DTD support, ignore and continue parsing - //return fDisallowDoctype ? next() : dtdEvent; } if(fSeenDoctypeDecl){ @@ -950,7 +952,7 @@ if (fDoctypeSystemId != null) { if (((fValidation || fLoadExternalDTD) && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { - if (!fDisallowDoctype) + if (fSupportDTD) setScannerState(SCANNER_STATE_DTD_EXTERNAL); else setScannerState(SCANNER_STATE_PROLOG); @@ -967,7 +969,7 @@ // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. fDTDScanner.setInputSource(fExternalSubsetSource); fExternalSubsetSource = null; - if (!fDisallowDoctype) + if (fSupportDTD) setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); else setScannerState(SCANNER_STATE_PROLOG); @@ -1113,7 +1115,7 @@ } fMarkupDepth--; - if (fDisallowDoctype) { + if (!fSupportDTD) { //simply reset the entity store without having to mess around //with the DTD Scanner code fEntityStore = fEntityManager.getEntityStore(); diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java --- a/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Fri Jun 29 15:20:01 2012 +0100 @@ -182,7 +182,8 @@ EXTERNAL_GENERAL_ENTITIES, EXTERNAL_PARAMETER_ENTITIES, ALLOW_JAVA_ENCODINGS, - WARN_ON_DUPLICATE_ENTITYDEF + WARN_ON_DUPLICATE_ENTITYDEF, + STANDARD_URI_CONFORMANT }; /** Feature defaults. */ @@ -192,6 +193,7 @@ Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, + Boolean.FALSE }; /** Recognized properties. */ diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xerces/internal/impl/XMLScanner.java diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xml/internal/serializer/CharInfo.java --- a/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java Fri Jun 29 15:20:01 2012 +0100 @@ -55,7 +55,7 @@ final class CharInfo { /** Given a character, lookup a String to output (e.g. a decorated entity reference). */ - private HashMap m_charToString; + private HashMap m_charToString = new HashMap(); /** * The name of the HTML entities file. @@ -72,50 +72,42 @@ "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ - static final char S_HORIZONAL_TAB = 0x09; + public static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ - static final char S_LINEFEED = 0x0A; + public static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ - static final char S_CARRIAGERETURN = 0x0D; - static final char S_SPACE = 0x20; - static final char S_QUOTE = 0x22; - static final char S_LT = 0x3C; - static final char S_GT = 0x3E; - static final char S_NEL = 0x85; - static final char S_LINE_SEPARATOR = 0x2028; + public static final char S_CARRIAGERETURN = 0x0D; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ - boolean onlyQuotAmpLtGt; + final boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ - static final int ASCII_MAX = 128; + private static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits - * to quickly check ASCII characters in attribute values, - * the value is true if the character in an attribute value - * should be mapped to a String. + * to quickly check ASCII characters in attribute values. */ - private final boolean[] shouldMapAttrChar_ASCII; + private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX]; /** Array of values is faster access than a set of bits - * to quickly check ASCII characters in text nodes, - * the value is true if the character in a text node - * should be mapped to a String. + * to quickly check ASCII characters in text nodes. */ - private final boolean[] shouldMapTextChar_ASCII; + private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX]; + + private boolean[] isCleanTextASCII = new boolean[ASCII_MAX]; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ - private final int array_of_bits[]; + private int array_of_bits[] = createEmptySetOfIntegers(65535); // 5 for 32 bit words, 6 for 64 bit words ... @@ -146,38 +138,33 @@ /** - * A base constructor just to explicitly create the fields, - * with the exception of m_charToString which is handled - * by the constructor that delegates base construction to this one. - *

- * m_charToString is not created here only for performance reasons, - * to avoid creating a Hashtable that will be replaced when - * making a mutable copy, {@link #mutableCopyOf(CharInfo)}. + * Constructor that reads in a resource file that describes the mapping of + * characters to entity references. + * This constructor is private, just to force the use + * of the getCharInfo(entitiesResource) factory + * + * Resource files must be encoded in UTF-8 and can either be properties + * files with a .properties extension assumed. Alternatively, they can + * have the following form, with no particular extension assumed: * + *

+     * # First char # is a comment
+     * Entity numericValue
+     * quot 34
+     * amp 38
+     * 
+ * + * @param entitiesResource Name of properties or resource file that should + * be loaded, which describes that mapping of characters to entity + * references. */ - private CharInfo() + private CharInfo(String entitiesResource, String method) { - this.array_of_bits = createEmptySetOfIntegers(65535); - this.firstWordNotUsed = 0; - this.shouldMapAttrChar_ASCII = new boolean[ASCII_MAX]; - this.shouldMapTextChar_ASCII = new boolean[ASCII_MAX]; - this.m_charKey = new CharKey(); - - // Not set here, but in a constructor that uses this one - // this.m_charToString = new Hashtable(); - - this.onlyQuotAmpLtGt = true; - - - return; + this(entitiesResource, method, false); } private CharInfo(String entitiesResource, String method, boolean internal) { - // call the default constructor to create the fields - this(); - m_charToString = new HashMap(); - ResourceBundle entities = null; boolean noExtraEntities = true; @@ -203,10 +190,12 @@ String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); - boolean extra = defineEntity(name, (char) code); - if (extra) + defineEntity(name, (char) code); + if (extraEntity(code)) noExtraEntities = false; } + set(S_LINEFEED); + set(S_CARRIAGERETURN); } else { InputStream is = null; @@ -290,8 +279,8 @@ int code = Integer.parseInt(value); - boolean extra = defineEntity(name, (char) code); - if (extra) + defineEntity(name, (char) code); + if (extraEntity(code)) noExtraEntities = false; } } @@ -300,6 +289,8 @@ } is.close(); + set(S_LINEFEED); + set(S_CARRIAGERETURN); } catch (Exception e) { throw new RuntimeException( Utils.messages.createMessage( @@ -317,8 +308,31 @@ } } + /* initialize the array isCleanTextASCII[] with a cache of values + * for use by ToStream.character(char[], int , int) + * and the array isSpecialTextASCII[] with the opposite values + * (all in the name of performance!) + */ + for (int ch = 0; ch Unlike internal entities, character references are a string to single * character mapping. They are used to map non-ASCII characters both on - * parsing and printing, primarily for HTML documents. '&lt;' is an + * parsing and printing, primarily for HTML documents. '<amp;' is an * example of a character reference.

* * @param name The entity's name * @param value The entity's value - * @return true if the mapping is not one of: - * */ - private boolean defineEntity(String name, char value) + private void defineEntity(String name, char value) { StringBuilder sb = new StringBuilder("&"); sb.append(name); sb.append(';'); String entityString = sb.toString(); - boolean extra = defineChar2StringMapping(entityString, value); - return extra; + defineChar2StringMapping(entityString, value); } /** - * A utility object, just used to map characters to output Strings, - * needed because a HashMap needs to map an object as a key, not a - * Java primitive type, like a char, so this object gets around that - * and it is reusable. - */ - private final CharKey m_charKey; - - /** * Map a character to a String. For example given * the character '>' this method would return the fully decorated * entity name "<". @@ -413,21 +400,21 @@ /** * Tell if the character argument that is from - * an attribute value has a mapping to a String. + * an attribute value should have special treatment. * * @param value the value of a character that is in an attribute value * @return true if the character should have any special treatment, * such as when writing out attribute values, - * such as when writing out entity references. + * or entity references. * @xsl.usage internal */ - final boolean shouldMapAttrChar(int value) + final boolean isSpecialAttrChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) - return shouldMapAttrChar_ASCII[value]; + return isSpecialAttrASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). @@ -436,27 +423,48 @@ /** * Tell if the character argument that is from a - * text node has a mapping to a String, for example - * to map '<' to "<". + * text node should have special treatment. * * @param value the value of a character that is in a text node - * @return true if the character has a mapping to a String, - * such as when writing out entity references. + * @return true if the character should have any special treatment, + * such as when writing out attribute values, + * or entity references. * @xsl.usage internal */ - final boolean shouldMapTextChar(int value) + final boolean isSpecialTextChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) - return shouldMapTextChar_ASCII[value]; + return isSpecialTextASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } + /** + * This method is used to determine if an ASCII character in + * a text node (not an attribute value) is "clean". + * @param value the character to check (0 to 127). + * @return true if the character can go to the writer as-is + * @xsl.usage internal + */ + final boolean isTextASCIIClean(int value) + { + return isCleanTextASCII[value]; + } + +// In the future one might want to use the array directly and avoid +// the method call, but I think the JIT alreay inlines this well enough +// so don't do it (for now) - bjm +// public final boolean[] getASCIIClean() +// { +// return isCleanTextASCII; +// } + + private static CharInfo getCharInfoBasedOnPrivilege( final String entitiesFileName, final String method, final boolean internal){ @@ -491,17 +499,15 @@ { CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName); if (charInfo != null) { - return mutableCopyOf(charInfo); + return charInfo; } // try to load it internally - cache try { charInfo = getCharInfoBasedOnPrivilege(entitiesFileName, method, true); - // Put the common copy of charInfo in the cache, but return - // a copy of it. m_getCharInfoCache.put(entitiesFileName, charInfo); - return mutableCopyOf(charInfo); + return charInfo; } catch (Exception e) {} // try to load it externally - do not cache @@ -528,41 +534,7 @@ method, false); } - /** - * Create a mutable copy of the cached one. - * @param charInfo The cached one. - * @return - */ - private static CharInfo mutableCopyOf(CharInfo charInfo) { - CharInfo copy = new CharInfo(); - - int max = charInfo.array_of_bits.length; - System.arraycopy(charInfo.array_of_bits,0,copy.array_of_bits,0,max); - - copy.firstWordNotUsed = charInfo.firstWordNotUsed; - - max = charInfo.shouldMapAttrChar_ASCII.length; - System.arraycopy(charInfo.shouldMapAttrChar_ASCII,0,copy.shouldMapAttrChar_ASCII,0,max); - - max = charInfo.shouldMapTextChar_ASCII.length; - System.arraycopy(charInfo.shouldMapTextChar_ASCII,0,copy.shouldMapTextChar_ASCII,0,max); - - // utility field copy.m_charKey is already created in the default constructor - - copy.m_charToString = (HashMap) charInfo.m_charToString.clone(); - - copy.onlyQuotAmpLtGt = charInfo.onlyQuotAmpLtGt; - - return copy; - } - - /** - * Table of user-specified char infos. - * The table maps entify file names (the name of the - * property file without the .properties extension) - * to CharInfo objects populated with entities defined in - * corresponding property file. - */ + /** Table of user-specified char infos. */ private static HashMap m_getCharInfoCache = new HashMap(); /** @@ -604,8 +576,7 @@ * the creation of the set. */ private final void set(int i) { - setASCIItextDirty(i); - setASCIIattrDirty(i); + setASCIIdirty(i); int j = (i >> SHIFT_PER_WORD); // this word is used int k = j + 1; @@ -640,43 +611,24 @@ return in_the_set; } + // record if there are any entities other than + // quot, amp, lt, gt (probably user defined) /** - * This method returns true if there are some non-standard mappings to - * entities other than quot, amp, lt, gt, and its only purpose is for - * performance. - * @param charToMap The value of the character that is mapped to a String - * @param outputString The String to which the character is mapped, usually - * an entity reference such as "<". - * @return true if the mapping is not one of: - * + * @return true if the entity + * @param code The value of the character that has an entity defined + * for it. */ - private boolean extraEntity(String outputString, int charToMap) + private boolean extraEntity(int entityValue) { boolean extra = false; - if (charToMap < ASCII_MAX) + if (entityValue < 128) { - switch (charToMap) + switch (entityValue) { - case '"' : // quot - if (!outputString.equals(""")) - extra = true; - break; - case '&' : // amp - if (!outputString.equals("&")) - extra = true; - break; - case '<' : // lt - if (!outputString.equals("<")) - extra = true; - break; - case '>' : // gt - if (!outputString.equals(">")) - extra = true; + case 34 : // quot + case 38 : // amp + case 60 : // lt + case 62 : // gt break; default : // other entity in range 0 to 127 extra = true; @@ -686,61 +638,49 @@ } /** - * If the character is in the ASCII range then - * mark it as needing replacement with - * a String on output if it occurs in a text node. + * If the character is a printable ASCII character then + * mark it as not clean and needing replacement with + * a String on output. * @param ch */ - private void setASCIItextDirty(int j) + private void setASCIIdirty(int j) { if (0 <= j && j < ASCII_MAX) { - shouldMapTextChar_ASCII[j] = true; + isCleanTextASCII[j] = false; + isSpecialTextASCII[j] = true; } } /** - * If the character is in the ASCII range then - * mark it as needing replacement with - * a String on output if it occurs in a attribute value. + * If the character is a printable ASCII character then + * mark it as and not needing replacement with + * a String on output. * @param ch */ - private void setASCIIattrDirty(int j) + private void setASCIIclean(int j) { if (0 <= j && j < ASCII_MAX) { - shouldMapAttrChar_ASCII[j] = true; + isCleanTextASCII[j] = true; + isSpecialTextASCII[j] = false; } } - /** - * Call this method to register a char to String mapping, for example - * to map '<' to "<". - * @param outputString The String to map to. - * @param inputChar The char to map from. - * @return true if the mapping is not one of: - * - */ - boolean defineChar2StringMapping(String outputString, char inputChar) + private void defineChar2StringMapping(String outputString, char inputChar) { CharKey character = new CharKey(inputChar); m_charToString.put(character, outputString); - set(inputChar); // mark the character has having a mapping to a String - - boolean extraMapping = extraEntity(outputString, inputChar); - return extraMapping; - + set(inputChar); } /** * Simple class for fast lookup of char values, when used with * hashtables. You can set the char, then use it as a key. * + * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. + * It exists to cut the serializers dependancy on that package. + * * @xsl.usage internal */ private static class CharKey extends Object diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java --- a/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java Fri Jun 29 15:20:01 2012 +0100 @@ -58,7 +58,7 @@ * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ - private final CharInfo m_htmlcharInfo = + private static final CharInfo m_htmlcharInfo = // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); @@ -1369,7 +1369,7 @@ // System.out.println("ch: "+(int)ch); // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); - if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch))) + if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) { cleanLength++; } diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xml/internal/serializer/ToStream.java --- a/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Fri Jun 29 15:20:01 2012 +0100 @@ -919,8 +919,7 @@ { // This is the old/fast code here, but is this // correct for all encodings? - if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch || - CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch)) + if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch)) ret= true; else ret = false; @@ -1029,7 +1028,7 @@ * * @throws java.io.IOException */ - int accumDefaultEntity( + protected int accumDefaultEntity( java.io.Writer writer, char ch, int i, @@ -1048,7 +1047,7 @@ { // if this is text node character and a special one of those, // or if this is a character from attribute value and a special one of those - if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))) + if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))) { String outputStringForChar = m_charInfo.getOutputStringForChar(ch); @@ -1399,6 +1398,7 @@ if (m_cdataTagOpen) closeCDATA(); + // the check with _escaping is a bit of a hack for XLSTC if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping)) { @@ -1421,173 +1421,82 @@ try { int i; + char ch1; int startClean; // skip any leading whitspace // don't go off the end and use a hand inlined version // of isWhitespace(ch) final int end = start + length; - int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed - // that was processed - final Writer writer = m_writer; - boolean isAllWhitespace = true; - - // process any leading whitspace - i = start; - while (i < end && isAllWhitespace) { - char ch1 = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch1)) { - // The character is supposed to be replaced by a String - // so write out the clean whitespace characters accumulated - // so far - // then the String. - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo - .getOutputStringForChar(ch1); - writer.write(outputStringForChar); - // We can't say that everything we are writing out is - // all whitespace, we just wrote out a String. - isAllWhitespace = false; - lastDirtyCharProcessed = i; // mark the last non-clean - // character processed - i++; - } else { - // The character is clean, but is it a whitespace ? - switch (ch1) { - // TODO: Any other whitespace to consider? - case CharInfo.S_SPACE: - // Just accumulate the clean whitespace - i++; - break; - case CharInfo.S_LINEFEED: - lastDirtyCharProcessed = processLineFeed(chars, i, - lastDirtyCharProcessed, writer); - i++; - break; - case CharInfo.S_CARRIAGERETURN: - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - writer.write(" "); - lastDirtyCharProcessed = i; - i++; - break; - case CharInfo.S_HORIZONAL_TAB: - // Just accumulate the clean whitespace - i++; - break; - default: - // The character was clean, but not a whitespace - // so break the loop to continue with this character - // (we don't increment index i !!) - isAllWhitespace = false; - break; + int lastDirty = start - 1; // last character that needed processing + for (i = start; + ((i < end) + && ((ch1 = chars[i]) == 0x20 + || (ch1 == 0xA && m_lineSepUse) + || ch1 == 0xD + || ch1 == 0x09)); + i++) + { + /* + * We are processing leading whitespace, but are doing the same + * processing for dirty characters here as for non-whitespace. + * + */ + if (!m_charInfo.isTextASCIIClean(ch1)) + { + lastDirty = processDirty(chars,end, i,ch1, lastDirty, true); + i = lastDirty; } } - } /* If there is some non-whitespace, mark that we may need * to preserve this. This is only important if we have indentation on. */ - if (i < end || !isAllWhitespace) + if (i < end) m_ispreserve = true; + +// int lengthClean; // number of clean characters in a row +// final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); + + final boolean isXML10 = XMLVERSION10.equals(getVersion()); + // we've skipped the leading whitespace, now deal with the rest for (; i < end; i++) { - char ch = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch)) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo.getOutputStringForChar(ch); - writer.write(outputStringForChar); - lastDirtyCharProcessed = i; - } - else { - if (ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - // Leave whitespace TAB as a real character + { + // A tight loop to skip over common clean chars + // This tight loop makes it easier for the JIT + // to optimize. + char ch2; + while (i startClean) { int lengthClean = i - startClean; @@ -1606,32 +1515,6 @@ if (m_tracer != null) super.fireCharEvent(chars, start, length); } - - private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException { - if (!m_lineSepUse - || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){ - // We are leaving the new-line alone, and it is just - // being added to the 'clean' characters, - // so the last dirty character processed remains unchanged - } - else { - writeOutCleanChars(chars, i, lastProcessed); - writer.write(m_lineSep, 0, m_lineSepLen); - lastProcessed = i; - } - return lastProcessed; - } - - private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException { - int startClean; - startClean = lastProcessed + 1; - if (startClean < i) - { - int lengthClean = i - startClean; - m_writer.write(chars, startClean, lengthClean); - } - } - /** * This method checks if a given character is between C0 or C1 range * of Control characters. @@ -1751,7 +1634,7 @@ * * @throws org.xml.sax.SAXException */ - private int accumDefaultEscape( + protected int accumDefaultEscape( Writer writer, char ch, int i, @@ -1815,15 +1698,16 @@ * to write it out as Numeric Character Reference(NCR) regardless of XML Version * being used for output document. */ - if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch)) + if (isCharacterInC0orC1Range(ch) || + (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch))) { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } else if ((!escapingNotNeeded(ch) || - ( (fromTextNode && m_charInfo.shouldMapTextChar(ch)) - || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))) + ( (fromTextNode && m_charInfo.isSpecialTextChar(ch)) + || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))) && m_elemContext.m_currentElemDepth > 0) { writer.write("&#"); @@ -2087,86 +1971,28 @@ string.getChars(0,len, m_attrBuff, 0); final char[] stringChars = m_attrBuff; - for (int i = 0; i < len;) + for (int i = 0; i < len; ) { char ch = stringChars[i]; - - if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" + if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) + { + writer.write(ch); + i++; + } + else + { // I guess the parser doesn't normalize cr/lf in attributes. -sb +// if ((CharInfo.S_CARRIAGERETURN == ch) +// && ((i + 1) < len) +// && (CharInfo.S_LINEFEED == stringChars[i + 1])) +// { +// i++; +// ch = CharInfo.S_LINEFEED; +// } + i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true); } - else { - i++; - if (0x0 <= ch && ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - writer.write(" "); - break; - case CharInfo.S_LINEFEED: - writer.write(" "); - break; - case CharInfo.S_CARRIAGERETURN: - writer.write(" "); - break; - default: - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - break; - } - } - else if (ch < 0x7F) { - // Range 0x20 through 0x7E inclusive - // Normal ASCII chars - writer.write(ch); - } - else if (ch <= 0x9F){ - // Range 0x7F through 0x9F inclusive - // More control characters - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - else if (ch == CharInfo.S_LINE_SEPARATOR) { - // LINE SEPARATOR - writer.write("
"); - } - else if (m_encodingInfo.isInEncoding(ch)) { - // If the character is in the encoding, and - // not in the normal ASCII range, we also - // just write it out - writer.write(ch); - } - else { - // This is a fallback plan, we should never get here - // but if the character wasn't previously handled - // (i.e. isn't in the encoding, etc.) then what - // should we do? We choose to write out a character ref - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - - } - } + } /** @@ -2936,14 +2762,6 @@ closeCDATA(); m_cdataTagOpen = false; } - if (m_writer != null) { - try { - m_writer.flush(); - } - catch(IOException e) { - // what? me worry? - } - } } public void setContentHandler(ContentHandler ch) diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java --- a/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java Fri Jun 29 15:20:01 2012 +0100 @@ -56,7 +56,7 @@ * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ - private CharInfo m_xmlcharInfo = + private static CharInfo m_xmlcharInfo = // new CharInfo(CharInfo.XML_ENTITIES_RESOURCE); CharInfo.getCharInfo(CharInfo.XML_ENTITIES_RESOURCE, Method.XML); @@ -329,11 +329,12 @@ /** * Before Xalan 1497, a newline char was printed out if not inside of an - * element. The whitespace is not significant if the output is standalone + * element. The whitespace is not significant is the output is standalone */ if (m_elemContext.m_currentElemDepth <= 0 && m_isStandalone) writer.write(m_lineSep, 0, m_lineSepLen); + /* * Don't write out any indentation whitespace now, * because there may be non-whitespace text after this. diff -r 0dd012bd32dd -r 411912b74c9c src/com/sun/xml/internal/stream/XMLEventReaderImpl.java --- a/src/com/sun/xml/internal/stream/XMLEventReaderImpl.java Thu Jun 28 00:42:08 2012 +0100 +++ b/src/com/sun/xml/internal/stream/XMLEventReaderImpl.java Fri Jun 29 15:20:01 2012 +0100 @@ -248,8 +248,10 @@ object = nextEvent(); }catch(XMLStreamException streamException){ fLastEvent = null ; - //xxx: what should be done in this case ? - throw new NoSuchElementException(); + //don't swallow the cause + NoSuchElementException e = new NoSuchElementException(streamException.getMessage()); + e.initCause(streamException.getCause()); + throw e; } return object; }