changeset 368:6e444b892c99

7183760: DocumentBuilder.parse(String uri) is not IPv6 enabled Summary: removing the hack of using escapeNonUSAscii. this is the same patch as 7166896 for 7u8. Reviewed-by: psandoz, lancea
author joehw
date Thu, 12 Jul 2012 21:06:52 -0700
parents 9cb8be5e6119
children df4092828362
files src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java
diffstat 1 files changed, 1 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- a/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java	Tue Jul 03 18:24:03 2012 -0700
+++ b/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java	Thu Jul 12 21:06:52 2012 -0700
@@ -602,7 +602,7 @@
         if (reader == null) {
             stream = xmlInputSource.getByteStream();
             if (stream == null) {
-                URL location = new URL(escapeNonUSAscii(expandedSystemId));
+                URL location = new URL(expandedSystemId);
                 URLConnection connect = location.openConnection();
                 if (!(connect instanceof HttpURLConnection)) {
                     stream = connect.getInputStream();
@@ -2586,76 +2586,6 @@
 
     } // fixURI(String):String
 
-    /**
-     * Escape invalid URI characters.
-     *
-     * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like),
-     * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of
-     * %-encoded UTF-8 octets).
-     *
-     * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that
-     * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case,
-     * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter,
-     * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment
-     * identifier or it might be an invalid '#'.
-     *
-     * Given that the former is vastly more likely than the latter in each case (most users are familiar with
-     * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses
-     * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit.
-     *
-     * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI.
-     */
-    protected static String escapeNonUSAscii(String str) {
-        if (str == null) {
-            return str;
-        }
-        int len = str.length(), i=0, ch;
-        for (; i < len; i++) {
-            ch = str.charAt(i);
-            // if it's not an ASCII 7 character, break here, and use UTF-8 encoding
-            if (ch >= 128)
-                break;
-        }
-
-        // we saw no non-ascii-7 character
-        if (i == len) {
-            return str;
-        }
-
-        // get UTF-8 bytes for the string
-        StringBuffer buffer = new StringBuffer();
-        byte[] bytes = null;
-        byte b;
-        try {
-            bytes = str.getBytes("UTF-8");
-        } catch (java.io.UnsupportedEncodingException e) {
-            // should never happen
-            return str;
-        }
-
-        len = bytes.length;
-
-        // for each byte
-        for (i = 0; i < len; i++) {
-            b = bytes[i];
-            // for non-ascii character: make it positive, then escape
-            if (b < 0) {
-                ch = b + 256;
-                buffer.append('%');
-                buffer.append(gHexChs[ch >> 4]);
-                buffer.append(gHexChs[ch & 0xf]);
-            }
-            else if (b != '%' && b != '#' && gNeedEscaping[b]) {
-                buffer.append('%');
-                buffer.append(gAfterEscaping1[b]);
-                buffer.append(gAfterEscaping2[b]);
-            }
-            else {
-                buffer.append((char)b);
-            }
-        }
-        return buffer.toString();
-    }
 
     //
     // Package visible methods