# HG changeset patch # User joehw # Date 1342152412 25200 # Node ID 6e444b892c99a4d76ef111a648903f7f7d29c8a3 # Parent 9cb8be5e6119fc97bd74cf7d78c0d789f698524a 7183760: DocumentBuilder.parse(String uri) is not IPv6 enabled Summary: removing the hack of using escapeNonUSAscii. this is the same patch as 7166896 for 7u8. Reviewed-by: psandoz, lancea diff -r 9cb8be5e6119 -r 6e444b892c99 src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java --- a/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Tue Jul 03 18:24:03 2012 -0700 +++ b/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Thu Jul 12 21:06:52 2012 -0700 @@ -602,7 +602,7 @@ if (reader == null) { stream = xmlInputSource.getByteStream(); if (stream == null) { - URL location = new URL(escapeNonUSAscii(expandedSystemId)); + URL location = new URL(expandedSystemId); URLConnection connect = location.openConnection(); if (!(connect instanceof HttpURLConnection)) { stream = connect.getInputStream(); @@ -2586,76 +2586,6 @@ } // fixURI(String):String - /** - * Escape invalid URI characters. - * - * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like), - * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of - * %-encoded UTF-8 octets). - * - * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that - * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case, - * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter, - * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment - * identifier or it might be an invalid '#'. - * - * Given that the former is vastly more likely than the latter in each case (most users are familiar with - * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses - * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit. - * - * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI. - */ - protected static String escapeNonUSAscii(String str) { - if (str == null) { - return str; - } - int len = str.length(), i=0, ch; - for (; i < len; i++) { - ch = str.charAt(i); - // if it's not an ASCII 7 character, break here, and use UTF-8 encoding - if (ch >= 128) - break; - } - - // we saw no non-ascii-7 character - if (i == len) { - return str; - } - - // get UTF-8 bytes for the string - StringBuffer buffer = new StringBuffer(); - byte[] bytes = null; - byte b; - try { - bytes = str.getBytes("UTF-8"); - } catch (java.io.UnsupportedEncodingException e) { - // should never happen - return str; - } - - len = bytes.length; - - // for each byte - for (i = 0; i < len; i++) { - b = bytes[i]; - // for non-ascii character: make it positive, then escape - if (b < 0) { - ch = b + 256; - buffer.append('%'); - buffer.append(gHexChs[ch >> 4]); - buffer.append(gHexChs[ch & 0xf]); - } - else if (b != '%' && b != '#' && gNeedEscaping[b]) { - buffer.append('%'); - buffer.append(gAfterEscaping1[b]); - buffer.append(gAfterEscaping2[b]); - } - else { - buffer.append((char)b); - } - } - return buffer.toString(); - } // // Package visible methods