Mercurial > hg > openjdk > aarch64-port > jdk
changeset 8012:cdf68747b0fb
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
Reviewed-by: michaelm
author | xuelei |
---|---|
date | Thu, 29 Aug 2013 18:58:18 -0700 |
parents | 5bf4f2eeee85 |
children | 2d51653d9b4b |
files | src/share/classes/java/net/IDN.java test/java/net/IDN/UseSTD3ASCIIRules.java |
diffstat | 2 files changed, 99 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/src/share/classes/java/net/IDN.java Thu Aug 29 10:43:46 2013 -0700 +++ b/src/share/classes/java/net/IDN.java Thu Aug 29 18:58:18 2013 -0700 @@ -292,13 +292,17 @@ if (useSTD3ASCIIRules) { for (int i = 0; i < dest.length(); i++) { int c = dest.charAt(i); - if (!isLDHChar(c)) { - throw new IllegalArgumentException("Contains non-LDH characters"); + if (isNonLDHAsciiCodePoint(c)) { + throw new IllegalArgumentException( + "Contains non-LDH ASCII characters"); } } - if (dest.charAt(0) == '-' || dest.charAt(dest.length() - 1) == '-') { - throw new IllegalArgumentException("Has leading or trailing hyphen"); + if (dest.charAt(0) == '-' || + dest.charAt(dest.length() - 1) == '-') { + + throw new IllegalArgumentException( + "Has leading or trailing hyphen"); } } @@ -401,26 +405,20 @@ // // LDH stands for "letter/digit/hyphen", with characters restricted to the // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen - // <-> - // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x56..0x60, 0x7B..0x7F + // <->. + // Non LDH refers to characters in the ASCII range, but which are not + // letters, digits or the hypen. + // + // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F // - private static boolean isLDHChar(int ch){ - // high runner case - if(ch > 0x007A){ - return false; - } - //['-' '0'..'9' 'A'..'Z' 'a'..'z'] - if((ch == 0x002D) || - (0x0030 <= ch && ch <= 0x0039) || - (0x0041 <= ch && ch <= 0x005A) || - (0x0061 <= ch && ch <= 0x007A) - ){ - return true; - } - return false; + private static boolean isNonLDHAsciiCodePoint(int ch){ + return (0x0000 <= ch && ch <= 0x002C) || + (0x002E <= ch && ch <= 0x002F) || + (0x003A <= ch && ch <= 0x0040) || + (0x005B <= ch && ch <= 0x0060) || + (0x007B <= ch && ch <= 0x007F); } - // // search dots in a string and return the index of that character; // or if there is no dots, return the length of input string
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/java/net/IDN/UseSTD3ASCIIRules.java Thu Aug 29 18:58:18 2013 -0700 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8023881 + * @summary IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode + * in IDN.toASCII + */ + +import java.net.*; + +public class UseSTD3ASCIIRules { + + public static void main(String[] args) throws Exception { + // Per Section 4.1, RFC 3490, if the UseSTD3ASCIIRules flag is set, + // then perform these checks: + // + // (a) Verify the absence of non-LDH ASCII code points; that is, the + // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. + // + // (b) Verify the absence of leading and trailing hyphen-minus; that + // is, the absence of U+002D at the beginning and end of the + // sequence. + String[] illegalNames = { + "www.example.com-", + "-www.example.com", + "-www.example.com-", + "www.ex\u002Cmple.com", + "www.ex\u007Bmple.com", + "www.ex\u007Fmple.com" + }; + + String[] legalNames = { + "www.ex-ample.com", + "www.ex\u002Dmple.com", // www.ex-mple.com + "www.ex\u007Ample.com", // www.exzmple.com + "www.ex\u3042mple.com", // www.xn--exmple-j43e.com + "www.\u3042\u3044\u3046.com", // www.xn--l8jeg.com + "www.\u793A\u4F8B.com" // www.xn--fsq092h.com + }; + + for (String name : illegalNames) { + try { + System.out.println("Convering illegal IDN: " + name); + IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES); + throw new Exception( + "Expected to get IllegalArgumentException for " + name); + } catch (IllegalArgumentException iae) { + // That's the right behavior. + } + } + + for (String name : legalNames) { + System.out.println("Convering legal IDN: " + name); + System.out.println("\tThe ACE form is: " + + IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES)); + } + } +}