changeset 7901:b25397be1e6a

8041791: String.toLowerCase regression - violates Unicode standard Reviewed-by: peytoia Contributed-by: jeremymanson@google.com
author naoto
date Fri, 15 Aug 2014 12:46:02 -0700
parents aca7751320fc
children b64f4a90cee7
files src/share/classes/java/lang/ConditionalSpecialCasing.java src/share/classes/java/lang/String.java test/java/lang/String/ToLowerCase.java
diffstat 3 files changed, 19 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/lang/ConditionalSpecialCasing.java	Tue Aug 12 19:02:51 2014 +0400
+++ b/src/share/classes/java/lang/ConditionalSpecialCasing.java	Fri Aug 15 12:46:02 2014 -0700
@@ -62,6 +62,7 @@
         //# Conditional mappings
         //# ================================================================================
         new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
+        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
 
         //# ================================================================================
         //# Locale-sensitive mappings
@@ -77,8 +78,8 @@
 
         //# ================================================================================
         //# Turkish and Azeri
-//      new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
-//      new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
+        new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
+        new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
         new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
         new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
         new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
@@ -148,21 +149,25 @@
 
     private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
         HashSet set = (HashSet)entryTable.get(new Integer(src.codePointAt(index)));
+        char[] ret = null;
 
         if (set != null) {
             Iterator iter = set.iterator();
             String currentLang = locale.getLanguage();
             while (iter.hasNext()) {
                 Entry entry = (Entry)iter.next();
-                String conditionLang= entry.getLanguage();
+                String conditionLang = entry.getLanguage();
                 if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
                         isConditionMet(src, index, locale, entry.getCondition())) {
-                    return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase());
+                    ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
+                    if (conditionLang != null) {
+                        break;
+                    }
                 }
             }
         }
 
-        return null;
+        return ret;
     }
 
     private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
--- a/src/share/classes/java/lang/String.java	Tue Aug 12 19:02:51 2014 +0400
+++ b/src/share/classes/java/lang/String.java	Fri Aug 15 12:46:02 2014 -0700
@@ -2459,7 +2459,9 @@
             } else {
                 srcCount = 1;
             }
-            if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
+            if (localeDependent ||
+                srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA
+                srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE
                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
             } else {
                 lowerChar = Character.toLowerCase(srcChar);
--- a/test/java/lang/String/ToLowerCase.java	Tue Aug 12 19:02:51 2014 +0400
+++ b/test/java/lang/String/ToLowerCase.java	Fri Aug 15 12:46:02 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /*
     @test
-    @bug 4217441 4533872 4900935 8020037
+    @bug 4217441 4533872 4900935 8020037 8041791
     @summary toLowerCase should lower-case Greek Sigma correctly depending
              on the context (final/non-final).  Also it should handle
              Locale specific (lt, tr, and az) lowercasings and supplementary
@@ -72,8 +72,10 @@
         // I-dot tests
         test("\u0130", turkish, "i");
         test("\u0130", az, "i");
-        test("\u0130", lt, "i");
-        test("\u0130", Locale.US, "i");
+        test("\u0130", lt, "\u0069\u0307");
+        test("\u0130", Locale.US, "\u0069\u0307");
+        test("\u0130", Locale.JAPAN, "\u0069\u0307");
+        test("\u0130", Locale.ROOT, "\u0069\u0307");
 
         // Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
         test("I\u0307", turkish, "i");