You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2018/03/26 22:29:12 UTC
lucene-solr:branch_7x: LUCENE-8175: un-revert "LUCENE-8122: Upgrade
analysis/icu to ICU 60.2"
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x e32e78db3 -> 2dcf263b5
LUCENE-8175: un-revert "LUCENE-8122: Upgrade analysis/icu to ICU 60.2"
the new icu version has been released that fixes the concurrency issue.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2dcf263b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2dcf263b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2dcf263b
Branch: refs/heads/branch_7x
Commit: 2dcf263b5207243f6854c0e48d2496036f678eee
Parents: e32e78d
Author: Robert Muir <rm...@apache.org>
Authored: Mon Mar 26 18:27:12 2018 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Mon Mar 26 18:28:58 2018 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
lucene/analysis/icu/src/data/uax29/Default.rbbi | 96 +++++++++++++++----
.../icu/src/data/utr30/DiacriticFolding.txt | 11 ++-
.../icu/src/data/utr30/NativeDigitFolding.txt | 10 ++
lucene/analysis/icu/src/data/utr30/nfc.txt | 13 ++-
lucene/analysis/icu/src/data/utr30/nfkc.txt | 4 +-
lucene/analysis/icu/src/data/utr30/nfkc_cf.txt | 10 +-
.../analysis/icu/segmentation/ICUTokenizer.java | 8 +-
lucene/analysis/icu/src/java/overview.html | 2 +-
.../analysis/icu/segmentation/Default.brk | Bin 36768 -> 54488 bytes
.../icu/segmentation/MyanmarSyllable.brk | Bin 20744 -> 21976 bytes
.../org/apache/lucene/analysis/icu/utr30.nrm | Bin 55184 -> 59056 bytes
.../analysis/icu/GenerateUTR30DataFiles.java | 6 +-
lucene/ivy-versions.properties | 2 +-
lucene/licenses/icu4j-59.1.jar.sha1 | 1 -
lucene/licenses/icu4j-60.2.jar.sha1 | 1 +
solr/licenses/icu4j-59.1.jar.sha1 | 1 -
solr/licenses/icu4j-60.2.jar.sha1 | 1 +
18 files changed, 129 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 740a862..4768b98 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -30,6 +30,9 @@ Other
* SOLR-10912: Add automatic patch validation. (Mano Kovacs, Steve Rowe)
+* LUCENE-8122: Upgrade analysis/icu to ICU 60.2. (Robert Muir)
+
+
======================= Lucene 7.3.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/uax29/Default.rbbi
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/uax29/Default.rbbi b/lucene/analysis/icu/src/data/uax29/Default.rbbi
index 6c6d1f9..afda68f 100644
--- a/lucene/analysis/icu/src/data/uax29/Default.rbbi
+++ b/lucene/analysis/icu/src/data/uax29/Default.rbbi
@@ -14,16 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-# This file is from ICU (with some small modifications, to avoid CJK dictionary break)
+# This file is from ICU (with some small modifications, to avoid CJK dictionary break,
+# and status code change related to that)
#
-# Copyright (C) 2002-2013, International Business Machines Corporation
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Copyright (C) 2002-2016, International Business Machines Corporation
# and others. All Rights Reserved.
#
# file: word.txt
#
# ICU Word Break Rules
# See Unicode Standard Annex #29.
-# These rules are based on UAX #29 Revision 22 for Unicode Version 6.3
+# These rules are based on UAX #29 Revision 29 for Unicode Version 9.0
+# with additions for Emoji Sequences from https://goo.gl/cluFCn
+# Plus additional characters introduces with Emoji 5, http://www.unicode.org/reports/tr51/proposed.html
#
# Note: Updates to word.txt will usually need to be merged into
# word_POSIX.txt also.
@@ -35,6 +40,7 @@
##############################################################################
!!chain;
+!!quoted_literals_only;
#
@@ -43,8 +49,9 @@
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
-$Newline = [\p{Word_Break = Newline}];
+$Newline = [\p{Word_Break = Newline} ];
$Extend = [\p{Word_Break = Extend}];
+$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
$Katakana = [\p{Word_Break = Katakana}];
@@ -57,6 +64,13 @@ $MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
+$E_Base = [\p{Word_Break = EB}];
+$E_Modifier = [\p{Word_Break = EM}];
+
+# Data for Extended Pictographic scraped from CLDR common/properties/ExtendedPictographic.txt, r13267
+$Extended_Pict = [\U0001F774-\U0001F77F\U00002700-\U00002701\U00002703-\U00002704\U0000270E\U00002710-\U00002711\U00002765-\U00002767\U0001F030-\U0001F093\U0001F094-\U0001F09F\U0001F10D-\U0001F10F\U0001F12F\U0001F16C-\U0001F16F\U0001F1AD-\U0001F1E5\U0001F260-\U0001F265\U0001F203-\U0001F20F\U0001F23C-\U0001F23F\U0001F249-\U0001F24F\U0001F252-\U0001F25F\U0001F266-\U0001F2FF\U0001F7D5-\U0001F7FF\U0001F000-\U0001F003\U0001F005-\U0001F02B\U0001F02C-\U0001F02F\U0001F322-\U0001F323\U0001F394-\U0001F395\U0001F398\U0001F39C-\U0001F39D\U0001F3F1-\U0001F3F2\U0001F3F6\U0001F4FE\U0001F53E-\U0001F548\U0001F54F\U0001F568-\U0001F56E\U0001F571-\U0001F572\U0001F57B-\U0001F586\U0001F588-\U0001F589\U0001F58E-\U0001F58F\U0001F591-\U0001F594\U0001F597-\U0001F5A3\U0001F5A6-\U0001F5A7\U0001F5A9-\U0001F5B0\U0001F5B3-\U0001F5BB\U0001F5BD-\U0001F5C1\U0001F5C5-\U0001F5D0\U0001F5D4-\U0001F5DB\U0001F5DF-\U0001F5E0\U0001F5E2\U0001F5E4-\U0001F5E7\U0001F5E9-\U0001F5EE\U0001F5F0-\U0001F5F2\U0001F5F4-\U0001F5F9\U0000
2605\U00002607-\U0000260D\U0000260F-\U00002610\U00002612\U00002616-\U00002617\U00002619-\U0000261C\U0000261E-\U0000261F\U00002621\U00002624-\U00002625\U00002627-\U00002629\U0000262B-\U0000262D\U00002630-\U00002637\U0000263B-\U00002647\U00002654-\U0000265F\U00002661-\U00002662\U00002664\U00002667\U00002669-\U0000267A\U0000267C-\U0000267E\U00002680-\U00002691\U00002695\U00002698\U0000269A\U0000269D-\U0000269F\U000026A2-\U000026A9\U000026AC-\U000026AF\U000026B2-\U000026BC\U000026BF-\U000026C3\U000026C6-\U000026C7\U000026C9-\U000026CD\U000026D0\U000026D2\U000026D5-\U000026E8\U000026EB-\U000026EF\U000026F6\U000026FB-\U000026FC\U000026FE-\U000026FF\U00002388\U0001FA00-\U0001FFFD\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F0AF-\U0001F0B0\U0001F0C0\U0001F0D0\U0001F0F6-\U0001F0FF\U0001F80C-\U0001F80F\U0001F848-\U0001F84F\U0001F85A-\U0001F85F\U0001F888-\U0001F88F\U0001F8AE-\U0001F8FF\U0001F900-\U0001F90B\U0001F91F\U0001F928-\U0001F92F\U0001F931-\U
0001F932\U0001F94C\U0001F95F-\U0001F96B\U0001F992-\U0001F997\U0001F9D0-\U0001F9E6\U0001F90C-\U0001F90F\U0001F93F\U0001F94D-\U0001F94F\U0001F96C-\U0001F97F\U0001F998-\U0001F9BF\U0001F9C1-\U0001F9CF\U0001F9E7-\U0001F9FF\U0001F6C6-\U0001F6CA\U0001F6D3-\U0001F6D4\U0001F6E6-\U0001F6E8\U0001F6EA\U0001F6F1-\U0001F6F2\U0001F6F7-\U0001F6F8\U0001F6D5-\U0001F6DF\U0001F6ED-\U0001F6EF\U0001F6F9-\U0001F6FF];
+$EBG = [\p{Word_Break = EBG}];
+$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
@@ -83,21 +97,21 @@ $ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
# except when they appear at the beginning of a region of text.
#
# TODO: check if handling of katakana in dictionary makes rules incorrect/void
-$KatakanaEx = $Katakana ($Extend | $Format)*;
-$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format)*;
-$ALetterEx = $ALetterPlus ($Extend | $Format)*;
-$Single_QuoteEx = $Single_Quote ($Extend | $Format)*;
-$Double_QuoteEx = $Double_Quote ($Extend | $Format)*;
-$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
-$MidLetterEx = $MidLetter ($Extend | $Format)*;
-$MidNumEx = $MidNum ($Extend | $Format)*;
-$NumericEx = $Numeric ($Extend | $Format)*;
-$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
-$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format)*;
+$KatakanaEx = $Katakana ($Extend | $Format | $ZWJ)*;
+$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format | $ZWJ)*;
+$ALetterEx = $ALetterPlus ($Extend | $Format | $ZWJ)*;
+$Single_QuoteEx = $Single_Quote ($Extend | $Format | $ZWJ)*;
+$Double_QuoteEx = $Double_Quote ($Extend | $Format | $ZWJ)*;
+$MidNumLetEx = $MidNumLet ($Extend | $Format | $ZWJ)*;
+$MidLetterEx = $MidLetter ($Extend | $Format | $ZWJ)*;
+$MidNumEx = $MidNum ($Extend | $Format | $ZWJ)*;
+$NumericEx = $Numeric ($Extend | $Format | $ZWJ)*;
+$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format | $ZWJ)*;
+$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format | $ZWJ)*;
$Ideographic = [\p{Ideographic}];
-$HiraganaEx = $Hiragana ($Extend | $Format)*;
-$IdeographicEx = $Ideographic ($Extend | $Format)*;
+$HiraganaEx = $Hiragana ($Extend | $Format | $ZWJ)*;
+$IdeographicEx = $Ideographic ($Extend | $Format | $ZWJ)*;
## -------------------------------------------------
@@ -108,12 +122,17 @@ $IdeographicEx = $Ideographic ($Extend | $Format)*;
#
$CR $LF;
+# Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed.
+#
+$ZWJ ($Extended_Pict | $EmojiNRK);
+
+
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
# of a region of Text. The rule here comes into play when the start of text
# begins with a group of Format chars, or with a "word" consisting of a single
# char that is not in any of the listed word break categories followed by
# format char(s), or is not a CJK dictionary character.
-[^$CR $LF $Newline]? ($Extend | $Format)+;
+[^$CR $LF $Newline]? ($Extend | $Format | $ZWJ)+;
$NumericEx {100};
$ALetterEx {200};
@@ -123,6 +142,10 @@ $KatakanaEx {300}; # note: these status values override those from rule 5
$HiraganaEx {300}; # by virtue of being numerically larger.
$IdeographicEx {400}; #
+$E_Base ($Extend | $Format | $ZWJ)*;
+$E_Modifier ($Extend | $Format | $ZWJ)*;
+$Extended_Pict ($Extend | $Format | $ZWJ)*;
+
#
# rule 5
# Do not break between most letters.
@@ -170,9 +193,42 @@ $ExtendNumLetEx $Hebrew_Letter {200}; # (13b)
$ExtendNumLetEx $NumericEx {100}; # (13b)
$ExtendNumLetEx $KatakanaEx {300}; # (13b)
-# rule 13c
+# rule 14
+# Do not break within emoji modifier sequences
+
+($E_Base | $EBG) ($Format | $Extend | $ZWJ)* $E_Modifier;
-$Regional_IndicatorEx $Regional_IndicatorEx;
+# rules 15 - 17
+# Pairs of Regional Indicators stay together.
+# With rule chaining disabled by ^, this rule will match exactly two of them.
+# No other rule begins with a Regional_Indicator, so chaining cannot extend the match.
+#
+^$Regional_IndicatorEx $Regional_IndicatorEx;
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable {200};
+
+# Rule 999
+# Match a single code point if no other rule applies.
+.;
+
+
+## -------------------------------------------------
+
+!!safe_reverse;
+
+# rule 3
+($Extend | $Format | $ZWJ)+ .?;
+
+# rule 6
+($MidLetter | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* ($Hebrew_Letter | $ALetterPlus);
+
+# rule 7b
+$Double_Quote ($Format | $Extend | $ZWJ)* $Hebrew_Letter;
+
+
+# rule 11
+($MidNum | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* $Numeric;
+
+# rule 13c
+$Regional_Indicator ($Format | $Extend | $ZWJ)* $Regional_Indicator;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
index eb5b78e..806a4f9 100644
--- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
@@ -73,12 +73,14 @@
0A4D>
0ABC>
0ACD>
+0AFD..0AFF>
0B3C>
0B4D>
0BCD>
0C4D>
0CBC>
0CCD>
+0D3B..0D3C>
0D4D>
0DCA>
0E47..0E4C>
@@ -112,10 +114,10 @@
1CD0..1CE8>
1CED>
1CF4>
-1CF8..1CF9>
+1CF7..1CF9>
1D2C..1D6A>
1DC4..1DCF>
-1DF5>
+1DF5..1DF9>
1DFD..1DFF>
1FBD>
1FBF..1FC1>
@@ -175,7 +177,12 @@ FFE3>
1163F>
116B6..116B7>
1172B>
+11A34>
+11A47>
+11A99>
11C3F>
+11D42>
+11D44..11D45>
16AF0..16AF4>
16F8F..16F9F>
1D167..1D169>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
index fb8cf1a..707674e 100644
--- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
@@ -580,6 +580,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
11C57>0037 # BHAIKSUKI DIGIT SEVEN
11C58>0038 # BHAIKSUKI DIGIT EIGHT
11C59>0039 # BHAIKSUKI DIGIT NINE
+11D50>0030 # MASARAM GONDI DIGIT ZERO
+11D51>0031 # MASARAM GONDI DIGIT ONE
+11D52>0032 # MASARAM GONDI DIGIT TWO
+11D53>0033 # MASARAM GONDI DIGIT THREE
+11D54>0034 # MASARAM GONDI DIGIT FOUR
+11D55>0035 # MASARAM GONDI DIGIT FIVE
+11D56>0036 # MASARAM GONDI DIGIT SIX
+11D57>0037 # MASARAM GONDI DIGIT SEVEN
+11D58>0038 # MASARAM GONDI DIGIT EIGHT
+11D59>0039 # MASARAM GONDI DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/utr30/nfc.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt
index 5f9b182..b41056d 100644
--- a/lucene/analysis/icu/src/data/utr30/nfc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfc.txt
@@ -1,3 +1,5 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
@@ -7,7 +9,7 @@
#
# Complete data for Unicode NFC normalization.
-* Unicode 9.0.0
+* Unicode 10.0.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
@@ -164,6 +166,7 @@
0C56:91
0CBC:7
0CCD:9
+0D3B..0D3C:9
0D4D:9
0DCA:9
0E38..0E39:103
@@ -234,6 +237,9 @@
1DCF:220
1DD0:202
1DD1..1DF5:230
+1DF6:232
+1DF7..1DF8:228
+1DF9:220
1DFB:230
1DFC:233
1DFD:220
@@ -322,7 +328,12 @@ FE2E..FE2F:230
116B6:9
116B7:7
1172B:9
+11A34:9
+11A47:9
+11A99:9
11C3F:9
+11D42:7
+11D44..11D45:9
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/utr30/nfkc.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt
index f51fa5d..8b71727 100644
--- a/lucene/analysis/icu/src/data/utr30/nfkc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt
@@ -1,3 +1,5 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
@@ -11,7 +13,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
-* Unicode 9.0.0
+* Unicode 10.0.0
00A0>0020
00A8>0020 0308
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
index 7f33df5..726c5b5 100644
--- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
@@ -1,7 +1,7 @@
-# Unicode Character Database
-# Copyright (c) 1991-2016 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-# For documentation, see http://www.unicode.org/reports/tr44/
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Copyright (C) 1999-2016, International Business Machines
+# Corporation and others. All Rights Reserved.
#
# file name: nfkc_cf.txt
#
@@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
-* Unicode 9.0.0
+* Unicode 10.0.0
0041>0061
0042>0062
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
index 0941551..8b62ddb 100644
--- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
@@ -200,18 +200,18 @@ public final class ICUTokenizer extends Tokenizer {
*/
private boolean incrementTokenBuffer() {
int start = breaker.current();
- if (start == BreakIterator.DONE)
- return false; // BreakIterator exhausted
+ assert start != BreakIterator.DONE;
// find the next set of boundaries, skipping over non-tokens (rule status 0)
int end = breaker.next();
- while (start != BreakIterator.DONE && breaker.getRuleStatus() == 0) {
+ while (end != BreakIterator.DONE && breaker.getRuleStatus() == 0) {
start = end;
end = breaker.next();
}
- if (start == BreakIterator.DONE)
+ if (end == BreakIterator.DONE) {
return false; // BreakIterator exhausted
+ }
termAtt.copyBuffer(buffer, start, end - start);
offsetAtt.setOffset(correctOffset(offset + start), correctOffset(offset + end));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html
index bdace97..6fa5821 100644
--- a/lucene/analysis/icu/src/java/overview.html
+++ b/lucene/analysis/icu/src/java/overview.html
@@ -353,7 +353,7 @@ and
<h1><a name="backcompat">Backwards Compatibility</a></h1>
<p>
This module exists to provide up-to-date Unicode functionality that supports
-the most recent version of Unicode (currently 8.0). However, some users who wish
+the most recent version of Unicode (currently 10.0). However, some users who wish
for stronger backwards compatibility can restrict
{@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only
a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
index c94a023..4a9df15 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk
index c3357ef..a9d0673 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
index 1a16f3e..1c3de12 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
index 0f2bffe..042fa37 100644
--- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
+++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
@@ -62,9 +62,9 @@ import java.util.regex.Pattern;
*/
public class GenerateUTR30DataFiles {
private static final String ICU_SVN_TAG_URL
- = "http://source.icu-project.org/repos/icu/icu/tags";
- private static final String ICU_RELEASE_TAG = "release-58-1";
- private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
+ = "http://source.icu-project.org/repos/icu/tags";
+ private static final String ICU_RELEASE_TAG = "release-60-2";
+ private static final String ICU_DATA_NORM2_PATH = "icu4c/source/data/unidata/norm2";
private static final String NFC_TXT = "nfc.txt";
private static final String NFKC_TXT = "nfkc.txt";
private static final String NFKC_CF_TXT = "nfkc_cf.txt";
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 58d21d0..edd6135 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -31,7 +31,7 @@ com.fasterxml.jackson.core.version = 2.5.4
/com.googlecode.mp4parser/isoparser = 1.1.18
/com.healthmarketscience.jackcess/jackcess = 2.1.8
/com.healthmarketscience.jackcess/jackcess-encrypt = 2.1.4
-/com.ibm.icu/icu4j = 59.1
+/com.ibm.icu/icu4j = 60.2
/com.lmax/disruptor = 3.4.0
/com.pff/java-libpst = 0.8.1
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/licenses/icu4j-59.1.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/icu4j-59.1.jar.sha1 b/lucene/licenses/icu4j-59.1.jar.sha1
deleted file mode 100644
index f3f0018..0000000
--- a/lucene/licenses/icu4j-59.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-6f06e820cf4c8968bbbaae66ae0b33f6a256b57f
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/lucene/licenses/icu4j-60.2.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/icu4j-60.2.jar.sha1 b/lucene/licenses/icu4j-60.2.jar.sha1
new file mode 100644
index 0000000..e613111
--- /dev/null
+++ b/lucene/licenses/icu4j-60.2.jar.sha1
@@ -0,0 +1 @@
+e452cba3caaf93b997ff543c7246a6da74ed70f1
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/solr/licenses/icu4j-59.1.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/icu4j-59.1.jar.sha1 b/solr/licenses/icu4j-59.1.jar.sha1
deleted file mode 100644
index f3f0018..0000000
--- a/solr/licenses/icu4j-59.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-6f06e820cf4c8968bbbaae66ae0b33f6a256b57f
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2dcf263b/solr/licenses/icu4j-60.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/icu4j-60.2.jar.sha1 b/solr/licenses/icu4j-60.2.jar.sha1
new file mode 100644
index 0000000..e613111
--- /dev/null
+++ b/solr/licenses/icu4j-60.2.jar.sha1
@@ -0,0 +1 @@
+e452cba3caaf93b997ff543c7246a6da74ed70f1