You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/05/25 17:55:44 UTC

[35/44] lucene-solr git commit: LUCENE-7841: Normalize ґ to г in Ukrainian analyzer

LUCENE-7841: Normalize ґ to г in Ukrainian analyzer


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/effd3766
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/effd3766
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/effd3766

Branch: refs/heads/jira/solr-8668
Commit: effd3766e7fc501a04a6cea08a4c803fa7197768
Parents: 10d3aa2
Author: Dawid Weiss <dw...@apache.org>
Authored: Thu May 25 14:04:28 2017 +0200
Committer: Dawid Weiss <dw...@apache.org>
Committed: Thu May 25 14:09:36 2017 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                                          | 4 ++++
 .../lucene/analysis/uk/UkrainianMorfologikAnalyzer.java     | 2 ++
 .../apache/lucene/analysis/uk/TestUkrainianAnalyzer.java    | 9 ++++++++-
 lucene/ivy-versions.properties                              | 2 +-
 lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1  | 1 -
 lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1  | 1 +
 6 files changed, 16 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fe1be6e..a45c11a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -109,6 +109,10 @@ Other
   from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
   Uwe Schindler, Dawid Weiss)
 
+Improvements
+
+* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
+
 ======================= Lucene 6.6.0 =======================
 
 New Features

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
index 6955fe3..cd502fd 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
@@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
     // ignored characters
     builder.add("\u0301", "");
     builder.add("\u00AD", "");
+    builder.add("ґ", "г");
+    builder.add("Ґ", "Г");
 
     NormalizeCharMap normMap = builder.build();
     reader = new MappingCharFilter(normMap, reader);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
index 15b247d..e9a0102 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
@@ -52,10 +52,17 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
   public void testCapsTokenStream() throws Exception {
     Analyzer a = new UkrainianMorfologikAnalyzer();
     assertAnalyzesTo(a, "Цих Чайковського і Ґете.",
-                     new String[] { "Чайковське", "Чайковський", "Ґете" });
+                     new String[] { "Чайковське", "Чайковський", "Гете" });
     a.close();
   }
 
+  public void testCharNormalization() throws Exception {
+    Analyzer a = new UkrainianMorfologikAnalyzer();
+    assertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
+                     new String[] { "Гюмрі", "Гюмрі" });
+    a.close();
+  }
+  
   public void testSampleSentence() throws Exception {
     Analyzer a = new UkrainianMorfologikAnalyzer();
     assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 3c45b2c..3318c21 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -276,7 +276,7 @@ org.slf4j.version = 1.7.7
 /org.tukaani/xz = 1.5
 /rome/rome = 1.0
 
-ua.net.nlp.morfologik-ukrainian-search.version = 3.7.5
+ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6
 /ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
 
 /xerces/xercesImpl = 2.9.1

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1
deleted file mode 100644
index 8794e71..0000000
--- a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2b8c8fbd740164d220ca7d18605b8b2092e163e9

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/effd3766/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1
new file mode 100644
index 0000000..6f0b86c
--- /dev/null
+++ b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1
@@ -0,0 +1 @@
+8d2c4bf006f59227bcba8885b4602b3a8b5bd799