You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pylucene-commits@lucene.apache.org by va...@apache.org on 2016/08/25 14:47:20 UTC

svn commit: r1757696 - in /lucene/pylucene/trunk/test: test_ICUFoldingFilter.py test_ICUNormalizer2Filter.py test_ICUTransformFilter.py

Author: vajda
Date: Thu Aug 25 14:47:20 2016
New Revision: 1757696

URL: http://svn.apache.org/viewvc?rev=1757696&view=rev
Log:
Test_ICU*.py passes

Modified:
    lucene/pylucene/trunk/test/test_ICUFoldingFilter.py
    lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py
    lucene/pylucene/trunk/test/test_ICUTransformFilter.py

Modified: lucene/pylucene/trunk/test/test_ICUFoldingFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUFoldingFilter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUFoldingFilter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUFoldingFilter.py Thu Aug 25 14:47:20 2016
@@ -25,7 +25,6 @@ import sys, lucene, unittest
 from BaseTokenStreamTestCase import BaseTokenStreamTestCase
 
 from org.apache.lucene.analysis import Analyzer
-from org.apache.lucene.util import Version
 from org.apache.lucene.analysis.core import WhitespaceTokenizer
 from org.apache.pylucene.analysis import PythonAnalyzer
 
@@ -37,8 +36,8 @@ class TestICUFoldingFilter(BaseTokenStre
         from lucene.ICUFoldingFilter import ICUFoldingFilter
 
         class _analyzer(PythonAnalyzer):
-            def createComponents(_self, fieldName, reader):
-                source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
+            def createComponents(_self, fieldName):
+                source = WhitespaceTokenizer()
                 return Analyzer.TokenStreamComponents(source, ICUFoldingFilter(source))
 
         a = _analyzer()
@@ -49,29 +48,29 @@ class TestICUFoldingFilter(BaseTokenStre
 
         # case folding
         self._assertAnalyzesTo(a, u"Ruß", [ "russ" ])
-    
+
         # case folding with accent removal
         self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μαιοσ" ])
         self._assertAnalyzesTo(a, u"Μάϊος", [ u"μαιοσ" ])
 
         # supplementary case folding
         self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
-    
+
         # normalization
         self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
 
         # removal of default ignorables
         self._assertAnalyzesTo(a, u"क्‍ष", [ u"कष" ])
-    
+
         # removal of latin accents (composed)
         self._assertAnalyzesTo(a, u"résumé", [ "resume" ])
-    
+
         # removal of latin accents (decomposed)
         self._assertAnalyzesTo(a, u"re\u0301sume\u0301", [ u"resume" ])
-    
+
         # fold native digits
         self._assertAnalyzesTo(a, u"৭০৬", [ "706" ])
-    
+
         # ascii-folding-filter type stuff
         self._assertAnalyzesTo(a, u"đis is cræzy", [ "dis", "is", "craezy" ])
 

Modified: lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py Thu Aug 25 14:47:20 2016
@@ -26,7 +26,6 @@ from BaseTokenStreamTestCase import Base
 
 from org.apache.lucene.analysis import Analyzer
 from org.apache.lucene.analysis.core import WhitespaceTokenizer
-from org.apache.lucene.util import Version
 from org.apache.pylucene.analysis import PythonAnalyzer
 
 
@@ -37,8 +36,8 @@ class TestICUNormalizer2Filter(BaseToken
         from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
 
         class _analyzer(PythonAnalyzer):
-            def createComponents(_self, fieldName, reader):
-                source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
+            def createComponents(_self, fieldName):
+                source = WhitespaceTokenizer()
                 return Analyzer.TokenStreamComponents(source, ICUNormalizer2Filter(source))
 
         a = _analyzer()
@@ -49,29 +48,32 @@ class TestICUNormalizer2Filter(BaseToken
 
         # case folding
         self._assertAnalyzesTo(a, "Ruß", [ "russ" ])
-    
+
         # case folding
         self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μάϊοσ" ])
         self._assertAnalyzesTo(a, u"Μάϊος", [ u"μάϊοσ" ])
 
         # supplementary case folding
         self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
-    
+
         # normalization
         self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
 
         # removal of default ignorables
         self._assertAnalyzesTo(a, u"क्‍ष", [ u"क्ष" ])
-  
+
     def testAlternate(self):
 
         from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
 
         class analyzer(PythonAnalyzer):
             # specify nfc with decompose to get nfd
-            def tokenStream(_self, fieldName, reader):
-                return ICUNormalizer2Filter(WhitespaceTokenizer(Version.LUCENE_CURRENT, reader),
-                                            Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE))
+            def createComponents(_self, fieldName):
+                source = WhitespaceTokenizer()
+                return Analyzer.TokenStreamComponents(
+                    source, ICUNormalizer2Filter(
+                        source,
+                        Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE)))
 
         a = analyzer()
         # decompose EAcute into E + combining Acute

Modified: lucene/pylucene/trunk/test/test_ICUTransformFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUTransformFilter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUTransformFilter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUTransformFilter.py Thu Aug 25 14:47:20 2016
@@ -31,12 +31,13 @@ from org.apache.pylucene.analysis import
 
 
 class TestICUTransformFilter(BaseTokenStreamTestCase):
-  
+
     def _checkToken(self, transform, input, expected):
 
         from lucene.ICUTransformFilter import ICUTransformFilter
-        ts = ICUTransformFilter(KeywordTokenizer(StringReader(input)),
-                                transform)
+        tokenizer = KeywordTokenizer()
+        tokenizer.setReader(StringReader(input))
+        ts = ICUTransformFilter(tokenizer, transform)
         self._assertTokenStreamContents(ts, [ expected ])
 
     def _getTransliterator(self, name):
@@ -45,31 +46,31 @@ class TestICUTransformFilter(BaseTokenSt
 
     def testBasicFunctionality(self):
 
-        self._checkToken(self._getTransliterator("Traditional-Simplified"), 
+        self._checkToken(self._getTransliterator("Traditional-Simplified"),
                          u"簡化字", u"简化字")
         self._checkToken(self._getTransliterator("Katakana-Hiragana"),
                          u"ヒラガナ", u"ひらがな")
-        self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"), 
+        self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"),
                          u"アルアノリウ", u"アルアノリウ")
-        self._checkToken(self._getTransliterator("Any-Latin"), 
+        self._checkToken(self._getTransliterator("Any-Latin"),
                          u"Αλφαβητικός Κατάλογος", u"Alphabētikós Katálogos")
-        self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"), 
+        self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"),
                          u"Alphabētikós Katálogos", u"Alphabetikos Katalogos")
         self._checkToken(self._getTransliterator("Han-Latin"),
                          u"中国", u"zhōng guó")
-  
+
     def testCustomFunctionality(self):
 
-        # convert a's to b's and b's to c's        
+        # convert a's to b's and b's to c's
         rules = "a > b; b > c;"
         self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "abacadaba", "bcbcbdbcb")
-  
+
     def testCustomFunctionality2(self):
-        
-        # convert a's to b's and b's to c's        
+
+        # convert a's to b's and b's to c's
         rules = "c { a > b; a > d;"
         self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "caa", "cbd")
-  
+
     def testOptimizer2(self):
 
         self._checkToken(self._getTransliterator("Traditional-Simplified; Lower"),