You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pylucene-commits@lucene.apache.org by va...@apache.org on 2016/08/25 14:47:20 UTC
svn commit: r1757696 - in /lucene/pylucene/trunk/test:
test_ICUFoldingFilter.py test_ICUNormalizer2Filter.py
test_ICUTransformFilter.py
Author: vajda
Date: Thu Aug 25 14:47:20 2016
New Revision: 1757696
URL: http://svn.apache.org/viewvc?rev=1757696&view=rev
Log:
Test_ICU*.py passes
Modified:
lucene/pylucene/trunk/test/test_ICUFoldingFilter.py
lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py
lucene/pylucene/trunk/test/test_ICUTransformFilter.py
Modified: lucene/pylucene/trunk/test/test_ICUFoldingFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUFoldingFilter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUFoldingFilter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUFoldingFilter.py Thu Aug 25 14:47:20 2016
@@ -25,7 +25,6 @@ import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from org.apache.lucene.analysis import Analyzer
-from org.apache.lucene.util import Version
from org.apache.lucene.analysis.core import WhitespaceTokenizer
from org.apache.pylucene.analysis import PythonAnalyzer
@@ -37,8 +36,8 @@ class TestICUFoldingFilter(BaseTokenStre
from lucene.ICUFoldingFilter import ICUFoldingFilter
class _analyzer(PythonAnalyzer):
- def createComponents(_self, fieldName, reader):
- source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
+ def createComponents(_self, fieldName):
+ source = WhitespaceTokenizer()
return Analyzer.TokenStreamComponents(source, ICUFoldingFilter(source))
a = _analyzer()
@@ -49,29 +48,29 @@ class TestICUFoldingFilter(BaseTokenStre
# case folding
self._assertAnalyzesTo(a, u"Ruß", [ "russ" ])
-
+
# case folding with accent removal
self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μαιοσ" ])
self._assertAnalyzesTo(a, u"Μάϊος", [ u"μαιοσ" ])
# supplementary case folding
self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
-
+
# normalization
self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
# removal of default ignorables
self._assertAnalyzesTo(a, u"क्ष", [ u"कष" ])
-
+
# removal of latin accents (composed)
self._assertAnalyzesTo(a, u"résumé", [ "resume" ])
-
+
# removal of latin accents (decomposed)
self._assertAnalyzesTo(a, u"re\u0301sume\u0301", [ u"resume" ])
-
+
# fold native digits
self._assertAnalyzesTo(a, u"৭০৬", [ "706" ])
-
+
# ascii-folding-filter type stuff
self._assertAnalyzesTo(a, u"đis is cræzy", [ "dis", "is", "craezy" ])
Modified: lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUNormalizer2Filter.py Thu Aug 25 14:47:20 2016
@@ -26,7 +26,6 @@ from BaseTokenStreamTestCase import Base
from org.apache.lucene.analysis import Analyzer
from org.apache.lucene.analysis.core import WhitespaceTokenizer
-from org.apache.lucene.util import Version
from org.apache.pylucene.analysis import PythonAnalyzer
@@ -37,8 +36,8 @@ class TestICUNormalizer2Filter(BaseToken
from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
class _analyzer(PythonAnalyzer):
- def createComponents(_self, fieldName, reader):
- source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
+ def createComponents(_self, fieldName):
+ source = WhitespaceTokenizer()
return Analyzer.TokenStreamComponents(source, ICUNormalizer2Filter(source))
a = _analyzer()
@@ -49,29 +48,32 @@ class TestICUNormalizer2Filter(BaseToken
# case folding
self._assertAnalyzesTo(a, "Ruß", [ "russ" ])
-
+
# case folding
self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μάϊοσ" ])
self._assertAnalyzesTo(a, u"Μάϊος", [ u"μάϊοσ" ])
# supplementary case folding
self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
-
+
# normalization
self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
# removal of default ignorables
self._assertAnalyzesTo(a, u"क्ष", [ u"क्ष" ])
-
+
def testAlternate(self):
from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
class analyzer(PythonAnalyzer):
# specify nfc with decompose to get nfd
- def tokenStream(_self, fieldName, reader):
- return ICUNormalizer2Filter(WhitespaceTokenizer(Version.LUCENE_CURRENT, reader),
- Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE))
+ def createComponents(_self, fieldName):
+ source = WhitespaceTokenizer()
+ return Analyzer.TokenStreamComponents(
+ source, ICUNormalizer2Filter(
+ source,
+ Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE)))
a = analyzer()
# decompose EAcute into E + combining Acute
Modified: lucene/pylucene/trunk/test/test_ICUTransformFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ICUTransformFilter.py?rev=1757696&r1=1757695&r2=1757696&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ICUTransformFilter.py (original)
+++ lucene/pylucene/trunk/test/test_ICUTransformFilter.py Thu Aug 25 14:47:20 2016
@@ -31,12 +31,13 @@ from org.apache.pylucene.analysis import
class TestICUTransformFilter(BaseTokenStreamTestCase):
-
+
def _checkToken(self, transform, input, expected):
from lucene.ICUTransformFilter import ICUTransformFilter
- ts = ICUTransformFilter(KeywordTokenizer(StringReader(input)),
- transform)
+ tokenizer = KeywordTokenizer()
+ tokenizer.setReader(StringReader(input))
+ ts = ICUTransformFilter(tokenizer, transform)
self._assertTokenStreamContents(ts, [ expected ])
def _getTransliterator(self, name):
@@ -45,31 +46,31 @@ class TestICUTransformFilter(BaseTokenSt
def testBasicFunctionality(self):
- self._checkToken(self._getTransliterator("Traditional-Simplified"),
+ self._checkToken(self._getTransliterator("Traditional-Simplified"),
u"簡化字", u"简化字")
self._checkToken(self._getTransliterator("Katakana-Hiragana"),
u"ヒラガナ", u"ひらがな")
- self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"),
+ self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"),
u"アルアノリウ", u"アルアノリウ")
- self._checkToken(self._getTransliterator("Any-Latin"),
+ self._checkToken(self._getTransliterator("Any-Latin"),
u"Αλφαβητικός Κατάλογος", u"Alphabētikós Katálogos")
- self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"),
+ self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"),
u"Alphabētikós Katálogos", u"Alphabetikos Katalogos")
self._checkToken(self._getTransliterator("Han-Latin"),
u"中国", u"zhōng guó")
-
+
def testCustomFunctionality(self):
- # convert a's to b's and b's to c's
+ # convert a's to b's and b's to c's
rules = "a > b; b > c;"
self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "abacadaba", "bcbcbdbcb")
-
+
def testCustomFunctionality2(self):
-
- # convert a's to b's and b's to c's
+
+ # convert a's to b's and b's to c's
rules = "c { a > b; a > d;"
self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "caa", "cbd")
-
+
def testOptimizer2(self):
self._checkToken(self._getTransliterator("Traditional-Simplified; Lower"),