You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2009/01/09 04:28:41 UTC
svn commit: r732916 [13/14] - in /lucene/pylucene/trunk: ./ java/ java/org/
java/org/osafoundation/ java/org/osafoundation/lucene/
java/org/osafoundation/lucene/analysis/
java/org/osafoundation/lucene/queryParser/
java/org/osafoundation/lucene/search/ ...
Added: lucene/pylucene/trunk/test/test_Analyzers.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Analyzers.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_Analyzers.py (added)
+++ lucene/pylucene/trunk/test/test_Analyzers.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,98 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class AnalyzersTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def _assertAnalyzesTo(self, a, input, output):
+
+ ts = a.tokenStream("dummy", StringReader(input))
+ for string in output:
+ t = ts.next()
+ self.assert_(t is not None)
+ self.assertEqual(t.termText(), string)
+
+ self.assert_(not list(ts))
+ ts.close()
+
+
+ def testSimple(self):
+
+ a = SimpleAnalyzer()
+ self._assertAnalyzesTo(a, "foo bar FOO BAR",
+ [ "foo", "bar", "foo", "bar" ])
+ self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
+ [ "foo", "bar", "foo", "bar" ])
+ self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
+ [ "foo", "bar", "foo", "bar" ])
+ self._assertAnalyzesTo(a, "U.S.A.",
+ [ "u", "s", "a" ])
+ self._assertAnalyzesTo(a, "C++",
+ [ "c" ])
+ self._assertAnalyzesTo(a, "B2B",
+ [ "b", "b" ])
+ self._assertAnalyzesTo(a, "2B",
+ [ "b" ])
+ self._assertAnalyzesTo(a, "\"QUOTED\" word",
+ [ "quoted", "word" ])
+
+ def testNull(self):
+
+ a = WhitespaceAnalyzer()
+ self._assertAnalyzesTo(a, "foo bar FOO BAR",
+ [ "foo", "bar", "FOO", "BAR" ])
+ self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
+ [ "foo", "bar", ".", "FOO", "<>", "BAR" ])
+ self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
+ [ "foo.bar.FOO.BAR" ])
+ self._assertAnalyzesTo(a, "U.S.A.",
+ [ "U.S.A." ])
+ self._assertAnalyzesTo(a, "C++",
+ [ "C++" ])
+ self._assertAnalyzesTo(a, "B2B",
+ [ "B2B" ])
+ self._assertAnalyzesTo(a, "2B",
+ [ "2B" ])
+ self._assertAnalyzesTo(a, "\"QUOTED\" word",
+ [ "\"QUOTED\"", "word" ])
+
+ def testStop(self):
+
+ a = StopAnalyzer()
+ self._assertAnalyzesTo(a, "foo bar FOO BAR",
+ [ "foo", "bar", "foo", "bar" ])
+ self._assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
+ [ "foo", "bar", "foo", "bar" ])
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_Analyzers.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_Analyzers.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_Binary.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Binary.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_Binary.py (added)
+++ lucene/pylucene/trunk/test/test_Binary.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,50 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+# test PyLucene binary field
+
+from unittest import TestCase, main
+from lucene import Field
+
+class BinaryTestCase(TestCase):
+
+ def binary(self, b):
+
+ c = ''.join([chr(a) for a in b])
+ field = Field("bin", c, Field.Store.YES)
+ v = field.binaryValue()
+ assert c == v and b == [ord(a) for a in v]
+
+ def testBinary(self):
+
+ self.binary([66, 90, 104, 57, 49, 65, 89, 38,
+ 83, 89, 105, 56, 95, 75, 0, 0, 14, 215, 128])
+ self.binary([])
+ self.binary([0, 0, 0])
+
+
+if __name__ == '__main__':
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_Binary.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_Binary.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_BooleanOr.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_BooleanOr.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_BooleanOr.py (added)
+++ lucene/pylucene/trunk/test/test_BooleanOr.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,128 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class BooleanOrTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def __init__(self, *args):
+
+ super(BooleanOrTestCase, self).__init__(*args)
+
+ self.FIELD_T = "T"
+ self.FIELD_C = "C"
+
+ self.t1 = TermQuery(Term(self.FIELD_T, "files"))
+ self.t2 = TermQuery(Term(self.FIELD_T, "deleting"))
+ self.c1 = TermQuery(Term(self.FIELD_C, "production"))
+ self.c2 = TermQuery(Term(self.FIELD_C, "optimize"))
+
+ self.searcher = None
+
+ def setUp(self):
+
+ rd = RAMDirectory()
+ writer = IndexWriter(rd, StandardAnalyzer(), True)
+
+ d = Document()
+ d.add(Field(self.FIELD_T,
+ "Optimize not deleting all files",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ d.add(Field(self.FIELD_C,
+ "Deleted When I run an optimize in our production environment.",
+ Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(d)
+ writer.close()
+
+ self.searcher = IndexSearcher(rd)
+
+ def search(self, q):
+ return self.searcher.search(q).length()
+
+ def testElements(self):
+
+ self.assertEqual(1, self.search(self.t1))
+ self.assertEqual(1, self.search(self.t2))
+ self.assertEqual(1, self.search(self.c1))
+ self.assertEqual(1, self.search(self.c2))
+
+ def testFlat(self):
+
+ q = BooleanQuery()
+ q.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
+ q.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
+ q.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
+ q.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
+ self.assertEqual(1, self.search(q))
+
+ def testParenthesisMust(self):
+
+ q3 = BooleanQuery()
+ q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
+ q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
+ q4 = BooleanQuery()
+ q4.add(BooleanClause(self.c1, BooleanClause.Occur.MUST))
+ q4.add(BooleanClause(self.c2, BooleanClause.Occur.MUST))
+ q2 = BooleanQuery()
+ q2.add(q3, BooleanClause.Occur.SHOULD)
+ q2.add(q4, BooleanClause.Occur.SHOULD)
+ self.assertEqual(1, self.search(q2))
+
+ def testParenthesisMust2(self):
+
+ q3 = BooleanQuery()
+ q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
+ q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
+ q4 = BooleanQuery()
+ q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
+ q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
+ q2 = BooleanQuery()
+ q2.add(q3, BooleanClause.Occur.SHOULD)
+ q2.add(q4, BooleanClause.Occur.MUST)
+ self.assertEqual(1, self.search(q2))
+
+ def testParenthesisShould(self):
+
+ q3 = BooleanQuery()
+ q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
+ q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
+ q4 = BooleanQuery()
+ q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
+ q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
+ q2 = BooleanQuery()
+ q2.add(q3, BooleanClause.Occur.SHOULD)
+ q2.add(q4, BooleanClause.Occur.SHOULD)
+ self.assertEqual(1, self.search(q2))
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_BooleanOr.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_BooleanOr.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py (added)
+++ lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,83 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class BooleanPrefixQueryTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testMethod(self):
+
+ directory = RAMDirectory()
+ categories = ["food", "foodanddrink",
+ "foodanddrinkandgoodtimes", "food and drink"]
+
+ rw1 = None
+ rw2 = None
+
+# try:
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+ for category in categories:
+ doc = Document()
+ doc.add(Field("category", category,
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ writer.addDocument(doc)
+
+ writer.close()
+
+ reader = IndexReader.open(directory)
+ query = PrefixQuery(Term("category", "foo"))
+
+ rw1 = query.rewrite(reader)
+ bq = BooleanQuery()
+ bq.add(query, BooleanClause.Occur.MUST)
+
+ rw2 = bq.rewrite(reader)
+# except Exception, e:
+# self.fail(str(e))
+
+ bq1 = None
+ if BooleanQuery.instance_(rw1):
+ bq1 = BooleanQuery.cast_(rw1)
+ else:
+ self.fail('rewrite')
+
+ bq2 = None
+ if BooleanQuery.instance_(rw2):
+ bq2 = BooleanQuery.cast_(rw2)
+ else:
+ self.fail('rewrite')
+
+ self.assertEqual(len(bq1.getClauses()), len(bq2.getClauses()),
+ "Number of Clauses Mismatch")
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_BooleanPrefixQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_BooleanQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_BooleanQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_BooleanQuery.py (added)
+++ lucene/pylucene/trunk/test/test_BooleanQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,60 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class TestBooleanQuery(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testEquality(self):
+
+ bq1 = BooleanQuery()
+ bq1.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
+ bq1.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)
+
+ nested1 = BooleanQuery()
+ nested1.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD)
+ nested1.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD)
+ bq1.add(nested1, BooleanClause.Occur.SHOULD)
+
+ bq2 = BooleanQuery()
+ bq2.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
+ bq2.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)
+
+ nested2 = BooleanQuery()
+ nested2.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD)
+ nested2.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD)
+ bq2.add(nested2, BooleanClause.Occur.SHOULD)
+
+ self.assert_(bq1.equals(bq2))
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_BooleanQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_BooleanQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_CachingWrapperFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_CachingWrapperFilter.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_CachingWrapperFilter.py (added)
+++ lucene/pylucene/trunk/test/test_CachingWrapperFilter.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,72 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class CachingWrapperFilterTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testCachingWorks(self):
+
+ dir = RAMDirectory()
+ writer = IndexWriter(dir, StandardAnalyzer(), True)
+ writer.close()
+
+ reader = IndexReader.open(dir)
+
+ class mockFilter(PythonFilter):
+ def __init__(self):
+ super(mockFilter, self).__init__()
+ self._wasCalled = False
+ def bits(self, reader):
+ self._wasCalled = True;
+ return BitSet()
+ def clear(self):
+ self._wasCalled = False
+ def wasCalled(self):
+ return self._wasCalled
+
+ filter = mockFilter()
+ cacher = CachingWrapperFilter(filter)
+
+ # first time, nested filter is called
+ cacher.bits(reader)
+ self.assert_(filter.wasCalled(), "first time")
+
+ # second time, nested filter should not be called
+ filter.clear()
+ cacher.bits(reader)
+ self.assert_(not filter.wasCalled(), "second time")
+
+ reader.close()
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_CachingWrapperFilter.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_CachingWrapperFilter.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_Compress.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Compress.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_Compress.py (added)
+++ lucene/pylucene/trunk/test/test_Compress.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,68 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import Document, Field, StandardAnalyzer, IndexModifier
+
+MinSizeForCompress = 50
+
+class CompressTestCase(TestCase):
+
+ def addField(self, doc, name, value):
+
+ if len(value) > MinSizeForCompress:
+ storeFlag = Field.Store.COMPRESS
+ else:
+ storeFlag = Field.Store.YES
+
+ doc.add(Field(name, value, storeFlag, Field.Index.TOKENIZED))
+
+ def indexData(self, idx, data):
+
+ doc = Document()
+ for key, val in data.iteritems():
+ self.addField(doc, key, val)
+ idx.addDocument(doc)
+
+ def writeData(self, indexdir, data):
+
+ idx = IndexModifier(indexdir, StandardAnalyzer(), True)
+ idx.setUseCompoundFile(True)
+ self.indexData(idx, data)
+ idx.close()
+
+ def testCompress(self):
+
+ indexdir = 't'
+ data = {'uri': "/testing/dict/index/",
+ 'title': "dict index example",
+ 'contents': "This index uses PyLucene, and writes dict data in the index."}
+
+ self.writeData(indexdir, data)
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_Compress.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_Compress.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_DocBoost.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_DocBoost.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_DocBoost.py (added)
+++ lucene/pylucene/trunk/test/test_DocBoost.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,80 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class DocBoostTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testDocBoost(self):
+
+ store = RAMDirectory()
+ writer = IndexWriter(store, SimpleAnalyzer(), True)
+
+ f1 = Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED)
+ f2 = Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED)
+ f2.setBoost(2.0)
+
+ d1 = Document()
+ d2 = Document()
+ d3 = Document()
+ d4 = Document()
+ d3.setBoost(3.0)
+ d4.setBoost(2.0)
+
+ d1.add(f1) # boost = 1
+ d2.add(f2) # boost = 2
+ d3.add(f1) # boost = 3
+ d4.add(f2) # boost = 4
+
+ writer.addDocument(d1)
+ writer.addDocument(d2)
+ writer.addDocument(d3)
+ writer.addDocument(d4)
+ writer.optimize()
+ writer.close()
+
+ scores = [0.0] * 4
+
+ class hitCollector(PythonHitCollector):
+ def collect(self, doc, score):
+ scores[doc] = score
+
+ IndexSearcher(store).search(TermQuery(Term("field", "word")),
+ hitCollector())
+
+ lastScore = 0.0
+ for score in scores:
+ self.assert_(score > lastScore)
+ lastScore = score
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_DocBoost.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_DocBoost.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_FilteredQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_FilteredQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_FilteredQuery.py (added)
+++ lucene/pylucene/trunk/test/test_FilteredQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,132 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class FilteredQueryTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True)
+
+ doc = Document()
+ doc.add(Field("field", "one two three four five",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("sorter", "b",
+ Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("field", "one two three four",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("sorter", "d",
+ Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("field", "one two three y",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("sorter", "a",
+ Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("field", "one two x",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("sorter", "c",
+ Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+
+ writer.optimize()
+ writer.close()
+
+ self.searcher = IndexSearcher(self.directory)
+ self.query = TermQuery(Term("field", "three"))
+
+ class filter(PythonFilter):
+ def bits(self, reader):
+ bitset = BitSet(5)
+ bitset.set(1)
+ bitset.set(3)
+ return bitset
+
+ self.filter = filter()
+
+ def tearDown(self):
+
+ self.searcher.close()
+ self.directory.close()
+
+ def testFilteredQuery(self):
+
+ filteredquery = FilteredQuery(self.query, self.filter)
+ hits = self.searcher.search(filteredquery);
+ self.assertEqual(1, hits.length())
+ self.assertEqual(1, hits.id(0))
+
+ hits = self.searcher.search(filteredquery, Sort("sorter"))
+ self.assertEqual(1, hits.length())
+ self.assertEqual(1, hits.id(0))
+
+ filteredquery = FilteredQuery(TermQuery(Term("field", "one")),
+ self.filter)
+ hits = self.searcher.search(filteredquery)
+ self.assertEqual(2, hits.length())
+
+ filteredquery = FilteredQuery(TermQuery(Term("field", "x")),
+ self.filter)
+ hits = self.searcher.search(filteredquery)
+ self.assertEqual(1, hits.length())
+ self.assertEqual(3, hits.id(0))
+
+ filteredquery = FilteredQuery(TermQuery(Term("field", "y")),
+ self.filter)
+ hits = self.searcher.search(filteredquery)
+ self.assertEqual(0, hits.length())
+
+ def testRangeQuery(self):
+ """
+ This tests FilteredQuery's rewrite correctness
+ """
+
+ rq = RangeQuery(Term("sorter", "b"), Term("sorter", "d"), True)
+ filteredquery = FilteredQuery(rq, self.filter)
+ hits = self.searcher.search(filteredquery)
+ self.assertEqual(2, hits.length())
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_FilteredQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_FilteredQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_FuzzyQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_FuzzyQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_FuzzyQuery.py (added)
+++ lucene/pylucene/trunk/test/test_FuzzyQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,141 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class FuzzyQueryTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def _addDoc(self, text, writer):
+ doc = Document()
+ doc.add(Field("field", text,
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+
+ def testDefaultFuzziness(self):
+
+ directory = RAMDirectory()
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+ self._addDoc("aaaaa", writer)
+ self._addDoc("aaaab", writer)
+ self._addDoc("aaabb", writer)
+ self._addDoc("aabbb", writer)
+ self._addDoc("abbbb", writer)
+ self._addDoc("bbbbb", writer)
+ self._addDoc("ddddd", writer)
+ writer.optimize()
+ writer.close()
+
+ searcher = IndexSearcher(directory)
+
+ query = FuzzyQuery(Term("field", "aaaaa"))
+ hits = searcher.search(query)
+ self.assertEqual(3, hits.length())
+
+ # not similar enough:
+ query = FuzzyQuery(Term("field", "xxxxx"))
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+ # edit distance to "aaaaa" = 3
+ query = FuzzyQuery(Term("field", "aaccc"))
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+
+ # query identical to a word in the index:
+ query = FuzzyQuery(Term("field", "aaaaa"))
+ hits = searcher.search(query)
+ self.assertEqual(3, hits.length())
+ self.assertEqual(hits.doc(0).get("field"), "aaaaa")
+ # default allows for up to two edits:
+ self.assertEqual(hits.doc(1).get("field"), "aaaab")
+ self.assertEqual(hits.doc(2).get("field"), "aaabb")
+
+ # query similar to a word in the index:
+ query = FuzzyQuery(Term("field", "aaaac"))
+ hits = searcher.search(query)
+ self.assertEqual(3, hits.length())
+ self.assertEqual(hits.doc(0).get("field"), "aaaaa")
+ self.assertEqual(hits.doc(1).get("field"), "aaaab")
+ self.assertEqual(hits.doc(2).get("field"), "aaabb")
+
+ query = FuzzyQuery(Term("field", "ddddX"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+ self.assertEqual(hits.doc(0).get("field"), "ddddd")
+
+ # different field = no match:
+ query = FuzzyQuery(Term("anotherfield", "ddddX"))
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+
+ searcher.close()
+ directory.close()
+
+ def testDefaultFuzzinessLong(self):
+
+ directory = RAMDirectory()
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+ self._addDoc("aaaaaaa", writer)
+ self._addDoc("segment", writer)
+ writer.optimize()
+ writer.close()
+ searcher = IndexSearcher(directory)
+
+ # not similar enough:
+ query = FuzzyQuery(Term("field", "xxxxx"))
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+ # edit distance to "aaaaaaa" = 3, this matches because
+ # the string is longer than
+ # in testDefaultFuzziness so a bigger difference is allowed:
+ query = FuzzyQuery(Term("field", "aaaaccc"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+ self.assertEqual(hits.doc(0).get("field"), "aaaaaaa")
+
+ # no match, more than half of the characters is wrong:
+ query = FuzzyQuery(Term("field", "aaacccc"))
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+
+ # "student" and "stellent" are indeed similar to "segment" by default:
+ query = FuzzyQuery(Term("field", "student"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+ query = FuzzyQuery(Term("field", "stellent"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+
+ searcher.close()
+ directory.close()
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_FuzzyQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_FuzzyQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_Highlighter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Highlighter.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_Highlighter.py (added)
+++ lucene/pylucene/trunk/test/test_Highlighter.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,158 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class TestFormatter(PythonFormatter):
+
+ def __init__(self, testCase):
+ super(TestFormatter, self).__init__()
+ self.testCase = testCase
+
+ def highlightTerm(self, originalText, group):
+ if group.getTotalScore() <= 0:
+ return originalText;
+
+ self.testCase.countHighlightTerm()
+
+ return "<b>" + originalText + "</b>"
+
+
+class HighlighterTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene.
+ 2004 by Yura Smolsky ;)
+ """
+
+ FIELD_NAME = "contents"
+ texts = [ "A wicked problem is one for which each attempt to create a solution changes the understanding of the problem. Wicked problems cannot be solved in a traditional linear fashion, because the problem definition evolves as new possible solutions are considered and/or implemented."
+ "Wicked problems always occur in a social context -- the wickedness of the problem reflects the diversity among the stakeholders in the problem."
+ "From http://cognexus.org/id42.htm"
+ "Most projects in organizations -- and virtually all technology-related projects these days -- are about wicked problems. Indeed, it is the social complexity of these problems, not their technical complexity, that overwhelms most current problem solving and project management approaches."
+ "This text has a typo in referring to whicked problems" ];
+
+ def __init__(self, *args):
+
+ super(HighlighterTestCase, self).__init__(*args)
+ self.parser = QueryParser(self.FIELD_NAME, StandardAnalyzer())
+
+ def testSimpleHighlighter(self):
+
+ self.doSearching("Wicked")
+ highlighter = Highlighter(QueryScorer(self.query))
+ highlighter.setTextFragmenter(SimpleFragmenter(40))
+ maxNumFragmentsRequired = 2
+
+ for i in range(0, self.hits.length()):
+ text = self.hits.doc(i).get(self.FIELD_NAME)
+ tokenStream = self.analyzer.tokenStream(self.FIELD_NAME,
+ StringReader(text))
+
+ result = highlighter.getBestFragments(tokenStream, text,
+ maxNumFragmentsRequired,
+ "...")
+ print "\t", result
+
+ # Not sure we can assert anything here - just running to check we don't
+ # throw any exceptions
+
+ def testGetBestFragmentsSimpleQuery(self):
+
+ self.doSearching("Wicked")
+ self.doStandardHighlights()
+ self.assert_(self.numHighlights == 3,
+ ("Failed to find correct number of highlights, %d found"
+ %(self.numHighlights)))
+
+ def doSearching(self, queryString):
+
+ searcher = IndexSearcher(self.ramDir)
+ self.query = self.parser.parse(queryString)
+ # for any multi-term queries to work (prefix, wildcard, range,
+ # fuzzy etc) you must use a rewritten query!
+ self.query = self.query.rewrite(self.reader)
+
+ print "Searching for:", self.query.toString(self.FIELD_NAME)
+ self.hits = searcher.search(self.query)
+ self.numHighlights = 0
+
+ def doStandardHighlights(self):
+
+ formatter = TestFormatter(self)
+
+ highlighter = Highlighter(formatter, QueryScorer(self.query))
+ highlighter.setTextFragmenter(SimpleFragmenter(20))
+ for i in range(0, self.hits.length()):
+ text = self.hits.doc(i).get(self.FIELD_NAME)
+ maxNumFragmentsRequired = 2
+ fragmentSeparator = "..."
+ tokenStream = self.analyzer.tokenStream(self.FIELD_NAME,
+ StringReader(text))
+
+ result = highlighter.getBestFragments(tokenStream,
+ text,
+ maxNumFragmentsRequired,
+ fragmentSeparator)
+ print "\t", result
+
+ def countHighlightTerm(self):
+
+ self.numHighlights += 1 # update stats used in assertions
+
+ def setUp(self):
+
+ self.analyzer=StandardAnalyzer()
+ self.ramDir = RAMDirectory()
+ writer = IndexWriter(self.ramDir, self.analyzer, True)
+ for text in self.texts:
+ self.addDoc(writer, text)
+
+ writer.optimize()
+ writer.close()
+ self.reader = IndexReader.open(self.ramDir)
+ self.numHighlights = 0;
+
+ def addDoc(self, writer, text):
+
+ d = Document()
+ f = Field(self.FIELD_NAME, text,
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES)
+ d.add(f)
+ writer.addDocument(d)
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_Highlighter.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_Highlighter.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_Not.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Not.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_Not.py (added)
+++ lucene/pylucene/trunk/test/test_Not.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,56 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class NotTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testNot(self):
+
+ store = RAMDirectory()
+ writer = IndexWriter(store, SimpleAnalyzer(), True)
+
+ d1 = Document()
+ d1.add(Field("field", "a b", Field.Store.YES, Field.Index.TOKENIZED))
+
+ writer.addDocument(d1)
+ writer.optimize()
+ writer.close()
+
+ searcher = IndexSearcher(store)
+ query = QueryParser("field", SimpleAnalyzer()).parse("a NOT b")
+
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length())
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_Not.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_Not.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py (added)
+++ lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,54 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class PerFieldAnalyzerTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testPerField(self):
+
+ text = "Qwerty"
+ analyzer = PerFieldAnalyzerWrapper(WhitespaceAnalyzer())
+ analyzer.addAnalyzer("special", SimpleAnalyzer())
+
+ tokenStream = analyzer.tokenStream("field", StringReader(text))
+ token = tokenStream.next()
+ self.assertEqual("Qwerty", token.termText(),
+ "WhitespaceAnalyzer does not lowercase")
+
+ tokenStream = analyzer.tokenStream("special", StringReader(text))
+ token = tokenStream.next()
+ self.assertEqual("qwerty", token.termText(),
+ "SimpleAnalyzer lowercases")
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PerFieldAnalyzerWrapper.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PhraseQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PhraseQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PhraseQuery.py (added)
+++ lucene/pylucene/trunk/test/test_PhraseQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,271 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class PhraseQueryTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True)
+
+ doc = Document()
+ doc.add(Field("field", "one two three four five",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+
+ writer.optimize()
+ writer.close()
+
+ self.searcher = IndexSearcher(self.directory)
+ self.query = PhraseQuery()
+
+ def tearDown(self):
+
+ self.searcher.close()
+ self.directory.close()
+
+ def testNotCloseEnough(self):
+
+ self.query.setSlop(2)
+ self.query.add(Term("field", "one"))
+ self.query.add(Term("field", "five"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(0, hits.length())
+
+ def testBarelyCloseEnough(self):
+
+ self.query.setSlop(3)
+ self.query.add(Term("field", "one"))
+ self.query.add(Term("field", "five"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length())
+
+ def testExact(self):
+ """
+ Ensures slop of 0 works for exact matches, but not reversed
+ """
+
+ # slop is zero by default
+ self.query.add(Term("field", "four"))
+ self.query.add(Term("field", "five"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length(), "exact match")
+
+ self.query = PhraseQuery()
+ self.query.add(Term("field", "two"))
+ self.query.add(Term("field", "one"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(0, hits.length(), "reverse not exact")
+
+ def testSlop1(self):
+
+ # Ensures slop of 1 works with terms in order.
+ self.query.setSlop(1)
+ self.query.add(Term("field", "one"))
+ self.query.add(Term("field", "two"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length(), "in order")
+
+ # Ensures slop of 1 does not work for phrases out of order
+ # must be at least 2.
+ self.query = PhraseQuery()
+ self.query.setSlop(1)
+ self.query.add(Term("field", "two"))
+ self.query.add(Term("field", "one"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(0, hits.length(), "reversed, slop not 2 or more")
+
+ def testOrderDoesntMatter(self):
+ """
+ As long as slop is at least 2, terms can be reversed
+ """
+
+ self.query.setSlop(2) # must be at least two for reverse order match
+ self.query.add(Term("field", "two"))
+ self.query.add(Term("field", "one"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length(), "just sloppy enough")
+
+ self.query = PhraseQuery()
+ self.query.setSlop(2)
+ self.query.add(Term("field", "three"))
+ self.query.add(Term("field", "one"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(0, hits.length(), "not sloppy enough")
+
+ def testMulipleTerms(self):
+ """
+ slop is the total number of positional moves allowed
+ to line up a phrase
+ """
+
+ self.query.setSlop(2)
+ self.query.add(Term("field", "one"))
+ self.query.add(Term("field", "three"))
+ self.query.add(Term("field", "five"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length(), "two total moves")
+
+ self.query = PhraseQuery()
+ self.query.setSlop(5) # it takes six moves to match this phrase
+ self.query.add(Term("field", "five"))
+ self.query.add(Term("field", "three"))
+ self.query.add(Term("field", "one"))
+ hits = self.searcher.search(self.query)
+ self.assertEqual(0, hits.length(), "slop of 5 not close enough")
+
+ self.query.setSlop(6)
+ hits = self.searcher.search(self.query)
+ self.assertEqual(1, hits.length(), "slop of 6 just right")
+
+ def testPhraseQueryWithStopAnalyzer(self):
+
+ directory = RAMDirectory()
+ stopAnalyzer = StopAnalyzer()
+ writer = IndexWriter(directory, stopAnalyzer, True)
+ doc = Document()
+ doc.add(Field("field", "the stop words are here",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+ writer.close()
+
+ searcher = IndexSearcher(directory)
+
+ # valid exact phrase query
+ query = PhraseQuery()
+ query.add(Term("field","stop"))
+ query.add(Term("field","words"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+
+ # currently StopAnalyzer does not leave "holes", so this matches.
+ query = PhraseQuery()
+ query.add(Term("field", "words"))
+ query.add(Term("field", "here"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length())
+
+ searcher.close()
+
+ def testPhraseQueryInConjunctionScorer(self):
+
+ directory = RAMDirectory()
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+
+ doc = Document()
+ doc.add(Field("source", "marketing info",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("contents", "foobar",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ doc.add(Field("source", "marketing info",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ writer.addDocument(doc)
+
+ writer.optimize()
+ writer.close()
+
+ searcher = IndexSearcher(directory)
+
+ phraseQuery = PhraseQuery()
+ phraseQuery.add(Term("source", "marketing"))
+ phraseQuery.add(Term("source", "info"))
+ hits = searcher.search(phraseQuery)
+ self.assertEqual(2, hits.length())
+
+ termQuery = TermQuery(Term("contents","foobar"))
+ booleanQuery = BooleanQuery()
+ booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
+ booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
+ hits = searcher.search(booleanQuery)
+ self.assertEqual(1, hits.length())
+
+ searcher.close()
+
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+ doc = Document()
+ doc.add(Field("contents", "map entry woo",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("contents", "woo map entry",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("contents", "map foobarword entry woo",
+ Field.Store.YES, Field.Index.TOKENIZED,
+ Field.TermVector.YES))
+ writer.addDocument(doc)
+
+ writer.optimize()
+ writer.close()
+
+ searcher = IndexSearcher(directory)
+
+ termQuery = TermQuery(Term("contents", "woo"))
+ phraseQuery = PhraseQuery()
+ phraseQuery.add(Term("contents", "map"))
+ phraseQuery.add(Term("contents", "entry"))
+
+ hits = searcher.search(termQuery)
+ self.assertEqual(3, hits.length())
+ hits = searcher.search(phraseQuery)
+ self.assertEqual(2, hits.length())
+
+ booleanQuery = BooleanQuery()
+ booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
+ booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
+ hits = searcher.search(booleanQuery)
+ self.assertEqual(2, hits.length())
+
+ booleanQuery = BooleanQuery()
+ booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
+ booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
+ hits = searcher.search(booleanQuery)
+ self.assertEqual(2, hits.length())
+
+ searcher.close()
+ directory.close()
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PhraseQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PhraseQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PositionIncrement.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PositionIncrement.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PositionIncrement.py (added)
+++ lucene/pylucene/trunk/test/test_PositionIncrement.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,130 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class PositionIncrementTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testSetPosition(self):
+
+ class _analyzer(PythonAnalyzer):
+ def tokenStream(self, fieldName, reader):
+ class _tokenStream(PythonTokenStream):
+ def __init__(self):
+ super(_tokenStream, self).__init__()
+ self.TOKENS = ["1", "2", "3", "4", "5"]
+ self.INCREMENTS = [1, 2, 1, 0, 1]
+ self.i = 0
+ def next(self):
+ if self.i == len(self.TOKENS):
+ return None
+ t = Token(self.TOKENS[self.i], self.i, self.i)
+ t.setPositionIncrement(self.INCREMENTS[self.i])
+ self.i += 1
+ return t
+ def reset(self):
+ pass
+ def close(self):
+ pass
+ return _tokenStream()
+
+ analyzer = _analyzer()
+
+ store = RAMDirectory()
+ writer = IndexWriter(store, analyzer, True)
+ d = Document()
+ d.add(Field("field", "bogus",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(d)
+ writer.optimize()
+ writer.close()
+
+ searcher = IndexSearcher(store)
+
+ q = PhraseQuery()
+ q.add(Term("field", "1"))
+ q.add(Term("field", "2"))
+ hits = searcher.search(q)
+ self.assertEqual(0, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "2"))
+ q.add(Term("field", "3"))
+ hits = searcher.search(q)
+ self.assertEqual(1, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "3"))
+ q.add(Term("field", "4"))
+ hits = searcher.search(q)
+ self.assertEqual(0, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "2"))
+ q.add(Term("field", "4"))
+ hits = searcher.search(q)
+ self.assertEqual(1, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "3"))
+ q.add(Term("field", "5"))
+ hits = searcher.search(q)
+ self.assertEqual(1, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "4"))
+ q.add(Term("field", "5"))
+ hits = searcher.search(q)
+ self.assertEqual(1, hits.length())
+
+ q = PhraseQuery()
+ q.add(Term("field", "2"))
+ q.add(Term("field", "5"))
+ hits = searcher.search(q)
+ self.assertEqual(0, hits.length())
+
+ def testIncrementingPositions(self):
+ """
+ Basic analyzer behavior should be to keep sequential terms in one
+ increment from one another.
+ """
+
+ analyzer = WhitespaceAnalyzer()
+ ts = analyzer.tokenStream("field",
+ StringReader(u"one two three four five"))
+
+ for token in ts:
+ self.assertEqual(1, token.getPositionIncrement(),
+ token.termText())
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PositionIncrement.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PositionIncrement.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PrefixFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PrefixFilter.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PrefixFilter.py (added)
+++ lucene/pylucene/trunk/test/test_PrefixFilter.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,113 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class PrefixFilterTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testPrefixFilter(self):
+
+ directory = RAMDirectory()
+
+ categories = ["/Computers/Linux",
+ "/Computers/Mac/One",
+ "/Computers/Mac/Two",
+ "/Computers/Windows"]
+
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+
+ for category in categories:
+ doc = Document()
+ doc.add(Field("category", category,
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ writer.addDocument(doc)
+
+ writer.close()
+
+ # PrefixFilter combined with ConstantScoreQuery
+ filter = PrefixFilter(Term("category", "/Computers"))
+ query = ConstantScoreQuery(filter)
+ searcher = IndexSearcher(directory)
+ hits = searcher.search(query)
+ self.assertEqual(4, hits.length(),
+ "All documents in /Computers category and below")
+
+ # test middle of values
+ filter = PrefixFilter(Term("category", "/Computers/Mac"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(2, hits.length(), "Two in /Computers/Mac")
+
+ # test start of values
+ filter = PrefixFilter(Term("category", "/Computers/Linux"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length(), "One in /Computers/Linux")
+
+ # test end of values
+ filter = PrefixFilter(Term("category", "/Computers/Windows"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length(), "One in /Computers/Windows")
+
+ # test non-existant
+ filter = PrefixFilter(Term("category", "/Computers/ObsoleteOS"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length(), "no documents")
+
+ # test non-existant, before values
+ filter = PrefixFilter(Term("category", "/Computers/AAA"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length(), "no documents")
+
+ # test non-existant, after values
+ filter = PrefixFilter(Term("category", "/Computers/ZZZ"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length(), "no documents")
+
+ # test zero-length prefix
+ filter = PrefixFilter(Term("category", ""))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(4, hits.length(), "all documents")
+
+ # test non-existant field
+ filter = PrefixFilter(Term("nonexistantfield", "/Computers"))
+ query = ConstantScoreQuery(filter)
+ hits = searcher.search(query)
+ self.assertEqual(0, hits.length(), "no documents")
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PrefixFilter.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PrefixFilter.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PrefixQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PrefixQuery.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PrefixQuery.py (added)
+++ lucene/pylucene/trunk/test/test_PrefixQuery.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,62 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class PrefixQueryTestCase(TestCase):
+ """
+ Unit tests ported from Java Lucene
+ """
+
+ def testPrefixQuery(self):
+
+ directory = RAMDirectory()
+
+ categories = ["/Computers", "/Computers/Mac", "/Computers/Windows"]
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+ for category in categories:
+ doc = Document()
+ doc.add(Field("category", category,
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ writer.addDocument(doc)
+
+ writer.close()
+
+ query = PrefixQuery(Term("category", "/Computers"))
+ searcher = IndexSearcher(directory)
+ hits = searcher.search(query)
+ self.assertEqual(3, hits.length(),
+ "All documents in /Computers category and below")
+
+ query = PrefixQuery(Term("category", "/Computers/Mac"))
+ hits = searcher.search(query)
+ self.assertEqual(1, hits.length(), "One in /Computers/Mac")
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PrefixQuery.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PrefixQuery.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/test/test_PyLucene.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PyLucene.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/test/test_PyLucene.py (added)
+++ lucene/pylucene/trunk/test/test_PyLucene.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,334 @@
+# ====================================================================
+# Copyright (c) 2004-2008 Open Source Applications Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+import os, shutil
+
+from unittest import TestCase, main
+from lucene import *
+
+
+class Test_PyLuceneBase(object):
+
+ def getAnalyzer(self):
+ return StandardAnalyzer()
+
+ def openStore(self):
+ raise NotImplemented
+
+ def closeStore(self, store, *args):
+ pass
+
+ def getWriter(self, store, analyzer, create=False):
+ writer = IndexWriter(store, analyzer, create)
+ #writer.setUseCompoundFile(False)
+ return writer
+
+ def getReader(self, store, analyzer):
+ pass
+
+ def test_indexDocument(self):
+
+ store = self.openStore()
+ writer = None
+ try:
+ analyzer = self.getAnalyzer()
+ writer = self.getWriter(store, analyzer, True)
+
+ doc = Document()
+ doc.add(Field("title", "value of testing",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("docid", str(1),
+ Field.Store.NO, Field.Index.UN_TOKENIZED))
+ doc.add(Field("owner", "unittester",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ doc.add(Field("search_name", "wisdom",
+ Field.Store.YES, Field.Index.NO))
+ doc.add(Field("meta_words", "rabbits are beautiful",
+ Field.Store.NO, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+ finally:
+ self.closeStore(store, writer)
+
+ def test_indexDocumentWithText(self):
+
+ store = self.openStore()
+ writer = None
+ try:
+ analyzer = self.getAnalyzer()
+ writer = self.getWriter(store, analyzer, True)
+
+ doc = Document()
+ doc.add(Field("title", "value of testing",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("docid", str(1),
+ Field.Store.NO, Field.Index.UN_TOKENIZED))
+ doc.add(Field("owner", "unittester",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ doc.add(Field("search_name", "wisdom",
+ Field.Store.YES, Field.Index.NO))
+ doc.add(Field("meta_words", "rabbits are beautiful",
+ Field.Store.NO, Field.Index.TOKENIZED))
+
+ body_text = "hello world" * 20
+ body_reader = StringReader(body_text)
+ doc.add(Field("content", body_reader))
+
+ writer.addDocument(doc)
+ finally:
+ self.closeStore(store, writer)
+
+ def test_indexDocumentWithUnicodeText(self):
+
+ store = self.openStore()
+ writer = None
+ try:
+ analyzer = self.getAnalyzer()
+ writer = self.getWriter(store, analyzer, True)
+
+ doc = Document()
+ doc.add(Field("title", "value of testing",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("docid", str(1),
+ Field.Store.NO, Field.Index.UN_TOKENIZED))
+ doc.add(Field("owner", "unittester",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ doc.add(Field("search_name", "wisdom",
+ Field.Store.YES, Field.Index.NO))
+ doc.add(Field("meta_words", "rabbits are beautiful",
+ Field.Store.NO, Field.Index.TOKENIZED))
+
+ # using a unicode body cause problems, which seems very odd
+ # since the python type is the same regardless affter doing
+ # the encode
+ body_text = u"hello world"*20
+ body_reader = StringReader(body_text)
+ doc.add(Field("content", body_reader))
+
+ writer.addDocument(doc)
+ finally:
+ self.closeStore(store, writer)
+
+ def test_searchDocuments(self):
+
+ self.test_indexDocument()
+
+ store = self.openStore()
+ searcher = None
+ try:
+ searcher = IndexSearcher(store)
+ query = QueryParser("title", self.getAnalyzer()).parse("value")
+ hits = searcher.search(query)
+ self.assertEqual(hits.length(), 1)
+ finally:
+ self.closeStore(store, searcher)
+
+ def test_searchDocumentsWithMultiField(self):
+ """
+ Tests searching with MultiFieldQueryParser
+ """
+
+ self.test_indexDocument()
+ store = self.openStore()
+ searcher = None
+ try:
+ searcher = IndexSearcher(store)
+ SHOULD = BooleanClause.Occur.SHOULD
+ query = MultiFieldQueryParser.parse("value",
+ ["title", "docid"],
+ [SHOULD, SHOULD],
+ self.getAnalyzer())
+ hits = searcher.search(query)
+ self.assertEquals(1, hits.length())
+ finally:
+ self.closeStore(store, searcher)
+
+ def test_removeDocument(self):
+
+ self.test_indexDocument()
+
+ store = self.openStore()
+ searcher = None
+ reader = None
+
+ try:
+ searcher = IndexSearcher(store)
+ query = TermQuery(Term("docid", str(1)))
+ hits = searcher.search(query)
+ self.assertEqual(hits.length(), 1)
+ # be careful with ids they are ephemeral
+ docid = hits.id(0)
+
+ reader = IndexReader.open(store)
+ reader.deleteDocument(docid)
+ finally:
+ self.closeStore(store, searcher, reader)
+
+ store = self.openStore()
+ searcher = None
+ try:
+ searcher = IndexSearcher(store)
+ query = TermQuery(Term("docid", str(1)))
+ hits = searcher.search(query)
+ self.assertEqual(hits.length(), 0)
+ finally:
+ self.closeStore(store, searcher)
+
+ def test_removeDocuments(self):
+
+ self.test_indexDocument()
+
+ store = self.openStore()
+ reader = None
+ try:
+ reader = IndexReader.open(store)
+ reader.deleteDocuments(Term('docid', str(1)))
+ finally:
+ self.closeStore(store, reader)
+
+ store = self.openStore()
+ searcher = None
+ try:
+ searcher = IndexSearcher(store)
+ query = QueryParser("title", self.getAnalyzer()).parse("value")
+ hits = searcher.search(query)
+ self.assertEqual(hits.length(), 0)
+ finally:
+ self.closeStore(store, searcher)
+
+ def test_FieldEnumeration(self):
+
+ self.test_indexDocument()
+
+ store = self.openStore()
+ writer = None
+ try:
+ analyzer = self.getAnalyzer()
+
+ writer = self.getWriter(store, analyzer, False)
+ doc = Document()
+ doc.add(Field("title", "value of testing",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ doc.add(Field("docid", str(2),
+ Field.Store.NO, Field.Index.UN_TOKENIZED))
+ doc.add(Field("owner", "unittester",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ doc.add(Field("search_name", "wisdom",
+ Field.Store.YES, Field.Index.NO))
+ doc.add(Field("meta_words", "rabbits are beautiful",
+ Field.Store.NO, Field.Index.TOKENIZED))
+
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("owner", "unittester",
+ Field.Store.NO, Field.Index.UN_TOKENIZED))
+ doc.add(Field("search_name", "wisdom",
+ Field.Store.YES, Field.Index.NO))
+ doc.add(Field("meta_words", "rabbits are beautiful",
+ Field.Store.NO, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+ finally:
+ self.closeStore(store, writer)
+
+ store = self.openStore()
+ reader = None
+ try:
+ reader = IndexReader.open(store)
+ term_enum = reader.terms(Term("docid", ''))
+ docids = []
+
+ while term_enum.term().field() == 'docid':
+ docids.append(term_enum.term().text())
+ term_enum.next()
+ self.assertEqual(len(docids), 2)
+ finally:
+ pass
+ #self.closeStore(store, reader)
+
+ def test_getFieldNames(self):
+
+ self.test_indexDocument()
+
+ store = self.openStore()
+ reader = None
+ try:
+ reader = IndexReader.open(store)
+ fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL)
+ for fieldName in fieldNames:
+ self.assert_(fieldName in ['owner', 'search_name', 'meta_words',
+ 'docid', 'title'])
+
+ fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED)
+ for fieldName in fieldNames:
+ self.assert_(fieldName in ['owner', 'meta_words',
+ 'docid', 'title'])
+
+ fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
+ for fieldName in fieldNames:
+ self.assert_(fieldName in ['owner', 'meta_words',
+ 'docid', 'title'])
+ finally:
+ store = self.closeStore(store, reader)
+
+
+class Test_PyLuceneWithFSStore(TestCase, Test_PyLuceneBase):
+
+ STORE_DIR = "testrepo"
+
+ def setUp(self):
+
+ if not os.path.exists(self.STORE_DIR):
+ os.mkdir(self.STORE_DIR)
+
+ def tearDown(self):
+
+ if os.path.exists(self.STORE_DIR):
+ shutil.rmtree(self.STORE_DIR)
+
+ def openStore(self):
+
+ return FSDirectory.getDirectory(self.STORE_DIR, False)
+
+ def closeStore(self, store, *args):
+
+ for arg in args:
+ if arg is not None:
+ arg.close()
+
+ store.close()
+
+
+class Test_PyLuceneWithMMapStore(Test_PyLuceneWithFSStore):
+
+ def openStore(self):
+
+ return MMapDirectory.getDirectory(self.STORE_DIR, False)
+
+
+
+if __name__ == "__main__":
+ import sys, lucene
+ lucene.initVM(lucene.CLASSPATH)
+ if '-loop' in sys.argv:
+ sys.argv.remove('-loop')
+ while True:
+ try:
+ main()
+ except:
+ pass
+ else:
+ main()
Propchange: lucene/pylucene/trunk/test/test_PyLucene.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/test/test_PyLucene.py
------------------------------------------------------------------------------
svn:mime-type = text/plain