You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2009/10/13 09:38:11 UTC
svn commit: r824626 [2/2] - in /lucene/pylucene/trunk: ./
java/org/apache/pylucene/analysis/ java/org/apache/pylucene/search/
java/org/apache/pylucene/store/ test/
Modified: lucene/pylucene/trunk/test/test_PyLucene.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PyLucene.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_PyLucene.py (original)
+++ lucene/pylucene/trunk/test/test_PyLucene.py Tue Oct 13 07:38:10 2009
@@ -30,7 +30,8 @@
pass
def getWriter(self, store, analyzer, create=False):
- writer = IndexWriter(store, analyzer, create)
+ writer = IndexWriter(store, analyzer, create,
+ IndexWriter.MaxFieldLength.LIMITED)
#writer.setUseCompoundFile(False)
return writer
@@ -47,15 +48,15 @@
doc = Document()
doc.add(Field("title", "value of testing",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("docid", str(1),
- Field.Store.NO, Field.Index.UN_TOKENIZED))
+ Field.Store.NO, Field.Index.NOT_ANALYZED))
doc.add(Field("owner", "unittester",
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("search_name", "wisdom",
Field.Store.YES, Field.Index.NO))
doc.add(Field("meta_words", "rabbits are beautiful",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
finally:
@@ -71,15 +72,15 @@
doc = Document()
doc.add(Field("title", "value of testing",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("docid", str(1),
- Field.Store.NO, Field.Index.UN_TOKENIZED))
+ Field.Store.NO, Field.Index.NOT_ANALYZED))
doc.add(Field("owner", "unittester",
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("search_name", "wisdom",
Field.Store.YES, Field.Index.NO))
doc.add(Field("meta_words", "rabbits are beautiful",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
body_text = "hello world" * 20
body_reader = StringReader(body_text)
@@ -99,15 +100,15 @@
doc = Document()
doc.add(Field("title", "value of testing",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("docid", str(1),
- Field.Store.NO, Field.Index.UN_TOKENIZED))
+ Field.Store.NO, Field.Index.NOT_ANALYZED))
doc.add(Field("owner", "unittester",
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("search_name", "wisdom",
Field.Store.YES, Field.Index.NO))
doc.add(Field("meta_words", "rabbits are beautiful",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
# using a unicode body cause problems, which seems very odd
# since the python type is the same regardless affter doing
@@ -127,10 +128,10 @@
store = self.openStore()
searcher = None
try:
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
query = QueryParser("title", self.getAnalyzer()).parse("value")
- hits = searcher.search(query)
- self.assertEqual(hits.length(), 1)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, 1)
finally:
self.closeStore(store, searcher)
@@ -143,14 +144,14 @@
store = self.openStore()
searcher = None
try:
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
SHOULD = BooleanClause.Occur.SHOULD
query = MultiFieldQueryParser.parse("value",
["title", "docid"],
[SHOULD, SHOULD],
self.getAnalyzer())
- hits = searcher.search(query)
- self.assertEquals(1, hits.length())
+ topDocs = searcher.search(query, 50)
+ self.assertEquals(1, topDocs.totalHits)
finally:
self.closeStore(store, searcher)
@@ -163,14 +164,14 @@
reader = None
try:
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
query = TermQuery(Term("docid", str(1)))
- hits = searcher.search(query)
- self.assertEqual(hits.length(), 1)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, 1)
# be careful with ids they are ephemeral
- docid = hits.id(0)
+ docid = topDocs.scoreDocs[0].doc
- reader = IndexReader.open(store)
+ reader = IndexReader.open(store, False)
reader.deleteDocument(docid)
finally:
self.closeStore(store, searcher, reader)
@@ -178,10 +179,10 @@
store = self.openStore()
searcher = None
try:
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
query = TermQuery(Term("docid", str(1)))
- hits = searcher.search(query)
- self.assertEqual(hits.length(), 0)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, 0)
finally:
self.closeStore(store, searcher)
@@ -192,7 +193,7 @@
store = self.openStore()
reader = None
try:
- reader = IndexReader.open(store)
+ reader = IndexReader.open(store, False)
reader.deleteDocuments(Term('docid', str(1)))
finally:
self.closeStore(store, reader)
@@ -200,10 +201,10 @@
store = self.openStore()
searcher = None
try:
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
query = QueryParser("title", self.getAnalyzer()).parse("value")
- hits = searcher.search(query)
- self.assertEqual(hits.length(), 0)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, 0)
finally:
self.closeStore(store, searcher)
@@ -219,25 +220,25 @@
writer = self.getWriter(store, analyzer, False)
doc = Document()
doc.add(Field("title", "value of testing",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("docid", str(2),
- Field.Store.NO, Field.Index.UN_TOKENIZED))
+ Field.Store.NO, Field.Index.NOT_ANALYZED))
doc.add(Field("owner", "unittester",
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("search_name", "wisdom",
Field.Store.YES, Field.Index.NO))
doc.add(Field("meta_words", "rabbits are beautiful",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("owner", "unittester",
- Field.Store.NO, Field.Index.UN_TOKENIZED))
+ Field.Store.NO, Field.Index.NOT_ANALYZED))
doc.add(Field("search_name", "wisdom",
Field.Store.YES, Field.Index.NO))
doc.add(Field("meta_words", "rabbits are beautiful",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
finally:
self.closeStore(store, writer)
@@ -245,7 +246,7 @@
store = self.openStore()
reader = None
try:
- reader = IndexReader.open(store)
+ reader = IndexReader.open(store, True)
term_enum = reader.terms(Term("docid", ''))
docids = []
@@ -264,7 +265,7 @@
store = self.openStore()
reader = None
try:
- reader = IndexReader.open(store)
+ reader = IndexReader.open(store, True)
fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL)
for fieldName in fieldNames:
self.assert_(fieldName in ['owner', 'search_name', 'meta_words',
@@ -299,7 +300,7 @@
def openStore(self):
- return FSDirectory.getDirectory(self.STORE_DIR, False)
+ return SimpleFSDirectory(File(self.STORE_DIR))
def closeStore(self, store, *args):
@@ -314,13 +315,13 @@
def openStore(self):
- return MMapDirectory.getDirectory(self.STORE_DIR, False)
+ return MMapDirectory(File(self.STORE_DIR))
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_PyLuceneThread.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PyLuceneThread.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_PyLuceneThread.py (original)
+++ lucene/pylucene/trunk/test/test_PyLuceneThread.py Tue Oct 13 07:38:10 2009
@@ -27,20 +27,21 @@
self.classLoader = Thread.currentThread().getContextClassLoader()
self.directory = RAMDirectory()
- writer = IndexWriter(self.directory, StandardAnalyzer(), True)
+ writer = IndexWriter(self.directory, StandardAnalyzer(), True,
+ IndexWriter.MaxFieldLength.LIMITED)
doc1 = Document()
doc2 = Document()
doc3 = Document()
doc4 = Document()
doc1.add(Field("field", "one",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc2.add(Field("field", "two",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc3.add(Field("field", "three",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
doc4.add(Field("field", "one",
- Field.Store.YES, Field.Index.TOKENIZED))
+ Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc1)
writer.addDocument(doc2)
@@ -97,13 +98,13 @@
getVMEnv().attachCurrentThread()
time.sleep(0.5)
- searcher = IndexSearcher(self.directory)
+ searcher = IndexSearcher(self.directory, True)
try:
self.query = PhraseQuery()
for word, count in self.testData[0:runCount]:
query = TermQuery(Term("field", word))
- result = searcher.search(query)
- self.assertEqual(result.length(), count)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, count)
self.lock.acquire()
self.totalQueries += 1
@@ -114,7 +115,7 @@
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_PythonDirectory.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PythonDirectory.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_PythonDirectory.py (original)
+++ lucene/pylucene/trunk/test/test_PythonDirectory.py Tue Oct 13 07:38:10 2009
@@ -231,7 +231,7 @@
if __name__ == "__main__":
import sys, lucene
- env = lucene.initVM(lucene.CLASSPATH)
+ env = lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_PythonQueryParser.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PythonQueryParser.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_PythonQueryParser.py (original)
+++ lucene/pylucene/trunk/test/test_PythonQueryParser.py Tue Oct 13 07:38:10 2009
@@ -56,7 +56,7 @@
if __name__ == "__main__":
import sys
- initVM(CLASSPATH)
+ initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_RangeFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_RangeFilter.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_RangeFilter.py (original)
+++ lucene/pylucene/trunk/test/test_RangeFilter.py Tue Oct 13 07:38:10 2009
@@ -51,22 +51,23 @@
def build(self):
# build an index
- writer = IndexWriter(self.index, SimpleAnalyzer(), True)
+ writer = IndexWriter(self.index, SimpleAnalyzer(), True,
+ IndexWriter.MaxFieldLength.LIMITED)
seed(101)
for d in xrange(self.minId, self.maxId + 1):
doc = Document()
doc.add(Field("id", self.pad(d),
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
r = randint(~self.MAX_INT, self.MAX_INT)
if self.maxR < r:
self.maxR = r
if r < self.minR:
self.minR = r
doc.add(Field("rand", self.pad(r),
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("body", "body",
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.addDocument(doc)
writer.optimize()
@@ -99,7 +100,7 @@
def testRangeFilterId(self):
- reader = IndexReader.open(self.index);
+ reader = IndexReader.open(self.index, True);
search = IndexSearcher(reader)
medId = ((self.maxId - self.minId) / 2)
@@ -117,89 +118,89 @@
# test id, bounded on both ends
result = search.search(q, RangeFilter("id", minIP, maxIP,
- True, True))
- self.assertEqual(numDocs, result.length(), "find all")
+ True, True), 50)
+ self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, RangeFilter("id", minIP, maxIP,
- True, False))
- self.assertEqual(numDocs - 1, result.length(), "all but last")
+ True, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "all but last")
result = search.search(q, RangeFilter("id", minIP, maxIP,
- False, True))
- self.assertEqual(numDocs - 1, result.length(), "all but first")
+ False, True), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "all but first")
result = search.search(q, RangeFilter("id", minIP, maxIP,
- False, False))
- self.assertEqual(numDocs - 2, result.length(), "all but ends")
+ False, False), 50)
+ self.assertEqual(numDocs - 2, result.totalHits, "all but ends")
result = search.search(q, RangeFilter("id", medIP, maxIP,
- True, True))
- self.assertEqual(1 + self.maxId - medId, result.length(), "med and up")
+ True, True), 50)
+ self.assertEqual(1 + self.maxId - medId, result.totalHits, "med and up")
result = search.search(q, RangeFilter("id", minIP, medIP,
- True, True))
- self.assertEqual(1 + medId - self.minId, result.length(), "up to med")
+ True, True), 50)
+ self.assertEqual(1 + medId - self.minId, result.totalHits, "up to med")
# unbounded id
result = search.search(q, RangeFilter("id", minIP, None,
- True, False))
- self.assertEqual(numDocs, result.length(), "min and up")
+ True, False), 50)
+ self.assertEqual(numDocs, result.totalHits, "min and up")
result = search.search(q, RangeFilter("id", None, maxIP,
- False, True))
- self.assertEqual(numDocs, result.length(), "max and down")
+ False, True), 50)
+ self.assertEqual(numDocs, result.totalHits, "max and down")
result = search.search(q, RangeFilter("id", minIP, None,
- False, False))
- self.assertEqual(numDocs - 1, result.length(), "not min, but up")
+ False, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "not min, but up")
result = search.search(q, RangeFilter("id", None, maxIP,
- False, False))
- self.assertEqual(numDocs - 1, result.length(), "not max, but down")
+ False, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "not max, but down")
result = search.search(q, RangeFilter("id",medIP, maxIP,
- True, False))
- self.assertEqual(self.maxId - medId, result.length(), "med and up, not max")
+ True, False), 50)
+ self.assertEqual(self.maxId - medId, result.totalHits, "med and up, not max")
result = search.search(q, RangeFilter("id", minIP, medIP,
- False, True))
- self.assertEqual(medId - self.minId, result.length(), "not min, up to med")
+ False, True), 50)
+ self.assertEqual(medId - self.minId, result.totalHits, "not min, up to med")
# very small sets
result = search.search(q, RangeFilter("id", minIP, minIP,
- False, False))
- self.assertEqual(0, result.length(), "min, min, False, False")
+ False, False), 50)
+ self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, RangeFilter("id", medIP, medIP,
- False, False))
- self.assertEqual(0, result.length(), "med, med, False, False")
+ False, False), 50)
+ self.assertEqual(0, result.totalHits, "med, med, False, False")
result = search.search(q, RangeFilter("id", maxIP, maxIP,
- False, False))
- self.assertEqual(0, result.length(), "max, max, False, False")
+ False, False), 50)
+ self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, RangeFilter("id", minIP, minIP,
- True, True))
- self.assertEqual(1, result.length(), "min, min, True, True")
+ True, True), 50)
+ self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, RangeFilter("id", None, minIP,
- False, True))
- self.assertEqual(1, result.length(), "nul, min, False, True")
+ False, True), 50)
+ self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, RangeFilter("id", maxIP, maxIP,
- True, True))
- self.assertEqual(1, result.length(), "max, max, True, True")
+ True, True), 50)
+ self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, RangeFilter("id", maxIP, None,
- True, False))
- self.assertEqual(1, result.length(), "max, nul, True, True")
+ True, False), 50)
+ self.assertEqual(1, result.totalHits, "max, nul, True, True")
result = search.search(q, RangeFilter("id", medIP, medIP,
- True, True))
- self.assertEqual(1, result.length(), "med, med, True, True")
+ True, True), 50)
+ self.assertEqual(1, result.totalHits, "med, med, True, True")
def testRangeFilterRand(self):
- reader = IndexReader.open(self.index)
+ reader = IndexReader.open(self.index, True)
search = IndexSearcher(reader)
minRP = self.pad(self.minR)
@@ -214,69 +215,69 @@
# test extremes, bounded on both ends
result = search.search(q, RangeFilter("rand", minRP, maxRP,
- True, True))
- self.assertEqual(numDocs, result.length(), "find all")
+ True, True), 50)
+ self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, RangeFilter("rand", minRP, maxRP,
- True, False))
- self.assertEqual(numDocs - 1, result.length(), "all but biggest")
+ True, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "all but biggest")
result = search.search(q, RangeFilter("rand", minRP, maxRP,
- False, True))
- self.assertEqual(numDocs - 1, result.length(), "all but smallest")
+ False, True), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "all but smallest")
result = search.search(q, RangeFilter("rand", minRP, maxRP,
- False, False))
- self.assertEqual(numDocs - 2, result.length(), "all but extremes")
+ False, False), 50)
+ self.assertEqual(numDocs - 2, result.totalHits, "all but extremes")
# unbounded
result = search.search(q, RangeFilter("rand", minRP, None,
- True, False))
- self.assertEqual(numDocs, result.length(), "smallest and up")
+ True, False), 50)
+ self.assertEqual(numDocs, result.totalHits, "smallest and up")
result = search.search(q, RangeFilter("rand", None, maxRP,
- False, True))
- self.assertEqual(numDocs, result.length(), "biggest and down")
+ False, True), 50)
+ self.assertEqual(numDocs, result.totalHits, "biggest and down")
result = search.search(q, RangeFilter("rand", minRP, None,
- False, False))
- self.assertEqual(numDocs - 1, result.length(), "not smallest, but up")
+ False, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "not smallest, but up")
result = search.search(q, RangeFilter("rand", None, maxRP,
- False, False))
- self.assertEqual(numDocs - 1, result.length(), "not biggest, but down")
+ False, False), 50)
+ self.assertEqual(numDocs - 1, result.totalHits, "not biggest, but down")
# very small sets
result = search.search(q, RangeFilter("rand", minRP, minRP,
- False, False))
- self.assertEqual(0, result.length(), "min, min, False, False")
+ False, False), 50)
+ self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, RangeFilter("rand", maxRP, maxRP,
- False, False))
- self.assertEqual(0, result.length(), "max, max, False, False")
+ False, False), 50)
+ self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, RangeFilter("rand", minRP, minRP,
- True, True))
- self.assertEqual(1, result.length(), "min, min, True, True")
+ True, True), 50)
+ self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, RangeFilter("rand", None, minRP,
- False, True))
- self.assertEqual(1, result.length(), "nul, min, False, True")
+ False, True), 50)
+ self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, RangeFilter("rand", maxRP, maxRP,
- True, True))
- self.assertEqual(1, result.length(), "max, max, True, True")
+ True, True), 50)
+ self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, RangeFilter("rand", maxRP, None,
- True, False))
- self.assertEqual(1, result.length(), "max, nul, True, True")
+ True, False), 50)
+ self.assertEqual(1, result.totalHits, "max, nul, True, True")
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_RangeQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_RangeQuery.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_RangeQuery.py (original)
+++ lucene/pylucene/trunk/test/test_RangeQuery.py Tue Oct 13 07:38:10 2009
@@ -16,14 +16,15 @@
from lucene import *
-class PrefixQueryTestCase(TestCase):
+class RangeQueryTestCase(TestCase):
"""
Unit tests ported from Java Lucene
"""
def _initializeIndex(self, values):
- writer = IndexWriter(self.dir, WhitespaceAnalyzer(), True)
+ writer = IndexWriter(self.dir, WhitespaceAnalyzer(), True,
+ IndexWriter.MaxFieldLength.LIMITED)
for value in values:
self._insertDoc(writer, value)
writer.close()
@@ -33,16 +34,17 @@
doc = Document()
doc.add(Field("id", "id" + str(self.docCount),
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("content", content,
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
self.docCount += 1
def _addDoc(self, content):
- writer = IndexWriter(self.dir, WhitespaceAnalyzer(), False)
+ writer = IndexWriter(self.dir, WhitespaceAnalyzer(), False,
+ IndexWriter.MaxFieldLength.LIMITED)
self._insertDoc(writer, content)
writer.close()
@@ -57,23 +59,23 @@
Term("content", "C"),
False)
self._initializeIndex(["A", "B", "C", "D"])
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(1, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(1, topDocs.totalHits,
"A,B,C,D, only B in range")
searcher.close()
self._initializeIndex(["A", "B", "D"])
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(1, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(1, topDocs.totalHits,
"A,B,D, only B in range")
searcher.close()
self._addDoc("C")
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(1, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(1, topDocs.totalHits,
"C added, still only B in range")
searcher.close()
@@ -84,30 +86,30 @@
True)
self._initializeIndex(["A", "B", "C", "D"])
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(3, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(3, topDocs.totalHits,
"A,B,C,D - A,B,C in range")
searcher.close()
self._initializeIndex(["A", "B", "D"])
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(2, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(2, topDocs.totalHits,
"A,B,D - A and B in range")
searcher.close()
self._addDoc("C")
- searcher = IndexSearcher(self.dir)
- hits = searcher.search(query)
- self.assertEqual(3, hits.length(),
+ searcher = IndexSearcher(self.dir, True)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(3, topDocs.totalHits,
"C added - A, B, C in range")
searcher.close()
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_RegexQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_RegexQuery.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_RegexQuery.py (original)
+++ lucene/pylucene/trunk/test/test_RegexQuery.py Tue Oct 13 07:38:10 2009
@@ -24,13 +24,14 @@
directory = RAMDirectory()
- writer = IndexWriter(directory, SimpleAnalyzer(), True)
+ writer = IndexWriter(directory, SimpleAnalyzer(), True,
+ IndexWriter.MaxFieldLength.LIMITED)
doc = Document()
- doc.add(Field(self.FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.TOKENIZED))
+ doc.add(Field(self.FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.optimize()
writer.close()
- self.searcher = IndexSearcher(directory)
+ self.searcher = IndexSearcher(directory, True)
def tearDown(self):
@@ -44,7 +45,7 @@
query = RegexQuery(self.newTerm(regex))
- return len(self.searcher.search(query))
+ return self.searcher.search(query, 50).totalHits
def spanRegexQueryNrHits(self, regex1, regex2, slop, ordered):
@@ -52,7 +53,7 @@
srq2 = SpanRegexQuery(self.newTerm(regex2))
query = SpanNearQuery([srq1, srq2], slop, ordered)
- return len(self.searcher.search(query))
+ return self.searcher.search(query, 50).totalHits
def testRegex1(self):
@@ -79,7 +80,7 @@
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_RewriteQuery.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_RewriteQuery.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_RewriteQuery.py (original)
+++ lucene/pylucene/trunk/test/test_RewriteQuery.py Tue Oct 13 07:38:10 2009
@@ -24,9 +24,10 @@
def setUp(self):
store = lucene.RAMDirectory()
- writer = lucene.IndexWriter(store, lucene.StandardAnalyzer(), True)
+ writer = lucene.IndexWriter(store, lucene.StandardAnalyzer(), True,
+ lucene.IndexWriter.MaxFieldLength.LIMITED)
writer.close()
- self.reader = lucene.IndexSearcher(store).getIndexReader()
+ self.reader = lucene.IndexSearcher(store, True).getIndexReader()
self.term = lucene.Term('all', 'foo')
def testQuery(self):
@@ -38,7 +39,7 @@
if __name__ == "__main__":
- env = lucene.initVM(lucene.CLASSPATH)
+ env = lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_RuntimeException.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_RuntimeException.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_RuntimeException.py (original)
+++ lucene/pylucene/trunk/test/test_RuntimeException.py Tue Oct 13 07:38:10 2009
@@ -27,5 +27,5 @@
if __name__ == '__main__':
import lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
unittest.main()
Modified: lucene/pylucene/trunk/test/test_Similarity.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Similarity.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_Similarity.py (original)
+++ lucene/pylucene/trunk/test/test_Similarity.py Tue Oct 13 07:38:10 2009
@@ -51,23 +51,22 @@
def testSimilarity(self):
store = RAMDirectory()
- writer = IndexWriter(store, SimpleAnalyzer(), True)
+ writer = IndexWriter(store, SimpleAnalyzer(), True,
+ IndexWriter.MaxFieldLength.LIMITED)
writer.setSimilarity(SimpleSimilarity())
d1 = Document()
- d1.add(Field("field", "a c",
- Field.Store.YES, Field.Index.TOKENIZED))
+ d1.add(Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED))
d2 = Document()
- d2.add(Field("field", "a b c",
- Field.Store.YES, Field.Index.TOKENIZED))
+ d2.add(Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(d1)
writer.addDocument(d2)
writer.optimize()
writer.close()
- searcher = IndexSearcher(store)
+ searcher = IndexSearcher(store, True)
searcher.setSimilarity(SimpleSimilarity())
a = Term("field", "a")
@@ -105,7 +104,7 @@
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_Sort.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Sort.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_Sort.py (original)
+++ lucene/pylucene/trunk/test/test_Sort.py Tue Oct 13 07:38:10 2009
@@ -99,7 +99,7 @@
writer.addDocument(doc)
# writer.optimize()
writer.close()
- s = IndexSearcher(indexStore)
+ s = IndexSearcher(indexStore, True)
s.setDefaultFieldSortScoring(True, True)
return s
@@ -135,7 +135,7 @@
# print writer.getSegmentCount()
writer.close()
- return IndexSearcher(indexStore)
+ return IndexSearcher(indexStore, True)
def getRandomNumberString(self, num, low, high):
@@ -586,7 +586,7 @@
sort.setSort(SortField("i18n", Locale("da", "dk")))
self._assertMatches(multiSearcher, self.queryY, sort, "BJDHF")
- def testCustomSorts(self):
+ def _testCustomSorts(self):
"""
test a custom sort function
"""
@@ -721,11 +721,11 @@
# a filter that only allows through the first hit
class filter(PythonFilter):
- def bits(_self, reader):
+ def getDocIdSet(_self, reader):
bs = BitSet(reader.maxDoc())
bs.set(0, reader.maxDoc())
bs.set(docs1.scoreDocs[0].doc)
- return bs
+ return DocIdBitSet(bs)
filt = filter()
@@ -1168,7 +1168,7 @@
if __name__ == "__main__":
import sys, lucene
- env = lucene.initVM(lucene.CLASSPATH)
+ env = lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_StopAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_StopAnalyzer.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_StopAnalyzer.py (original)
+++ lucene/pylucene/trunk/test/test_StopAnalyzer.py Tue Oct 13 07:38:10 2009
@@ -33,35 +33,59 @@
stream = self.stop.tokenStream("test", reader)
self.assert_(stream is not None)
- try:
- for token in stream:
- self.assert_(token.termText() not in self.inValidTokens)
- except Exception, e:
- self.fail(str(e))
+ termAtt = stream.getAttribute(TermAttribute.class_)
+
+ while stream.incrementToken():
+ self.assert_(termAtt.term() not in self.inValidTokens)
def testStopList(self):
- stopWordsSet = []
- stopWordsSet.append("good")
- stopWordsSet.append("test")
- stopWordsSet.append("analyzer")
+ stopWordsSet = ["good", "test", "analyzer"]
newStop = StopAnalyzer(stopWordsSet)
reader = StringReader("This is a good test of the english stop analyzer")
stream = newStop.tokenStream("test", reader)
self.assert_(stream is not None)
+ termAtt = stream.getAttribute(TermAttribute.class_)
+ posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_)
+
+ while stream.incrementToken():
+ text = termAtt.term()
+ self.assert_(text not in stopWordsSet)
+ # by default stop tokenizer does not apply increments.
+ self.assertEqual(1, posIncrAtt.getPositionIncrement())
+
+ def testStopListPositions(self):
+
+ defaultEnable = StopFilter.getEnablePositionIncrementsDefault()
+ StopFilter.setEnablePositionIncrementsDefault(True)
+
try:
- for token in stream:
- text = token.termText()
+ stopWordsSet = ["good", "test", "analyzer"]
+ newStop = StopAnalyzer(stopWordsSet)
+ reader = StringReader("This is a good test of the english stop analyzer with positions")
+ expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1]
+ stream = newStop.tokenStream("test", reader)
+ self.assert_(stream is not None)
+
+ i = 0
+ termAtt = stream.getAttribute(TermAttribute.class_)
+ posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_)
+
+ while stream.incrementToken():
+ text = termAtt.term()
self.assert_(text not in stopWordsSet)
- except Exception, e:
- self.fail(str(e))
+ self.assertEqual(expectedIncr[i],
+ posIncrAtt.getPositionIncrement())
+ i += 1
+ finally:
+ StopFilter.setEnablePositionIncrementsDefault(defaultEnable)
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_StopWords.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_StopWords.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_StopWords.py (original)
+++ lucene/pylucene/trunk/test/test_StopWords.py Tue Oct 13 07:38:10 2009
@@ -36,7 +36,7 @@
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_ThaiAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_ThaiAnalyzer.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_ThaiAnalyzer.py (original)
+++ lucene/pylucene/trunk/test/test_ThaiAnalyzer.py Tue Oct 13 07:38:10 2009
@@ -15,47 +15,36 @@
from unittest import TestCase, main
from lucene import ThaiAnalyzer, StringReader
+from BaseTokenStreamTestCase import BaseTokenStreamTestCase
-class ThaiAnalyzerTestCase(TestCase):
-
- def assertAnalyzesTo(self, analyzer, input, output):
-
- tokenStream = analyzer.tokenStream("dummy", StringReader(input))
-
- for termText in output:
- token = tokenStream.next()
- self.assert_(token is not None)
- self.assertEqual(token.termText(), termText)
-
- self.assert_(not list(tokenStream))
- tokenStream.close()
+class ThaiAnalyzerTestCase(BaseTokenStreamTestCase):
def testAnalyzer(self):
analyzer = ThaiAnalyzer()
- self.assertAnalyzesTo(analyzer, u"", [])
+ self._assertAnalyzesTo(analyzer, u"", [])
- self.assertAnalyzesTo(analyzer,
- u"à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ",
- [ u"à¸à¸²à¸£", u"à¸à¸µà¹", u"à¹à¸à¹", u"à¸à¹à¸à¸",
- u"à¹à¸ªà¸à¸", u"วà¹à¸²", u"à¸à¸²à¸", u"à¸à¸µ" ])
-
- self.assertAnalyzesTo(analyzer,
- u"à¸à¸£à¸´à¸©à¸±à¸à¸à¸·à¹à¸ XY&Z - à¸à¸¸à¸¢à¸à¸±à¸ xyz@demo.com",
- [ u"à¸à¸£à¸´à¸©à¸±à¸", u"à¸à¸·à¹à¸", u"xy&z", u"à¸à¸¸à¸¢", u"à¸à¸±à¸", u"xyz@demo.com" ])
+ self._assertAnalyzesTo(analyzer,
+ u"à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ",
+ [ u"à¸à¸²à¸£", u"à¸à¸µà¹", u"à¹à¸à¹", u"à¸à¹à¸à¸",
+ u"à¹à¸ªà¸à¸", u"วà¹à¸²", u"à¸à¸²à¸", u"à¸à¸µ" ])
+
+ self._assertAnalyzesTo(analyzer,
+ u"à¸à¸£à¸´à¸©à¸±à¸à¸à¸·à¹à¸ XY&Z - à¸à¸¸à¸¢à¸à¸±à¸ xyz@demo.com",
+ [ u"à¸à¸£à¸´à¸©à¸±à¸", u"à¸à¸·à¹à¸", u"xy&z", u"à¸à¸¸à¸¢", u"à¸à¸±à¸", u"xyz@demo.com" ])
# English stop words
- self.assertAnalyzesTo(analyzer,
- u"à¸à¸£à¸°à¹à¸¢à¸à¸§à¹à¸² The quick brown fox jumped over the lazy dogs",
- [ u"à¸à¸£à¸°à¹à¸¢à¸", u"วà¹à¸²", u"quick", u"brown", u"fox",
- u"jumped", u"over", u"lazy", u"dogs" ])
+ self._assertAnalyzesTo(analyzer,
+ u"à¸à¸£à¸°à¹à¸¢à¸à¸§à¹à¸² The quick brown fox jumped over the lazy dogs",
+ [ u"à¸à¸£à¸°à¹à¸¢à¸", u"วà¹à¸²", u"quick", u"brown", u"fox",
+ u"jumped", u"over", u"lazy", u"dogs" ])
if __name__ == "__main__":
import sys, lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
Modified: lucene/pylucene/trunk/test/test_bug1564.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_bug1564.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_bug1564.py (original)
+++ lucene/pylucene/trunk/test/test_bug1564.py Tue Oct 13 07:38:10 2009
@@ -22,13 +22,14 @@
self.analyzer = StandardAnalyzer()
self.store = RAMDirectory()
- writer = IndexWriter(self.store, self.analyzer, True)
+ writer = IndexWriter(self.store, self.analyzer, True,
+ IndexWriter.MaxFieldLength.LIMITED)
doc = Document()
doc.add(Field('all', u'windowpane beplaster rapacious \
catatonia gauntlet wynn depressible swede pick dressmake supreme \
jeremy plumb theoretic bureaucracy causation chartres equipoise \
dispersible careen heard',
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field('id', '1', Field.Store.YES, Field.Index.NO))
writer.addDocument(doc)
writer.optimize()
@@ -39,13 +40,13 @@
def test_bug1564(self):
- searcher = IndexSearcher(self.store)
+ searcher = IndexSearcher(self.store, True)
query = QueryParser('all', self.analyzer).parse('supreme')
- hits = searcher.search(query)
- self.assertEqual(hits.length(), 1)
+ topDocs = searcher.search(query, 50)
+ self.assertEqual(topDocs.totalHits, 1)
if __name__ == '__main__':
import lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
unittest.main()
Modified: lucene/pylucene/trunk/test/test_bug1763.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_bug1763.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_bug1763.py (original)
+++ lucene/pylucene/trunk/test/test_bug1763.py Tue Oct 13 07:38:10 2009
@@ -23,15 +23,16 @@
self.d1 = RAMDirectory()
self.d2 = RAMDirectory()
- w1, w2 = [IndexWriter(d, self.analyzer, True)
+ w1, w2 = [IndexWriter(d, self.analyzer, True,
+ IndexWriter.MaxFieldLength.LIMITED)
for d in [self.d1, self.d2]]
doc1 = Document()
doc2 = Document()
doc1.add(Field("all", "blah blah double blah Gesundheit",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
doc1.add(Field('id', '1', Field.Store.YES, Field.Index.NO))
doc2.add(Field("all", "a quick brown test ran over the lazy data",
- Field.Store.NO, Field.Index.TOKENIZED))
+ Field.Store.NO, Field.Index.ANALYZED))
doc2.add(Field('id', '2',
Field.Store.YES, Field.Index.NO))
w1.addDocument(doc1)
@@ -45,18 +46,19 @@
def test_bug1763(self):
- w1 = IndexWriter(self.d1, self.analyzer, True)
+ w1 = IndexWriter(self.d1, self.analyzer, True,
+ IndexWriter.MaxFieldLength.LIMITED)
w1.addIndexes([self.d2])
w1.optimize()
w1.close()
- searcher = IndexSearcher(self.d1)
+ searcher = IndexSearcher(self.d1, True)
q = QueryParser('all', self.analyzer).parse('brown')
- hits = searcher.search(q)
- self.assertEqual(hits.doc(0).get('id'), '2')
+ topDocs = searcher.search(q, 50)
+ self.assertEqual(searcher.doc(topDocs.scoreDocs[0].doc).get('id'), '2')
if __name__ == '__main__':
import lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
unittest.main()
Modified: lucene/pylucene/trunk/test/test_bug1842.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_bug1842.py?rev=824626&r1=824625&r2=824626&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_bug1842.py (original)
+++ lucene/pylucene/trunk/test/test_bug1842.py Tue Oct 13 07:38:10 2009
@@ -22,13 +22,14 @@
self.analyzer = StandardAnalyzer()
self.d1 = RAMDirectory()
- w1 = IndexWriter(self.d1, self.analyzer, True)
+ w1 = IndexWriter(self.d1, self.analyzer, True,
+ IndexWriter.MaxFieldLength.LIMITED)
doc1 = Document()
doc1.add(Field("all", "blah blah blah Gesundheit",
- Field.Store.NO, Field.Index.TOKENIZED,
+ Field.Store.NO, Field.Index.ANALYZED,
Field.TermVector.YES))
doc1.add(Field('id', '1',
- Field.Store.YES, Field.Index.UN_TOKENIZED))
+ Field.Store.YES, Field.Index.NOT_ANALYZED))
w1.addDocument(doc1)
w1.optimize()
w1.close()
@@ -37,11 +38,12 @@
pass
def test_bug1842(self):
- reader = IndexReader.open(self.d1)
- searcher = IndexSearcher(self.d1)
+
+ reader = IndexReader.open(self.d1, True)
+ searcher = IndexSearcher(self.d1, True)
q = TermQuery(Term("id", '1'))
- hits = searcher.search(q)
- freqvec = reader.getTermFreqVector(hits.id(0), "all")
+ topDocs = searcher.search(q, 50)
+ freqvec = reader.getTermFreqVector(topDocs.scoreDocs[0].doc, "all")
terms = list(freqvec.getTerms())
terms.sort()
self.assert_(terms == ['blah', 'gesundheit'])
@@ -51,5 +53,5 @@
if __name__ == '__main__':
import lucene
- lucene.initVM(lucene.CLASSPATH)
+ lucene.initVM()
unittest.main()