You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2009/01/09 04:28:41 UTC
svn commit: r732916 [9/14] - in /lucene/pylucene/trunk: ./ java/ java/org/
java/org/osafoundation/ java/org/osafoundation/lucene/
java/org/osafoundation/lucene/analysis/
java/org/osafoundation/lucene/queryParser/
java/org/osafoundation/lucene/search/ j...
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,98 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from lucene import \
+ Document, IndexReader, Term, BooleanQuery, IndexSearcher, TermQuery, \
+ FSDirectory, System, BooleanClause, Hit
+
+
+class BooksLikeThis(object):
+
+ def main(cls, argv):
+
+ indexDir = System.getProperty("index.dir")
+ directory = FSDirectory.getDirectory(indexDir, False)
+
+ reader = IndexReader.open(directory)
+ blt = BooksLikeThis(reader)
+
+ for id in xrange(reader.maxDoc()):
+ if reader.isDeleted(id):
+ continue
+ doc = reader.document(id)
+ print ''
+ print doc.get("title").encode('utf-8')
+
+ docs = blt.docsLike(id, doc, 10)
+ if not docs:
+ print " None like this"
+ else:
+ for doc in docs:
+ print " ->", doc.get("title").encode('utf-8')
+
+ def __init__(self, reader):
+
+ self.reader = reader
+ self.searcher = IndexSearcher(reader)
+
+ def docsLike(self, id, doc, max):
+
+ authors = doc.getValues("author")
+ authorQuery = BooleanQuery()
+ for author in authors:
+ authorQuery.add(TermQuery(Term("author", author)),
+ BooleanClause.Occur.SHOULD)
+ authorQuery.setBoost(2.0)
+
+ vector = self.reader.getTermFreqVector(id, "subject")
+
+ subjectQuery = BooleanQuery()
+ for term in vector.getTerms():
+ tq = TermQuery(Term("subject", term))
+ subjectQuery.add(tq, BooleanClause.Occur.SHOULD)
+
+ likeThisQuery = BooleanQuery()
+ likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD)
+ likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD)
+
+ # exclude myself
+ likeThisQuery.add(TermQuery(Term("isbn", doc.get("isbn"))),
+ BooleanClause.Occur.MUST_NOT)
+
+ print " Query:", likeThisQuery.toString("contents")
+ hits = self.searcher.search(likeThisQuery)
+
+ docs = []
+ for hit in hits:
+ hit = Hit.cast_(hit)
+ doc = hit.getDocument()
+ if len(docs) < max:
+ docs.append(doc)
+ else:
+ break
+
+ return docs
+
+ main = classmethod(main)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,123 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from math import pi, sqrt, acos
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import Document, IndexReader
+
+
+class CategorizerTest(LiaTestCase):
+
+ def setUp(self):
+
+ super(CategorizerTest, self).setUp()
+ self.categoryMap = {}
+
+ self.buildCategoryVectors()
+ self.dumpCategoryVectors()
+
+ def testCategorization(self):
+
+ self.assertEqual("/technology/computers/programming/methodology",
+ self.getCategory("extreme agile methodology"))
+ self.assertEqual("/education/pedagogy",
+ self.getCategory("montessori education philosophy"))
+
+ def dumpCategoryVectors(self):
+
+ for category, vectorMap in self.categoryMap.iteritems():
+ print "Category", category
+ for term, freq in vectorMap.iteritems():
+ print " ", term, "=", freq
+
+ def buildCategoryVectors(self):
+
+ reader = IndexReader.open(self.directory)
+
+ for id in xrange(reader.maxDoc()):
+ doc = reader.document(id)
+ category = doc.get("category")
+ vectorMap = self.categoryMap.get(category, None)
+ if vectorMap is None:
+ vectorMap = self.categoryMap[category] = {}
+
+ termFreqVector = reader.getTermFreqVector(id, "subject")
+ self.addTermFreqToMap(vectorMap, termFreqVector)
+
+ def addTermFreqToMap(self, vectorMap, termFreqVector):
+
+ terms = termFreqVector.getTerms()
+ freqs = termFreqVector.getTermFrequencies()
+
+ i = 0
+ for term in terms:
+ if term in vectorMap:
+ vectorMap[term] += freqs[i]
+ else:
+ vectorMap[term] = freqs[i]
+ i += 1
+
+ def getCategory(self, subject):
+
+ words = subject.split(' ')
+
+ bestAngle = 2 * pi
+ bestCategory = None
+
+ for category, vectorMap in self.categoryMap.iteritems():
+ angle = self.computeAngle(words, category, vectorMap)
+ if angle != 'nan' and angle < bestAngle:
+ bestAngle = angle
+ bestCategory = category
+
+ return bestCategory
+
+ def computeAngle(self, words, category, vectorMap):
+
+ # assume words are unique and only occur once
+
+ dotProduct = 0
+ sumOfSquares = 0
+
+ for word in words:
+ categoryWordFreq = 0
+
+ if word in vectorMap:
+ categoryWordFreq = vectorMap[word]
+
+ # optimized because we assume frequency in words is 1
+ dotProduct += categoryWordFreq
+ sumOfSquares += categoryWordFreq ** 2
+
+ if sumOfSquares == 0:
+ return 'nan'
+
+ if sumOfSquares == len(words):
+ # avoid precision issues for special case
+ # sqrt x * sqrt x = x
+ denominator = sumOfSquares
+ else:
+ denominator = sqrt(sumOfSquares) * sqrt(len(words))
+
+ return acos(dotProduct / denominator)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,100 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+ Term, BooleanQuery, IndexSearcher, TermQuery, DateField, \
+ CachingWrapperFilter, DateFilter, RangeQuery, QueryFilter, BooleanClause
+
+
+class FilterTest(LiaTestCase):
+
+ def setUp(self):
+
+ super(FilterTest, self).setUp()
+
+ self.allBooks = RangeQuery(Term("pubmonth", "190001"),
+ Term("pubmonth", "200512"), True)
+ self.searcher = IndexSearcher(self.directory)
+ hits = self.searcher.search(self.allBooks)
+ self.numAllBooks = len(hits)
+
+ def testDateFilter(self):
+
+ jan1 = self.parseDate("2004-01-01")
+ jan31 = self.parseDate("2004-01-31")
+ dec31 = self.parseDate("2004-12-31")
+
+ filter = DateFilter("modified", jan1, dec31)
+
+ hits = self.searcher.search(self.allBooks, filter)
+ self.assertEqual(self.numAllBooks, len(hits), "all modified in 2004")
+
+ filter = DateFilter("modified", jan1, jan31)
+ hits = self.searcher.search(self.allBooks, filter)
+ self.assertEqual(0, len(hits), "none modified in January")
+
+ def testQueryFilter(self):
+
+ categoryQuery = TermQuery(Term("category", "/philosophy/eastern"))
+ categoryFilter = QueryFilter(categoryQuery)
+
+ hits = self.searcher.search(self.allBooks, categoryFilter)
+ self.assertEqual(1, len(hits), "only tao te ching")
+
+ def testFilterAlternative(self):
+
+ categoryQuery = TermQuery(Term("category", "/philosophy/eastern"))
+
+ constrainedQuery = BooleanQuery()
+ constrainedQuery.add(self.allBooks, BooleanClause.Occur.MUST)
+ constrainedQuery.add(categoryQuery, BooleanClause.Occur.MUST)
+
+ hits = self.searcher.search(constrainedQuery)
+ self.assertEqual(1, len(hits), "only tao te ching")
+
+ def testQueryFilterWithRangeQuery(self):
+
+ jan1 = self.parseDate("2004-01-01")
+ dec31 = self.parseDate("2004-12-31")
+
+ start = Term("modified", DateField.dateToString(jan1))
+ end = Term("modified", DateField.dateToString(dec31))
+
+ rangeQuery = RangeQuery(start, end, True)
+
+ filter = QueryFilter(rangeQuery)
+ hits = self.searcher.search(self.allBooks, filter)
+ self.assertEqual(self.numAllBooks, len(hits), "all of 'em")
+
+ def testCachingWrapper(self):
+
+ jan1 = self.parseDate("2004-01-01")
+ dec31 = self.parseDate("2004-12-31")
+
+ dateFilter = DateFilter("modified", jan1, dec31)
+ cachingFilter = CachingWrapperFilter(dateFilter)
+
+ hits = self.searcher.search(self.allBooks, cachingFilter)
+ self.assertEqual(self.numAllBooks, len(hits), "all of 'em")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,60 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+ SimpleAnalyzer, MultiFieldQueryParser, IndexSearcher, BooleanClause
+
+
+class MultiFieldQueryParserTest(LiaTestCase):
+
+ def testDefaultOperator(self):
+
+ SHOULD = BooleanClause.Occur.SHOULD
+ query = MultiFieldQueryParser.parse("development",
+ ["title", "subject"],
+ [SHOULD, SHOULD],
+ SimpleAnalyzer())
+
+ searcher = IndexSearcher(self.directory)
+ hits = searcher.search(query)
+
+ self.assertHitsIncludeTitle(hits, "Java Development with Ant")
+
+ # has "development" in the subject field
+ self.assertHitsIncludeTitle(hits, "Extreme Programming Explained")
+
+ def testSpecifiedOperator(self):
+
+ MUST = BooleanClause.Occur.MUST
+ query = MultiFieldQueryParser.parse("development",
+ ["title", "subject"],
+ [MUST, MUST],
+ SimpleAnalyzer())
+
+ searcher = IndexSearcher(self.directory)
+ hits = searcher.search(query)
+
+ self.assertHitsIncludeTitle(hits, "Java Development with Ant")
+ self.assertEqual(1, hits.length(), "one and only one")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,74 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+ WhitespaceAnalyzer, Document, Field, IndexWriter, Term, MultiSearcher, \
+ RangeQuery, RAMDirectory, IndexSearcher
+
+
+class MultiSearcherTest(TestCase):
+
+ def setUp(self):
+
+ animals = [ "aardvark", "beaver", "coati",
+ "dog", "elephant", "frog", "gila monster",
+ "horse", "iguana", "javelina", "kangaroo",
+ "lemur", "moose", "nematode", "orca",
+ "python", "quokka", "rat", "scorpion",
+ "tarantula", "uromastyx", "vicuna",
+ "walrus", "xiphias", "yak", "zebra" ]
+
+ analyzer = WhitespaceAnalyzer()
+
+ aTOmDirectory = RAMDirectory()
+ nTOzDirectory = RAMDirectory()
+
+ aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True)
+ nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True)
+
+ for animal in animals:
+ doc = Document()
+ doc.add(Field("animal", animal,
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+
+ if animal[0].lower() < "n":
+ aTOmWriter.addDocument(doc)
+ else:
+ nTOzWriter.addDocument(doc)
+
+ aTOmWriter.close()
+ nTOzWriter.close()
+
+ self.searchers = [ IndexSearcher(aTOmDirectory),
+ IndexSearcher(nTOzDirectory) ]
+
+ def testMulti(self):
+
+ searcher = MultiSearcher(self.searchers)
+
+ # range spans documents across both indexes
+ query = RangeQuery(Term("animal", "h"), Term("animal", "t"), True)
+
+ hits = searcher.search(query)
+ self.assertEqual(12, hits.length(), "tarantula not included")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,84 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+ WhitespaceAnalyzer, Document, Field, IndexWriter, Term, BooleanQuery, \
+ IndexSearcher, PhrasePrefixQuery, PhraseQuery, RAMDirectory, BooleanClause
+
+
+class PhrasePrefixQueryTest(TestCase):
+
+ def setUp(self):
+
+ directory = RAMDirectory()
+ writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+
+ doc1 = Document()
+ doc1.add(Field("field", "the quick brown fox jumped over the lazy dog",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc1)
+
+ doc2 = Document()
+ doc2.add(Field("field", "the fast fox hopped over the hound",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc2)
+ writer.close()
+
+ self.searcher = IndexSearcher(directory)
+
+ def testBasic(self):
+
+ query = PhrasePrefixQuery()
+ query.add([Term("field", "quick"), Term("field", "fast")])
+ query.add(Term("field", "fox"))
+ print query
+
+ hits = self.searcher.search(query)
+ self.assertEqual(1, len(hits), "fast fox match")
+
+ query.setSlop(1)
+ hits = self.searcher.search(query)
+ self.assertEqual(2, len(hits), "both match")
+
+ def testAgainstOR(self):
+
+ quickFox = PhraseQuery()
+ quickFox.setSlop(1)
+ quickFox.add(Term("field", "quick"))
+ quickFox.add(Term("field", "fox"))
+
+ fastFox = PhraseQuery()
+ fastFox.add(Term("field", "fast"))
+ fastFox.add(Term("field", "fox"))
+
+ query = BooleanQuery()
+ query.add(quickFox, BooleanClause.Occur.SHOULD)
+ query.add(fastFox, BooleanClause.Occur.SHOULD)
+ hits = self.searcher.search(query)
+ self.assertEqual(2, len(hits))
+
+ def debug(self, hits):
+
+ for i, doc in hits:
+ print "%s: %s" %(hits.score(i), doc['field'])
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,68 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+ WhitespaceAnalyzer, Document, Field, IndexWriter, Term, MultiSearcher, \
+ QueryFilter, RAMDirectory, IndexSearcher, TermQuery
+
+
+class SecurityFilterTest(TestCase):
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True)
+
+ # Elwood
+ document = Document()
+ document.add(Field("owner", "elwood",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ document.add(Field("keywords", "elwoods sensitive info",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(document)
+
+ # Jake
+ document = Document()
+ document.add(Field("owner", "jake",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ document.add(Field("keywords", "jakes sensitive info",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(document)
+
+ writer.close()
+
+ def testSecurityFilter(self):
+
+ query = TermQuery(Term("keywords", "info"))
+
+ searcher = IndexSearcher(self.directory)
+ hits = searcher.search(query)
+ self.assertEqual(2, len(hits), "Both documents match")
+
+ jakeFilter = QueryFilter(TermQuery(Term("owner", "jake")))
+
+ hits = searcher.search(query, jakeFilter)
+ self.assertEqual(1, len(hits))
+ self.assertEqual("jakes sensitive info", hits[0].get("keywords"),
+ "elwood is safe")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,84 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from lucene import \
+ FSDirectory, Document, Field, IndexSearcher, SimpleAnalyzer, \
+ RangeQuery, Sort, SortField, DecimalFormat, System, Term
+
+
+class SortingExample(object):
+
+ def __init__(self, directory):
+
+ self.directory = directory
+
+ def displayHits(self, query, sort):
+
+ searcher = IndexSearcher(self.directory)
+ hits = searcher.search(query, sort)
+
+ print "\nResults for:", query, "sorted by", sort
+ print "Title".rjust(30), "pubmonth".rjust(10), \
+ "id".center(4), "score".center(15)
+
+ scoreFormatter = DecimalFormat("0.######")
+ for i, doc in hits:
+ title = doc["title"]
+ if len(title) > 30:
+ title = title[:30]
+ print title.encode('ascii', 'replace').rjust(30), \
+ doc["pubmonth"].rjust(10), \
+ str(hits.id(i)).center(4), \
+ scoreFormatter.format(hits.score(i)).ljust(12)
+ print " ", doc["category"]
+ # print searcher.explain(query, hits.id(i))
+
+ searcher.close()
+
+ def main(cls, argv):
+
+ earliest = Term("pubmonth", "190001")
+ latest = Term("pubmonth", "201012")
+ allBooks = RangeQuery(earliest, latest, True)
+
+ indexDir = System.getProperty("index.dir")
+ directory = FSDirectory.getDirectory(indexDir, False)
+ example = SortingExample(directory)
+
+ example.displayHits(allBooks, Sort.RELEVANCE)
+ example.displayHits(allBooks, Sort.INDEXORDER)
+ example.displayHits(allBooks, Sort("category"))
+ example.displayHits(allBooks, Sort("pubmonth", True))
+
+ example.displayHits(allBooks,
+ Sort([SortField("category"),
+ SortField.FIELD_SCORE,
+ SortField("pubmonth", SortField.INT, True)]))
+
+ example.displayHits(allBooks,
+ Sort([SortField.FIELD_SCORE,
+ SortField("category")]))
+
+ main = classmethod(main)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,221 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from cStringIO import StringIO
+
+from lucene import \
+ WhitespaceAnalyzer, Document, Field, IndexReader, IndexWriter, Term, \
+ IndexSearcher, PhraseQuery, SpanFirstQuery, SpanNearQuery, SpanNotQuery, \
+ SpanOrQuery, SpanTermQuery, RAMDirectory, Hit
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+
+
+class SpanQueryTest(TestCase):
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ self.analyzer = WhitespaceAnalyzer()
+
+ writer = IndexWriter(self.directory, self.analyzer, True)
+
+ doc = Document()
+ doc.add(Field("f", "the quick brown fox jumps over the lazy dog",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+
+ doc = Document()
+ doc.add(Field("f", "the quick red fox jumps over the sleepy cat",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+
+ writer.close()
+
+ self.searcher = IndexSearcher(self.directory)
+ self.reader = IndexReader.open(self.directory)
+
+ self.quick = SpanTermQuery(Term("f", "quick"))
+ self.brown = SpanTermQuery(Term("f", "brown"))
+ self.red = SpanTermQuery(Term("f", "red"))
+ self.fox = SpanTermQuery(Term("f", "fox"))
+ self.lazy = SpanTermQuery(Term("f", "lazy"))
+ self.sleepy = SpanTermQuery(Term("f", "sleepy"))
+ self.dog = SpanTermQuery(Term("f", "dog"))
+ self.cat = SpanTermQuery(Term("f", "cat"))
+
+ def assertOnlyBrownFox(self, query):
+
+ hits = self.searcher.search(query)
+ self.assertEqual(1, len(hits))
+ self.assertEqual(0, hits.id(0), "wrong doc")
+
+ def assertBothFoxes(self, query):
+
+ hits = self.searcher.search(query)
+ self.assertEqual(2, len(hits))
+
+ def assertNoMatches(self, query):
+
+ hits = self.searcher.search(query)
+ self.assertEquals(0, len(hits))
+
+ def testSpanTermQuery(self):
+
+ self.assertOnlyBrownFox(self.brown)
+ self.dumpSpans(self.brown)
+
+ def testSpanFirstQuery(self):
+
+ sfq = SpanFirstQuery(self.brown, 2)
+ self.assertNoMatches(sfq)
+
+ self.dumpSpans(sfq)
+
+ sfq = SpanFirstQuery(self.brown, 3)
+ self.dumpSpans(sfq)
+ self.assertOnlyBrownFox(sfq)
+
+ def testSpanNearQuery(self):
+
+ quick_brown_dog = [self.quick, self.brown, self.dog]
+ snq = SpanNearQuery(quick_brown_dog, 0, True)
+ self.assertNoMatches(snq)
+ self.dumpSpans(snq)
+
+ snq = SpanNearQuery(quick_brown_dog, 4, True)
+ self.assertNoMatches(snq)
+ self.dumpSpans(snq)
+
+ snq = SpanNearQuery(quick_brown_dog, 5, True)
+ self.assertOnlyBrownFox(snq)
+ self.dumpSpans(snq)
+
+ # interesting - even a sloppy phrase query would require
+ # more slop to match
+ snq = SpanNearQuery([self.lazy, self.fox], 3, False)
+ self.assertOnlyBrownFox(snq)
+ self.dumpSpans(snq)
+
+ pq = PhraseQuery()
+ pq.add(Term("f", "lazy"))
+ pq.add(Term("f", "fox"))
+ pq.setSlop(4)
+ self.assertNoMatches(pq)
+
+ pq.setSlop(5)
+ self.assertOnlyBrownFox(pq)
+
+ def testSpanNotQuery(self):
+
+ quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+ self.assertBothFoxes(quick_fox)
+ self.dumpSpans(quick_fox)
+
+ quick_fox_dog = SpanNotQuery(quick_fox, self.dog)
+ self.assertBothFoxes(quick_fox_dog)
+ self.dumpSpans(quick_fox_dog)
+
+ no_quick_red_fox = SpanNotQuery(quick_fox, self.red)
+ self.assertOnlyBrownFox(no_quick_red_fox)
+ self.dumpSpans(no_quick_red_fox)
+
+ def testSpanOrQuery(self):
+
+ quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+ lazy_dog = SpanNearQuery([self.lazy, self.dog], 0, True)
+ sleepy_cat = SpanNearQuery([self.sleepy, self.cat], 0, True)
+ qf_near_ld = SpanNearQuery([quick_fox, lazy_dog], 3, True)
+
+ self.assertOnlyBrownFox(qf_near_ld)
+ self.dumpSpans(qf_near_ld)
+
+ qf_near_sc = SpanNearQuery([quick_fox, sleepy_cat], 3, True)
+ self.dumpSpans(qf_near_sc)
+
+ orQ = SpanOrQuery([qf_near_ld, qf_near_sc])
+ self.assertBothFoxes(orQ)
+ self.dumpSpans(orQ)
+
+ def testPlay(self):
+
+ orQ = SpanOrQuery([self.quick, self.fox])
+ self.dumpSpans(orQ)
+
+ quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+ sfq = SpanFirstQuery(quick_fox, 4)
+ self.dumpSpans(sfq)
+
+ self.dumpSpans(SpanTermQuery(Term("f", "the")))
+
+ quick_brown = SpanNearQuery([self.quick, self.brown], 0, False)
+ self.dumpSpans(quick_brown)
+
+ def dumpSpans(self, query):
+
+ spans = query.getSpans(self.reader)
+ print "%s:" % query
+ numSpans = 0
+
+ hits = self.searcher.search(query)
+ scores = [0, 0]
+ for hit in hits:
+ hit = Hit.cast_(hit)
+ scores[hit.getId()] = hit.getScore()
+
+ while spans.next():
+ numSpans += 1
+
+ id = spans.doc()
+ doc = self.reader.document(id)
+
+ # for simplicity - assume tokens are in sequential,
+ # positions, starting from 0
+ tokens = AnalyzerUtils.tokensFromAnalysis(self.analyzer, doc["f"])
+ buffer = StringIO()
+ buffer.write(" ")
+
+ i = 0
+ for token in tokens:
+ if i == spans.start():
+ buffer.write("<")
+
+ buffer.write(token.termText())
+ if i + 1 == spans.end():
+ buffer.write(">")
+
+ buffer.write(" ")
+ i += 1
+
+ buffer.write("(")
+ buffer.write(str(scores[id]))
+ buffer.write(") ")
+
+ print buffer.getvalue()
+ # print self.searcher.explain(query, id)
+
+ if numSpans == 0:
+ print " No spans"
+
+ print ''
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1 @@
+# advsearching package
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,69 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+from lucene import \
+ StopAnalyzer, SimpleAnalyzer, WhitespaceAnalyzer, StandardAnalyzer
+
+
+class AnalyzerDemo(object):
+
+ examples = ["The quick brown fox jumped over the lazy dogs",
+ "XY&Z Corporation - xyz@example.com"]
+
+ analyzers = [WhitespaceAnalyzer(),
+ SimpleAnalyzer(),
+ StopAnalyzer(),
+ StandardAnalyzer()]
+
+ def main(cls, argv):
+
+ # Use the embedded example strings, unless
+ # command line arguments are specified, then use those.
+ strings = cls.examples
+
+ if len(argv) > 1:
+ strings = argv[1:]
+
+ for string in strings:
+ cls.analyze(string)
+
+ def analyze(cls, text):
+
+ print'"Analyzing "', text, '"'
+
+ for analyzer in cls.analyzers:
+ name = type(analyzer).__name__
+ print " %s:" %(name),
+ AnalyzerUtils.displayTokens(analyzer, text)
+ print ''
+ print ''
+
+ main = classmethod(main)
+ analyze = classmethod(analyze)
+
+
+if __name__ == "__main__":
+ import sys
+ AnalyzerDemo.main(sys.argv)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,94 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+ SimpleAnalyzer, Token, TokenStream, StandardAnalyzer, StringReader
+
+
+class AnalyzerUtils(object):
+
+ def main(cls, argv):
+
+ print "SimpleAnalyzer"
+ cls.displayTokensWithFullDetails(SimpleAnalyzer(),
+ "The quick brown fox....")
+
+ print "\n----"
+ print "StandardAnalyzer"
+ cls.displayTokensWithFullDetails(StandardAnalyzer(),
+ "I'll e-mail you at xyz@example.com")
+
+ def tokensFromAnalysis(cls, analyzer, text):
+ return [token for token in analyzer.tokenStream("contents", StringReader(text))]
+
+ def displayTokens(cls, analyzer, text):
+
+ for token in cls.tokensFromAnalysis(analyzer, text):
+ print "[%s]" %(token.termText()),
+
+ def displayTokensWithPositions(cls, analyzer, text):
+
+ position = 0
+ for token in cls.tokensFromAnalysis(analyzer, text):
+ increment = token.getPositionIncrement()
+ if increment > 0:
+ position += increment
+ print "\n%d:" %(position),
+
+ print "[%s]" %(token.termText()),
+
+ def displayTokensWithFullDetails(cls, analyzer, text):
+
+ position = 0
+ for token in cls.tokensFromAnalysis(analyzer, text):
+ increment = token.getPositionIncrement()
+
+ if increment > 0:
+ position += increment
+ print "\n%s:" %(position),
+
+ print "[%s:%d->%d:%s]" %(token.termText(),
+ token.startOffset(),
+ token.endOffset(),
+ token.type()),
+
+ def assertTokensEqual(cls, unittest, tokens, strings):
+
+ unittest.assertEqual(len(strings), len(tokens))
+
+ i = 0
+ for token in tokens:
+ unittest.assertEqual(strings[i], token.termText(), "index %d" %(i))
+ i += 1
+
+ main = classmethod(main)
+ tokensFromAnalysis = classmethod(tokensFromAnalysis)
+ displayTokens = classmethod(displayTokens)
+ displayTokensWithPositions = classmethod(displayTokensWithPositions)
+ displayTokensWithFullDetails = classmethod(displayTokensWithFullDetails)
+ assertTokensEqual = classmethod(assertTokensEqual)
+
+
+if __name__ == "__main__":
+ import sys
+ AnalyzerUtils.main(sys.argv)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,53 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+ RAMDirectory, IndexWriter, StandardAnalyzer, Document, Field, \
+ QueryParser
+
+class UsingAnalyzersExample(object):
+
+ #
+ # This method doesn't do anything, except compile correctly.
+ # This is used to show snippets of how Analyzers are used.
+ #
+ def someMethod(self):
+
+ directory = RAMDirectory()
+
+ analyzer = StandardAnalyzer()
+ writer = IndexWriter(directory, analyzer, True)
+
+ doc = Document()
+ doc.add(Field.Text("title", "This is the title"))
+ doc.add(Field.UnStored("contents", "...document contents..."))
+ writer.addDocument(doc)
+
+ writer.addDocument(doc, analyzer)
+
+ expression = "some query"
+
+ query = QueryParser.parse(expression, "contents", analyzer)
+
+ parser = QueryParser("contents", analyzer)
+ query = parser.parseQuery(expression)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1 @@
+# analysis package
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+from lucene import Term, IndexSearcher, TermQuery
+
+
+class ChineseTest(LiaTestCase):
+
+ def testChinese(self):
+
+ searcher = IndexSearcher(self.directory)
+ hits = searcher.search(TermQuery(Term("contents", "é")))
+
+ self.assertEqual(1, hits.length(), "tao")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1 @@
+# i18n package
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,48 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import Token, PythonAnalyzer, PythonTokenStream, JArray
+
+#
+# "Tokenizes" the entire stream as a single token.
+#
+
+class KeywordAnalyzer(PythonAnalyzer):
+
+ def tokenStream(self, fieldName, reader):
+
+ class _tokenStream(PythonTokenStream):
+
+ def __init__(self):
+ super(_tokenStream, self).__init__()
+ self.done = False
+
+ def next(self):
+ if not self.done:
+ self.done = True
+ text = JArray('char')(1024)
+ size = reader.read(text, 0, 1024)
+ return Token(text, 0, size, 0, size)
+ return None
+
+ return _tokenStream()
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,89 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+
+from lucene import \
+ IndexWriter, Term, SimpleAnalyzer, PerFieldAnalyzerWrapper, \
+ RAMDirectory, Document, Field, IndexSearcher, TermQuery, \
+ QueryParser, Analyzer, StringReader, Token, JavaError
+
+from lia.analysis.keyword.KeywordAnalyzer import KeywordAnalyzer
+from lia.analysis.keyword.SimpleKeywordAnalyzer import SimpleKeywordAnalyzer
+
+
+class KeywordAnalyzerTest(TestCase):
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ writer = IndexWriter(self.directory, SimpleAnalyzer(), True)
+
+ doc = Document()
+ doc.add(Field("partnum", "Q36",
+ Field.Store.YES, Field.Index.UN_TOKENIZED))
+ doc.add(Field("description", "Illidium Space Modulator",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+ writer.close()
+
+ self.searcher = IndexSearcher(self.directory)
+
+ def testTermQuery(self):
+
+ query = TermQuery(Term("partnum", "Q36"))
+ hits = self.searcher.search(query)
+ self.assertEqual(1, hits.length())
+
+ def testBasicQueryParser(self):
+
+ query = QueryParser("description",
+ SimpleAnalyzer()).parse("partnum:Q36 AND SPACE")
+
+ hits = self.searcher.search(query)
+ self.assertEqual("+partnum:q +space", query.toString("description"),
+ "note Q36 -> q")
+ self.assertEqual(0, hits.length(), "doc not found :(")
+
+ def testPerFieldAnalyzer(self):
+
+ analyzer = PerFieldAnalyzerWrapper(SimpleAnalyzer())
+ analyzer.addAnalyzer("partnum", KeywordAnalyzer())
+
+ query = QueryParser("description",
+ analyzer).parse("partnum:Q36 AND SPACE")
+ hits = self.searcher.search(query)
+
+ self.assertEqual("+partnum:Q36 +space", query.toString("description"),
+ "Q36 kept as-is")
+ self.assertEqual(1, hits.length(), "doc found!")
+
+ def testSimpleKeywordAnalyzer(self):
+
+ analyzer = SimpleKeywordAnalyzer()
+
+ input = "Hello World"
+ ts = analyzer.tokenStream("dummy", StringReader(input))
+ self.assertEqual(ts.next().termText(), input)
+ self.assert_(not list(ts) is None)
+ ts.close()
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,44 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+#
+# CharTokenizer limits token width to 255 characters, though.
+# This implementation assumes keywords are 255 in length or less.
+#
+
+from lucene import PythonAnalyzer, PythonCharTokenizer
+
+
+class SimpleKeywordAnalyzer(PythonAnalyzer):
+
+ def tokenStream(self, fieldName, reader):
+
+ class charTokenizer(PythonCharTokenizer):
+ def __init__(self, reader):
+ super(charTokenizer, self).__init__(reader)
+ def isTokenChar(self, c):
+ return True
+ def normalize(self, c):
+ return c
+
+ return charTokenizer(reader)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1 @@
+# keyword package
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,57 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import sys
+
+from lucene import \
+ LowerCaseTokenizer, PorterStemFilter, StopAnalyzer, StopFilter, \
+ TokenStream, PythonAnalyzer
+
+from lia.analysis.positional.PositionalStopFilter import PositionalStopFilter
+
+python_ver = '%d.%d.%d' %(sys.version_info[0:3])
+if python_ver < '2.4':
+ from sets import Set as set
+
+
+#
+# An Analyzer extension
+#
+
+class PositionalPorterStopAnalyzer(PythonAnalyzer):
+
+ def __init__(self, stopWords=None):
+
+ super(PositionalPorterStopAnalyzer, self).__init__()
+
+ if stopWords is None:
+ stopWords = StopAnalyzer.ENGLISH_STOP_WORDS
+
+ self.stopWords = set(stopWords)
+
+ def tokenStream(self, fieldName, reader):
+
+ tokenStream = LowerCaseTokenizer(reader)
+ stopFilter = PositionalStopFilter(tokenStream, self.stopWords)
+
+ return PorterStemFilter(stopFilter)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,92 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+
+from lucene import \
+ IndexWriter, Term, RAMDirectory, Document, Field, \
+ IndexSearcher, QueryParser
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+from lia.analysis.positional.PositionalPorterStopAnalyzer import \
+ PositionalPorterStopAnalyzer
+
+
+class PositionalPorterStopAnalyzerTest(TestCase):
+
+ porterAnalyzer = PositionalPorterStopAnalyzer()
+
+ def setUp(self):
+
+ self.directory = RAMDirectory()
+ writer = IndexWriter(self.directory, self.porterAnalyzer, True)
+
+ doc = Document()
+ doc.add(Field("contents",
+ "The quick brown fox jumps over the lazy dogs",
+ Field.Store.YES, Field.Index.TOKENIZED))
+ writer.addDocument(doc)
+ writer.close()
+
+ def testStems(self):
+
+ searcher = IndexSearcher(self.directory)
+ query = QueryParser("contents", self.porterAnalyzer).parse("laziness")
+ hits = searcher.search(query)
+
+ self.assertEqual(1, hits.length(), "lazi")
+
+ query = QueryParser("contents",
+ self.porterAnalyzer).parse('"fox jumped"')
+ hits = searcher.search(query)
+
+ self.assertEqual(1, hits.length(), "jump jumps jumped jumping")
+
+ def testExactPhrase(self):
+
+ searcher = IndexSearcher(self.directory)
+ query = QueryParser("contents",
+ self.porterAnalyzer).parse('"over the lazy"')
+ hits = searcher.search(query)
+
+ self.assertEqual(0, hits.length(), "exact match not found!")
+
+ def testWithSlop(self):
+
+ searcher = IndexSearcher(self.directory)
+
+ parser = QueryParser("contents", self.porterAnalyzer)
+ parser.setPhraseSlop(1)
+
+ query = parser.parse('"over the lazy"')
+ hits = searcher.search(query)
+
+ self.assertEqual(1, hits.length(), "hole accounted for")
+
+ def main(cls):
+
+ text = "The quick brown fox jumps over the lazy dogs"
+ AnalyzerUtils.displayTokensWithPositions(cls.porterAnalyzer, text)
+ print ''
+
+ main = classmethod(main)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,51 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import PythonTokenFilter
+
+#
+# A TokenFilter extension
+#
+
+class PositionalStopFilter(PythonTokenFilter):
+
+ def __init__(self, tokenStream, stopWords):
+
+ super(PositionalStopFilter, self).__init__(tokenStream)
+
+ self.input = tokenStream
+ self.stopWords = stopWords
+
+ def next(self):
+
+ increment = 0
+
+ for token in self.input:
+ if not token.termText() in self.stopWords:
+ token.setPositionIncrement(token.getPositionIncrement() +
+ increment)
+ return token
+
+ increment += 1
+
+ return None
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1 @@
+# positional package
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,48 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+ QueryParser, StandardAnalyzer, PerFieldAnalyzerWrapper, WhitespaceAnalyzer
+
+
+class AnalysisParalysisTest(LiaTestCase):
+
+ def testAnalyzer(self):
+
+ analyzer = StandardAnalyzer()
+ queryString = "category:/philosophy/eastern"
+
+ query = QueryParser("contents", analyzer).parse(queryString)
+
+ self.assertEqual("category:\"philosophy eastern\"",
+ query.toString("contents"), "path got split, yikes!")
+
+ perFieldAnalyzer = PerFieldAnalyzerWrapper(analyzer)
+ perFieldAnalyzer.addAnalyzer("category", WhitespaceAnalyzer())
+ query = QueryParser("contents", perFieldAnalyzer).parse(queryString)
+
+ self.assertEqual("category:/philosophy/eastern",
+ query.toString("contents"),
+ "leave category field alone")
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,2 @@
+# queryparser package
+
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py Thu Jan 8 19:28:33 2009
@@ -0,0 +1,43 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+ LetterTokenizer, LowerCaseFilter, StopAnalyzer, StopFilter
+
+#
+# An Analyzer extension
+#
+
+class StopAnalyzer2(object):
+
+ def __init__(self, stopWords=None):
+
+ if stopWords is None:
+ self.stopWords = StopAnalyzer.ENGLISH_STOP_WORDS
+ else:
+ self.stopWords = stopWords
+
+ def tokenStream(self, fieldName, reader):
+
+ return StopFilter(LowerCaseFilter(LetterTokenizer(reader)),
+ self.stopWords)
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
------------------------------------------------------------------------------
svn:mime-type = text/plain