You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pylucene-commits@lucene.apache.org by va...@apache.org on 2016/09/09 09:20:26 UTC
svn commit: r1759963 - in /lucene/pylucene/trunk: CHANGES
samples/IndexFiles.py samples/PorterStemmerAnalyzer.py
Author: vajda
Date: Fri Sep 9 09:20:26 2016
New Revision: 1759963
URL: http://svn.apache.org/viewvc?rev=1759963&view=rev
Log:
got IndexFiles.py and PorterStemmerAnalyzer.py to run (with Dirk Rothe's help)
Modified:
lucene/pylucene/trunk/CHANGES
lucene/pylucene/trunk/samples/IndexFiles.py
lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
Modified: lucene/pylucene/trunk/CHANGES
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/CHANGES?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/CHANGES (original)
+++ lucene/pylucene/trunk/CHANGES Fri Sep 9 09:20:26 2016
@@ -1,7 +1,12 @@
+Version 4.10.1 -> 6.2.0
+-----------------------
+ - using Lucene 6.2.0 sources
+ - PyLucene built with JCC 2.22
+
Version 4.9.0 -> 4.10.1
-----------------------
- using Lucene 4.10.1 sources
- - PyLucene built with JCC 2.20
+ - PyLucene built with JCC 2.21
Version 4.8.0 -> 4.9.0
----------------------
Modified: lucene/pylucene/trunk/samples/IndexFiles.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/IndexFiles.py?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/samples/IndexFiles.py (original)
+++ lucene/pylucene/trunk/samples/IndexFiles.py Fri Sep 9 09:20:26 2016
@@ -5,16 +5,16 @@ INDEX_DIR = "IndexFiles.index"
import sys, os, lucene, threading, time
from datetime import datetime
-from java.io import File
+from java.nio.file import Paths
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
-from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig
+from org.apache.lucene.index import \
+ FieldInfo, IndexWriter, IndexWriterConfig, IndexOptions
from org.apache.lucene.store import SimpleFSDirectory
-from org.apache.lucene.util import Version
"""
-This class is loosely based on the Lucene (java implementation) demo class
+This class is loosely based on the Lucene (java implementation) demo class
org.apache.lucene.demo.IndexFiles. It will take a directory as an argument
and will index all of the files in that directory and downward recursively.
It will index on the file path, the file name and the file contents. The
@@ -41,9 +41,9 @@ class IndexFiles(object):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
- store = SimpleFSDirectory(File(storeDir))
+ store = SimpleFSDirectory(Paths.get(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
- config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
+ config = IndexWriterConfig(analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
@@ -59,17 +59,15 @@ class IndexFiles(object):
def indexDocs(self, root, writer):
t1 = FieldType()
- t1.setIndexed(True)
t1.setStored(True)
t1.setTokenized(False)
- t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
-
+ t1.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
+
t2 = FieldType()
- t2.setIndexed(True)
t2.setStored(False)
t2.setTokenized(True)
- t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
-
+ t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+
for root, dirnames, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith('.txt'):
@@ -101,7 +99,7 @@ if __name__ == '__main__':
try:
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR),
- StandardAnalyzer(Version.LUCENE_CURRENT))
+ StandardAnalyzer())
end = datetime.now()
print end - start
except Exception, e:
Modified: lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py (original)
+++ lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py Fri Sep 9 09:20:26 2016
@@ -13,7 +13,7 @@
# ====================================================================
# This sample illustrates how to write an Analyzer 'extension' in Python.
-#
+#
# What is happening behind the scenes ?
#
# The PorterStemmerAnalyzer python class does not in fact extend Analyzer,
@@ -30,12 +30,11 @@ import sys, os, lucene
from datetime import datetime
from IndexFiles import IndexFiles
-from org.apache.lucene.analysis.core import \
- LowerCaseFilter, StopFilter, StopAnalyzer
+from org.apache.lucene.analysis import LowerCaseFilter, StopFilter
+from org.apache.lucene.analysis.core import StopAnalyzer
from org.apache.lucene.analysis.en import PorterStemFilter
from org.apache.lucene.analysis.standard import \
StandardTokenizer, StandardFilter
-from org.apache.lucene.util import Version
from org.apache.pylucene.analysis import PythonAnalyzer
@@ -43,12 +42,11 @@ class PorterStemmerAnalyzer(PythonAnalyz
def createComponents(self, fieldName, reader):
- source = StandardTokenizer(Version.LUCENE_CURRENT, reader)
- filter = StandardFilter(Version.LUCENE_CURRENT, source)
- filter = LowerCaseFilter(Version.LUCENE_CURRENT, filter)
+ source = StandardTokenizer(reader)
+ filter = StandardFilter(source)
+ filter = LowerCaseFilter(filter)
filter = PorterStemFilter(filter)
- filter = StopFilter(Version.LUCENE_CURRENT, filter,
- StopAnalyzer.ENGLISH_STOP_WORDS_SET)
+ filter = StopFilter(filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET)
return self.TokenStreamComponents(source, filter)