You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pylucene-commits@lucene.apache.org by va...@apache.org on 2016/09/09 09:20:26 UTC

svn commit: r1759963 - in /lucene/pylucene/trunk: CHANGES samples/IndexFiles.py samples/PorterStemmerAnalyzer.py

Author: vajda
Date: Fri Sep  9 09:20:26 2016
New Revision: 1759963

URL: http://svn.apache.org/viewvc?rev=1759963&view=rev
Log:
got IndexFiles.py and PorterStemmerAnalyzer.py to run (with Dirk Rothe's help)

Modified:
    lucene/pylucene/trunk/CHANGES
    lucene/pylucene/trunk/samples/IndexFiles.py
    lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py

Modified: lucene/pylucene/trunk/CHANGES
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/CHANGES?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/CHANGES (original)
+++ lucene/pylucene/trunk/CHANGES Fri Sep  9 09:20:26 2016
@@ -1,7 +1,12 @@
+Version 4.10.1 -> 6.2.0
+-----------------------
+ - using Lucene 6.2.0 sources
+ - PyLucene built with JCC 2.22
+
 Version 4.9.0 -> 4.10.1
 -----------------------
  - using Lucene 4.10.1 sources
- - PyLucene built with JCC 2.20
+ - PyLucene built with JCC 2.21
 
 Version 4.8.0 -> 4.9.0
 ----------------------

Modified: lucene/pylucene/trunk/samples/IndexFiles.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/IndexFiles.py?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/samples/IndexFiles.py (original)
+++ lucene/pylucene/trunk/samples/IndexFiles.py Fri Sep  9 09:20:26 2016
@@ -5,16 +5,16 @@ INDEX_DIR = "IndexFiles.index"
 import sys, os, lucene, threading, time
 from datetime import datetime
 
-from java.io import File
+from java.nio.file import Paths
 from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
 from org.apache.lucene.analysis.standard import StandardAnalyzer
 from org.apache.lucene.document import Document, Field, FieldType
-from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig
+from org.apache.lucene.index import \
+    FieldInfo, IndexWriter, IndexWriterConfig, IndexOptions
 from org.apache.lucene.store import SimpleFSDirectory
-from org.apache.lucene.util import Version
 
 """
-This class is loosely based on the Lucene (java implementation) demo class 
+This class is loosely based on the Lucene (java implementation) demo class
 org.apache.lucene.demo.IndexFiles.  It will take a directory as an argument
 and will index all of the files in that directory and downward recursively.
 It will index on the file path, the file name and the file contents.  The
@@ -41,9 +41,9 @@ class IndexFiles(object):
         if not os.path.exists(storeDir):
             os.mkdir(storeDir)
 
-        store = SimpleFSDirectory(File(storeDir))
+        store = SimpleFSDirectory(Paths.get(storeDir))
         analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
-        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
+        config = IndexWriterConfig(analyzer)
         config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
         writer = IndexWriter(store, config)
 
@@ -59,17 +59,15 @@ class IndexFiles(object):
     def indexDocs(self, root, writer):
 
         t1 = FieldType()
-        t1.setIndexed(True)
         t1.setStored(True)
         t1.setTokenized(False)
-        t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
-        
+        t1.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
+
         t2 = FieldType()
-        t2.setIndexed(True)
         t2.setStored(False)
         t2.setTokenized(True)
-        t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
-        
+        t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+
         for root, dirnames, filenames in os.walk(root):
             for filename in filenames:
                 if not filename.endswith('.txt'):
@@ -101,7 +99,7 @@ if __name__ == '__main__':
     try:
         base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
         IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR),
-                   StandardAnalyzer(Version.LUCENE_CURRENT))
+                   StandardAnalyzer())
         end = datetime.now()
         print end - start
     except Exception, e:

Modified: lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py?rev=1759963&r1=1759962&r2=1759963&view=diff
==============================================================================
--- lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py (original)
+++ lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py Fri Sep  9 09:20:26 2016
@@ -13,7 +13,7 @@
 # ====================================================================
 
 # This sample illustrates how to write an Analyzer 'extension' in Python.
-# 
+#
 #   What is happening behind the scenes ?
 #
 # The PorterStemmerAnalyzer python class does not in fact extend Analyzer,
@@ -30,12 +30,11 @@ import sys, os, lucene
 from datetime import datetime
 from IndexFiles import IndexFiles
 
-from org.apache.lucene.analysis.core import \
-    LowerCaseFilter, StopFilter, StopAnalyzer
+from org.apache.lucene.analysis import LowerCaseFilter, StopFilter
+from org.apache.lucene.analysis.core import StopAnalyzer
 from org.apache.lucene.analysis.en import PorterStemFilter
 from org.apache.lucene.analysis.standard import \
     StandardTokenizer, StandardFilter
-from org.apache.lucene.util import Version
 from org.apache.pylucene.analysis import PythonAnalyzer
 
 
@@ -43,12 +42,11 @@ class PorterStemmerAnalyzer(PythonAnalyz
 
     def createComponents(self, fieldName, reader):
 
-        source = StandardTokenizer(Version.LUCENE_CURRENT, reader)
-        filter = StandardFilter(Version.LUCENE_CURRENT, source)
-        filter = LowerCaseFilter(Version.LUCENE_CURRENT, filter)
+        source = StandardTokenizer(reader)
+        filter = StandardFilter(source)
+        filter = LowerCaseFilter(filter)
         filter = PorterStemFilter(filter)
-        filter = StopFilter(Version.LUCENE_CURRENT, filter,
-                            StopAnalyzer.ENGLISH_STOP_WORDS_SET)
+        filter = StopFilter(filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET)
 
         return self.TokenStreamComponents(source, filter)