You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/05/24 15:21:55 UTC

svn commit: r1342247 - /lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java

Author: rmuir
Date: Thu May 24 13:21:55 2012
New Revision: 1342247

URL: http://svn.apache.org/viewvc?rev=1342247&view=rev
Log:
LUCENE-4055: start cleaning up 3.x SIreader

Modified:
    lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java

Modified: lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java?rev=1342247&r1=1342246&r2=1342247&view=diff
==============================================================================
--- lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene4055/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java Thu May 24 13:21:55 2012
@@ -50,7 +50,7 @@ public class Lucene3xSegmentInfoReader e
     infos.counter = input.readInt(); // read counter
     Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();
     for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
-      SegmentInfoPerCommit siPerCommit = reader.readSegmentInfo(null, directory, format, input);
+      SegmentInfoPerCommit siPerCommit = reader.readLegacySegmentInfo(directory, format, input);
       SegmentInfo si = siPerCommit.info;
 
       if (si.getVersion() == null) {
@@ -94,11 +94,6 @@ public class Lucene3xSegmentInfoReader e
 
   @Override
   public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { 
-    return read(directory, segmentName, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE, context);
-  }
-
-  public SegmentInfo read(Directory directory, String segmentName, int format, IOContext context) throws IOException { 
-
     // NOTE: this is NOT how 3.x is really written...
     String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.SI_EXTENSION);
 
@@ -107,7 +102,8 @@ public class Lucene3xSegmentInfoReader e
     IndexInput input = directory.openInput(fileName, context);
 
     try {
-      SegmentInfo si = readSegmentInfo(segmentName, directory, format, input).info;
+      // nocommit: we need a version header
+      SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input);
       success = true;
       return si;
     } finally {
@@ -124,14 +120,16 @@ public class Lucene3xSegmentInfoReader e
       files.add(fileName);
     }
   }
-
-  private SegmentInfoPerCommit readSegmentInfo(String segmentName, Directory dir, int format, IndexInput input) throws IOException {
+  
+  /** reads from legacy 3.x segments_N */
+  private SegmentInfoPerCommit readLegacySegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
     // check that it is a format we can understand
+    assert format != Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE;
     if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) {
       throw new IndexFormatTooOldException(input, format,
                                            Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE);
     }
-    if (format < Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) {
+    if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) {
       throw new IndexFormatTooNewException(input, format,
                                            Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE);
     }
@@ -142,46 +140,26 @@ public class Lucene3xSegmentInfoReader e
       version = null;
     }
 
-    // NOTE: we ignore this and use the incoming arg
-    // instead, if it's non-null:
     final String name = input.readString();
-    if (segmentName == null) {
-      segmentName = name;
-    }
 
     final int docCount = input.readInt();
     final long delGen = input.readLong();
     
-    final int docStoreOffset;
+    final int docStoreOffset = input.readInt();
+    final Map<String,String> attributes = new HashMap<String,String>();
+    
+    // parse the docstore stuff and shove it into attributes
     final String docStoreSegment;
     final boolean docStoreIsCompoundFile;
-    final Map<String,String> attributes;
-    
-    if (format == Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) {
-      // we already upgraded to 4.x si format: so shared docstore stuff is in the attributes map.
-      attributes = input.readStringStringMap();
-      String v = attributes.get(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY);
-      docStoreOffset = v == null ? -1 : Integer.parseInt(v);
-      
-      v = attributes.get(Lucene3xSegmentInfoFormat.DS_NAME_KEY);
-      docStoreSegment = v == null ? segmentName : v;
-      
-      v = attributes.get(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY);
-      docStoreIsCompoundFile = v == null ? false : Boolean.parseBoolean(v);
+    if (docStoreOffset != -1) {
+      docStoreSegment = input.readString();
+      docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES;
+      attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset));
+      attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment);
+      attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile));
     } else {
-      // for older formats, parse the docstore stuff and shove it into attributes
-      attributes = new HashMap<String,String>();
-      docStoreOffset = input.readInt();
-      if (docStoreOffset != -1) {
-        docStoreSegment = input.readString();
-        docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES;
-        attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset));
-        attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment);
-        attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile));
-      } else {
-        docStoreSegment = name;
-        docStoreIsCompoundFile = false;
-      }
+      docStoreSegment = name;
+      docStoreIsCompoundFile = false;
     }
 
     // pre-4.0 indexes write a byte if there is a single norms file
@@ -214,63 +192,110 @@ public class Lucene3xSegmentInfoReader e
       final int hasVectors = input.readByte();
     }
 
-    final Set<String> files;
-    if (format == Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) {
-      files = input.readStringSet();
+    // Replicate logic from 3.x's SegmentInfo.files():
+    final Set<String> files = new HashSet<String>();
+    if (isCompoundFile) {
+      files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
     } else {
-      // Replicate logic from 3.x's SegmentInfo.files():
-      files = new HashSet<String>();
-      if (isCompoundFile) {
-        files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
+    }
+    
+    if (docStoreOffset != -1) {
+      if (docStoreIsCompoundFile) {
+        files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
       } else {
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.PROX_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xNormsProducer.NORMS_EXTENSION));
-      }
-
-      if (docStoreOffset != -1) {
-        if (docStoreIsCompoundFile) {
-          files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
+        files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
+        files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
+        addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
+        addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
+        addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
+      }
+    } else if (!isCompoundFile) {
+      files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
+      files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
+      addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
+    }
+    
+    if (normGen != null) {
+      for(Map.Entry<Integer,Long> ent : normGen.entrySet()) {
+        long gen = ent.getValue();
+        if (gen >= SegmentInfo.YES) {
+          // Definitely a separate norm file, with generation:
+          files.add(IndexFileNames.fileNameFromGeneration(name, "s" + ent.getKey(), gen));
+        } else if (gen == SegmentInfo.NO) {
+          // No separate norm
         } else {
-          files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
-          files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
-          addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
-          addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
-          addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
-        }
-      } else if (!isCompoundFile) {
-        files.add(IndexFileNames.segmentFileName(segmentName, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
-        files.add(IndexFileNames.segmentFileName(segmentName, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
-        addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
-      }
-
-      if (normGen != null) {
-        for(Map.Entry<Integer,Long> ent : normGen.entrySet()) {
-          long gen = ent.getValue();
-          if (gen >= SegmentInfo.YES) {
-            // Definitely a separate norm file, with generation:
-            files.add(IndexFileNames.fileNameFromGeneration(segmentName, "s" + ent.getKey(), gen));
-          } else if (gen == SegmentInfo.NO) {
-            // No separate norm
-          } else {
-            // We should have already hit indexformat too old exception
-            assert false;
-          }
+          // We should have already hit indexformat too old exception
+          assert false;
         }
       }
     }
 
     // nocommit: convert normgen into attributes?
-    SegmentInfo info = new SegmentInfo(dir, version, segmentName, docCount, normGen, isCompoundFile,
+    SegmentInfo info = new SegmentInfo(dir, version, name, docCount, normGen, isCompoundFile,
                                        null, diagnostics, Collections.unmodifiableMap(attributes));
     info.setFiles(files);
 
     SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen);
     return infoPerCommit;
   }
+
+  private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException {
+
+    final String version = input.readString();
+
+    // nocommit: we ignore this and use the incoming arg: don't write this
+    input.readString();
+
+    final int docCount = input.readInt();
+    // nocommit: dont write this
+    final long delGen = input.readLong();
+    
+    final Map<String,String> attributes = input.readStringStringMap();
+
+    // pre-4.0 indexes write a byte if there is a single norms file
+    byte b = input.readByte();
+
+    //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);
+
+    // nocommit: don't write this
+    assert 1 == b : "expected 1 but was: "+ b;
+    final int numNormGen = input.readInt();
+    final Map<Integer,Long> normGen;
+    if (numNormGen == SegmentInfo.NO) {
+      normGen = null;
+    } else {
+      normGen = new HashMap<Integer, Long>();
+      for(int j=0;j<numNormGen;j++) {
+        normGen.put(j, input.readLong());
+      }
+    }
+    final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+    final int delCount = input.readInt();
+    assert delCount <= docCount;
+
+    // nocommit: unused, dont write this
+    final boolean hasProx = input.readByte() == 1;
+
+    final Map<String,String> diagnostics = input.readStringStringMap();
+
+    // nocommit: unused, dont write this
+    final int hasVectors = input.readByte();
+
+    final Set<String> files = input.readStringSet();
+
+    // nocommit: convert normgen into attributes?
+    SegmentInfo info = new SegmentInfo(dir, version, name, docCount, normGen, isCompoundFile,
+                                       null, diagnostics, Collections.unmodifiableMap(attributes));
+    info.setFiles(files);
+    return info;
+  }
 }