You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/10/04 16:12:59 UTC

svn commit: r1629397 - in /lucene/dev/branches/lucene5969/lucene: backward-codecs/src/java/org/apache/lucene/codecs/lucene40/ codecs/src/java/org/apache/lucene/codecs/simpletext/ codecs/src/test/org/apache/lucene/codecs/simpletext/ core/src/java/org/ap...

Author: rmuir
Date: Sat Oct  4 14:12:58 2014
New Revision: 1629397

URL: http://svn.apache.org/r1629397
Log:
LUCENE-5969: add SimpleText cfs

Added:
    lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java   (with props)
    lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java   (with props)
Modified:
    lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java
    lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
    lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java

Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java?rev=1629397&r1=1629396&r2=1629397&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java Sat Oct  4 14:12:58 2014
@@ -234,12 +234,15 @@ final class Lucene40CompoundReader exten
     throw new UnsupportedOperationException();
   }
   
-  /** Not implemented
-   * @throws UnsupportedOperationException always: not supported by CFS */
   @Override
   public Lock makeLock(String name) {
     throw new UnsupportedOperationException();
   }
+  
+  @Override
+  public void clearLock(String name) throws IOException {
+    throw new UnsupportedOperationException();
+  }
 
   @Override
   public String toString() {

Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java?rev=1629397&r1=1629396&r2=1629397&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java Sat Oct  4 14:12:58 2014
@@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValue
 import org.apache.lucene.codecs.NormsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
 
 /**
  * plain text index format.
@@ -44,8 +43,7 @@ public final class SimpleTextCodec exten
   private final NormsFormat normsFormat = new SimpleTextNormsFormat();
   private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
   private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
-  // nocommit
-  private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+  private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
   
   public SimpleTextCodec() {
     super("SimpleText");

Added: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java?rev=1629397&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java (added)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java Sat Oct  4 14:12:58 2014
@@ -0,0 +1,246 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.ParseException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Locale;
+
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.MergeState.CheckAbort;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.BaseDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Lock;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * plain text compound format.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextCompoundFormat extends CompoundFormat {
+
+  @Override
+  public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
+    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
+    final IndexInput in = dir.openInput(dataFile, context);
+    
+    BytesRefBuilder scratch = new BytesRefBuilder();
+
+    // first get to TOC:
+    DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
+    long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1;
+    in.seek(pos);
+    SimpleTextUtil.readLine(in, scratch);
+    assert StringHelper.startsWith(scratch.get(), TABLEPOS);
+    long tablePos = -1; 
+    try {
+      tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue();
+    } catch (ParseException e) {
+      throw new CorruptIndexException("can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in);
+    }
+    
+    // seek to TOC and read it
+    in.seek(tablePos);
+    SimpleTextUtil.readLine(in, scratch);
+    assert StringHelper.startsWith(scratch.get(), TABLE);
+    int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE));
+    
+    final String fileNames[] = new String[numEntries];
+    final long startOffsets[] = new long[numEntries];
+    final long endOffsets[] = new long[numEntries];
+    
+    for (int i = 0; i < numEntries; i++) {
+      SimpleTextUtil.readLine(in, scratch);
+      assert StringHelper.startsWith(scratch.get(), TABLENAME);
+      fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME));
+      
+      if (i > 0) {
+        // files must be unique and in sorted order
+        assert fileNames[i].compareTo(fileNames[i-1]) > 0;
+      }
+      
+      SimpleTextUtil.readLine(in, scratch);
+      assert StringHelper.startsWith(scratch.get(), TABLESTART);
+      startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART));
+      
+      SimpleTextUtil.readLine(in, scratch);
+      assert StringHelper.startsWith(scratch.get(), TABLEEND);
+      endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND));
+    }
+    
+    return new BaseDirectory() {
+      
+      private int getIndex(String name) throws IOException {
+        int index = Arrays.binarySearch(fileNames, name);
+        if (index < 0) {
+          throw new FileNotFoundException("No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")");
+        }
+        return index;
+      }
+      
+      @Override
+      public String[] listAll() throws IOException {
+        ensureOpen();
+        return fileNames.clone();
+      }
+      
+      @Override
+      public long fileLength(String name) throws IOException {
+        ensureOpen();
+        int index = getIndex(name);
+        return endOffsets[index] - startOffsets[index];
+      }
+      
+      @Override
+      public IndexInput openInput(String name, IOContext context) throws IOException {
+        ensureOpen();
+        int index = getIndex(name);
+        return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]);
+      }
+      
+      @Override
+      public void close() throws IOException {
+        isOpen = false;
+        in.close();
+      }
+      
+      // write methods: disabled
+      
+      @Override
+      public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); }
+      
+      @Override
+      public void sync(Collection<String> names) { throw new UnsupportedOperationException(); }
+      
+      @Override
+      public void deleteFile(String name) { throw new UnsupportedOperationException(); }
+      
+      @Override
+      public void renameFile(String source, String dest) { throw new UnsupportedOperationException(); }
+      
+      @Override
+      public Lock makeLock(String name) { throw new UnsupportedOperationException(); }
+      
+      @Override
+      public void clearLock(String name) { throw new UnsupportedOperationException(); }
+    };
+  }
+
+  @Override
+  public void write(Directory dir, SegmentInfo si, Collection<String> files, CheckAbort checkAbort, IOContext context) throws IOException {
+    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
+    
+    int numFiles = files.size();
+    String names[] = files.toArray(new String[numFiles]);
+    Arrays.sort(names);
+    long startOffsets[] = new long[numFiles];
+    long endOffsets[] = new long[numFiles];
+    
+    BytesRefBuilder scratch = new BytesRefBuilder();
+    
+    try (IndexOutput out = dir.createOutput(dataFile, context)) { 
+      for (int i = 0; i < names.length; i++) {
+        // write header for file
+        SimpleTextUtil.write(out, HEADER);
+        SimpleTextUtil.write(out, names[i], scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        // write bytes for file
+        startOffsets[i] = out.getFilePointer();
+        try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) {
+          out.copyBytes(in, in.length());
+        }
+        endOffsets[i] = out.getFilePointer();
+        
+        checkAbort.work(endOffsets[i] - startOffsets[i]);
+      }
+      
+      long tocPos = out.getFilePointer();
+      
+      // write CFS table
+      SimpleTextUtil.write(out, TABLE);
+      SimpleTextUtil.write(out, Integer.toString(numFiles), scratch);
+      SimpleTextUtil.writeNewline(out);
+     
+      for (int i = 0; i < names.length; i++) {
+        SimpleTextUtil.write(out, TABLENAME);
+        SimpleTextUtil.write(out, names[i], scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, TABLESTART);
+        SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch);
+        SimpleTextUtil.writeNewline(out);
+
+        SimpleTextUtil.write(out, TABLEEND);
+        SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch);
+        SimpleTextUtil.writeNewline(out);
+      }
+      
+      DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
+      SimpleTextUtil.write(out, TABLEPOS);
+      SimpleTextUtil.write(out, df.format(tocPos), scratch);
+      SimpleTextUtil.writeNewline(out);
+    }
+  }
+
+  @Override
+  public String[] files(SegmentInfo si) {
+    return new String[] { IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION) };
+  }
+  
+  // helper method to strip strip away 'prefix' from 'scratch' and return as String
+  private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) throws IOException {
+    return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
+  }
+  
+  /** Extension of compound file */
+  static final String DATA_EXTENSION = "scf";
+  
+  final static BytesRef HEADER  = new BytesRef("cfs entry for: ");
+  
+  final static BytesRef TABLE =      new BytesRef("table of contents, size: ");
+  final static BytesRef TABLENAME =  new BytesRef("  filename: ");
+  final static BytesRef TABLESTART = new BytesRef("    start: ");
+  final static BytesRef TABLEEND =   new BytesRef("    end: ");
+  
+  final static BytesRef TABLEPOS = new BytesRef("table of contents begins at offset: ");
+  
+  final static String OFFSETPATTERN;
+  static {
+    int numDigits = Long.toString(Long.MAX_VALUE).length();
+    char pattern[] = new char[numDigits];
+    Arrays.fill(pattern, '0');
+    OFFSETPATTERN = new String(pattern);
+  }
+}

Added: lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java?rev=1629397&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java (added)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java Sat Oct  4 14:12:58 2014
@@ -0,0 +1,30 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseCompoundFormatTestCase;
+
+public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
+  private final Codec codec = new SimpleTextCodec();
+
+  @Override
+  protected Codec getCodec() {
+    return codec;
+  }
+}

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java?rev=1629397&r1=1629396&r2=1629397&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java Sat Oct  4 14:12:58 2014
@@ -60,6 +60,8 @@ final class Lucene50CompoundReader exten
   /**
    * Create a new CompoundFileDirectory.
    */
+  // TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
+  // this need not be a "general purpose" directory anymore (it only writes index files)
   public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
     this.directory = directory;
     this.segmentName = si.name;
@@ -179,14 +181,17 @@ final class Lucene50CompoundReader exten
     throw new UnsupportedOperationException();
   }
   
-  /** Not implemented
-   * @throws UnsupportedOperationException always: not supported by CFS */
   @Override
   public Lock makeLock(String name) {
     throw new UnsupportedOperationException();
   }
 
   @Override
+  public void clearLock(String name) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
   public String toString() {
     return "CompoundFileDirectory(segment=\"" + segmentName + "\" in dir=" + directory + ")";
   }

Modified: lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java?rev=1629397&r1=1629396&r2=1629397&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java (original)
+++ lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java Sat Oct  4 14:12:58 2014
@@ -316,6 +316,28 @@ public abstract class BaseCompoundFormat
     dir.close();
   }
   
+  // test that cfs reader is read-only
+  public void testClearLockDisabled() throws IOException {
+    final String testfile = "_123.test";
+
+    Directory dir = newDirectory();
+    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
+    out.writeInt(3);
+    out.close();
+ 
+    SegmentInfo si = newSegmentInfo(dir, "_123");
+    si.getCodec().compoundFormat().write(dir, si, Collections.<String>emptyList(), MergeState.CheckAbort.NONE, IOContext.DEFAULT);
+    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
+    try {
+      cfs.clearLock("foobar");
+      fail("didn't get expected exception");
+    } catch (UnsupportedOperationException expected) {
+      // expected UOE
+    }
+    cfs.close();
+    dir.close();
+  }
+  
   /** 
    * This test creates a compound file based on a large number of files of
    * various length. The file content is generated randomly. The sizes range
@@ -379,7 +401,7 @@ public abstract class BaseCompoundFormat
     
     final IndexInput[] ins = new IndexInput[FILE_COUNT];
     for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
-      ins[fileIdx] = cfs.openInput("file." + fileIdx, newIOContext(random()));
+      ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
     }
     
     assertEquals(1, dir.getFileHandleCount());