You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2012/06/06 10:46:11 UTC

svn commit: r1346784 - in /lucene/dev/trunk/lucene: ./ facet/src/java/org/apache/lucene/facet/taxonomy/directory/ facet/src/java/org/apache/lucene/facet/taxonomy/writercache/ facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/ facet/src/j...

Author: shaie
Date: Wed Jun  6 08:46:10 2012
New Revision: 1346784

URL: http://svn.apache.org/viewvc?rev=1346784&view=rev
Log:
LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jun  6 08:46:10 2012
@@ -6,7 +6,10 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 5.0.0 =======================
 
-(No changes)
+New features
+
+* LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces
+  the taxonomy in place with the given one. (Shai Erera)
 
 ======================= Lucene 4.0.0 =======================
 

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Wed Jun  6 08:46:10 2012
@@ -93,6 +93,7 @@ public class DirectoryTaxonomyWriter imp
    */
   public static final String INDEX_CREATE_TIME = "index.create.time";
 
+  private Directory dir;
   private IndexWriter indexWriter;
   private int nextID;
   private char delimiter = Consts.DEFAULT_DELIMITER;
@@ -115,8 +116,8 @@ public class DirectoryTaxonomyWriter imp
   private DirectoryReader reader;
   private int cacheMisses;
 
-  /** Records the taxonomy index creation time. */
-  private final String createTime;
+  /** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */
+  private String createTime;
   
   /** Reads the commit data from a Directory. */
   private static Map<String, String> readCommitData(Directory dir) throws IOException {
@@ -204,8 +205,9 @@ public class DirectoryTaxonomyWriter imp
       }
     }
     
+    dir = directory;
     IndexWriterConfig config = createIndexWriterConfig(openMode);
-    indexWriter = openIndexWriter(directory, config);
+    indexWriter = openIndexWriter(dir, config);
     
     // verify (to some extent) that merge policy in effect would preserve category docids 
     assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) : 
@@ -282,7 +284,7 @@ public class DirectoryTaxonomyWriter imp
   protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
     // Make sure we use a MergePolicy which always merges adjacent segments and thus
     // keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
-    return new IndexWriterConfig(Version.LUCENE_40,
+    return new IndexWriterConfig(Version.LUCENE_50,
         new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
         new LogByteSizeMergePolicy());
   }
@@ -1021,4 +1023,32 @@ public class DirectoryTaxonomyWriter imp
     doClose();
   }
   
+  /**
+   * Replaces the current taxonomy with the given one. This method should
+   * generally be called in conjunction with
+   * {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy
+   * as well as the search index content.
+   */
+  public void replaceTaxonomy(Directory taxoDir) throws IOException {
+    // replace the taxonomy by doing IW optimized operations
+    indexWriter.deleteAll();
+    indexWriter.addIndexes(taxoDir);
+    refreshInternalReader();
+    nextID = indexWriter.maxDoc();
+    
+    // need to clear the cache, so that addCategory won't accidentally return
+    // old categories that are in the cache.
+    cache.clear();
+    cacheIsComplete = false;
+    alreadyCalledFillCache = false;
+    
+    // update createTime as a taxonomy replace is just like it has be recreated
+    createTime = Long.toString(System.nanoTime());
+  }
+
+  /** Returns the {@link Directory} of this taxonomy writer. */
+  public Directory getDirectory() {
+    return dir;
+  }
+
 }

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java Wed Jun  6 08:46:10 2012
@@ -112,4 +112,10 @@ public interface TaxonomyWriterCache {
    */
   public boolean hasRoom(int numberOfEntries);
 
+  /**
+   * Clears the content of the cache. Unlike {@link #close()}, the caller can
+   * assume that the cache is still operable after this method returns.
+   */
+  public void clear();
+  
 }

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java Wed Jun  6 08:46:10 2012
@@ -34,13 +34,24 @@ import org.apache.lucene.facet.taxonomy.
 public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {  
 
   private final ReadWriteLock lock = new ReentrantReadWriteLock();
+  private final int initialCapcity, numHashArrays;
+  private final float loadFactor;
+  
   private CompactLabelToOrdinal cache;
 
   public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
     this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
+    this.initialCapcity = initialCapcity;
+    this.numHashArrays = numHashArrays;
+    this.loadFactor = loadFactor;
   }
 
   @Override
+  public void clear() {
+    cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
+  }
+  
+  @Override
   public synchronized void close() {
     cache = null;
   }

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java Wed Jun  6 08:46:10 2012
@@ -66,6 +66,11 @@ public class LruTaxonomyWriterCache impl
   }
 
   @Override
+  public void clear() {
+    cache.clear();
+  }
+  
+  @Override
   public synchronized void close() {
     cache.clear();
     cache = null;

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1346784&r1=1346783&r2=1346784&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Wed Jun  6 08:46:10 2012
@@ -17,6 +17,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@@ -59,6 +60,8 @@ public class TestDirectoryTaxonomyWriter
     public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
     @Override
     public boolean hasRoom(int numberOfEntries) { return false; }
+    @Override
+    public void clear() {}
     
   }
   
@@ -255,5 +258,43 @@ public class TestDirectoryTaxonomyWriter
     
     dir.close();
   }
+
+  private String getCreateTime(Directory taxoDir) throws IOException {
+    SegmentInfos infos = new SegmentInfos();
+    infos.read(taxoDir);
+    return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+  }
   
+  @Test
+  public void testReplaceTaxonomy() throws Exception {
+    Directory input = newDirectory();
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
+    taxoWriter.addCategory(new CategoryPath("a"));
+    taxoWriter.close();
+    
+    Directory dir = newDirectory();
+    taxoWriter = new DirectoryTaxonomyWriter(dir);
+    int ordinal = taxoWriter.addCategory(new CategoryPath("b"));
+    taxoWriter.addCategory(new CategoryPath("c"));
+    taxoWriter.commit();
+    
+    String origCreateTime = getCreateTime(dir);
+    
+    // replace the taxonomy with the input one
+    taxoWriter.replaceTaxonomy(input);
+    
+    // add the same category again -- it should not receive the same ordinal !
+    int newOrdinal = taxoWriter.addCategory(new CategoryPath("b"));
+    assertNotSame("new ordinal cannot be the original ordinal", ordinal, newOrdinal);
+    assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdinal);
+    
+    taxoWriter.close();
+    
+    String newCreateTime = getCreateTime(dir);
+    assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
+    
+    dir.close();
+    input.close();
+  }
+
 }