You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by as...@apache.org on 2015/11/11 22:24:10 UTC

[38/50] [abbrv] incubator-geode git commit: Adding a microbenchmark of create and query performance.

Adding a microbenchmark of create and query performance.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/1f597bb7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/1f597bb7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/1f597bb7

Branch: refs/heads/develop
Commit: 1f597bb707fb8482a3808e085c86d3c6b9d25fcf
Parents: d623cf6
Author: Dan Smith <up...@apache.org>
Authored: Fri Oct 16 14:53:58 2015 -0700
Committer: Dan Smith <up...@apache.org>
Committed: Fri Oct 16 14:54:50 2015 -0700

----------------------------------------------------------------------
 ...IndexRepositoryImplJUnitPerformanceTest.java | 422 +++++++++++++++++++
 1 file changed, 422 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/1f597bb7/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
----------------------------------------------------------------------
diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
new file mode 100644
index 0000000..8641254
--- /dev/null
+++ b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
@@ -0,0 +1,422 @@
+package com.gemstone.gemfire.cache.lucene.internal.repository;
+
+import static org.junit.Assert.*;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import com.carrotsearch.randomizedtesting.generators.RandomStrings;
+import com.gemstone.gemfire.DataSerializable;
+import com.gemstone.gemfire.DataSerializer;
+import com.gemstone.gemfire.cache.Cache;
+import com.gemstone.gemfire.cache.CacheFactory;
+import com.gemstone.gemfire.cache.PartitionAttributesFactory;
+import com.gemstone.gemfire.cache.Region;
+import com.gemstone.gemfire.cache.RegionShortcut;
+import com.gemstone.gemfire.cache.asyncqueue.AsyncEventQueue;
+import com.gemstone.gemfire.cache.lucene.LuceneIndex;
+import com.gemstone.gemfire.cache.lucene.LuceneQuery;
+import com.gemstone.gemfire.cache.lucene.LuceneQueryProvider;
+import com.gemstone.gemfire.cache.lucene.LuceneQueryResults;
+import com.gemstone.gemfire.cache.lucene.LuceneService;
+import com.gemstone.gemfire.cache.lucene.LuceneServiceProvider;
+import com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImpl;
+import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory;
+import com.gemstone.gemfire.cache.lucene.internal.distributed.TopEntriesCollector;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.File;
+import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer;
+import com.gemstone.gemfire.cache.query.QueryException;
+import com.gemstone.gemfire.test.junit.categories.PerformanceTest;
+
+/**
+ * Microbenchmark of the IndexRepository to compare an
+ * IndexRepository built on top of cache with a 
+ * stock lucene IndexWriter with a RAMDirectory.
+ */
+@Category(PerformanceTest.class)
+public class IndexRepositoryImplJUnitPerformanceTest {
+  
+  private static final int NUM_WORDS = 1000;
+  private static int[] COMMIT_INTERVAL = new int[] {100, 1000, 5000};
+  private static int NUM_ENTRIES = 500_000;
+  private static int NUM_QUERIES = 500_000;
+
+  private StandardAnalyzer analyzer = new StandardAnalyzer();
+  
+  @Test
+  public  void testIndexRepository() throws Exception {
+    
+
+    doTest("IndexRepository", new TestCallbacks() {
+
+      private Cache cache;
+      private IndexRepositoryImpl repo;
+      private IndexWriter writer;
+
+      @Override
+      public void addObject(String key, String text) throws Exception {
+        repo.create(key, new TestObject(text));
+      }
+
+      @Override
+      public void commit()  throws Exception {
+        repo.commit();
+      }
+
+      @Override
+      public void init() throws Exception {
+        cache = new CacheFactory().set("mcast-port", "0")
+            .set("log-level", "error")
+            .create();
+        Region<String, File> fileRegion = cache.<String, File>createRegionFactory(RegionShortcut.REPLICATE).create("files");
+        Region<ChunkKey, byte[]> chunkRegion = cache.<ChunkKey, byte[]>createRegionFactory(RegionShortcut.REPLICATE).create("chunks");
+
+        RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion);
+        
+        
+        IndexWriterConfig config = new IndexWriterConfig(analyzer);
+        writer = new IndexWriter(dir, config);
+        String[] indexedFields= new String[] {"text"};
+        HeterogenousLuceneSerializer mapper = new HeterogenousLuceneSerializer(indexedFields);
+        repo = new IndexRepositoryImpl(fileRegion, writer, mapper);
+      }
+
+      @Override
+      public void cleanup() throws IOException {
+        writer.close();
+        cache.close();
+      }
+
+      @Override
+      public void waitForAsync() throws Exception {
+        //do nothing
+      }
+
+      @Override
+      public int query(Query query) throws IOException {
+        TopEntriesCollector collector = new TopEntriesCollector();
+        repo.query(query, 100, collector);
+        return collector.size();
+      }
+    });
+  }
+  
+  /**
+   * Test our full lucene index implementation
+   * @throws Exception
+   */
+  @Test
+  public void testLuceneIndex() throws Exception {
+    
+
+    doTest("LuceneIndex", new TestCallbacks() {
+
+      private Cache cache;
+      private Region<String, TestObject> region;
+      private LuceneService service;
+
+      @Override
+      public void addObject(String key, String text) throws Exception {
+        region.create(key, new TestObject(text));
+      }
+
+      @Override
+      public void commit()  throws Exception {
+        //NA
+      }
+
+      @Override
+      public void init() throws Exception {
+        cache = new CacheFactory().set("mcast-port", "0")
+            .set("log-level", "warning")
+            .create();
+        service = LuceneServiceProvider.get(cache);
+        service.createIndex("index", "/region", "text");
+        region = cache.<String, TestObject>createRegionFactory(RegionShortcut.PARTITION)
+            .setPartitionAttributes(new PartitionAttributesFactory<>().setTotalNumBuckets(1).create())
+            .create("region");
+      }
+
+      @Override
+      public void cleanup() throws IOException {
+        cache.close();
+      }
+      
+      @Override
+      public void waitForAsync() throws Exception {
+        AsyncEventQueue aeq = cache.getAsyncEventQueue(LuceneServiceImpl.getUniqueIndexName("index", "/region"));
+        
+        //We will be at most 10 ms off
+        while(aeq.size() > 0) {
+          Thread.sleep(10);
+        }
+      }
+
+      @Override
+      public int query(final Query query) throws Exception {
+        LuceneQuery<Object, Object> luceneQuery = service.createLuceneQueryFactory().create("index", "/region", new LuceneQueryProvider() {
+          
+          @Override
+          public Query getQuery(LuceneIndex index) throws QueryException {
+            return query;
+          }
+        });
+        
+        LuceneQueryResults<Object, Object> results = luceneQuery.search();
+        return results.size();
+      }
+    });
+  }
+  
+  @Test
+  public  void testLuceneWithRegionDirectory() throws Exception {
+    doTest("RegionDirectory", new TestCallbacks() {
+
+      private IndexWriter writer;
+      private SearcherManager searcherManager;
+
+      @Override
+      public void init() throws Exception {
+        RegionDirectory dir = new RegionDirectory(new ConcurrentHashMap<String, File>(), new ConcurrentHashMap<ChunkKey, byte[]>());
+        IndexWriterConfig config = new IndexWriterConfig(analyzer);
+        writer = new IndexWriter(dir, config);
+        searcherManager = new SearcherManager(writer, true, null);
+      }
+
+      @Override
+      public void addObject(String key, String text) throws Exception {
+        Document doc = new Document();
+        doc.add(new TextField("key", key, Store.YES));
+        doc.add(new TextField("text", text, Store.NO));
+        writer.addDocument(doc);
+      }
+
+      @Override
+      public void commit() throws Exception {
+        writer.commit();
+        searcherManager.maybeRefresh();
+      }
+
+      @Override
+      public void cleanup() throws Exception {
+        writer.close();
+      }
+      
+      @Override
+      public void waitForAsync() throws Exception {
+        //do nothing
+      }
+
+      @Override
+      public int query(Query query) throws Exception {
+        IndexSearcher searcher = searcherManager.acquire();
+        try {
+          return searcher.count(query);
+        } finally {
+          searcherManager.release(searcher);
+        }
+      }
+      
+    });
+    
+  }
+  
+  @Test
+  public  void testLucene() throws Exception {
+    doTest("Lucene", new TestCallbacks() {
+
+      private IndexWriter writer;
+      private SearcherManager searcherManager;
+
+      @Override
+      public void init() throws Exception {
+        RAMDirectory dir = new RAMDirectory();
+        IndexWriterConfig config = new IndexWriterConfig(analyzer);
+        writer = new IndexWriter(dir, config);
+        searcherManager = new SearcherManager(writer, true, null);
+      }
+
+      @Override
+      public void addObject(String key, String text) throws Exception {
+        Document doc = new Document();
+        doc.add(new TextField("key", key, Store.YES));
+        doc.add(new TextField("text", text, Store.NO));
+        writer.addDocument(doc);
+      }
+
+      @Override
+      public void commit() throws Exception {
+        writer.commit();
+        searcherManager.maybeRefresh();
+      }
+
+      @Override
+      public void cleanup() throws Exception {
+        writer.close();
+      }
+      
+      @Override
+      public void waitForAsync() throws Exception {
+        //do nothing
+      }
+
+      @Override
+      public int query(Query query) throws Exception {
+        IndexSearcher searcher = searcherManager.acquire();
+        try {
+          return searcher.count(query);
+        } finally {
+          searcherManager.release(searcher);
+        }
+      }
+      
+    });
+    
+  }
+  
+  private void doTest(String testName, TestCallbacks callbacks) throws Exception {
+
+    //Create some random words. We need to be careful
+    //to make sure we get NUM_WORDS distinct words here
+    Set<String> wordSet = new HashSet<String>();
+    Random rand = new Random();
+    while(wordSet.size() < NUM_WORDS) {
+      int length = rand.nextInt(12) + 3;
+      char[] text = new char[length];
+      for(int i = 0; i < length; i++) {
+        text[i] = (char) (rand.nextInt(26) + 97);
+      }
+      wordSet.add(new String(text));
+    }
+    List<String> words = new ArrayList<String>(wordSet.size());
+    words.addAll(wordSet);
+    
+    
+    
+    //warm up
+    writeRandomWords(callbacks, words, rand, NUM_ENTRIES / 10, NUM_QUERIES / 10, COMMIT_INTERVAL[0]);
+    
+    //Do the actual test
+    
+    for(int i = 0; i < COMMIT_INTERVAL.length; i++) {
+      Results results = writeRandomWords(callbacks, words, rand, NUM_ENTRIES, NUM_QUERIES / 10, COMMIT_INTERVAL[i]);
+    
+      System.out.println(testName + " writes(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.writeTime));
+      System.out.println(testName + " queries(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.queryTime));
+    }
+  }
+
+  private Results writeRandomWords(TestCallbacks callbacks, List<String> words,
+      Random rand, int numEntries, int numQueries, int commitInterval) throws Exception {
+    Results results  = new Results();
+    callbacks.init();
+    int[] counts = new int[words.size()];
+    long start = System.nanoTime();
+    try {
+      for(int i =0; i < numEntries; i++) {
+        int word1 = rand.nextInt(words.size());
+        int word2 = rand.nextInt(words.size());
+        counts[word1]++;
+        counts[word2]++;
+        String value = words.get(word1) + " " + words.get(word2);
+        callbacks.addObject("key" + i, value);
+
+        if(i % commitInterval == 0 && i != 0) {
+          callbacks.commit();
+        }
+      }
+      callbacks.commit();
+      callbacks.waitForAsync();
+      long end = System.nanoTime();
+      results.writeTime = end - start;
+      
+      
+      start = System.nanoTime();
+      for(int i=0; i < numQueries; i++) {
+        int wordIndex = rand.nextInt(words.size());
+        String word = words.get(wordIndex);
+        Query query = new TermQuery(new Term("text", word));
+        int size  = callbacks.query(query);
+//        int size  = callbacks.query(parser.parse(word));
+        //All of my tests sometimes seem to be missing a couple of words, including the stock lucene
+//        assertEquals("Error on query " + i + " word=" + word, counts[wordIndex], size);
+      }
+      end = System.nanoTime();
+      results.queryTime = end - start;
+      
+      return results;
+    } finally {
+      callbacks.cleanup();
+    }
+  }
+
+  private static class TestObject implements DataSerializable {
+    private String text;
+
+    public TestObject() {
+      
+    }
+    
+    public TestObject(String text) {
+      super();
+      this.text = text;
+    }
+
+    @Override
+    public void toData(DataOutput out) throws IOException {
+      DataSerializer.writeString(text, out);
+    }
+
+    @Override
+    public void fromData(DataInput in)
+        throws IOException, ClassNotFoundException {
+      text = DataSerializer.readString(in);
+    }
+
+    @Override
+    public String toString() {
+      return text;
+    }
+    
+    
+  }
+  
+  private interface TestCallbacks {
+    public void init() throws Exception;
+    public int query(Query query) throws Exception;
+    public void addObject(String key, String text)  throws Exception;
+    public void commit() throws Exception;
+    public void waitForAsync() throws Exception;
+    public void cleanup() throws Exception;
+  }
+  
+  private static class Results {
+    long writeTime;
+    long queryTime;
+  }
+}