You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by up...@apache.org on 2015/10/16 23:55:09 UTC
[3/3] incubator-geode git commit: Adding a microbenchmark of create
and query performance.
Adding a microbenchmark of create and query performance.
Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/1f597bb7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/1f597bb7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/1f597bb7
Branch: refs/heads/feature/GEODE-11
Commit: 1f597bb707fb8482a3808e085c86d3c6b9d25fcf
Parents: d623cf6
Author: Dan Smith <up...@apache.org>
Authored: Fri Oct 16 14:53:58 2015 -0700
Committer: Dan Smith <up...@apache.org>
Committed: Fri Oct 16 14:54:50 2015 -0700
----------------------------------------------------------------------
...IndexRepositoryImplJUnitPerformanceTest.java | 422 +++++++++++++++++++
1 file changed, 422 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/1f597bb7/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
----------------------------------------------------------------------
diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
new file mode 100644
index 0000000..8641254
--- /dev/null
+++ b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java
@@ -0,0 +1,422 @@
+package com.gemstone.gemfire.cache.lucene.internal.repository;
+
+import static org.junit.Assert.*;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import com.carrotsearch.randomizedtesting.generators.RandomStrings;
+import com.gemstone.gemfire.DataSerializable;
+import com.gemstone.gemfire.DataSerializer;
+import com.gemstone.gemfire.cache.Cache;
+import com.gemstone.gemfire.cache.CacheFactory;
+import com.gemstone.gemfire.cache.PartitionAttributesFactory;
+import com.gemstone.gemfire.cache.Region;
+import com.gemstone.gemfire.cache.RegionShortcut;
+import com.gemstone.gemfire.cache.asyncqueue.AsyncEventQueue;
+import com.gemstone.gemfire.cache.lucene.LuceneIndex;
+import com.gemstone.gemfire.cache.lucene.LuceneQuery;
+import com.gemstone.gemfire.cache.lucene.LuceneQueryProvider;
+import com.gemstone.gemfire.cache.lucene.LuceneQueryResults;
+import com.gemstone.gemfire.cache.lucene.LuceneService;
+import com.gemstone.gemfire.cache.lucene.LuceneServiceProvider;
+import com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImpl;
+import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory;
+import com.gemstone.gemfire.cache.lucene.internal.distributed.TopEntriesCollector;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.File;
+import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer;
+import com.gemstone.gemfire.cache.query.QueryException;
+import com.gemstone.gemfire.test.junit.categories.PerformanceTest;
+
+/**
+ * Microbenchmark of the IndexRepository to compare an
+ * IndexRepository built on top of cache with a
+ * stock lucene IndexWriter with a RAMDirectory.
+ */
+@Category(PerformanceTest.class)
+public class IndexRepositoryImplJUnitPerformanceTest {
+
+ private static final int NUM_WORDS = 1000;
+ private static int[] COMMIT_INTERVAL = new int[] {100, 1000, 5000};
+ private static int NUM_ENTRIES = 500_000;
+ private static int NUM_QUERIES = 500_000;
+
+ private StandardAnalyzer analyzer = new StandardAnalyzer();
+
+ @Test
+ public void testIndexRepository() throws Exception {
+
+
+ doTest("IndexRepository", new TestCallbacks() {
+
+ private Cache cache;
+ private IndexRepositoryImpl repo;
+ private IndexWriter writer;
+
+ @Override
+ public void addObject(String key, String text) throws Exception {
+ repo.create(key, new TestObject(text));
+ }
+
+ @Override
+ public void commit() throws Exception {
+ repo.commit();
+ }
+
+ @Override
+ public void init() throws Exception {
+ cache = new CacheFactory().set("mcast-port", "0")
+ .set("log-level", "error")
+ .create();
+ Region<String, File> fileRegion = cache.<String, File>createRegionFactory(RegionShortcut.REPLICATE).create("files");
+ Region<ChunkKey, byte[]> chunkRegion = cache.<ChunkKey, byte[]>createRegionFactory(RegionShortcut.REPLICATE).create("chunks");
+
+ RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion);
+
+
+ IndexWriterConfig config = new IndexWriterConfig(analyzer);
+ writer = new IndexWriter(dir, config);
+ String[] indexedFields= new String[] {"text"};
+ HeterogenousLuceneSerializer mapper = new HeterogenousLuceneSerializer(indexedFields);
+ repo = new IndexRepositoryImpl(fileRegion, writer, mapper);
+ }
+
+ @Override
+ public void cleanup() throws IOException {
+ writer.close();
+ cache.close();
+ }
+
+ @Override
+ public void waitForAsync() throws Exception {
+ //do nothing
+ }
+
+ @Override
+ public int query(Query query) throws IOException {
+ TopEntriesCollector collector = new TopEntriesCollector();
+ repo.query(query, 100, collector);
+ return collector.size();
+ }
+ });
+ }
+
+ /**
+ * Test our full lucene index implementation
+ * @throws Exception
+ */
+ @Test
+ public void testLuceneIndex() throws Exception {
+
+
+ doTest("LuceneIndex", new TestCallbacks() {
+
+ private Cache cache;
+ private Region<String, TestObject> region;
+ private LuceneService service;
+
+ @Override
+ public void addObject(String key, String text) throws Exception {
+ region.create(key, new TestObject(text));
+ }
+
+ @Override
+ public void commit() throws Exception {
+ //NA
+ }
+
+ @Override
+ public void init() throws Exception {
+ cache = new CacheFactory().set("mcast-port", "0")
+ .set("log-level", "warning")
+ .create();
+ service = LuceneServiceProvider.get(cache);
+ service.createIndex("index", "/region", "text");
+ region = cache.<String, TestObject>createRegionFactory(RegionShortcut.PARTITION)
+ .setPartitionAttributes(new PartitionAttributesFactory<>().setTotalNumBuckets(1).create())
+ .create("region");
+ }
+
+ @Override
+ public void cleanup() throws IOException {
+ cache.close();
+ }
+
+ @Override
+ public void waitForAsync() throws Exception {
+ AsyncEventQueue aeq = cache.getAsyncEventQueue(LuceneServiceImpl.getUniqueIndexName("index", "/region"));
+
+ //We will be at most 10 ms off
+ while(aeq.size() > 0) {
+ Thread.sleep(10);
+ }
+ }
+
+ @Override
+ public int query(final Query query) throws Exception {
+ LuceneQuery<Object, Object> luceneQuery = service.createLuceneQueryFactory().create("index", "/region", new LuceneQueryProvider() {
+
+ @Override
+ public Query getQuery(LuceneIndex index) throws QueryException {
+ return query;
+ }
+ });
+
+ LuceneQueryResults<Object, Object> results = luceneQuery.search();
+ return results.size();
+ }
+ });
+ }
+
+ @Test
+ public void testLuceneWithRegionDirectory() throws Exception {
+ doTest("RegionDirectory", new TestCallbacks() {
+
+ private IndexWriter writer;
+ private SearcherManager searcherManager;
+
+ @Override
+ public void init() throws Exception {
+ RegionDirectory dir = new RegionDirectory(new ConcurrentHashMap<String, File>(), new ConcurrentHashMap<ChunkKey, byte[]>());
+ IndexWriterConfig config = new IndexWriterConfig(analyzer);
+ writer = new IndexWriter(dir, config);
+ searcherManager = new SearcherManager(writer, true, null);
+ }
+
+ @Override
+ public void addObject(String key, String text) throws Exception {
+ Document doc = new Document();
+ doc.add(new TextField("key", key, Store.YES));
+ doc.add(new TextField("text", text, Store.NO));
+ writer.addDocument(doc);
+ }
+
+ @Override
+ public void commit() throws Exception {
+ writer.commit();
+ searcherManager.maybeRefresh();
+ }
+
+ @Override
+ public void cleanup() throws Exception {
+ writer.close();
+ }
+
+ @Override
+ public void waitForAsync() throws Exception {
+ //do nothing
+ }
+
+ @Override
+ public int query(Query query) throws Exception {
+ IndexSearcher searcher = searcherManager.acquire();
+ try {
+ return searcher.count(query);
+ } finally {
+ searcherManager.release(searcher);
+ }
+ }
+
+ });
+
+ }
+
+ @Test
+ public void testLucene() throws Exception {
+ doTest("Lucene", new TestCallbacks() {
+
+ private IndexWriter writer;
+ private SearcherManager searcherManager;
+
+ @Override
+ public void init() throws Exception {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriterConfig config = new IndexWriterConfig(analyzer);
+ writer = new IndexWriter(dir, config);
+ searcherManager = new SearcherManager(writer, true, null);
+ }
+
+ @Override
+ public void addObject(String key, String text) throws Exception {
+ Document doc = new Document();
+ doc.add(new TextField("key", key, Store.YES));
+ doc.add(new TextField("text", text, Store.NO));
+ writer.addDocument(doc);
+ }
+
+ @Override
+ public void commit() throws Exception {
+ writer.commit();
+ searcherManager.maybeRefresh();
+ }
+
+ @Override
+ public void cleanup() throws Exception {
+ writer.close();
+ }
+
+ @Override
+ public void waitForAsync() throws Exception {
+ //do nothing
+ }
+
+ @Override
+ public int query(Query query) throws Exception {
+ IndexSearcher searcher = searcherManager.acquire();
+ try {
+ return searcher.count(query);
+ } finally {
+ searcherManager.release(searcher);
+ }
+ }
+
+ });
+
+ }
+
+ private void doTest(String testName, TestCallbacks callbacks) throws Exception {
+
+ //Create some random words. We need to be careful
+ //to make sure we get NUM_WORDS distinct words here
+ Set<String> wordSet = new HashSet<String>();
+ Random rand = new Random();
+ while(wordSet.size() < NUM_WORDS) {
+ int length = rand.nextInt(12) + 3;
+ char[] text = new char[length];
+ for(int i = 0; i < length; i++) {
+ text[i] = (char) (rand.nextInt(26) + 97);
+ }
+ wordSet.add(new String(text));
+ }
+ List<String> words = new ArrayList<String>(wordSet.size());
+ words.addAll(wordSet);
+
+
+
+ //warm up
+ writeRandomWords(callbacks, words, rand, NUM_ENTRIES / 10, NUM_QUERIES / 10, COMMIT_INTERVAL[0]);
+
+ //Do the actual test
+
+ for(int i = 0; i < COMMIT_INTERVAL.length; i++) {
+ Results results = writeRandomWords(callbacks, words, rand, NUM_ENTRIES, NUM_QUERIES / 10, COMMIT_INTERVAL[i]);
+
+ System.out.println(testName + " writes(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.writeTime));
+ System.out.println(testName + " queries(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.queryTime));
+ }
+ }
+
+ private Results writeRandomWords(TestCallbacks callbacks, List<String> words,
+ Random rand, int numEntries, int numQueries, int commitInterval) throws Exception {
+ Results results = new Results();
+ callbacks.init();
+ int[] counts = new int[words.size()];
+ long start = System.nanoTime();
+ try {
+ for(int i =0; i < numEntries; i++) {
+ int word1 = rand.nextInt(words.size());
+ int word2 = rand.nextInt(words.size());
+ counts[word1]++;
+ counts[word2]++;
+ String value = words.get(word1) + " " + words.get(word2);
+ callbacks.addObject("key" + i, value);
+
+ if(i % commitInterval == 0 && i != 0) {
+ callbacks.commit();
+ }
+ }
+ callbacks.commit();
+ callbacks.waitForAsync();
+ long end = System.nanoTime();
+ results.writeTime = end - start;
+
+
+ start = System.nanoTime();
+ for(int i=0; i < numQueries; i++) {
+ int wordIndex = rand.nextInt(words.size());
+ String word = words.get(wordIndex);
+ Query query = new TermQuery(new Term("text", word));
+ int size = callbacks.query(query);
+// int size = callbacks.query(parser.parse(word));
+ //All of my tests sometimes seem to be missing a couple of words, including the stock lucene
+// assertEquals("Error on query " + i + " word=" + word, counts[wordIndex], size);
+ }
+ end = System.nanoTime();
+ results.queryTime = end - start;
+
+ return results;
+ } finally {
+ callbacks.cleanup();
+ }
+ }
+
+ private static class TestObject implements DataSerializable {
+ private String text;
+
+ public TestObject() {
+
+ }
+
+ public TestObject(String text) {
+ super();
+ this.text = text;
+ }
+
+ @Override
+ public void toData(DataOutput out) throws IOException {
+ DataSerializer.writeString(text, out);
+ }
+
+ @Override
+ public void fromData(DataInput in)
+ throws IOException, ClassNotFoundException {
+ text = DataSerializer.readString(in);
+ }
+
+ @Override
+ public String toString() {
+ return text;
+ }
+
+
+ }
+
+ private interface TestCallbacks {
+ public void init() throws Exception;
+ public int query(Query query) throws Exception;
+ public void addObject(String key, String text) throws Exception;
+ public void commit() throws Exception;
+ public void waitForAsync() throws Exception;
+ public void cleanup() throws Exception;
+ }
+
+ private static class Results {
+ long writeTime;
+ long queryTime;
+ }
+}