You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by up...@apache.org on 2015/09/01 00:02:18 UTC

[2/2] incubator-geode git commit: Making binary keys work properly with the lucene index

Making binary keys work properly with the lucene index

The earlier attempt was not storing the binary data in the index. With
Lucene 5.3, we can add StringFields with binary data, so that's what
this code is now doing.

Enabling the unit tests for update and delete with both binary and
string keys.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/e6739f86
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/e6739f86
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/e6739f86

Branch: refs/heads/feature/GEODE-11
Commit: e6739f86a1f3826ad17871454c8c43e3028b4823
Parents: 5ed921c
Author: Dan Smith <up...@apache.org>
Authored: Fri Aug 28 17:20:38 2015 -0700
Committer: Dan Smith <up...@apache.org>
Committed: Mon Aug 31 14:56:30 2015 -0700

----------------------------------------------------------------------
 .../repository/serializer/SerializerUtil.java   |  67 ++++---
 .../IndexRepositoryImplJUnitTest.java           | 175 +++++++++++++++++++
 .../SingleIndexRepositoryImplJUnitTest.java     | 110 ------------
 3 files changed, 220 insertions(+), 132 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e6739f86/gemfire-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/SerializerUtil.java
----------------------------------------------------------------------
diff --git a/gemfire-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/SerializerUtil.java b/gemfire-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/SerializerUtil.java
index 07e89c9..30224b4 100644
--- a/gemfire-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/SerializerUtil.java
+++ b/gemfire-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/SerializerUtil.java
@@ -1,40 +1,38 @@
 package com.gemstone.gemfire.cache.lucene.internal.repository.serializer;
 
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
 import java.io.IOException;
 
-import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FloatField;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BytesRef;
 
 import com.gemstone.gemfire.DataSerializer;
 import com.gemstone.gemfire.InternalGemFireError;
-import com.gemstone.gemfire.internal.HeapDataOutputStream;
-import com.gemstone.gemfire.internal.Version;
 import com.gemstone.gemfire.internal.util.BlobHelper;
 
 /**
  * Static utility functions for mapping objects to lucene documents
  */
 public class SerializerUtil {
-  private static final String KEY_FIELD = "_STORED_KEY";
-  private static final String KEY_SEARCH_FIELD = "_SEARCH_KEY";
+  private static final String KEY_FIELD = "_KEY";
   
   /**
    * A small buffer for converting keys to byte[] arrays.
    */
-  private static ThreadLocal<HeapDataOutputStream> buffer = new ThreadLocal<HeapDataOutputStream>() {
+  private static ThreadLocal<ByteArrayOutputStream> LOCAL_BUFFER = new ThreadLocal<ByteArrayOutputStream>() {
     @Override
-    protected HeapDataOutputStream initialValue() {
-      return new HeapDataOutputStream(Version.CURRENT);
+    protected ByteArrayOutputStream initialValue() {
+      return new ByteArrayOutputStream();
     }
   };
 
@@ -45,9 +43,11 @@ public class SerializerUtil {
    * Add a gemfire key to a document
    */
   public static void addKey(Object key, Document doc) {
-    BytesRef keyBytes = keyToBytes(key);
-    doc.add(new BinaryDocValuesField(KEY_SEARCH_FIELD, keyBytes));
-    doc.add(new StoredField(KEY_FIELD, keyBytes));
+    if(key instanceof String) {
+      doc.add(new StringField(KEY_FIELD, (String) key, Store.YES));
+    } else {
+      doc.add(new StringField(KEY_FIELD, keyToBytes(key), Store.YES));
+    }
   }
 
   /**
@@ -88,10 +88,11 @@ public class SerializerUtil {
    * Extract the gemfire key from a lucene document
    */
   public static Object getKey(Document doc) {
-    try {
-      return BlobHelper.deserializeBlob(doc.getField(KEY_FIELD).binaryValue().bytes);
-    } catch (ClassNotFoundException | IOException e) {
-      throw new InternalGemFireError("Unable to deserialize key", e);
+    IndexableField field = doc.getField(KEY_FIELD);
+    if(field.stringValue() != null) {
+      return field.stringValue();
+    } else {
+      return  keyFromBytes(field.binaryValue());
     }
   }
  
@@ -99,7 +100,12 @@ public class SerializerUtil {
    * Extract the gemfire key term from a lucene document
    */
   public static Term getKeyTerm(Document doc) {
-    return new Term(KEY_SEARCH_FIELD, doc.getField(KEY_FIELD).binaryValue());
+    IndexableField field = doc.getField(KEY_FIELD);
+    if(field.stringValue() != null) {
+      return new Term(KEY_FIELD, field.stringValue());
+    } else {
+      return new Term(KEY_FIELD, field.binaryValue());
+    }
   }
   
   /**
@@ -107,20 +113,37 @@ public class SerializerUtil {
    * update or delete the document associated with this key.
    */
   public static Term toKeyTerm(Object key) {
-    return new Term(KEY_SEARCH_FIELD, keyToBytes(key));
+    if(key instanceof String) {
+      return new Term(KEY_FIELD, (String) key);
+    } else {
+      return new Term(KEY_FIELD, keyToBytes(key));
+    }
+  }
+  
+  private static Object keyFromBytes(BytesRef bytes) {
+    try {
+      return BlobHelper.deserializeBlob(bytes.bytes);
+    } catch (ClassNotFoundException | IOException e) {
+      throw new InternalGemFireError(e);
+    }
   }
   
   /**
    * Convert a key to a byte array.
    */
   private static BytesRef keyToBytes(Object key)  {
-    buffer.get().reset();
+    ByteArrayOutputStream buffer = LOCAL_BUFFER.get();
+    
     try {
-      DataSerializer.writeObject(key, buffer.get());
+      DataOutputStream out = new DataOutputStream(buffer);
+      DataSerializer.writeObject(key, out);
+      out.flush();
+      BytesRef result = new BytesRef(buffer.toByteArray());
+      buffer.reset();
+      return result;
     } catch (IOException e) {
       throw new InternalGemFireError("Unable to serialize key", e);
     }
-    return new BytesRef(buffer.get().toByteArray());
   }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e6739f86/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitTest.java
new file mode 100644
index 0000000..e47b7bd
--- /dev/null
+++ b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitTest.java
@@ -0,0 +1,175 @@
+package com.gemstone.gemfire.cache.lucene.internal.repository;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey;
+import com.gemstone.gemfire.cache.lucene.internal.filesystem.File;
+import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer;
+import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.SerializerUtil;
+import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.Type2;
+import com.gemstone.gemfire.test.junit.categories.IntegrationTest;
+
+/**
+ * Test of the {@link IndexRepository} and everything below
+ * it. This tests that we can save gemfire objects or PDXInstance
+ * objects into a lucene index and search for those objects later.
+ */
+@Category(IntegrationTest.class)
+public class IndexRepositoryImplJUnitTest {
+
+  private IndexRepositoryImpl repo;
+  private HeterogenousLuceneSerializer mapper;
+  private DirectoryReader reader;
+  private StandardAnalyzer analyzer = new StandardAnalyzer();
+  private IndexWriter writer;
+
+  @Before
+  public void setUp() throws IOException {
+    ConcurrentHashMap<String, File> fileRegion = new ConcurrentHashMap<String, File>();
+    ConcurrentHashMap<ChunkKey, byte[]> chunkRegion = new ConcurrentHashMap<ChunkKey, byte[]>();
+    RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion);
+    IndexWriterConfig config = new IndexWriterConfig(analyzer);
+    writer = new IndexWriter(dir, config);
+    reader = DirectoryReader.open(writer, true);
+    String[] indexedFields= new String[] {"s", "i", "l", "d", "f", "s2", "missing"};
+    mapper = new HeterogenousLuceneSerializer(indexedFields);
+    repo = new IndexRepositoryImpl(writer, mapper);
+  }
+  
+  @Test
+  public void testAddDocs() throws IOException, ParseException {
+    repo.create("key1", new Type2("bacon maple bar", 1, 2L, 3.0, 4.0f, "Grape Ape doughnut"));
+    repo.create("key2", new Type2("McMinnville Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
+    repo.create("key3", new Type2("Voodoo Doll doughnut", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
+    repo.create("key4", new Type2("Portland Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
+    repo.commit();
+    
+    checkQuery("Cream", "s", "key2", "key4");
+  }
+  
+  @Test
+  public void testUpdateAndRemoveStringKeys() throws IOException, ParseException {
+    updateAndRemove("key1", "key2", "key3", "key4");
+  }
+  
+  @Test
+  public void testUpdateAndRemoveBinaryKeys() throws IOException, ParseException {
+    
+    ByteWrapper key1 = randomKey();
+    ByteWrapper key2 = randomKey();
+    ByteWrapper key3 = randomKey();
+    ByteWrapper key4 = randomKey();
+    
+    updateAndRemove(key1, key2, key3, key4);
+  }
+
+  private void updateAndRemove(Object key1, Object key2, Object key3,
+      Object key4) throws IOException, ParseException {
+    repo.create(key1, new Type2("bacon maple bar", 1, 2L, 3.0, 4.0f, "Grape Ape doughnut"));
+    repo.create(key2, new Type2("McMinnville Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
+    repo.create(key3, new Type2("Voodoo Doll doughnut", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
+    repo.create(key4, new Type2("Portland Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
+    repo.commit();
+    
+    repo.update(key3, new Type2("Boston Cream Pie", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
+    repo.delete(key4);
+    repo.commit();
+
+//    BooleanQuery q = new BooleanQuery();
+//    q.add(new TermQuery(SerializerUtil.toKeyTerm("key3")), Occur.MUST_NOT);
+//    writer.deleteDocuments(q);
+//    writer.commit();
+    
+    //Make sure the updates and deletes were applied
+    checkQuery("doughnut", "s", key2);
+    checkQuery("Cream", "s", key2, key3);
+  }
+
+  private ByteWrapper randomKey() {
+    Random rand = new Random();
+    int size = rand.nextInt(2048) + 50;
+    byte[] key = new byte[size];
+    rand.nextBytes(key);
+    return new ByteWrapper(key);
+  }
+
+  private void checkQuery(String queryTerm, String queryField, Object ... expectedKeys)
+      throws IOException, ParseException {
+    QueryParser parser = new QueryParser(queryField, analyzer);
+    
+    DirectoryReader result = DirectoryReader.openIfChanged(reader);
+    reader = result == null ? reader : result;
+    IndexSearcher searcher = new IndexSearcher(reader);
+    TopDocs results = searcher.search(parser.parse(queryTerm), 100);
+    
+    Set<Object> expectedSet = new HashSet<Object>();
+    expectedSet.addAll(Arrays.asList(expectedKeys));
+    Set<Object> actualKeys = new HashSet<Object>();
+    for(ScoreDoc scoreDoc: results.scoreDocs) {
+      Document doc = searcher.doc(scoreDoc.doc);
+      assertEquals(1, doc.getFields().size());
+      actualKeys.add(SerializerUtil.getKey(doc));
+    }
+    assertEquals(expectedSet, actualKeys);
+  }
+
+  /**
+   * A wrapper around a byte array that implements equals,
+   * for comparison checks.
+   */
+  private static class ByteWrapper implements Serializable {
+    private byte[] bytes;
+
+    
+    public ByteWrapper(byte[] bytes) {
+      super();
+      this.bytes = bytes;
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + Arrays.hashCode(bytes);
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (getClass() != obj.getClass())
+        return false;
+      ByteWrapper other = (ByteWrapper) obj;
+      if (!Arrays.equals(bytes, other.bytes))
+        return false;
+      return true;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e6739f86/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/SingleIndexRepositoryImplJUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/SingleIndexRepositoryImplJUnitTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/SingleIndexRepositoryImplJUnitTest.java
deleted file mode 100644
index bfae55a..0000000
--- a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/SingleIndexRepositoryImplJUnitTest.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package com.gemstone.gemfire.cache.lucene.internal.repository;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory;
-import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey;
-import com.gemstone.gemfire.cache.lucene.internal.filesystem.File;
-import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer;
-import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.SerializerUtil;
-import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.Type2;
-import com.gemstone.gemfire.test.junit.categories.IntegrationTest;
-
-/**
- * Test of the SingleIndexRepostory and everything below
- * it. This tests that we can save gemfire objects or PDXInstance
- * objects into a lucene index and search for those objects later.
- */
-@Category(IntegrationTest.class)
-public class SingleIndexRepositoryImplJUnitTest {
-
-  private IndexRepositoryImpl repo;
-  private HeterogenousLuceneSerializer mapper;
-  private DirectoryReader reader;
-  private StandardAnalyzer analyzer = new StandardAnalyzer();
-
-  @Before
-  public void setUp() throws IOException {
-    ConcurrentHashMap<String, File> fileRegion = new ConcurrentHashMap<String, File>();
-    ConcurrentHashMap<ChunkKey, byte[]> chunkRegion = new ConcurrentHashMap<ChunkKey, byte[]>();
-    RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion);
-    IndexWriterConfig config = new IndexWriterConfig(analyzer);
-    IndexWriter writer = new IndexWriter(dir, config);
-    reader = DirectoryReader.open(writer, true);
-    String[] indexedFields= new String[] {"s", "i", "l", "d", "f", "s2", "missing"};
-    mapper = new HeterogenousLuceneSerializer(indexedFields);
-    repo = new IndexRepositoryImpl(writer, mapper);
-  }
-  
-  @Test
-  public void testAddDocs() throws IOException, ParseException {
-    repo.create("key1", new Type2("bacon maple bar", 1, 2L, 3.0, 4.0f, "Grape Ape doughnut"));
-    repo.create("key2", new Type2("McMinnville Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
-    repo.create("key3", new Type2("Voodoo Doll doughnut", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
-    repo.create("key4", new Type2("Portland Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
-    repo.commit();
-    
-    checkQuery("Cream", "s", "key2", "key4");
-  }
-  
-  @Test
-  @Ignore("This test is not yet working. The deletes aren't recognized")
-  public void testUpdateAndRemove() throws IOException, ParseException {
-    repo.create("key1", new Type2("bacon maple bar", 1, 2L, 3.0, 4.0f, "Grape Ape doughnut"));
-    repo.create("key2", new Type2("McMinnville Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
-    repo.create("key3", new Type2("Voodoo Doll doughnut", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
-    repo.create("key4", new Type2("Portland Cream doughnut", 1, 2L, 3.0, 4.0f, "Captain my Captain doughnut"));
-    repo.commit();
-    
-    repo.update("key3", new Type2("Boston Cream Pie", 1, 2L, 3.0, 4.0f, "Toasted coconut doughnut"));
-    repo.delete("key4");
-    repo.commit();
-    
-    
-    //Make sure the updates and deletes were applied
-    checkQuery("doughnut", "s", "key2");
-    checkQuery("Cream", "s", "key2", "key3");
-  }
-
-  private void checkQuery(String queryTerm, String queryField, String ... expectedKeys)
-      throws IOException, ParseException {
-    QueryParser parser = new QueryParser(queryField, analyzer);
-    
-    reader = DirectoryReader.openIfChanged(reader);
-    IndexSearcher searcher = new IndexSearcher(reader);
-    TopDocs results = searcher.search(parser.parse(queryTerm), 100);
-//    TopDocs results = searcher.search(new TermQuery(new Term("s", "Cream")), 100);
-    
-    Set<String> expectedSet = new HashSet<String>();
-    expectedSet.addAll(Arrays.asList(expectedKeys));
-    Set<Object> actualKeys = new HashSet<Object>();
-    for(ScoreDoc scoreDoc: results.scoreDocs) {
-      Document doc = searcher.doc(scoreDoc.doc);
-      assertEquals(1, doc.getFields().size());
-      actualKeys.add(SerializerUtil.getKey(doc));
-    }
-    assertEquals(expectedSet, actualKeys);
-  }
-
-}