You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2016/07/05 04:26:03 UTC

cassandra git commit: Fix SASI PREFIX search in CONTAINS mode with partial terms

Repository: cassandra
Updated Branches:
  refs/heads/cassandra-3.9 0702e4580 -> 7107646ac


Fix SASI PREFIX search in CONTAINS mode with partial terms

patch by doanduyhai; reviewed by xedin for CASSANDRA-12073


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7107646a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7107646a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7107646a

Branch: refs/heads/cassandra-3.9
Commit: 7107646ace81fe8f9e1de1e87c5dc4cdfd9f6607
Parents: 0702e45
Author: Pavel Yaskevich <xe...@apache.org>
Authored: Mon Jul 4 14:14:59 2016 -0700
Committer: Pavel Yaskevich <xe...@apache.org>
Committed: Mon Jul 4 21:24:20 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cassandra/index/sasi/disk/OnDiskIndex.java  |  7 ++++
 .../cassandra/index/sasi/plan/Expression.java   | 11 ------
 .../unit/org/apache/cassandra/SchemaLoader.java | 35 +++++++++++++++++++
 .../cassandra/index/sasi/SASIIndexTest.java     | 30 +++++++++++++++-
 .../index/sasi/disk/OnDiskIndexTest.java        | 36 ++++++++++++++++++++
 6 files changed, 108 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ee5a4af..68854b3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.9
+ * Fix SASI PREFIX search in CONTAINS mode with partial terms (CASSANDRA-12073)
  * Increase size of flushExecutor thread pool (CASSANDRA-12071)
 Merged from 3.0:
  * Avoid digest mismatch with empty but static rows (CASSANDRA-12090)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
index 80092ef..4d43cd9 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
@@ -756,6 +756,13 @@ public class OnDiskIndex implements Iterable<OnDiskIndex.DataTerm>, Closeable
                 {
                     DataTerm currentTerm = currentBlock.getTerm(nextOffset());
 
+                    // we need to step over all of the partial terms, in PREFIX mode,
+                    // encountered by the query until upper-bound tells us to stop
+                    if (e.getOp() == Op.PREFIX && currentTerm.isPartial())
+                        continue;
+
+                    // haven't reached the start of the query range yet, let's
+                    // keep skip the current term until lower bound is satisfied
                     if (checkLower && !e.isLowerSatisfiedBy(currentTerm))
                         continue;
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
index ce420d1..cc156ee 100644
--- a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
@@ -322,9 +322,6 @@ public class Expression
         if (!hasLower())
             return true;
 
-        if (nonMatchingPartial(term))
-            return false;
-
         int cmp = term.compareTo(validator, lower.value, false);
         return cmp > 0 || cmp == 0 && lower.inclusive;
     }
@@ -334,9 +331,6 @@ public class Expression
         if (!hasUpper())
             return true;
 
-        if (nonMatchingPartial(term))
-            return false;
-
         int cmp = term.compareTo(validator, upper.value, false);
         return cmp < 0 || cmp == 0 && upper.inclusive;
     }
@@ -385,11 +379,6 @@ public class Expression
                 && exclusions.equals(o.exclusions);
     }
 
-    private boolean nonMatchingPartial(OnDiskIndex.DataTerm term)
-    {
-        return term.isPartial() && operation == Op.PREFIX;
-    }
-
     public static class Bound
     {
         public final ByteBuffer value;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/SchemaLoader.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java b/test/unit/org/apache/cassandra/SchemaLoader.java
index 6aea343..28fc8d5 100644
--- a/test/unit/org/apache/cassandra/SchemaLoader.java
+++ b/test/unit/org/apache/cassandra/SchemaLoader.java
@@ -645,6 +645,41 @@ public class SchemaLoader
         return cfm;
     }
 
+    public static CFMetaData fullTextSearchSASICFMD(String ksName, String cfName)
+    {
+        CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName)
+                                           .addPartitionKey("song_id", UUIDType.instance)
+                                           .addRegularColumn("title", UTF8Type.instance)
+                                           .addRegularColumn("artist", UTF8Type.instance)
+                                           .build();
+
+        Indexes indexes = cfm.getIndexes();
+        indexes = indexes.with(IndexMetadata.fromSchemaMetadata("title", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>()
+        {{
+            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
+            put(IndexTarget.TARGET_OPTION_NAME, "title");
+            put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString());
+            put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer");
+            put("tokenization_enable_stemming", "true");
+            put("tokenization_locale", "en");
+            put("tokenization_skip_stop_words", "true");
+            put("tokenization_normalize_lowercase", "true");
+        }}));
+
+        indexes = indexes.with(IndexMetadata.fromSchemaMetadata("artist", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>()
+        {{
+            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
+            put(IndexTarget.TARGET_OPTION_NAME, "artist");
+            put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString());
+            put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer");
+            put("case_sensitive", "false");
+
+        }}));
+
+        cfm.indexes(indexes);
+        return cfm;
+    }
+
     public static CompressionParams getCompressionParameters()
     {
         return getCompressionParameters(null);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index 498e82d..a27db74 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -88,6 +88,7 @@ public class SASIIndexTest
     private static final String CLUSTERING_CF_NAME_1 = "clustering_test_cf_1";
     private static final String CLUSTERING_CF_NAME_2 = "clustering_test_cf_2";
     private static final String STATIC_CF_NAME = "static_sasi_test_cf";
+    private static final String FTS_CF_NAME = "full_text_search_sasi_test_cf";
 
     @BeforeClass
     public static void loadSchema() throws ConfigurationException
@@ -98,7 +99,8 @@ public class SASIIndexTest
                                                                      Tables.of(SchemaLoader.sasiCFMD(KS_NAME, CF_NAME),
                                                                                SchemaLoader.clusteringSASICFMD(KS_NAME, CLUSTERING_CF_NAME_1),
                                                                                SchemaLoader.clusteringSASICFMD(KS_NAME, CLUSTERING_CF_NAME_2, "location"),
-                                                                               SchemaLoader.staticSASICFMD(KS_NAME, STATIC_CF_NAME))));
+                                                                               SchemaLoader.staticSASICFMD(KS_NAME, STATIC_CF_NAME),
+                                                                               SchemaLoader.fullTextSearchSASICFMD(KS_NAME, FTS_CF_NAME))));
     }
 
     @After
@@ -417,6 +419,32 @@ public class SASIIndexTest
     }
 
     @Test
+    public void testPrefixSearchWithContainsMode() throws Exception
+    {
+        testPrefixSearchWithContainsMode(false);
+        cleanupData();
+        testPrefixSearchWithContainsMode(true);
+    }
+
+    private void testPrefixSearchWithContainsMode(boolean forceFlush) throws Exception
+    {
+        ColumnFamilyStore store = Keyspace.open(KS_NAME).getColumnFamilyStore(FTS_CF_NAME);
+
+        executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("1a4abbcd-b5de-4c69-a578-31231e01ff09"), "Poker Face", "Lady Gaga");
+        executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("9472a394-359b-4a06-b1d5-b6afce590598"), "Forgetting the Way Home", "Our Lady of Bells");
+        executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("4f8dc18e-54e6-4e16-b507-c5324b61523b"), "Zamki na piasku", "Lady Pank");
+        executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("eaf294fa-bad5-49d4-8f08-35ba3636a706"), "Koncertowa", "Lady Pank");
+
+
+        if (forceFlush)
+            store.forceBlockingFlush();
+
+        final UntypedResultSet results = executeCQL(FTS_CF_NAME, "SELECT * FROM %s.%s WHERE artist LIKE 'lady%%'");
+        Assert.assertNotNull(results);
+        Assert.assertEquals(3, results.size());
+    }
+
+    @Test
     public void testMultiExpressionQueriesWhereRowSplitBetweenSSTables() throws Exception
     {
         testMultiExpressionQueriesWhereRowSplitBetweenSSTables(false);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
index bac23ea..a3985ca 100644
--- a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
@@ -36,6 +36,7 @@ import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.db.marshal.Int32Type;
 import org.apache.cassandra.db.marshal.LongType;
 import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.io.util.DataOutputBuffer;
 import org.apache.cassandra.utils.MurmurHash;
 import org.apache.cassandra.utils.Pair;
 
@@ -701,6 +702,41 @@ public class OnDiskIndexTest
         b.close();
     }
 
+    @Test
+    public void testPrefixSearchWithCONTAINSMode() throws Exception
+    {
+        Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>()
+        {{
+
+            put(UTF8Type.instance.decompose("lady gaga"), keyBuilder(1L));
+
+            // Partial term for 'lady of bells'
+            DataOutputBuffer ladyOfBellsBuffer = new DataOutputBuffer();
+            ladyOfBellsBuffer.writeShort(UTF8Type.instance.decompose("lady of bells").remaining() | (1 << OnDiskIndexBuilder.IS_PARTIAL_BIT));
+            ladyOfBellsBuffer.write(UTF8Type.instance.decompose("lady of bells"));
+            put(ladyOfBellsBuffer.asNewBuffer(), keyBuilder(2L));
+
+
+            put(UTF8Type.instance.decompose("lady pank"),  keyBuilder(3L));
+        }};
+
+        OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS);
+        for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet())
+            addAll(builder, e.getKey(), e.getValue());
+
+        File index = File.createTempFile("on-disk-sa-prefix-contains-search", "db");
+        index.deleteOnExit();
+
+        builder.finish(index);
+
+        OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter());
+
+        // check that lady% return lady gaga (1) and lady pank (3) but not lady of bells(2)
+        Assert.assertEquals(convert(1, 3), convert(onDisk.search(expressionFor("lady", Operator.LIKE_PREFIX))));
+
+        onDisk.close();
+    }
+
     private void testSearchRangeWithSuperBlocks(OnDiskIndex onDiskIndex, long start, long end)
     {
         RangeIterator<Long, Token> tokens = onDiskIndex.search(expressionFor(start, true, end, false));