You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2016/07/05 04:26:03 UTC
cassandra git commit: Fix SASI PREFIX search in CONTAINS mode with
partial terms
Repository: cassandra
Updated Branches:
refs/heads/cassandra-3.9 0702e4580 -> 7107646ac
Fix SASI PREFIX search in CONTAINS mode with partial terms
patch by doanduyhai; reviewed by xedin for CASSANDRA-12073
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7107646a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7107646a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7107646a
Branch: refs/heads/cassandra-3.9
Commit: 7107646ace81fe8f9e1de1e87c5dc4cdfd9f6607
Parents: 0702e45
Author: Pavel Yaskevich <xe...@apache.org>
Authored: Mon Jul 4 14:14:59 2016 -0700
Committer: Pavel Yaskevich <xe...@apache.org>
Committed: Mon Jul 4 21:24:20 2016 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cassandra/index/sasi/disk/OnDiskIndex.java | 7 ++++
.../cassandra/index/sasi/plan/Expression.java | 11 ------
.../unit/org/apache/cassandra/SchemaLoader.java | 35 +++++++++++++++++++
.../cassandra/index/sasi/SASIIndexTest.java | 30 +++++++++++++++-
.../index/sasi/disk/OnDiskIndexTest.java | 36 ++++++++++++++++++++
6 files changed, 108 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ee5a4af..68854b3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.9
+ * Fix SASI PREFIX search in CONTAINS mode with partial terms (CASSANDRA-12073)
* Increase size of flushExecutor thread pool (CASSANDRA-12071)
Merged from 3.0:
* Avoid digest mismatch with empty but static rows (CASSANDRA-12090)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
index 80092ef..4d43cd9 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
@@ -756,6 +756,13 @@ public class OnDiskIndex implements Iterable<OnDiskIndex.DataTerm>, Closeable
{
DataTerm currentTerm = currentBlock.getTerm(nextOffset());
+ // we need to step over all of the partial terms, in PREFIX mode,
+ // encountered by the query until upper-bound tells us to stop
+ if (e.getOp() == Op.PREFIX && currentTerm.isPartial())
+ continue;
+
+ // haven't reached the start of the query range yet, let's
+ // keep skip the current term until lower bound is satisfied
if (checkLower && !e.isLowerSatisfiedBy(currentTerm))
continue;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
index ce420d1..cc156ee 100644
--- a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
@@ -322,9 +322,6 @@ public class Expression
if (!hasLower())
return true;
- if (nonMatchingPartial(term))
- return false;
-
int cmp = term.compareTo(validator, lower.value, false);
return cmp > 0 || cmp == 0 && lower.inclusive;
}
@@ -334,9 +331,6 @@ public class Expression
if (!hasUpper())
return true;
- if (nonMatchingPartial(term))
- return false;
-
int cmp = term.compareTo(validator, upper.value, false);
return cmp < 0 || cmp == 0 && upper.inclusive;
}
@@ -385,11 +379,6 @@ public class Expression
&& exclusions.equals(o.exclusions);
}
- private boolean nonMatchingPartial(OnDiskIndex.DataTerm term)
- {
- return term.isPartial() && operation == Op.PREFIX;
- }
-
public static class Bound
{
public final ByteBuffer value;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/SchemaLoader.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java b/test/unit/org/apache/cassandra/SchemaLoader.java
index 6aea343..28fc8d5 100644
--- a/test/unit/org/apache/cassandra/SchemaLoader.java
+++ b/test/unit/org/apache/cassandra/SchemaLoader.java
@@ -645,6 +645,41 @@ public class SchemaLoader
return cfm;
}
+ public static CFMetaData fullTextSearchSASICFMD(String ksName, String cfName)
+ {
+ CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName)
+ .addPartitionKey("song_id", UUIDType.instance)
+ .addRegularColumn("title", UTF8Type.instance)
+ .addRegularColumn("artist", UTF8Type.instance)
+ .build();
+
+ Indexes indexes = cfm.getIndexes();
+ indexes = indexes.with(IndexMetadata.fromSchemaMetadata("title", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>()
+ {{
+ put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
+ put(IndexTarget.TARGET_OPTION_NAME, "title");
+ put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString());
+ put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer");
+ put("tokenization_enable_stemming", "true");
+ put("tokenization_locale", "en");
+ put("tokenization_skip_stop_words", "true");
+ put("tokenization_normalize_lowercase", "true");
+ }}));
+
+ indexes = indexes.with(IndexMetadata.fromSchemaMetadata("artist", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>()
+ {{
+ put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
+ put(IndexTarget.TARGET_OPTION_NAME, "artist");
+ put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString());
+ put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer");
+ put("case_sensitive", "false");
+
+ }}));
+
+ cfm.indexes(indexes);
+ return cfm;
+ }
+
public static CompressionParams getCompressionParameters()
{
return getCompressionParameters(null);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index 498e82d..a27db74 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -88,6 +88,7 @@ public class SASIIndexTest
private static final String CLUSTERING_CF_NAME_1 = "clustering_test_cf_1";
private static final String CLUSTERING_CF_NAME_2 = "clustering_test_cf_2";
private static final String STATIC_CF_NAME = "static_sasi_test_cf";
+ private static final String FTS_CF_NAME = "full_text_search_sasi_test_cf";
@BeforeClass
public static void loadSchema() throws ConfigurationException
@@ -98,7 +99,8 @@ public class SASIIndexTest
Tables.of(SchemaLoader.sasiCFMD(KS_NAME, CF_NAME),
SchemaLoader.clusteringSASICFMD(KS_NAME, CLUSTERING_CF_NAME_1),
SchemaLoader.clusteringSASICFMD(KS_NAME, CLUSTERING_CF_NAME_2, "location"),
- SchemaLoader.staticSASICFMD(KS_NAME, STATIC_CF_NAME))));
+ SchemaLoader.staticSASICFMD(KS_NAME, STATIC_CF_NAME),
+ SchemaLoader.fullTextSearchSASICFMD(KS_NAME, FTS_CF_NAME))));
}
@After
@@ -417,6 +419,32 @@ public class SASIIndexTest
}
@Test
+ public void testPrefixSearchWithContainsMode() throws Exception
+ {
+ testPrefixSearchWithContainsMode(false);
+ cleanupData();
+ testPrefixSearchWithContainsMode(true);
+ }
+
+ private void testPrefixSearchWithContainsMode(boolean forceFlush) throws Exception
+ {
+ ColumnFamilyStore store = Keyspace.open(KS_NAME).getColumnFamilyStore(FTS_CF_NAME);
+
+ executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("1a4abbcd-b5de-4c69-a578-31231e01ff09"), "Poker Face", "Lady Gaga");
+ executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("9472a394-359b-4a06-b1d5-b6afce590598"), "Forgetting the Way Home", "Our Lady of Bells");
+ executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("4f8dc18e-54e6-4e16-b507-c5324b61523b"), "Zamki na piasku", "Lady Pank");
+ executeCQL(FTS_CF_NAME, "INSERT INTO %s.%s (song_id, title, artist) VALUES(?, ?, ?)", UUID.fromString("eaf294fa-bad5-49d4-8f08-35ba3636a706"), "Koncertowa", "Lady Pank");
+
+
+ if (forceFlush)
+ store.forceBlockingFlush();
+
+ final UntypedResultSet results = executeCQL(FTS_CF_NAME, "SELECT * FROM %s.%s WHERE artist LIKE 'lady%%'");
+ Assert.assertNotNull(results);
+ Assert.assertEquals(3, results.size());
+ }
+
+ @Test
public void testMultiExpressionQueriesWhereRowSplitBetweenSSTables() throws Exception
{
testMultiExpressionQueriesWhereRowSplitBetweenSSTables(false);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7107646a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
index bac23ea..a3985ca 100644
--- a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
@@ -36,6 +36,7 @@ import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.Int32Type;
import org.apache.cassandra.db.marshal.LongType;
import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.utils.MurmurHash;
import org.apache.cassandra.utils.Pair;
@@ -701,6 +702,41 @@ public class OnDiskIndexTest
b.close();
}
+ @Test
+ public void testPrefixSearchWithCONTAINSMode() throws Exception
+ {
+ Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>()
+ {{
+
+ put(UTF8Type.instance.decompose("lady gaga"), keyBuilder(1L));
+
+ // Partial term for 'lady of bells'
+ DataOutputBuffer ladyOfBellsBuffer = new DataOutputBuffer();
+ ladyOfBellsBuffer.writeShort(UTF8Type.instance.decompose("lady of bells").remaining() | (1 << OnDiskIndexBuilder.IS_PARTIAL_BIT));
+ ladyOfBellsBuffer.write(UTF8Type.instance.decompose("lady of bells"));
+ put(ladyOfBellsBuffer.asNewBuffer(), keyBuilder(2L));
+
+
+ put(UTF8Type.instance.decompose("lady pank"), keyBuilder(3L));
+ }};
+
+ OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS);
+ for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet())
+ addAll(builder, e.getKey(), e.getValue());
+
+ File index = File.createTempFile("on-disk-sa-prefix-contains-search", "db");
+ index.deleteOnExit();
+
+ builder.finish(index);
+
+ OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter());
+
+ // check that lady% return lady gaga (1) and lady pank (3) but not lady of bells(2)
+ Assert.assertEquals(convert(1, 3), convert(onDisk.search(expressionFor("lady", Operator.LIKE_PREFIX))));
+
+ onDisk.close();
+ }
+
private void testSearchRangeWithSuperBlocks(OnDiskIndex onDiskIndex, long start, long end)
{
RangeIterator<Long, Token> tokens = onDiskIndex.search(expressionFor(start, true, end, false));