You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ad...@apache.org on 2018/06/25 11:12:20 UTC
[2/3] cassandra git commit: Validate supported column type with SASI
analyzer
Validate supported column type with SASI analyzer
patch by Zhao Yang; reviewed by Andres de la Peña for CASSANDRA-13669
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ea62d886
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ea62d886
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ea62d886
Branch: refs/heads/trunk
Commit: ea62d8862c311e3d9b64d622bea0a68d3825aa7d
Parents: 08a515d
Author: Zhao Yang <zh...@gmail.com>
Authored: Tue May 15 09:43:39 2018 +0800
Committer: Andrés de la Peña <a....@gmail.com>
Committed: Mon Jun 25 11:51:15 2018 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/index/sasi/SASIIndex.java | 5 +-
.../index/sasi/analyzer/AbstractAnalyzer.java | 8 ++
.../index/sasi/analyzer/DelimiterAnalyzer.java | 12 ++-
.../index/sasi/analyzer/NoOpAnalyzer.java | 6 ++
.../sasi/analyzer/NonTokenizingAnalyzer.java | 6 ++
.../index/sasi/analyzer/StandardAnalyzer.java | 18 +++++
.../cassandra/index/sasi/conf/IndexMode.java | 20 ++++-
.../cassandra/index/sasi/SASIIndexTest.java | 77 ++++++++++++++++++++
9 files changed, 147 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d9c62ea..6f6e009 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.11.3
+ * Validate supported column type with SASI analyzer (CASSANDRA-13669)
* Remove BTree.Builder Recycler to reduce memory usage (CASSANDRA-13929)
* Reduce nodetool GC thread count (CASSANDRA-14475)
* Fix New SASI view creation during Index Redistribution (CASSANDRA-14055)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
index f127748..2c1d088 100644
--- a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
@@ -121,6 +121,9 @@ public class SASIIndex implements Index, INotificationConsumer
CompactionManager.instance.submitIndexBuild(new SASIIndexBuilder(baseCfs, toRebuild));
}
+ /**
+ * Called via reflection at {@link IndexMetadata#validateCustomIndexOptions}
+ */
public static Map<String, String> validateOptions(Map<String, String> options, CFMetaData cfm)
{
if (!(cfm.partitioner instanceof Murmur3Partitioner))
@@ -140,7 +143,7 @@ public class SASIIndex implements Index, INotificationConsumer
if (target.left.isPartitionKey())
throw new ConfigurationException("partition key columns are not yet supported by SASI");
- IndexMode.validateAnalyzer(options);
+ IndexMode.validateAnalyzer(options, target.left);
IndexMode mode = IndexMode.getMode(target.left, options);
if (mode.mode == Mode.SPARSE)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
index 31c66cc..e3bb7a2 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
@@ -43,6 +43,14 @@ public abstract class AbstractAnalyzer implements Iterator<ByteBuffer>
public abstract void reset(ByteBuffer input);
/**
+ * Test whether the given validator is compatible with the underlying analyzer.
+ *
+ * @param validator
+ * @return
+ */
+ public abstract boolean isCompatibleWith(AbstractType<?> validator);
+
+ /**
* @return true if current analyzer provides text tokenization, false otherwise.
*/
public boolean isTokenizing()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
index 24acef4..fea4b4f 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
@@ -37,10 +37,10 @@ import org.apache.cassandra.utils.AbstractIterator;
public class DelimiterAnalyzer extends AbstractAnalyzer
{
- private static final Map<AbstractType<?>,Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>,Charset>()
+ private static final Map<AbstractType<?>, Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>, Charset>()
{{
- put(UTF8Type.instance, StandardCharsets.UTF_8);
- put(AsciiType.instance, StandardCharsets.US_ASCII);
+ put(UTF8Type.instance, StandardCharsets.UTF_8);
+ put(AsciiType.instance, StandardCharsets.US_ASCII);
}};
private char delimiter;
@@ -105,4 +105,10 @@ public class DelimiterAnalyzer extends AbstractAnalyzer
{
return true;
}
+
+ @Override
+ public boolean isCompatibleWith(AbstractType<?> validator)
+ {
+ return VALID_ANALYZABLE_TYPES.containsKey(validator);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
index 9939a13..5c9b748 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
@@ -51,4 +51,10 @@ public class NoOpAnalyzer extends AbstractAnalyzer
this.input = input;
this.hasNext = true;
}
+
+ @Override
+ public boolean isCompatibleWith(AbstractType<?> validator)
+ {
+ return true;
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
index 676b304..82084bc 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
@@ -123,4 +123,10 @@ public class NonTokenizingAnalyzer extends AbstractAnalyzer
builder = builder.add("to_lower", new BasicResultFilters.LowerCase());
return builder.build();
}
+
+ @Override
+ public boolean isCompatibleWith(AbstractType<?> validator)
+ {
+ return VALID_ANALYZABLE_TYPES.contains(validator);
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
index 3b58bf9..e1a4a44 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
@@ -23,10 +23,13 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
import org.apache.cassandra.index.sasi.analyzer.filter.*;
import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.io.util.DataInputBuffer;
import org.apache.cassandra.utils.ByteBufferUtil;
@@ -38,6 +41,15 @@ import com.carrotsearch.hppc.IntObjectOpenHashMap;
public class StandardAnalyzer extends AbstractAnalyzer
{
+
+ private static final Set<AbstractType<?>> VALID_ANALYZABLE_TYPES = new HashSet<AbstractType<?>>()
+ {
+ {
+ add(UTF8Type.instance);
+ add(AsciiType.instance);
+ }
+ };
+
public enum TokenType
{
EOF(-1),
@@ -198,4 +210,10 @@ public class StandardAnalyzer extends AbstractAnalyzer
{
return true;
}
+
+ @Override
+ public boolean isCompatibleWith(AbstractType<?> validator)
+ {
+ return VALID_ANALYZABLE_TYPES.contains(validator);
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
index c66dd02..5709a0f 100644
--- a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
+++ b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
@@ -93,20 +93,36 @@ public class IndexMode
return analyzer;
}
- public static void validateAnalyzer(Map<String, String> indexOptions) throws ConfigurationException
+ public static void validateAnalyzer(Map<String, String> indexOptions, ColumnDefinition cd) throws ConfigurationException
{
// validate that a valid analyzer class was provided if specified
if (indexOptions.containsKey(INDEX_ANALYZER_CLASS_OPTION))
{
+ Class<?> analyzerClass;
try
{
- Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION));
+ analyzerClass = Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION));
}
catch (ClassNotFoundException e)
{
throw new ConfigurationException(String.format("Invalid analyzer class option specified [%s]",
indexOptions.get(INDEX_ANALYZER_CLASS_OPTION)));
}
+
+ AbstractAnalyzer analyzer;
+ try
+ {
+ analyzer = (AbstractAnalyzer) analyzerClass.newInstance();
+ if (!analyzer.isCompatibleWith(cd.type))
+ throw new ConfigurationException(String.format("%s does not support type %s",
+ analyzerClass.getSimpleName(),
+ cd.type.asCQL3Type()));
+ }
+ catch (InstantiationException | IllegalAccessException e)
+ {
+ throw new ConfigurationException(String.format("Unable to initialize analyzer class option specified [%s]",
+ analyzerClass.getSimpleName()));
+ }
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index a57af61..1579857 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -31,10 +31,12 @@ import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
import org.apache.cassandra.SchemaLoader;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
+import org.apache.cassandra.config.Schema;
import org.apache.cassandra.index.Index;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.cql3.*;
@@ -55,6 +57,11 @@ import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.NoOpAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer;
import org.apache.cassandra.index.sasi.conf.ColumnIndex;
import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
import org.apache.cassandra.index.sasi.exceptions.TimeQuotaExceededException;
@@ -2411,6 +2418,76 @@ public class SASIIndexTest
Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
}
+ @Test
+ public void testAnalyzerValidation()
+ {
+ final String TABLE_NAME = "analyzer_validation";
+ QueryProcessor.executeOnceInternal(String.format("CREATE TABLE %s.%s (" +
+ " pk text PRIMARY KEY, " +
+ " ascii_v ascii, " +
+ " bigint_v bigint, " +
+ " blob_v blob, " +
+ " boolean_v boolean, " +
+ " date_v date, " +
+ " decimal_v decimal, " +
+ " double_v double, " +
+ " float_v float, " +
+ " inet_v inet, " +
+ " int_v int, " +
+ " smallint_v smallint, " +
+ " text_v text, " +
+ " time_v time, " +
+ " timestamp_v timestamp, " +
+ " timeuuid_v timeuuid, " +
+ " tinyint_v tinyint, " +
+ " uuid_v uuid, " +
+ " varchar_v varchar, " +
+ " varint_v varint" +
+ ");",
+ KS_NAME,
+ TABLE_NAME));
+
+ Columns regulars = Schema.instance.getCFMetaData(KS_NAME, TABLE_NAME).partitionColumns().regulars;
+ List<String> allColumns = regulars.stream().map(ColumnDefinition::toString).collect(Collectors.toList());
+ List<String> textColumns = Arrays.asList("text_v", "ascii_v", "varchar_v");
+
+ new HashMap<Class<? extends AbstractAnalyzer>, List<String>>()
+ {{
+ put(StandardAnalyzer.class, textColumns);
+ put(NonTokenizingAnalyzer.class, textColumns);
+ put(DelimiterAnalyzer.class, textColumns);
+ put(NoOpAnalyzer.class, allColumns);
+ }}
+ .forEach((analyzer, supportedColumns) -> {
+ for (String column : allColumns)
+ {
+ String query = String.format("CREATE CUSTOM INDEX ON %s.%s(%s) " +
+ "USING 'org.apache.cassandra.index.sasi.SASIIndex' " +
+ "WITH OPTIONS = {'analyzer_class': '%s', 'mode':'PREFIX'};",
+ KS_NAME, TABLE_NAME, column, analyzer.getName());
+
+ if (supportedColumns.contains(column))
+ {
+ QueryProcessor.executeOnceInternal(query);
+ }
+ else
+ {
+ try
+ {
+ QueryProcessor.executeOnceInternal(query);
+ Assert.fail("Expected ConfigurationException");
+ }
+ catch (ConfigurationException e)
+ {
+ // expected
+ Assert.assertTrue("Unexpected error message " + e.getMessage(),
+ e.getMessage().contains("does not support type"));
+ }
+ }
+ }
+ });
+ }
+
private static ColumnFamilyStore loadData(Map<String, Pair<String, Integer>> data, boolean forceFlush)
{
return loadData(data, System.currentTimeMillis(), forceFlush);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org