You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ad...@apache.org on 2018/06/25 11:12:20 UTC

[2/3] cassandra git commit: Validate supported column type with SASI analyzer

Validate supported column type with SASI analyzer

patch by Zhao Yang; reviewed by Andres de la Peña for CASSANDRA-13669


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ea62d886
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ea62d886
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ea62d886

Branch: refs/heads/trunk
Commit: ea62d8862c311e3d9b64d622bea0a68d3825aa7d
Parents: 08a515d
Author: Zhao Yang <zh...@gmail.com>
Authored: Tue May 15 09:43:39 2018 +0800
Committer: Andrés de la Peña <a....@gmail.com>
Committed: Mon Jun 25 11:51:15 2018 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/index/sasi/SASIIndex.java  |  5 +-
 .../index/sasi/analyzer/AbstractAnalyzer.java   |  8 ++
 .../index/sasi/analyzer/DelimiterAnalyzer.java  | 12 ++-
 .../index/sasi/analyzer/NoOpAnalyzer.java       |  6 ++
 .../sasi/analyzer/NonTokenizingAnalyzer.java    |  6 ++
 .../index/sasi/analyzer/StandardAnalyzer.java   | 18 +++++
 .../cassandra/index/sasi/conf/IndexMode.java    | 20 ++++-
 .../cassandra/index/sasi/SASIIndexTest.java     | 77 ++++++++++++++++++++
 9 files changed, 147 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d9c62ea..6f6e009 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.11.3
+ * Validate supported column type with SASI analyzer (CASSANDRA-13669)
  * Remove BTree.Builder Recycler to reduce memory usage (CASSANDRA-13929)
  * Reduce nodetool GC thread count (CASSANDRA-14475)
  * Fix New SASI view creation during Index Redistribution (CASSANDRA-14055)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
index f127748..2c1d088 100644
--- a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java
@@ -121,6 +121,9 @@ public class SASIIndex implements Index, INotificationConsumer
         CompactionManager.instance.submitIndexBuild(new SASIIndexBuilder(baseCfs, toRebuild));
     }
 
+    /**
+     * Called via reflection at {@link IndexMetadata#validateCustomIndexOptions}
+     */
     public static Map<String, String> validateOptions(Map<String, String> options, CFMetaData cfm)
     {
         if (!(cfm.partitioner instanceof Murmur3Partitioner))
@@ -140,7 +143,7 @@ public class SASIIndex implements Index, INotificationConsumer
         if (target.left.isPartitionKey())
             throw new ConfigurationException("partition key columns are not yet supported by SASI");
 
-        IndexMode.validateAnalyzer(options);
+        IndexMode.validateAnalyzer(options, target.left);
 
         IndexMode mode = IndexMode.getMode(target.left, options);
         if (mode.mode == Mode.SPARSE)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
index 31c66cc..e3bb7a2 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java
@@ -43,6 +43,14 @@ public abstract class AbstractAnalyzer implements Iterator<ByteBuffer>
     public abstract void reset(ByteBuffer input);
 
     /**
+     * Test whether the given validator is compatible with the underlying analyzer.
+     *
+     * @param validator
+     * @return
+     */
+    public abstract boolean isCompatibleWith(AbstractType<?> validator);
+
+    /**
      * @return true if current analyzer provides text tokenization, false otherwise.
      */
     public boolean isTokenizing()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
index 24acef4..fea4b4f 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
@@ -37,10 +37,10 @@ import org.apache.cassandra.utils.AbstractIterator;
 public class DelimiterAnalyzer extends AbstractAnalyzer
 {
 
-    private static final Map<AbstractType<?>,Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>,Charset>()
+    private static final Map<AbstractType<?>, Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>, Charset>()
     {{
-            put(UTF8Type.instance, StandardCharsets.UTF_8);
-            put(AsciiType.instance, StandardCharsets.US_ASCII);
+        put(UTF8Type.instance, StandardCharsets.UTF_8);
+        put(AsciiType.instance, StandardCharsets.US_ASCII);
     }};
 
     private char delimiter;
@@ -105,4 +105,10 @@ public class DelimiterAnalyzer extends AbstractAnalyzer
     {
         return true;
     }
+
+    @Override
+    public boolean isCompatibleWith(AbstractType<?> validator)
+    {
+        return VALID_ANALYZABLE_TYPES.containsKey(validator);
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
index 9939a13..5c9b748 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java
@@ -51,4 +51,10 @@ public class NoOpAnalyzer extends AbstractAnalyzer
         this.input = input;
         this.hasNext = true;
     }
+
+    @Override
+    public boolean isCompatibleWith(AbstractType<?> validator)
+    {
+        return true;
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
index 676b304..82084bc 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java
@@ -123,4 +123,10 @@ public class NonTokenizingAnalyzer extends AbstractAnalyzer
             builder = builder.add("to_lower", new BasicResultFilters.LowerCase());
         return builder.build();
     }
+
+    @Override
+    public boolean isCompatibleWith(AbstractType<?> validator)
+    {
+        return VALID_ANALYZABLE_TYPES.contains(validator);
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
index 3b58bf9..e1a4a44 100644
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java
@@ -23,10 +23,13 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.cassandra.index.sasi.analyzer.filter.*;
 import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
 import org.apache.cassandra.db.marshal.UTF8Type;
 import org.apache.cassandra.io.util.DataInputBuffer;
 import org.apache.cassandra.utils.ByteBufferUtil;
@@ -38,6 +41,15 @@ import com.carrotsearch.hppc.IntObjectOpenHashMap;
 
 public class StandardAnalyzer extends AbstractAnalyzer
 {
+
+    private static final Set<AbstractType<?>> VALID_ANALYZABLE_TYPES = new HashSet<AbstractType<?>>()
+    {
+        {
+            add(UTF8Type.instance);
+            add(AsciiType.instance);
+        }
+    };
+
     public enum TokenType
     {
         EOF(-1),
@@ -198,4 +210,10 @@ public class StandardAnalyzer extends AbstractAnalyzer
     {
         return true;
     }
+
+    @Override
+    public boolean isCompatibleWith(AbstractType<?> validator)
+    {
+        return VALID_ANALYZABLE_TYPES.contains(validator);
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
index c66dd02..5709a0f 100644
--- a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
+++ b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java
@@ -93,20 +93,36 @@ public class IndexMode
         return analyzer;
     }
 
-    public static void validateAnalyzer(Map<String, String> indexOptions) throws ConfigurationException
+    public static void validateAnalyzer(Map<String, String> indexOptions, ColumnDefinition cd) throws ConfigurationException
     {
         // validate that a valid analyzer class was provided if specified
         if (indexOptions.containsKey(INDEX_ANALYZER_CLASS_OPTION))
         {
+            Class<?> analyzerClass;
             try
             {
-                Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION));
+                analyzerClass = Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION));
             }
             catch (ClassNotFoundException e)
             {
                 throw new ConfigurationException(String.format("Invalid analyzer class option specified [%s]",
                                                                indexOptions.get(INDEX_ANALYZER_CLASS_OPTION)));
             }
+
+            AbstractAnalyzer analyzer;
+            try
+            {
+                analyzer = (AbstractAnalyzer) analyzerClass.newInstance();
+                if (!analyzer.isCompatibleWith(cd.type))
+                    throw new ConfigurationException(String.format("%s does not support type %s",
+                                                                   analyzerClass.getSimpleName(),
+                                                                   cd.type.asCQL3Type()));
+            }
+            catch (InstantiationException | IllegalAccessException e)
+            {
+                throw new ConfigurationException(String.format("Unable to initialize analyzer class option specified [%s]",
+                                                               analyzerClass.getSimpleName()));
+            }
         }
     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index a57af61..1579857 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -31,10 +31,12 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
 
 import org.apache.cassandra.SchemaLoader;
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;
+import org.apache.cassandra.config.Schema;
 import org.apache.cassandra.index.Index;
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.cql3.*;
@@ -55,6 +57,11 @@ import org.apache.cassandra.dht.Murmur3Partitioner;
 import org.apache.cassandra.dht.Range;
 import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.NoOpAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer;
+import org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer;
 import org.apache.cassandra.index.sasi.conf.ColumnIndex;
 import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.exceptions.TimeQuotaExceededException;
@@ -2411,6 +2418,76 @@ public class SASIIndexTest
         Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
     }
 
+    @Test
+    public void testAnalyzerValidation()
+    {
+        final String TABLE_NAME = "analyzer_validation";
+        QueryProcessor.executeOnceInternal(String.format("CREATE TABLE %s.%s (" +
+                                                         "  pk text PRIMARY KEY, " +
+                                                         "  ascii_v ascii, " +
+                                                         "  bigint_v bigint, " +
+                                                         "  blob_v blob, " +
+                                                         "  boolean_v boolean, " +
+                                                         "  date_v date, " +
+                                                         "  decimal_v decimal, " +
+                                                         "  double_v double, " +
+                                                         "  float_v float, " +
+                                                         "  inet_v inet, " +
+                                                         "  int_v int, " +
+                                                         "  smallint_v smallint, " +
+                                                         "  text_v text, " +
+                                                         "  time_v time, " +
+                                                         "  timestamp_v timestamp, " +
+                                                         "  timeuuid_v timeuuid, " +
+                                                         "  tinyint_v tinyint, " +
+                                                         "  uuid_v uuid, " +
+                                                         "  varchar_v varchar, " +
+                                                         "  varint_v varint" +
+                                                         ");",
+                                                         KS_NAME,
+                                                         TABLE_NAME));
+
+        Columns regulars = Schema.instance.getCFMetaData(KS_NAME, TABLE_NAME).partitionColumns().regulars;
+        List<String> allColumns = regulars.stream().map(ColumnDefinition::toString).collect(Collectors.toList());
+        List<String> textColumns = Arrays.asList("text_v", "ascii_v", "varchar_v");
+
+        new HashMap<Class<? extends AbstractAnalyzer>, List<String>>()
+        {{
+            put(StandardAnalyzer.class, textColumns);
+            put(NonTokenizingAnalyzer.class, textColumns);
+            put(DelimiterAnalyzer.class, textColumns);
+            put(NoOpAnalyzer.class, allColumns);
+        }}
+        .forEach((analyzer, supportedColumns) -> {
+            for (String column : allColumns)
+            {
+                String query = String.format("CREATE CUSTOM INDEX ON %s.%s(%s) " +
+                                             "USING 'org.apache.cassandra.index.sasi.SASIIndex' " +
+                                             "WITH OPTIONS = {'analyzer_class': '%s', 'mode':'PREFIX'};",
+                                             KS_NAME, TABLE_NAME, column, analyzer.getName());
+
+                if (supportedColumns.contains(column))
+                {
+                    QueryProcessor.executeOnceInternal(query);
+                }
+                else
+                {
+                    try
+                    {
+                        QueryProcessor.executeOnceInternal(query);
+                        Assert.fail("Expected ConfigurationException");
+                    }
+                    catch (ConfigurationException e)
+                    {
+                        // expected
+                        Assert.assertTrue("Unexpected error message " + e.getMessage(),
+                                          e.getMessage().contains("does not support type"));
+                    }
+                }
+            }
+        });
+    }
+
     private static ColumnFamilyStore loadData(Map<String, Pair<String, Integer>> data, boolean forceFlush)
     {
         return loadData(data, System.currentTimeMillis(), forceFlush);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org