You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2016/10/21 03:46:48 UTC

asterixdb git commit: ASTERIXDB-1700: fixed multiple same type of index application error on the same field

Repository: asterixdb
Updated Branches:
  refs/heads/master 6ae219d92 -> 68c8e9bef


ASTERIXDB-1700: fixed multiple same type of index application error on the same field

 - Fixed an issue that multiple same type of indexes can be applied for the same field.
   For this situation, applying only one index will be enough.
   (e.g., 2-gram and 3-gram index on the same field)

Change-Id: I450f3adb20c777d5b9a8f638e010076b9d817942
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1307
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Jianfeng Jia <ji...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/68c8e9be
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/68c8e9be
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/68c8e9be

Branch: refs/heads/master
Commit: 68c8e9befabfa7def67ce3a1cc93dba05966bd15
Parents: 6ae219d
Author: Taewoo Kim <wa...@yahoo.com>
Authored: Thu Oct 20 13:01:57 2016 -0700
Committer: Taewoo Kim <wa...@yahoo.com>
Committed: Thu Oct 20 20:45:58 2016 -0700

----------------------------------------------------------------------
 .../am/AbstractIntroduceAccessMethodRule.java   | 34 +++++++++++-----
 ...edit-distance-with-two-ngram-index.1.ddl.aql | 42 ++++++++++++++++++++
 ...t-distance-with-two-ngram-index.2.update.aql | 24 +++++++++++
 ...edit-distance-with-two-ngram-index.3.ddl.aql | 23 +++++++++++
 ...it-distance-with-two-ngram-index.4.query.aql | 24 +++++++++++
 ...ram-edit-distance-with-two-ngram-index.1.adm |  1 +
 .../src/test/resources/runtimets/testsuite.xml  |  5 +++
 7 files changed, 142 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
index ec29b53..8e78d1a 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
@@ -20,6 +20,7 @@ package org.apache.asterix.optimizer.rules.am;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -145,9 +146,19 @@ public abstract class AbstractIntroduceAccessMethodRule implements IAlgebraicRew
         return list.isEmpty() ? null : list.get(0);
     }
 
+    /**
+     * Choose all indexes that match the given access method. These indexes will be used as index-search
+     * to replace the given predicates in a SELECT operator. Also, if there are multiple same type of indexes
+     * on the same field, only of them will be chosen. Allowed cases (AccessMethod, IndexType) are:
+     * [BTreeAccessMethod , IndexType.BTREE], [RTreeAccessMethod , IndexType.RTREE],
+     * [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX || SINGLE_PARTITION_NGRAM_INVIX ||
+     * LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX]
+     */
     protected List<Pair<IAccessMethod, Index>> chooseAllIndex(
             Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
         List<Pair<IAccessMethod, Index>> result = new ArrayList<Pair<IAccessMethod, Index>>();
+        // Use variables (fields) to the index types map to check which type of indexes are applied for the vars.
+        Map<List<Pair<Integer, Integer>>, List<IndexType>> resultVarsToIndexTypesMap = new HashMap<>();
         Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator();
         while (amIt.hasNext()) {
             Map.Entry<IAccessMethod, AccessMethodAnalysisContext> amEntry = amIt.next();
@@ -156,15 +167,6 @@ public abstract class AbstractIntroduceAccessMethodRule implements IAlgebraicRew
                     .iterator();
             while (indexIt.hasNext()) {
                 Map.Entry<Index, List<Pair<Integer, Integer>>> indexEntry = indexIt.next();
-                // To avoid a case where the chosen access method and a chosen
-                // index type is different.
-                // Allowed Case: [BTreeAccessMethod , IndexType.BTREE],
-                //               [RTreeAccessMethod , IndexType.RTREE],
-                //               [InvertedIndexAccessMethod,
-                //                 IndexType.SINGLE_PARTITION_WORD_INVIX ||
-                //                           SINGLE_PARTITION_NGRAM_INVIX ||
-                //                           LENGTH_PARTITIONED_WORD_INVIX ||
-                //                           LENGTH_PARTITIONED_NGRAM_INVIX]
                 IAccessMethod chosenAccessMethod = amEntry.getKey();
                 Index chosenIndex = indexEntry.getKey();
                 IndexType indexType = chosenIndex.getIndexType();
@@ -172,11 +174,21 @@ public abstract class AbstractIntroduceAccessMethodRule implements IAlgebraicRew
                         || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX
                         || indexType == IndexType.SINGLE_PARTITION_WORD_INVIX
                         || indexType == IndexType.SINGLE_PARTITION_NGRAM_INVIX;
-
                 if ((chosenAccessMethod == BTreeAccessMethod.INSTANCE && indexType == IndexType.BTREE)
                         || (chosenAccessMethod == RTreeAccessMethod.INSTANCE && indexType == IndexType.RTREE)
                         || (chosenAccessMethod == InvertedIndexAccessMethod.INSTANCE && isKeywordOrNgramIndexChosen)) {
-                    result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex));
+                    if (resultVarsToIndexTypesMap.containsKey(indexEntry.getValue())) {
+                        List<IndexType> appliedIndexTypes = resultVarsToIndexTypesMap.get(indexEntry.getValue());
+                        if (!appliedIndexTypes.contains(indexType)) {
+                            appliedIndexTypes.add(indexType);
+                            result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex));
+                        }
+                    } else {
+                        List<IndexType> addedIndexTypes = new ArrayList<>();
+                        addedIndexTypes.add(indexType);
+                        resultVarsToIndexTypesMap.put(indexEntry.getValue(), addedIndexTypes);
+                        result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex));
+                    }
                 }
             }
         }

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql
new file mode 100644
index 0000000..3756b81
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ /*
+ * Description     : Test that conducts an inverted index search on the field with multiple same types of indexes.
+ * Expected Result : Success
+ * Issue           : ASTERIXDB-1700
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int64,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create nodegroup group1 if not exists on asterix_nc1, asterix_nc2;
+
+create dataset DBLP(DBLPType)
+  primary key id on group1;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql
new file mode 100644
index 0000000..88653a2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse test;
+
+load dataset DBLP
+using localfs
+(("path"="asterix_nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql
new file mode 100644
index 0000000..0b7ef02
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse test;
+
+create index ngram2_index on DBLP(authors) type ngram(2);
+
+create index ngram3_index on DBLP(authors) type ngram(3);

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql
new file mode 100644
index 0000000..b61b765
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse test;
+
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 1)
+where $ed[0]
+return $o

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm
new file mode 100644
index 0000000..528c4a5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm
@@ -0,0 +1 @@
+{ "id": 22, "dblpid": "books/acm/kim95/Motro95", "title": "Management of Uncerainty in database Systems.", "authors": "Amihai Motro", "misc": "2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95" }

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/68c8e9be/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index c5afa97..942b546 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -2818,6 +2818,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="index-selection">
+      <compilation-unit name="inverted-index-ngram-edit-distance-with-two-ngram-index">
+        <output-dir compare="Text">inverted-index-ngram-edit-distance-with-two-ngram-index</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="index-selection">
       <compilation-unit name="inverted-index-ngram-edit-distance-word-tokens">
         <output-dir compare="Text">inverted-index-ngram-edit-distance-word-tokens</output-dir>
       </compilation-unit>