You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by im...@apache.org on 2020/04/17 07:32:45 UTC

[asterixdb] 02/03: [NO ISSUE] Rename tokenizer factories

This is an automated email from the ASF dual-hosted git repository.

imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 934ce9d903d36990d22febd69e08a81d0c8e40b2
Author: Rui Guo <ru...@uci.edu>
AuthorDate: Tue Apr 14 12:17:13 2020 -0700

    [NO ISSUE] Rename tokenizer factories
    
    Rename the variables to make things clear. Previously, tokenizer
    factories were named tokenizer which could be confusing.
    
    Change-Id: Ie72d420e18509489d0fc0f9c98b162202a62be55
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5804
    Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ian Maxon <im...@uci.edu>
---
 .../nontagged/BinaryTokenizerFactoryProvider.java  | 27 ++++++++++++----------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
index 5ef7702..6333e92 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
@@ -30,26 +30,29 @@ import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8Strin
 import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory;
 import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8WordTokenFactory;
 
+// ToDo: maybe we can make the constructor method of the tokenizers private so that tokenizers have to be generated via this provider
+// Currently, different call sites of tokenizers use **different parameters**, and this can be error-prone
+// A centralized provider can avoid the bugs due to different parameters.
 public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryProvider {
 
     public static final BinaryTokenizerFactoryProvider INSTANCE = new BinaryTokenizerFactoryProvider();
 
-    private static final IBinaryTokenizerFactory aqlStringTokenizer =
+    private static final IBinaryTokenizerFactory stringTokenizerFactory =
             new DelimitedUTF8StringBinaryTokenizerFactory(true, true,
                     new UTF8WordTokenFactory(ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG));
 
-    private static final IBinaryTokenizerFactory aqlStringNoTypeTagTokenizer =
+    private static final IBinaryTokenizerFactory stringNoTypeTagTokenizerFactory =
             new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
                     new UTF8WordTokenFactory(ATypeTag.STRING.serialize(), ATypeTag.INTEGER.serialize()));
 
-    private static final IBinaryTokenizerFactory aqlHashingStringTokenizer =
+    private static final IBinaryTokenizerFactory stringHashingTokenizerFactory =
             new DelimitedUTF8StringBinaryTokenizerFactory(true, true, new HashedUTF8WordTokenFactory(
                     ATypeTag.SERIALIZED_INT32_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG));
 
-    private static final IBinaryTokenizerFactory orderedListTokenizer =
+    private static final IBinaryTokenizerFactory orderedListTokenizerFactory =
             new AOrderedListBinaryTokenizerFactory(new AListElementTokenFactory());
 
-    private static final IBinaryTokenizerFactory unorderedListTokenizer =
+    private static final IBinaryTokenizerFactory unorderedListTokenizerFactory =
             new AUnorderedListBinaryTokenizerFactory(new AListElementTokenFactory());
 
     @Override
@@ -58,16 +61,16 @@ public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryPr
         switch (typeTag) {
             case STRING:
                 if (hashedTokens) {
-                    return aqlHashingStringTokenizer;
+                    return stringHashingTokenizerFactory;
                 } else if (!typeTageAlreadyRemoved) {
-                    return aqlStringTokenizer;
+                    return stringTokenizerFactory;
                 } else {
-                    return aqlStringNoTypeTagTokenizer;
+                    return stringNoTypeTagTokenizerFactory;
                 }
             case ARRAY:
-                return orderedListTokenizer;
+                return orderedListTokenizerFactory;
             case MULTISET:
-                return unorderedListTokenizer;
+                return unorderedListTokenizerFactory;
             default:
                 return null;
         }
@@ -86,9 +89,9 @@ public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryPr
                                     ATypeTag.SERIALIZED_INT32_TYPE_TAG));
                 }
             case ARRAY:
-                return orderedListTokenizer;
+                return orderedListTokenizerFactory;
             case MULTISET:
-                return unorderedListTokenizer;
+                return unorderedListTokenizerFactory;
             default:
                 return null;
         }