You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/06/13 02:27:48 UTC

[kylin] branch master updated (30ff09f -> f19f16a)

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git.


    from 30ff09f  KYLIN-4022 when Adhoc Push Down then Unrecognized column type: DECIMAL(xx,xx)
     new b0ae003  KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is  not a  Sequence file.
     new f19f16a  KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../kylin/engine/mr/steps/UHCDictionaryJob.java    |  2 +
 .../engine/mr/steps/filter/UHCDictPathFilter.java  | 46 +++++++++++-----------
 2 files changed, 26 insertions(+), 22 deletions(-)
 copy datasource-sdk/src/main/java/org/apache/kylin/sdk/datasource/framework/conv/ParamNodeParser.java => engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java (57%)


[kylin] 01/02: KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is not a Sequence file.

Posted by sh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit b0ae00315e1fe3d9301581387668fde3ec0efd6f
Author: shqmh <sh...@126.com>
AuthorDate: Sun May 26 23:44:55 2019 +0800

    KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is  not a  Sequence file.
---
 .../kylin/engine/mr/steps/UHCDictionaryJob.java    |  2 +
 .../engine/mr/steps/filter/UHCDictPathFilter.java  | 48 ++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
index 0903228..79565a9 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
@@ -40,6 +40,7 @@ import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -81,6 +82,7 @@ public class UHCDictionaryJob extends AbstractHadoopJob {
                 Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
                 if (HadoopUtil.getFileSystem(path).exists(path)) {
                     FileInputFormat.addInputPath(job, path);
+                    FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
                     hasUHCValue = true;
                 }
             }
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
new file mode 100644
index 0000000..44a837b
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps.filter;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class UHCDictPathFilter implements PathFilter {
+    private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
+
+    private static final String DCIFILE_POSTFIX = ".dci";
+
+    @Override
+    public boolean accept(Path path) {
+
+        Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+        Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
+
+        if (matcher.find()) {
+            logger.info("filter file: " + path.getName());
+            return false;
+        }
+
+        return true;
+    }
+}


[kylin] 02/02: KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX

Posted by sh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit f19f16aad334fd65f9946dd51d369497a15cb9e4
Author: shqmh <sh...@126.com>
AuthorDate: Wed Jun 12 20:22:10 2019 +0800

    KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX
---
 .../org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
index 44a837b..4c5b84d 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.steps.filter;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -30,16 +31,14 @@ import java.util.regex.Pattern;
 public class UHCDictPathFilter implements PathFilter {
     private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
 
-    private static final String DCIFILE_POSTFIX = ".dci";
-
     @Override
     public boolean accept(Path path) {
 
-        Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+        Pattern pattern = Pattern.compile(FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX);
         Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
 
         if (matcher.find()) {
-            logger.info("filter file: " + path.getName());
+            logger.info("filter dict file: " + path.getName());
             return false;
         }