You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/06/13 02:31:40 UTC

[kylin] branch 2.6.x updated (5af92e2 -> fb48674)

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a change to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git.


    from 5af92e2  update KylinVersion to 2.6.3
     new fc3e6e5  KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is  not a  Sequence file.
     new fb48674  KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../kylin/engine/mr/steps/UHCDictionaryJob.java    |  2 +
 .../engine/mr/steps/filter/UHCDictPathFilter.java  | 46 +++++++++++-----------
 2 files changed, 26 insertions(+), 22 deletions(-)
 copy datasource-sdk/src/main/java/org/apache/kylin/sdk/datasource/framework/conv/ParamNodeParser.java => engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java (57%)


[kylin] 01/02: KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is not a Sequence file.

Posted by sh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit fc3e6e52140b8e724043e7a0134d06aa4c9e34ca
Author: shqmh <sh...@126.com>
AuthorDate: Sun May 26 23:44:55 2019 +0800

    KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is  not a  Sequence file.
---
 .../kylin/engine/mr/steps/UHCDictionaryJob.java    |  2 +
 .../engine/mr/steps/filter/UHCDictPathFilter.java  | 48 ++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
index 0903228..79565a9 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
@@ -40,6 +40,7 @@ import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -81,6 +82,7 @@ public class UHCDictionaryJob extends AbstractHadoopJob {
                 Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
                 if (HadoopUtil.getFileSystem(path).exists(path)) {
                     FileInputFormat.addInputPath(job, path);
+                    FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
                     hasUHCValue = true;
                 }
             }
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
new file mode 100644
index 0000000..44a837b
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps.filter;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class UHCDictPathFilter implements PathFilter {
+    private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
+
+    private static final String DCIFILE_POSTFIX = ".dci";
+
+    @Override
+    public boolean accept(Path path) {
+
+        Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+        Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
+
+        if (matcher.find()) {
+            logger.info("filter file: " + path.getName());
+            return false;
+        }
+
+        return true;
+    }
+}


[kylin] 02/02: KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX

Posted by sh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit fb486745ba7ca024b117ba2cc7ec95f64c41f401
Author: shqmh <sh...@126.com>
AuthorDate: Wed Jun 12 20:22:10 2019 +0800

    KYLIN-4015 change uhc path filter .dci to FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX
---
 .../org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
index 44a837b..4c5b84d 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.steps.filter;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -30,16 +31,14 @@ import java.util.regex.Pattern;
 public class UHCDictPathFilter implements PathFilter {
     private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
 
-    private static final String DCIFILE_POSTFIX = ".dci";
-
     @Override
     public boolean accept(Path path) {
 
-        Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+        Pattern pattern = Pattern.compile(FactDistinctColumnsReducer.DIMENSION_COL_INFO_FILE_POSTFIX);
         Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
 
         if (matcher.find()) {
-            logger.info("filter file: " + path.getName());
+            logger.info("filter dict file: " + path.getName());
             return false;
         }