You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/06/13 02:27:49 UTC

[kylin] 01/02: KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is not a Sequence file.

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit b0ae00315e1fe3d9301581387668fde3ec0efd6f
Author: shqmh <sh...@126.com>
AuthorDate: Sun May 26 23:44:55 2019 +0800

    KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is  not a  Sequence file.
---
 .../kylin/engine/mr/steps/UHCDictionaryJob.java    |  2 +
 .../engine/mr/steps/filter/UHCDictPathFilter.java  | 48 ++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
index 0903228..79565a9 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
@@ -40,6 +40,7 @@ import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -81,6 +82,7 @@ public class UHCDictionaryJob extends AbstractHadoopJob {
                 Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
                 if (HadoopUtil.getFileSystem(path).exists(path)) {
                     FileInputFormat.addInputPath(job, path);
+                    FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
                     hasUHCValue = true;
                 }
             }
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
new file mode 100644
index 0000000..44a837b
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps.filter;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class UHCDictPathFilter implements PathFilter {
+    private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
+
+    private static final String DCIFILE_POSTFIX = ".dci";
+
+    @Override
+    public boolean accept(Path path) {
+
+        Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+        Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
+
+        if (matcher.find()) {
+            logger.info("filter file: " + path.getName());
+            return false;
+        }
+
+        return true;
+    }
+}