You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/06/13 02:27:49 UTC
[kylin] 01/02: KYLIN-4015 "Build UHC Dictionary" step filter ".dci"
files to solve the problem that MR engine run failed because the ".dci"
file is not a Sequence file.
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit b0ae00315e1fe3d9301581387668fde3ec0efd6f
Author: shqmh <sh...@126.com>
AuthorDate: Sun May 26 23:44:55 2019 +0800
KYLIN-4015 "Build UHC Dictionary" step filter ".dci" files to solve the problem that MR engine run failed because the ".dci" file is not a Sequence file.
---
.../kylin/engine/mr/steps/UHCDictionaryJob.java | 2 +
.../engine/mr/steps/filter/UHCDictPathFilter.java | 48 ++++++++++++++++++++++
2 files changed, 50 insertions(+)
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
index 0903228..79565a9 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UHCDictionaryJob.java
@@ -40,6 +40,7 @@ import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -81,6 +82,7 @@ public class UHCDictionaryJob extends AbstractHadoopJob {
Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
if (HadoopUtil.getFileSystem(path).exists(path)) {
FileInputFormat.addInputPath(job, path);
+ FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
hasUHCValue = true;
}
}
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
new file mode 100644
index 0000000..44a837b
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/filter/UHCDictPathFilter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps.filter;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class UHCDictPathFilter implements PathFilter {
+ private static final Logger logger = LoggerFactory.getLogger(UHCDictPathFilter.class);
+
+ private static final String DCIFILE_POSTFIX = ".dci";
+
+ @Override
+ public boolean accept(Path path) {
+
+ Pattern pattern = Pattern.compile(DCIFILE_POSTFIX);
+ Matcher matcher = pattern.matcher(path.getName().toLowerCase(Locale.ROOT));
+
+ if (matcher.find()) {
+ logger.info("filter file: " + path.getName());
+ return false;
+ }
+
+ return true;
+ }
+}