You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2017/12/09 14:39:30 UTC

[1/2] kylin git commit: KYLIN-3070 Add a config property for flat table storage format

Repository: kylin
Updated Branches:
  refs/heads/master 7e6aa3471 -> fab517b95


KYLIN-3070 Add a config property for flat table storage format

Signed-off-by: shaofengshi <sh...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/9df8572d
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/9df8572d
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/9df8572d

Branch: refs/heads/master
Commit: 9df8572d787e03577c515b5818f5f11814b321f9
Parents: 7e6aa34
Author: Seva Ostapenko <se...@anovadata.com>
Authored: Wed Dec 6 23:57:48 2017 -0500
Committer: shaofengshi <sh...@apache.org>
Committed: Sat Dec 9 21:58:59 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    |  8 +++++++
 .../org/apache/kylin/job/JoinedFlatTable.java   | 23 ++++++++++----------
 .../test_case_data/sandbox/kylin.properties     |  7 ++++++
 3 files changed, 27 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index e1a10a8..d0cc3be 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -663,6 +663,14 @@ abstract public class KylinConfigBase implements Serializable {
         return this.getOptional("kylin.source.hive.database-for-flat-table", "default");
     }
 
+    public String getFlatTableStorageFormat() {
+        return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE").toUpperCase();
+    }
+
+    public String getFlatTableFieldDelimiter() {
+        return this.getOptional("kylin.source.hive.flat-table-field-delimiter", "\\u001F");
+    }
+
     public boolean isHiveRedistributeEnabled() {
         return Boolean.parseBoolean(this.getOptional("kylin.source.hive.redistribute-flat-table", "true"));
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
----------------------------------------------------------------------
diff --git a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
index d136ec6..316fc99 100644
--- a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
+++ b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
@@ -56,11 +56,18 @@ public class JoinedFlatTable {
     }
 
     public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir) {
-        return generateCreateTableStatement(flatDesc, storageDfsDir, "SEQUENCEFILE");
+        String storageFormat = flatDesc.getDataModel().getConfig().getFlatTableStorageFormat();
+        return generateCreateTableStatement(flatDesc, storageDfsDir, storageFormat);
     }
 
     public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
-            String format, String filedDelimiter) {
+            String storageFormat) {
+        String fieldDelimiter = flatDesc.getDataModel().getConfig().getFlatTableFieldDelimiter();
+        return generateCreateTableStatement(flatDesc, storageDfsDir, storageFormat, fieldDelimiter);
+    }
+
+    public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
+            String storageFormat, String filedDelimiter) {
         StringBuilder ddl = new StringBuilder();
 
         ddl.append("CREATE EXTERNAL TABLE IF NOT EXISTS " + flatDesc.getTableName() + "\n");
@@ -74,21 +81,15 @@ public class JoinedFlatTable {
             ddl.append(colName(col) + " " + getHiveDataType(col.getDatatype()) + "\n");
         }
         ddl.append(")" + "\n");
-        if ("TEXTFILE".equals(format)) {
+        if ("TEXTFILE".equals(storageFormat)) {
             ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '" + filedDelimiter + "'\n");
         }
-        ddl.append("STORED AS " + format + "\n");
+        ddl.append("STORED AS " + storageFormat + "\n");
         ddl.append("LOCATION '" + getTableDir(flatDesc, storageDfsDir) + "';").append("\n");
         ddl.append("ALTER TABLE " + flatDesc.getTableName() + " SET TBLPROPERTIES('auto.purge'='true');\n");
         return ddl.toString();
     }
 
-    public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
-            String format) {
-        String fieldDelimiter = flatDesc.getDataModel().getConfig().getSourceFieldDelimiter();
-        return generateCreateTableStatement(flatDesc, storageDfsDir, format, fieldDelimiter);
-    }
-
     public static String generateDropTableStatement(IJoinedFlatTableDesc flatDesc) {
         StringBuilder ddl = new StringBuilder();
         ddl.append("DROP TABLE IF EXISTS " + flatDesc.getTableName() + ";").append("\n");
@@ -277,4 +278,4 @@ public class JoinedFlatTable {
         return sql.toString();
     }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/examples/test_case_data/sandbox/kylin.properties
----------------------------------------------------------------------
diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties
index 90c0b95..7271e90 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -94,6 +94,13 @@ kylin.engine.mr.yarn-check-interval-seconds=10
 # Hive database name for putting the intermediate flat tables
 kylin.source.hive.database-for-flat-table=default
 
+# Hive flat table storage format, defaults to sequencefile
+#kylin.source.hive.flat-table-storage-format=textfile
+
+# Hive flat table field delimiter; used only when kylin.source.hive.flat-table-storage-format is set to textfile; defaults to \u001F (unit separator)
+#kylin.source.hive.flat-table-field-delimiter=\\u001F
+
+
 #default compression codec for htable,snappy,lzo,gzip,lz4
 kylin.storage.hbase.compression-codec=gzip
 


[2/2] kylin git commit: KYLIN-3070 give a better method name for jdbc delimeter

Posted by sh...@apache.org.
KYLIN-3070 give a better method name for jdbc delimeter


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fab517b9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fab517b9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fab517b9

Branch: refs/heads/master
Commit: fab517b951fdcc7cd2f74f38f3f2e6991bcfada0
Parents: 9df8572
Author: shaofengshi <sh...@apache.org>
Authored: Sat Dec 9 22:34:57 2017 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Sat Dec 9 22:34:57 2017 +0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/kylin/common/KylinConfigBase.java   | 4 ++--
 .../main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/fab517b9/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index d0cc3be..66805df 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -664,7 +664,7 @@ abstract public class KylinConfigBase implements Serializable {
     }
 
     public String getFlatTableStorageFormat() {
-        return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE").toUpperCase();
+        return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE");
     }
 
     public String getFlatTableFieldDelimiter() {
@@ -768,7 +768,7 @@ abstract public class KylinConfigBase implements Serializable {
         return Integer.parseInt(getOptional("kylin.source.jdbc.sqoop-mapper-num", "4"));
     }
 
-    public String getSourceFieldDelimiter() {
+    public String getJdbcSourceFieldDelimiter() {
         return getOptional("kylin.source.jdbc.field-delimiter", "|");
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/fab517b9/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
index 0bfc145..d05f14e 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
@@ -67,7 +67,7 @@ public class JdbcHiveMRInput extends HiveMRInput {
 
         private AbstractExecutable createFlatHiveTableFromFiles(String hiveInitStatements, String jobWorkingDir) {
             final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
-            String filedDelimiter = getConfig().getSourceFieldDelimiter();
+            String filedDelimiter = getConfig().getJdbcSourceFieldDelimiter();
             // Sqoop does not support exporting SEQUENSEFILE to Hive now SQOOP-869
             final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir,
                     "TEXTFILE", filedDelimiter);
@@ -153,7 +153,7 @@ public class JdbcHiveMRInput extends HiveMRInput {
             String jdbcUser = config.getJdbcSourceUser();
             String jdbcPass = config.getJdbcSourcePass();
             String sqoopHome = config.getSqoopHome();
-            String filedDelimiter = config.getSourceFieldDelimiter();
+            String filedDelimiter = config.getJdbcSourceFieldDelimiter();
             int mapperNum = config.getSqoopMapperNum();
 
             String bquery = String.format("SELECT min(%s), max(%s) FROM %s.%s", splitColumn, splitColumn, splitDatabase,