You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/08/01 15:28:55 UTC

[1/2] incubator-carbondata git commit: Refactored and cleaned up POM

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 1ae4e4015 -> 645eafadb


Refactored and cleaned up POM

Update the document as per build change

Update the document as per build change


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/55ed7f83
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/55ed7f83
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/55ed7f83

Branch: refs/heads/master
Commit: 55ed7f83593433bdc911a5b2bf759da8a7704e90
Parents: 1ae4e40
Author: ravipesala <ra...@gmail.com>
Authored: Sun Jul 24 17:31:20 2016 +0530
Committer: chenliang613 <ch...@apache.org>
Committed: Mon Aug 1 23:25:50 2016 +0800

----------------------------------------------------------------------
 assembly/pom.xml                                |  24 ++++-
 common/pom.xml                                  |  14 +--
 core/pom.xml                                    |  50 ++-------
 ...stalling-CarbonData-And-IDE-Configuartion.md |   4 +
 examples/pom.xml                                |  33 +-----
 format/pom.xml                                  |   4 +-
 hadoop/pom.xml                                  |  20 +---
 integration-testcases/pom.xml                   |  40 +------
 integration/spark/pom.xml                       |  41 +------
 pom.xml                                         | 107 ++++++++++++++++---
 processing/pom.xml                              |  41 +------
 .../processing/csvload/DataGraphExecuter.java   |  23 ----
 .../CarbonCSVBasedDimSurrogateKeyGen.java       |  35 ++----
 .../csvbased/CarbonCSVBasedSeqGenStep.java      |  10 +-
 14 files changed, 149 insertions(+), 297 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/assembly/pom.xml
----------------------------------------------------------------------
diff --git a/assembly/pom.xml b/assembly/pom.xml
index e41b99a..4c39180 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -18,11 +18,10 @@
  -->
 <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
   <modelVersion>4.0.0</modelVersion>
-  <groupId>com.huawei.datasight</groupId>
+  <groupId>org.carbondata</groupId>
   <artifactId>carbon-assembly</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
-  <name>carbon-assembly</name>
+  <name>CarbonData Assembly</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -124,6 +123,25 @@
   </build>
   <profiles>
     <profile>
+      <id>provided</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <properties>
+        <hadoop.deps.scope>provided</hadoop.deps.scope>
+        <spark.deps.scope>provided</spark.deps.scope>
+        <scala.deps.scope>provided</scala.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>include-all</id>
+      <properties>
+        <hadoop.deps.scope>compile</hadoop.deps.scope>
+        <spark.deps.scope>compile</spark.deps.scope>
+        <scala.deps.scope>compile</scala.deps.scope>
+      </properties>
+    </profile>
+    <profile>
       <id>dist</id>
       <build>
         <plugins>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/common/pom.xml
----------------------------------------------------------------------
diff --git a/common/pom.xml b/common/pom.xml
index d7b6ad3..1ebcbb0 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -20,9 +20,8 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-common</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-common</name>
+  <name>CarbonData Common</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -41,25 +40,14 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.jmockit</groupId>
       <artifactId>jmockit</artifactId>
-      <version>1.10</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
   </dependencies>
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 0ad09ee..de8467b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -20,9 +20,8 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-core</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-core</name>
+  <name>CarbonData Core</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -41,6 +40,11 @@
   <dependencies>
     <dependency>
       <groupId>org.carbondata</groupId>
+      <artifactId>carbon-format</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.carbondata</groupId>
       <artifactId>carbon-common</artifactId>
       <version>${project.version}</version>
     </dependency>
@@ -60,43 +64,17 @@
       <version>${kettle.version}</version>
     </dependency>
     <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.5.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
       <groupId>com.google.code.gson</groupId>
       <artifactId>gson</artifactId>
       <version>2.3.1</version>
     </dependency>
     <dependency>
-      <groupId>commons-vfs</groupId>
-      <artifactId>commons-vfs</artifactId>
-      <version>1.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-hdfs</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.xerial.snappy</groupId>
@@ -106,25 +84,14 @@
     <dependency>
       <groupId>org.jmockit</groupId>
       <artifactId>jmockit</artifactId>
-      <version>1.10</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <scope>${spark.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.carbondata</groupId>
@@ -132,11 +99,6 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libthrift</artifactId>
-      <version>0.9.3</version>
-    </dependency>
-    <dependency>
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
       <version>3.4.7</version>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/docs/Installing-CarbonData-And-IDE-Configuartion.md
----------------------------------------------------------------------
diff --git a/docs/Installing-CarbonData-And-IDE-Configuartion.md b/docs/Installing-CarbonData-And-IDE-Configuartion.md
index 5015a48..3b2095d 100644
--- a/docs/Installing-CarbonData-And-IDE-Configuartion.md
+++ b/docs/Installing-CarbonData-And-IDE-Configuartion.md
@@ -26,6 +26,10 @@ $ mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.0 clean package
 $ mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.1 clean package
 $ mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package
 ```
+* Build the assembly jar which includes Spark and Hadoop jars
+```
+$ mvn clean -DskipTests -Pinclude-all package
+```
 * Build with test
 ```
 $ mvn clean package

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index 11e5bf2..9ad21b3 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -20,9 +20,8 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-examples</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-examples</name>
+  <name>CarbonData Examples</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -71,50 +70,20 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.5.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>pentaho</groupId>
-      <artifactId>pentaho-hdfs-vfs</artifactId>
-      <version>1.0.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-compiler</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-reflect</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <scope>${spark.deps.scope}</scope>
     </dependency>
   </dependencies>
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/format/pom.xml
----------------------------------------------------------------------
diff --git a/format/pom.xml b/format/pom.xml
index e0136d3..673b2c5 100644
--- a/format/pom.xml
+++ b/format/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-format</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-format</name>
+  <name>CarbonData Format</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
     <artifactId>carbondata-parent</artifactId>
+    <relativePath>../pom.xml</relativePath>
   </parent>
   <properties>
     <dev.path>${basedir}/../dev</dev.path>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/hadoop/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index 7cd445b..f16cbce 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -20,9 +20,8 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-hadoop</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-hadoop</name>
+  <name>CarbonData Hadoop</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -60,27 +59,12 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>commons-vfs</groupId>
-      <artifactId>commons-vfs</artifactId>
-      <version>1.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-hdfs</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
@@ -90,8 +74,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
-      <scope>test</scope>
     </dependency>
   </dependencies>
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/integration-testcases/pom.xml
----------------------------------------------------------------------
diff --git a/integration-testcases/pom.xml b/integration-testcases/pom.xml
index 832ea5d..c437b93 100644
--- a/integration-testcases/pom.xml
+++ b/integration-testcases/pom.xml
@@ -16,9 +16,8 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>org.carbondata</groupId>
     <artifactId>carbon-integration-testcases</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
     <packaging>jar</packaging>
-    <name>carbon-integration-testcases</name>
+    <name>CarbonData Integration Testcases</name>
     <parent>
         <groupId>org.carbondata</groupId>
         <version>0.1.0-SNAPSHOT</version>
@@ -78,61 +77,24 @@
             </exclusions>
         </dependency>
         <dependency>
-            <groupId>it.unimi.dsi</groupId>
-            <artifactId>fastutil</artifactId>
-            <version>6.5.0</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>*</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>pentaho</groupId>
-            <artifactId>pentaho-hdfs-vfs</artifactId>
-            <version>1.0.0</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>*</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-            <version>2.5</version>
-        </dependency>
-        <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-compiler</artifactId>
-            <version>${scala.version}</version>
-            <scope>${scala.deps.scope}</scope>
         </dependency>
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-reflect</artifactId>
-            <version>${scala.version}</version>
-            <scope>${scala.deps.scope}</scope>
         </dependency>
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
-            <version>${scala.version}</version>
-            <scope>${scala.deps.scope}</scope>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
-            <version>${spark.version}</version>
-            <scope>${spark.deps.scope}</scope>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
-            <version>4.11</version>
-            <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/integration/spark/pom.xml
----------------------------------------------------------------------
diff --git a/integration/spark/pom.xml b/integration/spark/pom.xml
index e88ef6e..5eb760a 100644
--- a/integration/spark/pom.xml
+++ b/integration/spark/pom.xml
@@ -22,7 +22,7 @@
   <artifactId>carbon-spark</artifactId>
   <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-spark</name>
+  <name>CarbonData Spark</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -65,67 +65,28 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.5.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>pentaho</groupId>
-      <artifactId>pentaho-hdfs-vfs</artifactId>
-      <version>1.0.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>servlet-api</artifactId>
-      <version>2.5</version>
-    </dependency>
-    <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-compiler</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-reflect</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-      <scope>${scala.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <scope>${spark.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-repl_${scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <scope>${spark.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 61609b5..0e5e566 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,12 +22,22 @@
   <artifactId>carbondata-parent</artifactId>
   <version>0.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
-  <name>carbondata</name>
+  <name>CarbonData Parent POM</name>
+  <modules>
+    <module>common</module>
+    <module>format</module>
+    <module>core</module>
+    <module>processing</module>
+    <module>hadoop</module>
+    <module>integration/spark</module>
+    <module>assembly</module>
+    <module>examples</module>
+  </modules>
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <spark.version>1.5.2</spark.version>
     <scala.binary.version>2.10</scala.binary.version>
-    <snappy.version>1.1.1.7</snappy.version>
+    <snappy.version>1.1.2.6</snappy.version>
     <hadoop.version>2.2.0</hadoop.version>
     <scala.version>2.10.4</scala.version>
     <kettle.version>4.4.0-stable</kettle.version>
@@ -36,16 +46,83 @@
     <scala.deps.scope>compile</scala.deps.scope>
     <dev.path>${basedir}/dev</dev.path>
   </properties>
-  <modules>
-    <module>common</module>
-    <module>format</module>
-    <module>core</module>
-    <module>processing</module>
-    <module>hadoop</module>
-    <module>integration/spark</module>
-    <module>assembly</module>
-    <module>examples</module>
-  </modules>
+
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-core</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>${spark.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>${spark.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-repl_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>${spark.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-compiler</artifactId>
+        <version>${scala.version}</version>
+        <scope>${scala.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-reflect</artifactId>
+        <version>${scala.version}</version>
+        <scope>${scala.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-library</artifactId>
+        <version>${scala.version}</version>
+        <scope>${scala.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.jmockit</groupId>
+        <artifactId>jmockit</artifactId>
+        <version>1.10</version>
+        <exclusions>
+          <exclusion>
+            <groupId>*</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.11</version>
+        <scope>test</scope>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
   <build>
     <plugins>
       <plugin>
@@ -160,5 +237,11 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>provided</id>
+    </profile>
+    <profile>
+      <id>include-all</id>
+    </profile>
   </profiles>
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/processing/pom.xml
----------------------------------------------------------------------
diff --git a/processing/pom.xml b/processing/pom.xml
index 8e60bc8..66d0820 100644
--- a/processing/pom.xml
+++ b/processing/pom.xml
@@ -20,9 +20,8 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.carbondata</groupId>
   <artifactId>carbon-processing</artifactId>
-  <version>0.1.0-SNAPSHOT</version>
   <packaging>jar</packaging>
-  <name>carbon-processing</name>
+  <name>CarbonData Processing</name>
   <parent>
     <groupId>org.carbondata</groupId>
     <version>0.1.0-SNAPSHOT</version>
@@ -81,54 +80,20 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.5.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>pentaho</groupId>
-      <artifactId>pentaho-hdfs-vfs</artifactId>
-      <version>1.0.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>*</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
+      <groupId>org.jmockit</groupId>
+      <artifactId>jmockit</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-hdfs</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-      <version>3.3.2</version>
-    </dependency>
-    <dependency>
-      <groupId>net.jpountz.lz4</groupId>
-      <artifactId>lz4</artifactId>
-      <version>1.3.0</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
-      <scope>test</scope>
     </dependency>
   </dependencies>
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
index 610746e..d4a7f4e 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
@@ -21,7 +21,6 @@ package org.carbondata.processing.csvload;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -42,16 +41,12 @@ import org.carbondata.processing.dataprocessor.IDataProcessStatus;
 import org.carbondata.processing.etl.DataLoadingException;
 import org.carbondata.processing.surrogatekeysgenerator.csvbased.BadRecordslogger;
 
-import org.apache.commons.vfs.FileSystemException;
-import org.apache.commons.vfs.FileSystemManager;
-import org.apache.commons.vfs.impl.DefaultFileSystemManager;
 import org.pentaho.di.core.KettleEnvironment;
 import org.pentaho.di.core.exception.KettleException;
 import org.pentaho.di.core.exception.KettleXMLException;
 import org.pentaho.di.core.logging.LogLevel;
 import org.pentaho.di.core.logging.LoggingObjectInterface;
 import org.pentaho.di.core.logging.LoggingRegistry;
-import org.pentaho.di.core.vfs.KettleVFS;
 import org.pentaho.di.core.xml.XMLHandlerCache;
 import org.pentaho.di.trans.Trans;
 import org.pentaho.di.trans.TransMeta;
@@ -59,7 +54,6 @@ import org.pentaho.di.trans.step.StepMeta;
 import org.pentaho.di.trans.steps.getfilenames.GetFileNamesMeta;
 import org.pentaho.di.trans.steps.hadoopfileinput.HadoopFileInputMeta;
 import org.pentaho.di.trans.steps.textfileinput.TextFileInputField;
-import org.pentaho.hdfs.vfs.HDFSFileProvider;
 
 public class DataGraphExecuter {
   /**
@@ -171,23 +165,6 @@ public class DataGraphExecuter {
         // Register HDFS as a file system type with VFS to make HadoopFileInputMeta work
         boolean hdfsReadMode =
             model.getCsvFilePath() != null && model.getCsvFilePath().startsWith("hdfs:");
-        if (hdfsReadMode) {
-          try {
-            FileSystemManager fsm = KettleVFS.getInstance().getFileSystemManager();
-            if (fsm instanceof DefaultFileSystemManager) {
-              if (!Arrays.asList(fsm.getSchemes()).contains("hdfs")
-                  && !((DefaultFileSystemManager) fsm).hasProvider("hdfs")) {
-                ((DefaultFileSystemManager) fsm).addProvider("hdfs", new HDFSFileProvider());
-              }
-            }
-          } catch (FileSystemException e) {
-            if (!e.getMessage().contains("Multiple providers registered for URL scheme")) {
-              LOGGER.error(e,
-                  e.getMessage());
-            }
-          }
-        }
-
         trans.setVariable("modifiedDimNames", model.getDimTables());
         trans.setVariable("csvInputFilePath", model.getCsvFilePath());
         trans.setVariable("dimFileLocDir", model.getDimCSVDirLoc());

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
index ee6cfd0..3c1eaac 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
@@ -21,7 +21,6 @@ package org.carbondata.processing.surrogatekeysgenerator.csvbased;
 
 import java.sql.Connection;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -45,9 +44,6 @@ import org.carbondata.processing.schema.metadata.ArrayWrapper;
 import org.carbondata.processing.schema.metadata.ColumnSchemaDetails;
 import org.carbondata.processing.schema.metadata.ColumnsInfo;
 
-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
-import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
-
 import org.pentaho.di.core.exception.KettleException;
 
 public abstract class CarbonCSVBasedDimSurrogateKeyGen {
@@ -108,8 +104,8 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
   /**
    * hierCache
    */
-  private Map<String, Int2ObjectMap<int[]>> hierCache =
-      new HashMap<String, Int2ObjectMap<int[]>>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+  private Map<String, Map<Integer, int[]>> hierCache =
+      new HashMap<String, Map<Integer, int[]>>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
   /**
    *
    */
@@ -206,23 +202,6 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
     return key;
   }
 
-  public void checkHierExists(int[] val, String hier, int primaryKey) throws KettleException {
-    Int2ObjectMap<int[]> cache = hierCache.get(hier);
-
-    int[] hCache = cache.get(primaryKey);
-    if (hCache != null && Arrays.equals(hCache, val)) {
-      return;
-    } else {
-      wLock2.lock();
-      try {
-        // Store in cache
-        cache.put(primaryKey, val);
-      } finally {
-        wLock2.unlock();
-      }
-    }
-  }
-
   public void checkNormalizedHierExists(int[] val, String hier,
       HierarchyValueWriterForCSV hierWriter) throws KettleException {
     Map<ArrayWrapper, Integer> cache = hierCacheReverse.get(hier);
@@ -309,10 +288,10 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
   public abstract int getSurrogateForMeasure(String tuple, String columnName)
       throws KettleException;
 
-  private Int2ObjectMap<int[]> getHCache(String hName) {
-    Int2ObjectMap<int[]> hCache = hierCache.get(hName);
+  private Map<Integer, int[]> getHCache(String hName) {
+    Map<Integer, int[]> hCache = hierCache.get(hName);
     if (hCache == null) {
-      hCache = new Int2ObjectOpenHashMap<int[]>();
+      hCache = new HashMap<Integer, int[]>();
       hierCache.put(hName, hCache);
     }
 
@@ -456,14 +435,14 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
   /**
    * @return Returns the hierCache.
    */
-  public Map<String, Int2ObjectMap<int[]>> getHierCache() {
+  public Map<String, Map<Integer, int[]>> getHierCache() {
     return hierCache;
   }
 
   /**
    * @param hierCache The hierCache to set.
    */
-  public void setHierCache(Map<String, Int2ObjectMap<int[]>> hierCache) {
+  public void setHierCache(Map<String, Map<Integer, int[]>> hierCache) {
     this.hierCache = hierCache;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/55ed7f83/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
index 003b629..4baf2f8 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
@@ -55,7 +55,10 @@ import org.carbondata.core.keygenerator.KeyGenerator;
 import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
 import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
 import org.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
-import org.carbondata.core.util.*;
+import org.carbondata.core.util.CarbonProperties;
+import org.carbondata.core.util.CarbonTimeStatisticsFactory;
+import org.carbondata.core.util.CarbonUtil;
+import org.carbondata.core.util.DataTypeUtil;
 import org.carbondata.core.writer.ByteArrayHolder;
 import org.carbondata.core.writer.HierarchyValueWriterForCSV;
 import org.carbondata.processing.dataprocessor.manager.CarbonDataProcessorManager;
@@ -70,7 +73,6 @@ import org.carbondata.processing.schema.metadata.HierarchiesInfo;
 import org.carbondata.processing.util.CarbonDataProcessorUtil;
 import org.carbondata.processing.util.RemoveDictionaryUtil;
 
-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
 import org.pentaho.di.core.exception.KettleException;
 import org.pentaho.di.core.row.RowMetaInterface;
 import org.pentaho.di.core.row.ValueMeta;
@@ -1028,7 +1030,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
 
           }
 
-          Int2ObjectMap<int[]> cache = surrogateKeyGen.getHierCache().get(actualHierName);
+          Map<Integer, int[]> cache = surrogateKeyGen.getHierCache().get(actualHierName);
           int[] surrogateKeyForHierarchy = null;
           if (null != cache) {
 
@@ -1095,7 +1097,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
 
         }
 
-        Int2ObjectMap<int[]> cache = surrogateKeyGen.getHierCache().get(actualHierName);
+        Map<Integer, int[]> cache = surrogateKeyGen.getHierCache().get(actualHierName);
         int[] surrogateKeyForHrrchy = null;
         if (null != cache) {
           Integer keyFromCsv = dicCache.getSurrogateKey(tuple);


[2/2] incubator-carbondata git commit: [CARBONDATA-102]Reduce the size of the CarbonData jar file This closes #53

Posted by ch...@apache.org.
[CARBONDATA-102]Reduce the size of the CarbonData jar file This closes #53


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/645eafad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/645eafad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/645eafad

Branch: refs/heads/master
Commit: 645eafadbaf1dcd1bbd021fbb22ae35d93e3e424
Parents: 1ae4e40 55ed7f8
Author: chenliang613 <ch...@apache.org>
Authored: Mon Aug 1 23:28:10 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Mon Aug 1 23:28:10 2016 +0800

----------------------------------------------------------------------
 assembly/pom.xml                                |  24 ++++-
 common/pom.xml                                  |  14 +--
 core/pom.xml                                    |  50 ++-------
 ...stalling-CarbonData-And-IDE-Configuartion.md |   4 +
 examples/pom.xml                                |  33 +-----
 format/pom.xml                                  |   4 +-
 hadoop/pom.xml                                  |  20 +---
 integration-testcases/pom.xml                   |  40 +------
 integration/spark/pom.xml                       |  41 +------
 pom.xml                                         | 107 ++++++++++++++++---
 processing/pom.xml                              |  41 +------
 .../processing/csvload/DataGraphExecuter.java   |  23 ----
 .../CarbonCSVBasedDimSurrogateKeyGen.java       |  35 ++----
 .../csvbased/CarbonCSVBasedSeqGenStep.java      |  10 +-
 14 files changed, 149 insertions(+), 297 deletions(-)
----------------------------------------------------------------------