You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by ma...@apache.org on 2012/10/07 10:26:55 UTC

git commit: CRUNCH-87: Fix archetype for Hadoop 2.

Updated Branches:
  refs/heads/master 8b3cc0015 -> eda3c776b


CRUNCH-87: Fix archetype for Hadoop 2.

Mark Hadoop dependencies in Crunch as "provided" and change dependent
modules so that integration tests work for both Hadoop 1 and 2.
Change the archetype so that generated projects contain the necessary
dependencies for using LocalJobRunner from the IDE.
Add logging configuration to the archetype.
Use jcl instead of slf4j in the HBase example for consistency.


Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/eda3c776
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/eda3c776
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/eda3c776

Branch: refs/heads/master
Commit: eda3c776b34ba58eabd11674cac668ab4a40bfcc
Parents: 8b3cc00
Author: Matthias Friedrich <ma...@mafr.de>
Authored: Sat Oct 6 17:36:19 2012 +0200
Committer: Matthias Friedrich <ma...@mafr.de>
Committed: Sun Oct 7 10:15:25 2012 +0200

----------------------------------------------------------------------
 .../src/main/resources/archetype-resources/pom.xml |   27 +++++++++--
 .../src/main/resources/log4j.properties            |   28 +++++++++++
 crunch-examples/pom.xml                            |   32 ++++++++++--
 .../crunch/examples/WordAggregationHBase.java      |    9 ++--
 .../src/main/resources/log4j.properties            |   24 +++++++++
 crunch/pom.xml                                     |   37 ++++++---------
 pom.xml                                            |    1 +
 7 files changed, 122 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch-archetype/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/pom.xml b/crunch-archetype/src/main/resources/archetype-resources/pom.xml
index 4c5d8cf..bcb1d70 100644
--- a/crunch-archetype/src/main/resources/archetype-resources/pom.xml
+++ b/crunch-archetype/src/main/resources/archetype-resources/pom.xml
@@ -45,16 +45,35 @@
       <artifactId>crunch</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <!-- We use hadoop-core instead of hadoop-client because it contains
-         dependencies that are necessary for running LocalJobRunner from
-         an IDE. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-core</artifactId>
+      <artifactId>hadoop-client</artifactId>
       <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
 
+    <!-- The following libraries come with Hadoop but are missing from
+         the client POM. You need them if you want to use LocalJobRunner
+         from the IDE. If you don't you can safely delete them. -->
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <version>${commons-cli.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons-io.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <version>${commons-httpclient.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch-archetype/src/main/resources/archetype-resources/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/main/resources/log4j.properties b/crunch-archetype/src/main/resources/archetype-resources/src/main/resources/log4j.properties
new file mode 100644
index 0000000..ef6654d
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/main/resources/log4j.properties
@@ -0,0 +1,28 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#
+# Logging configuration for LocalJobRunner, when running from an IDE.
+#
+# NOTE: This configuration has NO EFFECT when running on the cluster!
+#
+log4j.rootLogger=INFO, A
+
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch-examples/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-examples/pom.xml b/crunch-examples/pom.xml
index 087a3b4..ae292b8 100644
--- a/crunch-examples/pom.xml
+++ b/crunch-examples/pom.xml
@@ -35,26 +35,46 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.crunch</groupId>
+      <artifactId>crunch</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.crunch</groupId>
+      <artifactId>crunch-hbase</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
       <scope>provided</scope>
     </dependency>
 
     <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase</artifactId>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
       <scope>provided</scope>
     </dependency>
 
     <dependency>
-      <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>provided</scope>
     </dependency>
 
+    <!-- Required for running LocalJobRunner from the IDE, but missing
+         from hadoop-client -->
     <dependency>
-      <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch-hbase</artifactId>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <scope>provided</scope>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+    </dependency>
+
   </dependencies>
 
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch-examples/src/main/java/org/apache/crunch/examples/WordAggregationHBase.java
----------------------------------------------------------------------
diff --git a/crunch-examples/src/main/java/org/apache/crunch/examples/WordAggregationHBase.java b/crunch-examples/src/main/java/org/apache/crunch/examples/WordAggregationHBase.java
index 680752f..691721d 100644
--- a/crunch-examples/src/main/java/org/apache/crunch/examples/WordAggregationHBase.java
+++ b/crunch-examples/src/main/java/org/apache/crunch/examples/WordAggregationHBase.java
@@ -23,6 +23,8 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.crunch.CombineFn;
 import org.apache.crunch.DoFn;
 import org.apache.crunch.Emitter;
@@ -50,8 +52,7 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
 
 /**
  * You need to have a HBase instance running. Required dependencies : hbase /!\
@@ -61,7 +62,7 @@ import org.slf4j.LoggerFactory;
  */
 @SuppressWarnings("serial")
 public class WordAggregationHBase extends Configured implements Tool, Serializable {
-  private final static Logger LOGGER = LoggerFactory.getLogger(WordAggregationHBase.class);
+  private final static Log LOG = LogFactory.getLog(WordAggregationHBase.class);
 
   // Configuration parameters. Here configured for a hbase instance running
   // locally
@@ -187,7 +188,7 @@ public class WordAggregationHBase extends Configured implements Tool, Serializab
   private List<Put> createPuts(final List<String> character, final List<String> play, final List<String> quote) throws IllegalArgumentException {
     List<Put> list = new ArrayList<Put>();
     if (character.size() != play.size() || quote.size() != play.size()) {
-      LOGGER.error("Every list should have the same number of elements");
+      LOG.error("Every list should have the same number of elements");
       throw new IllegalArgumentException("Every list should have the same number of elements");
     }
     for (int i = 0; i < character.size(); i++) {

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch-examples/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch-examples/src/main/resources/log4j.properties b/crunch-examples/src/main/resources/log4j.properties
new file mode 100644
index 0000000..116c69a
--- /dev/null
+++ b/crunch-examples/src/main/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ***** Set root logger level to INFO and its only appender to A.
+log4j.rootLogger=INFO, A
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/crunch/pom.xml
----------------------------------------------------------------------
diff --git a/crunch/pom.xml b/crunch/pom.xml
index 29c7263..5dcc489 100644
--- a/crunch/pom.xml
+++ b/crunch/pom.xml
@@ -35,12 +35,6 @@ under the License.
     </dependency>
 
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
     </dependency>
@@ -50,20 +44,18 @@ under the License.
       <artifactId>avro-mapred</artifactId>
     </dependency>
 
-    <!-- Override the slf4j dependency from Avro, which is incompatible with
-         Hadoop's. We don't use the "provided" scope so that clients get the
-	 correct version, too. -->
     <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
     </dependency>
 
-    <!-- LocalJobRunner needs commons-io, but hadoop-core doesn't reference
-         it. We add it here so that clients can run Crunch-based applications
-	 from the IDE. Note that this wouldn't work with "provided" scope! -->
+    <!-- Override the slf4j dependency from Avro, which is incompatible with
+         Hadoop's. -->
     <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
     </dependency>
 
     <dependency>
@@ -108,12 +100,6 @@ under the License.
     </dependency>
 
     <dependency>
-      <groupId>commons-httpclient</groupId>
-      <artifactId>commons-httpclient</artifactId>
-      <scope>test</scope> <!-- only needed for LocalJobRunner -->
-    </dependency>
-
-    <dependency>
       <groupId>commons-logging</groupId>
       <artifactId>commons-logging</artifactId>
       <scope>provided</scope>
@@ -131,6 +117,13 @@ under the License.
       <scope>provided</scope>
     </dependency>
 
+    <!-- Used by LocalJobRunner in integration tests -->
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.crunch</groupId>
       <artifactId>crunch-test</artifactId>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/eda3c776/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0791ee5..b2a7ba2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,6 +69,7 @@ under the License.
     <commons-codec.version>1.4</commons-codec.version>
     <commons-httpclient.version>3.0.1</commons-httpclient.version>
     <commons-logging.version>1.1.1</commons-logging.version>
+    <commons-cli.version>1.2</commons-cli.version>
     <avro.version>1.7.0</avro.version>
     <jackson.version>1.8.8</jackson.version>
     <protobuf-java.version>2.3.0</protobuf-java.version>