You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2012/08/13 03:33:51 UTC

[2/3] git commit: CRUNCH-4: Create profiles for hadoop 1.0.3 and hadoop 2.0.0-alpha

CRUNCH-4: Create profiles for hadoop 1.0.3 and hadoop 2.0.0-alpha


Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/629f5734
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/629f5734
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/629f5734

Branch: refs/heads/master
Commit: 629f57342bb8fb41b1484de7869c90509fdbfb9d
Parents: 38d7b3a
Author: jwills <jw...@apache.org>
Authored: Fri Jul 27 11:14:11 2012 -0700
Committer: jwills <jw...@apache.org>
Committed: Sun Aug 12 17:24:29 2012 -0700

----------------------------------------------------------------------
 .../java/org/apache/crunch/WordCountHBaseIT.java   |   13 +-
 .../it/java/org/apache/crunch/lib/AggregateIT.java |    4 +-
 crunch/src/it/resources/log4j.properties           |    2 +
 .../org/apache/crunch/lib/join/MapsideJoin.java    |    2 +-
 pom.xml                                            |  213 ++++++++++-----
 5 files changed, 162 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
index f7c9836..b7531b9 100644
--- a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
+++ b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
@@ -98,6 +98,7 @@ public class WordCountHBaseIT {
     conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
     conf.setInt("hbase.master.info.port", -1);
     conf.setInt("hbase.regionserver.info.port", -1);
+    conf.set("dfs.datanode.data.dir.perm", "775");
 
     hbaseTestUtil.startMiniZKCluster();
     hbaseTestUtil.startMiniCluster();
@@ -116,16 +117,16 @@ public class WordCountHBaseIT {
       }
 
       // Create a programmatic container for this jar.
-      JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseTest.jar"));
+      JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseIT.jar"));
       File baseDir = new File("target/test-classes");
       String prefix = "org/apache/crunch/";
-      jarUp(jos, baseDir, prefix + "WordCountHBaseTest.class");
-      jarUp(jos, baseDir, prefix + "WordCountHBaseTest$1.class");
-      jarUp(jos, baseDir, prefix + "WordCountHBaseTest$2.class");
+      jarUp(jos, baseDir, prefix + "WordCountHBaseIT.class");
+      jarUp(jos, baseDir, prefix + "WordCountHBaseIT$1.class");
+      jarUp(jos, baseDir, prefix + "WordCountHBaseIT$2.class");
       jos.close();
 
-      Path target = new Path(tmpPath, "WordCountHBaseTest.jar");
-      fs.copyFromLocalFile(true, new Path("WordCountHBaseTest.jar"), target);
+      Path target = new Path(tmpPath, "WordCountHBaseIT.jar");
+      fs.copyFromLocalFile(true, new Path("WordCountHBaseIT.jar"), target);
       DistributedCache.addFileToClassPath(target, conf, fs);
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
index 0202d09..4f47d8f 100644
--- a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
+++ b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
@@ -148,7 +148,7 @@ public class AggregateIT {
 
     assertEquals(1, collectionMap.size());
 
-    assertEquals(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a")), collectionMap.get(1));
+    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a"))));
   }
 
   @Test
@@ -166,7 +166,7 @@ public class AggregateIT {
     Employee empD = mapFn.map("d").second();
     Employee empA = mapFn.map("a").second();
 
-    assertEquals(Lists.newArrayList(empC, empD, empA), collectionMap.get(1));
+    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(empC, empD, empA)));
   }
 
   private static class MapStringToTextPair extends MapFn<String, Pair<Integer, Text>> {

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/log4j.properties b/crunch/src/it/resources/log4j.properties
index a04cec8..5d144a0 100644
--- a/crunch/src/it/resources/log4j.properties
+++ b/crunch/src/it/resources/log4j.properties
@@ -19,6 +19,8 @@ log4j.logger.org.apache.crunch=info, A
 
 # Log warnings on Hadoop for the local runner when testing
 log4j.logger.org.apache.hadoop=warn, A
+# Except for Configuration, which is chatty.
+log4j.logger.org.apache.hadoop.conf.Configuration=error, A
 
 # ***** A is set to be a ConsoleAppender.
 log4j.appender.A=org.apache.log4j.ConsoleAppender

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
index 226ad90..0ca1ab3 100644
--- a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
+++ b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
@@ -88,7 +88,7 @@ public class MapsideJoin {
     Path path = sourcePathTarget.getPath();
     DistributedCache.addCacheFile(path.toUri(), pipeline.getConfiguration());
 
-    MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.toString(), right.getPType());
+    MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.getName(), right.getPType());
     PTypeFamily typeFamily = left.getTypeFamily();
     return left.parallelDo("mapjoin", mapJoinDoFn,
         typeFamily.tableOf(left.getKeyType(), typeFamily.pairs(left.getValueType(), right.getValueType())));

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9afd875..c0a6526 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,8 +46,6 @@ under the License.
     <java.target.version>1.6</java.target.version>
     <scala.version>2.9.2</scala.version>
     <avro.version>1.7.0</avro.version>
-    <hadoop.version>0.20.2-cdh3u4</hadoop.version>
-    <hbase.version>0.90.6-cdh3u4</hbase.version>
   </properties>
 
   <scm>
@@ -120,12 +118,6 @@ under the License.
       </dependency>
 
       <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client</artifactId>
-        <version>${hadoop.version}</version>
-      </dependency>
-
-      <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
@@ -180,12 +172,6 @@ under the License.
       </dependency>
            
       <dependency>
-       <groupId>org.apache.hadoop</groupId>
-       <artifactId>hadoop-minicluster</artifactId>
-       <version>${hadoop.version}</version>
-      </dependency>
-
-      <dependency>
         <groupId>org.hamcrest</groupId>
         <artifactId>hamcrest-all</artifactId>
         <version>1.1</version>
@@ -228,55 +214,6 @@ under the License.
       </dependency>
 
       <dependency>
-        <groupId>org.apache.hbase</groupId>
-        <artifactId>hbase</artifactId>
-        <version>${hbase.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>avro</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-
-      <dependency>
-        <groupId>org.apache.hbase</groupId>
-        <artifactId>hbase</artifactId>
-        <version>${hbase.version}</version>
-        <type>test-jar</type>
-        <exclusions>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>avro</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-
-      <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-library</artifactId>
         <version>${scala.version}</version>
@@ -320,6 +257,156 @@ under the License.
     </site>
   </distributionManagement>
 
+  <profiles>
+    <profile>
+      <id>hadoop-1</id>
+      <activation>
+        <property>
+          <name>!crunch.platform</name>
+        </property>
+      </activation>
+      <properties>
+        <hadoop.version>1.0.3</hadoop.version>
+        <hbase.version>0.90.4</hbase.version>
+      </properties>
+      <dependencyManagement>
+        <dependencies>
+          <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-minicluster</artifactId>
+            <version>${hadoop.version}</version>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase</artifactId>
+            <version>${hbase.version}</version>
+            <exclusions>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>avro</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-core</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>commons-logging</groupId>
+                <artifactId>commons-logging</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>com.google.guava</groupId>
+                <artifactId>guava</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>log4j</groupId>
+                <artifactId>log4j</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-log4j12</artifactId>
+              </exclusion>
+            </exclusions>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase</artifactId>
+            <version>${hbase.version}</version>
+            <type>test-jar</type>
+            <exclusions>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>avro</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-log4j12</artifactId>
+              </exclusion>
+            </exclusions>
+          </dependency>
+        </dependencies>
+      </dependencyManagement>
+    </profile>
+    <profile>
+      <id>hadoop-2</id>
+      <activation>
+        <property>
+          <name>crunch.platform</name>
+          <value>2</value>
+        </property>
+      </activation>
+      <properties>
+        <hadoop.version>2.0.0-alpha</hadoop.version>
+        <hbase.version>0.92.1-cdh4.0.0</hbase.version>
+      </properties>
+      <dependencyManagement>
+        <dependencies>
+          <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-minicluster</artifactId>
+            <version>${hadoop.version}</version>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase</artifactId>
+            <version>${hbase.version}</version>
+            <exclusions>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>avro</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-core</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>commons-logging</groupId>
+                <artifactId>commons-logging</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>com.google.guava</groupId>
+                <artifactId>guava</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>log4j</groupId>
+                <artifactId>log4j</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-log4j12</artifactId>
+              </exclusion>
+            </exclusions>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase</artifactId>
+            <version>${hbase.version}</version>
+            <type>test-jar</type>
+            <exclusions>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>avro</artifactId>
+              </exclusion>
+              <exclusion>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-log4j12</artifactId>
+              </exclusion>
+            </exclusions>
+          </dependency>
+        </dependencies>
+      </dependencyManagement>
+    </profile>
+  </profiles>
+
   <build>
     <plugins>
       <plugin>