You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2012/08/13 03:33:51 UTC
[2/3] git commit: CRUNCH-4: Create profiles for hadoop 1.0.3 and
hadoop 2.0.0-alpha
CRUNCH-4: Create profiles for hadoop 1.0.3 and hadoop 2.0.0-alpha
Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/629f5734
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/629f5734
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/629f5734
Branch: refs/heads/master
Commit: 629f57342bb8fb41b1484de7869c90509fdbfb9d
Parents: 38d7b3a
Author: jwills <jw...@apache.org>
Authored: Fri Jul 27 11:14:11 2012 -0700
Committer: jwills <jw...@apache.org>
Committed: Sun Aug 12 17:24:29 2012 -0700
----------------------------------------------------------------------
.../java/org/apache/crunch/WordCountHBaseIT.java | 13 +-
.../it/java/org/apache/crunch/lib/AggregateIT.java | 4 +-
crunch/src/it/resources/log4j.properties | 2 +
.../org/apache/crunch/lib/join/MapsideJoin.java | 2 +-
pom.xml | 213 ++++++++++-----
5 files changed, 162 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
index f7c9836..b7531b9 100644
--- a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
+++ b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java
@@ -98,6 +98,7 @@ public class WordCountHBaseIT {
conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
conf.setInt("hbase.master.info.port", -1);
conf.setInt("hbase.regionserver.info.port", -1);
+ conf.set("dfs.datanode.data.dir.perm", "775");
hbaseTestUtil.startMiniZKCluster();
hbaseTestUtil.startMiniCluster();
@@ -116,16 +117,16 @@ public class WordCountHBaseIT {
}
// Create a programmatic container for this jar.
- JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseTest.jar"));
+ JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseIT.jar"));
File baseDir = new File("target/test-classes");
String prefix = "org/apache/crunch/";
- jarUp(jos, baseDir, prefix + "WordCountHBaseTest.class");
- jarUp(jos, baseDir, prefix + "WordCountHBaseTest$1.class");
- jarUp(jos, baseDir, prefix + "WordCountHBaseTest$2.class");
+ jarUp(jos, baseDir, prefix + "WordCountHBaseIT.class");
+ jarUp(jos, baseDir, prefix + "WordCountHBaseIT$1.class");
+ jarUp(jos, baseDir, prefix + "WordCountHBaseIT$2.class");
jos.close();
- Path target = new Path(tmpPath, "WordCountHBaseTest.jar");
- fs.copyFromLocalFile(true, new Path("WordCountHBaseTest.jar"), target);
+ Path target = new Path(tmpPath, "WordCountHBaseIT.jar");
+ fs.copyFromLocalFile(true, new Path("WordCountHBaseIT.jar"), target);
DistributedCache.addFileToClassPath(target, conf, fs);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
index 0202d09..4f47d8f 100644
--- a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
+++ b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
@@ -148,7 +148,7 @@ public class AggregateIT {
assertEquals(1, collectionMap.size());
- assertEquals(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a")), collectionMap.get(1));
+ assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a"))));
}
@Test
@@ -166,7 +166,7 @@ public class AggregateIT {
Employee empD = mapFn.map("d").second();
Employee empA = mapFn.map("a").second();
- assertEquals(Lists.newArrayList(empC, empD, empA), collectionMap.get(1));
+ assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(empC, empD, empA)));
}
private static class MapStringToTextPair extends MapFn<String, Pair<Integer, Text>> {
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/log4j.properties b/crunch/src/it/resources/log4j.properties
index a04cec8..5d144a0 100644
--- a/crunch/src/it/resources/log4j.properties
+++ b/crunch/src/it/resources/log4j.properties
@@ -19,6 +19,8 @@ log4j.logger.org.apache.crunch=info, A
# Log warnings on Hadoop for the local runner when testing
log4j.logger.org.apache.hadoop=warn, A
+# Except for Configuration, which is chatty.
+log4j.logger.org.apache.hadoop.conf.Configuration=error, A
# ***** A is set to be a ConsoleAppender.
log4j.appender.A=org.apache.log4j.ConsoleAppender
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
index 226ad90..0ca1ab3 100644
--- a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
+++ b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
@@ -88,7 +88,7 @@ public class MapsideJoin {
Path path = sourcePathTarget.getPath();
DistributedCache.addCacheFile(path.toUri(), pipeline.getConfiguration());
- MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.toString(), right.getPType());
+ MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.getName(), right.getPType());
PTypeFamily typeFamily = left.getTypeFamily();
return left.parallelDo("mapjoin", mapJoinDoFn,
typeFamily.tableOf(left.getKeyType(), typeFamily.pairs(left.getValueType(), right.getValueType())));
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9afd875..c0a6526 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,8 +46,6 @@ under the License.
<java.target.version>1.6</java.target.version>
<scala.version>2.9.2</scala.version>
<avro.version>1.7.0</avro.version>
- <hadoop.version>0.20.2-cdh3u4</hadoop.version>
- <hbase.version>0.90.6-cdh3u4</hbase.version>
</properties>
<scm>
@@ -120,12 +118,6 @@ under the License.
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
-
- <dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
@@ -180,12 +172,6 @@ under the License.
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-minicluster</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
-
- <dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.1</version>
@@ -228,55 +214,6 @@ under the License.
</dependency>
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>${hbase.version}</version>
- <type>test-jar</type>
- <exclusions>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
@@ -320,6 +257,156 @@ under the License.
</site>
</distributionManagement>
+ <profiles>
+ <profile>
+ <id>hadoop-1</id>
+ <activation>
+ <property>
+ <name>!crunch.platform</name>
+ </property>
+ </activation>
+ <properties>
+ <hadoop.version>1.0.3</hadoop.version>
+ <hbase.version>0.90.4</hbase.version>
+ </properties>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>${hbase.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+ </profile>
+ <profile>
+ <id>hadoop-2</id>
+ <activation>
+ <property>
+ <name>crunch.platform</name>
+ <value>2</value>
+ </property>
+ </activation>
+ <properties>
+ <hadoop.version>2.0.0-alpha</hadoop.version>
+ <hbase.version>0.92.1-cdh4.0.0</hbase.version>
+ </properties>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>${hbase.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+ </profile>
+ </profiles>
+
<build>
<plugins>
<plugin>