You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/11/07 06:24:05 UTC
[3/5] git commit: ACCUMULO-1783 Lift some pig test classes to write a
better "functional" test that ensures that joins actually work.
ACCUMULO-1783 Lift some pig test classes to write a better "functional"
test that ensures that joins actually work.
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/9b398d4a
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/9b398d4a
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/9b398d4a
Branch: refs/heads/ACCUMULO-1783
Commit: 9b398d4a32e50d3503b4ecb2f86a306e9db0221b
Parents: d72e1cb
Author: Josh Elser <el...@apache.org>
Authored: Tue Nov 5 17:14:33 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Tue Nov 5 17:14:33 2013 -0500
----------------------------------------------------------------------
.../accumulo/pig/AccumuloPigClusterTest.java | 165 +++++++++++++++++++
.../java/org/apache/pig/test/MiniCluster.java | 86 ++++++++++
.../org/apache/pig/test/MiniGenericCluster.java | 123 ++++++++++++++
3 files changed, 374 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/9b398d4a/src/test/java/org/apache/accumulo/pig/AccumuloPigClusterTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/accumulo/pig/AccumuloPigClusterTest.java b/src/test/java/org/apache/accumulo/pig/AccumuloPigClusterTest.java
new file mode 100644
index 0000000..0e2abb5
--- /dev/null
+++ b/src/test/java/org/apache/accumulo/pig/AccumuloPigClusterTest.java
@@ -0,0 +1,165 @@
+package org.apache.accumulo.pig;
+
+import java.io.File;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.admin.TableOperations;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.minicluster.MiniAccumuloCluster;
+import org.apache.accumulo.minicluster.MiniAccumuloConfig;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+public class AccumuloPigClusterTest {
+
+ private static final File tmpdir = Files.createTempDir();
+ private static MiniAccumuloCluster accumuloCluster;
+ private static MiniCluster cluster;
+ private static Configuration conf;
+ private PigServer pig;
+
+ @BeforeClass
+ public static void setupClusters() throws Exception {
+ MiniAccumuloConfig macConf = new MiniAccumuloConfig(tmpdir, "password");
+ macConf.setNumTservers(1);
+
+ accumuloCluster = new MiniAccumuloCluster(macConf);
+ accumuloCluster.start();
+
+ // This is needed by Pig
+ cluster = MiniCluster.buildCluster();
+ conf = cluster.getConfiguration();
+ }
+
+ @Before
+ public void beforeTest() throws Exception {
+ AccumuloInputFormat.resetCounters();
+ AccumuloOutputFormat.resetCounters();
+ pig = new PigServer(ExecType.LOCAL, conf);
+ }
+
+ @AfterClass
+ public static void stopClusters() throws Exception {
+ accumuloCluster.stop();
+ FileUtils.deleteDirectory(tmpdir);
+ }
+
+ private void loadTestData() throws Exception {
+ ZooKeeperInstance inst = new ZooKeeperInstance(accumuloCluster.getInstanceName(), accumuloCluster.getZooKeepers());
+ Connector c = inst.getConnector("root", "password");
+
+ TableOperations tops = c.tableOperations();
+ if (!tops.exists("airports")) {
+ tops.create("airports");
+ }
+
+ if (!tops.exists("flights")) {
+ tops.create("flights");
+ }
+
+ @SuppressWarnings("unchecked")
+ final List<ImmutableMap<String,String>> airportData = Lists.newArrayList(ImmutableMap.of("code", "SJC", "name", "San Jose"),
+ ImmutableMap.of("code", "SFO", "name", "San Francisco"), ImmutableMap.of("code", "MDO", "name", "Orlando"),
+ ImmutableMap.of("code", "MDW", "name", "Chicago-Midway"), ImmutableMap.of("code", "JFK", "name", "JFK International"),
+ ImmutableMap.of("code", "BWI", "name", "Baltimore-Washington"));
+
+ BatchWriter bw = c.createBatchWriter("airports", 100000l, 1000l, 1);
+ try {
+ int i = 1;
+ for (Map<String,String> record : airportData) {
+ Mutation m = new Mutation(Integer.toString(i));
+
+ for (Entry<String,String> entry : record.entrySet()) {
+ m.put(entry.getKey(), "", entry.getValue());
+ }
+
+ bw.addMutation(m);
+ i++;
+ }
+ } finally {
+ if (null != bw) {
+ bw.close();
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ final List<ImmutableMap<String,String>> flightData = Lists.newArrayList(ImmutableMap.of("origin", "BWI", "destination", "SFO"),
+ ImmutableMap.of("origin", "BWI", "destination", "SJC"), ImmutableMap.of("origin", "MDW", "destination", "MDO"),
+ ImmutableMap.of("origin", "MDO", "destination", "SJC"), ImmutableMap.of("origin", "SJC", "destination", "JFK"),
+ ImmutableMap.of("origin", "JFK", "destination", "MDW"));
+
+ bw = c.createBatchWriter("flights", 100000l, 1000l, 1);
+ try {
+ int i = 1;
+ for (Map<String,String> record : flightData) {
+ Mutation m = new Mutation(Integer.toString(i));
+
+ for (Entry<String,String> entry : record.entrySet()) {
+ m.put(entry.getKey(), "", entry.getValue());
+ }
+
+ bw.addMutation(m);
+ i++;
+ }
+ } finally {
+ if (null != bw) {
+ bw.close();
+ }
+ }
+ }
+
+ @Test
+ public void test() throws Exception {
+ loadTestData();
+
+ final String loadFlights = "flights = LOAD 'accumulo://flights?instance=" + accumuloCluster.getInstanceName() +
+ "&user=root&password=password&zookeepers=" + accumuloCluster.getZooKeepers() + "' using org.apache.accumulo.pig.AccumuloStorage()" +
+ " as (rowKey:chararray, column_map:map[]);";
+
+ final String loadAirports = "airports = LOAD 'accumulo://airports?instance=" + accumuloCluster.getInstanceName() +
+ "&user=root&password=password&zookeepers=" + accumuloCluster.getZooKeepers() + "' using org.apache.accumulo.pig.AccumuloStorage()" +
+ " as (rowKey:chararray, column_map:map[]);";
+
+ final String joinQuery = "joined = JOIN flights BY column_map#'origin', airports BY column_map#'code';";
+
+ // System.out.println(query);
+
+ pig.registerQuery(loadFlights);
+ pig.registerQuery(loadAirports);
+ pig.registerQuery(joinQuery);
+
+ Iterator<Tuple> it = pig.openIterator("joined");
+
+ int i = 0;
+ while (it.hasNext()) {
+ Tuple t = it.next();
+ System.out.println(t);
+ i++;
+ }
+
+ // TODO actually verify something here
+ Assert.assertTrue("Should have found records but found none", i > 0);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/9b398d4a/src/test/java/org/apache/pig/test/MiniCluster.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/pig/test/MiniCluster.java b/src/test/java/org/apache/pig/test/MiniCluster.java
new file mode 100644
index 0000000..64467ae
--- /dev/null
+++ b/src/test/java/org/apache/pig/test/MiniCluster.java
@@ -0,0 +1,86 @@
+package org.apache.pig.test;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+public class MiniCluster extends MiniGenericCluster {
+ private static final File CONF_DIR = new File("build/classes");
+ private static final File CONF_FILE = new File(CONF_DIR, "hadoop-site.xml");
+
+ private MiniMRCluster m_mr = null;
+ public MiniCluster() {
+ super();
+ }
+
+ @Override
+ protected void setupMiniDfsAndMrClusters() {
+ try {
+ System.setProperty("hadoop.log.dir", "build/test/logs");
+ final int dataNodes = 4; // There will be 4 data nodes
+ final int taskTrackers = 4; // There will be 4 task tracker nodes
+
+ // Create the dir that holds hadoop-site.xml file
+ // Delete if hadoop-site.xml exists already
+ CONF_DIR.mkdirs();
+ if(CONF_FILE.exists()) {
+ CONF_FILE.delete();
+ }
+
+ // Builds and starts the mini dfs and mapreduce clusters
+ Configuration config = new Configuration();
+ m_dfs = new MiniDFSCluster(config, dataNodes, true, null);
+ m_fileSys = m_dfs.getFileSystem();
+ m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1);
+
+ // Write the necessary config info to hadoop-site.xml
+ m_conf = m_mr.createJobConf();
+ m_conf.setInt("mapred.submit.replication", 2);
+ m_conf.set("dfs.datanode.address", "0.0.0.0:0");
+ m_conf.set("dfs.datanode.http.address", "0.0.0.0:0");
+ m_conf.set("mapred.map.max.attempts", "2");
+ m_conf.set("mapred.reduce.max.attempts", "2");
+ m_conf.set("pig.jobcontrol.sleep", "100");
+ m_conf.writeXml(new FileOutputStream(CONF_FILE));
+
+ // Set the system properties needed by Pig
+ System.setProperty("cluster", m_conf.get("mapred.job.tracker"));
+ System.setProperty("namenode", m_conf.get("fs.default.name"));
+ System.setProperty("junit.hadoop.conf", CONF_DIR.getPath());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ protected void shutdownMiniMrClusters() {
+ // Delete hadoop-site.xml on shutDown
+ if(CONF_FILE.exists()) {
+ CONF_FILE.delete();
+ }
+ if (m_mr != null) { m_mr.shutdown(); }
+ m_mr = null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/9b398d4a/src/test/java/org/apache/pig/test/MiniGenericCluster.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/pig/test/MiniGenericCluster.java b/src/test/java/org/apache/pig/test/MiniGenericCluster.java
new file mode 100644
index 0000000..584631a
--- /dev/null
+++ b/src/test/java/org/apache/pig/test/MiniGenericCluster.java
@@ -0,0 +1,123 @@
+package org.apache.pig.test;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.*;
+import java.util.Properties;
+
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+
+/**
+ * This class builds a single instance of itself with the Singleton
+ * design pattern. While building the single instance, it sets up a
+ * mini cluster that actually consists of a mini DFS cluster and a
+ * mini MapReduce cluster on the local machine and also sets up the
+ * environment for Pig to run on top of the mini cluster.
+ *
+ * This class is the base class for MiniCluster, which has slightly
+ * difference among different versions of hadoop. MiniCluster implementation
+ * is located in $PIG_HOME/shims.
+ */
+abstract public class MiniGenericCluster {
+ protected MiniDFSCluster m_dfs = null;
+ protected FileSystem m_fileSys = null;
+ protected Configuration m_conf = null;
+
+ protected final static MiniCluster INSTANCE = new MiniCluster();
+ protected static boolean isSetup = true;
+
+ protected MiniGenericCluster() {
+ setupMiniDfsAndMrClusters();
+ }
+
+ abstract protected void setupMiniDfsAndMrClusters();
+
+ /**
+ * Returns the single instance of class MiniClusterBuilder that
+ * represents the resouces for a mini dfs cluster and a mini
+ * mapreduce cluster.
+ */
+ public static MiniCluster buildCluster() {
+ if(! isSetup){
+ INSTANCE.setupMiniDfsAndMrClusters();
+ isSetup = true;
+ }
+ return INSTANCE;
+ }
+
+ public void shutDown(){
+ INSTANCE.shutdownMiniDfsAndMrClusters();
+ }
+
+ protected void finalize() {
+ shutdownMiniDfsAndMrClusters();
+ }
+
+ protected void shutdownMiniDfsAndMrClusters() {
+ isSetup = false;
+ shutdownMiniDfsClusters();
+ shutdownMiniMrClusters();
+ }
+
+ protected void shutdownMiniDfsClusters() {
+ try {
+ if (m_fileSys != null) { m_fileSys.close(); }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ if (m_dfs != null) { m_dfs.shutdown(); }
+ m_fileSys = null;
+ m_dfs = null;
+ }
+
+ abstract protected void shutdownMiniMrClusters();
+
+ public Properties getProperties() {
+ errorIfNotSetup();
+ return ConfigurationUtil.toProperties(m_conf);
+ }
+
+ public Configuration getConfiguration() {
+ return new Configuration(m_conf);
+ }
+
+ public void setProperty(String name, String value) {
+ errorIfNotSetup();
+ m_conf.set(name, value);
+ }
+
+ public FileSystem getFileSystem() {
+ errorIfNotSetup();
+ return m_fileSys;
+ }
+
+ /**
+ * Throw RunTimeException if isSetup is false
+ */
+ private void errorIfNotSetup(){
+ if(isSetup)
+ return;
+ String msg = "function called on MiniCluster that has been shutdown";
+ throw new RuntimeException(msg);
+ }
+}