You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by ma...@apache.org on 2014/04/16 02:46:22 UTC

[1/2] BigPetStore - initial code drop - (lines commited by others : nigelsavage~200, mattfenwick~200, michaelmcune~5, anushshetty~10, jeffvance~10)

Repository: bigtop
Updated Branches:
  refs/heads/master 3298063c6 -> d3da8ceb1


http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java
new file mode 100755
index 0000000..3319064
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.bigtop.bigpetstore.util.DeveloperTools;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This is a mapreduce implementation of a generator of a large sentiment
+ * analysis data set. The scenario is as follows:
+ *
+ * The number of records will (roughly) correspond to the output size - each
+ * record is about 80 bytes.
+ *
+ * 1KB set bigpetstore_records=10 1MB set bigpetstore_records=10,000 1GB set
+ * bigpetstore_records=10,000,000 1TB set bigpetstore_records=10,000,000,000
+ */
+public class BPSGenerator {
+
+    final static Logger log = LoggerFactory.getLogger(BPSGenerator.class);
+
+    public enum props {
+        // bigpetstore_splits,
+        bigpetstore_records
+    }
+
+    public static Job createJob(Path output, int records) throws IOException {
+        Configuration c = new Configuration();
+        c.setInt(props.bigpetstore_records.name(), 10);
+        return createJob(output, c);
+    }
+
+    public static Job createJob(Path output, Configuration conf)
+            throws IOException {
+        Job job = new Job(conf, "PetStoreTransaction_ETL_"
+                + System.currentTimeMillis());
+        // recursively delete the data set if it exists.
+        FileSystem.get(output.toUri(),conf).delete(output, true);
+        job.setJarByClass(BPSGenerator.class);
+        job.setMapperClass(MyMapper.class);
+        // use the default reducer
+        // job.setReducerClass(PetStoreTransactionGeneratorJob.Red.class);
+        job.setOutputKeyClass(Text.class);
+        job.setOutputValueClass(Text.class);
+        job.setMapOutputKeyClass(Text.class);
+        job.setMapOutputValueClass(Text.class);
+        job.setInputFormatClass(GeneratePetStoreTransactionsInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileOutputFormat.setOutputPath(job, output);
+        return job;
+    }
+
+    public static class MyMapper extends Mapper<Text, Text, Text, Text> {
+
+        @Override
+        protected void setup(Context context) throws IOException,
+                InterruptedException {
+            super.setup(context);
+        }
+
+        protected void map(Text key, Text value, Context context)
+                throws java.io.IOException, InterruptedException {
+            context.write(key, value);
+            // TODO: Add multiple outputs here which writes mock addresses for
+            // generated users
+            // to a corresponding data file.
+        };
+    }
+
+    public static void main(String args[]) throws Exception {
+        if (args.length != 2) {
+            System.err.println("USAGE : [number of records] [output path]");
+            System.exit(0);
+        } else {
+            Configuration conf = new Configuration();
+            DeveloperTools.validate(
+                    args,
+                    "# of records",
+                    "output path");
+
+            conf.setInt(
+                    GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records.name(),
+                    Integer.parseInt(args[0]));
+            createJob(new Path(args[1]), conf).waitForCompletion(true);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/GeneratePetStoreTransactionsInputFormat.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/GeneratePetStoreTransactionsInputFormat.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/GeneratePetStoreTransactionsInputFormat.java
new file mode 100755
index 0000000..a779428
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/GeneratePetStoreTransactionsInputFormat.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.KeyVal;
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+/**
+ * A simple input split that fakes input.
+ */
+public class GeneratePetStoreTransactionsInputFormat extends
+        FileInputFormat<Text, Text> {
+
+    @Override
+    public RecordReader<Text, Text> createRecordReader(
+            final InputSplit inputSplit, TaskAttemptContext arg1)
+            throws IOException, InterruptedException {
+        return new RecordReader<Text, Text>() {
+
+            @Override
+            public void close() throws IOException {
+
+            }
+
+            /**
+             * We need the "state" information to generate records. - Each state
+             * has a probability associated with it, so that our data set can be
+             * realistic (i.e. Colorado should have more transactions than rhode
+             * island).
+             *
+             * - Each state also will its name as part of the key.
+             *
+             * - This task would be distributed, for example, into 50 nodes on a
+             * real cluster, each creating the data for a given state.
+             */
+
+            // String storeCode = ((Split) inputSplit).storeCode;
+            int records = ((PetStoreTransactionInputSplit) inputSplit).records;
+            Iterator<KeyVal<String, String>> data = (new TransactionIteratorFactory(
+                    records, ((PetStoreTransactionInputSplit) inputSplit).state))
+                    .getData();
+            KeyVal<String, String> currentRecord;
+
+            @Override
+            public Text getCurrentKey() throws IOException,
+                    InterruptedException {
+                return new Text(currentRecord.key);
+            }
+
+            @Override
+            public Text getCurrentValue() throws IOException,
+                    InterruptedException {
+                return new Text(currentRecord.val);
+            }
+
+            @Override
+            public void initialize(InputSplit arg0, TaskAttemptContext arg1)
+                    throws IOException, InterruptedException {
+            }
+
+            @Override
+            public boolean nextKeyValue() throws IOException,
+                    InterruptedException {
+                if (data.hasNext()) {
+                    currentRecord = data.next();
+                    return true;
+                }
+                return false;
+            }
+
+            @Override
+            public float getProgress() throws IOException, InterruptedException {
+                return 0f;
+            }
+
+        };
+    }
+
+    public enum props {
+        // bigpetstore_splits,
+        bigpetstore_records
+    }
+
+    @Override
+    public List<InputSplit> getSplits(JobContext arg) throws IOException {
+        int num_records_desired = arg
+                .getConfiguration()
+                .getInt(GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records
+                        .name(), -1);
+        if (num_records_desired == -1) {
+            throw new RuntimeException(
+                    "# of total records not set in configuration object: "
+                            + arg.getConfiguration());
+        }
+
+        ArrayList<InputSplit> list = new ArrayList<InputSplit>();
+
+        /**
+         * Generator class will take a state as input and generate all the data
+         * for that state.
+         */
+        for (TransactionIteratorFactory.STATE s : STATE.values()) {
+            PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(
+                    (int) (Math.ceil(num_records_desired * s.probability)), s);
+            System.out.println(s + " _ " + split.records);
+            list.add(split);
+        }
+        return list;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java
new file mode 100755
index 0000000..71aa6d6
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.util.Date;
+
+public interface PetStoreTransaction {
+
+    public String getFirstName();
+
+    public String getLastName();
+
+    public String getProduct();
+
+    public Date getDate();
+
+    public Integer getPrice();
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java
new file mode 100755
index 0000000..9b32344
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+/**
+ * What does an `InputSplit` actually do? From the Javadocs, it looks like ...
+ * absolutely nothing.
+ *
+ * Note: for some reason, you *have* to implement Writable, even if your methods
+ * do nothing, or you will got strange and un-debuggable null pointer
+ * exceptions.
+ */
+public class PetStoreTransactionInputSplit extends InputSplit implements
+        Writable {
+
+    public PetStoreTransactionInputSplit() {
+    }
+
+    public int records;
+    public STATE state;
+
+    public PetStoreTransactionInputSplit(int records, STATE state) {
+        this.records = records;
+        this.state = state;
+    }
+
+    public void readFields(DataInput arg0) throws IOException {
+        records = arg0.readInt();
+        state = STATE.valueOf(arg0.readUTF());
+    }
+
+    public void write(DataOutput arg0) throws IOException {
+        arg0.writeInt(records);
+        arg0.writeUTF(state.name());
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+        return new String[] {};
+    }
+
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+        return 100;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
new file mode 100755
index 0000000..0ea81ee
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
@@ -0,0 +1,468 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.util.Pair;
+import org.apache.bigtop.bigpetstore.util.StringUtils;
+
+/**
+ * This class generates our data. Over time we will use it to embed bias which
+ * can then be teased out, i.e. by clutstering/classifiers. For example:
+ *
+ * certain products <--> certain years or days
+ *
+ *
+ */
+public class TransactionIteratorFactory {
+
+    /**
+     * Each "state" has a pet store , with a certain "proportion" of the
+     * transactions. In this case colorado represents the majority of the
+     * transactions.
+     */
+
+    public static enum STATE {
+
+        // Each product is separated with an _ for its base price.
+        // That is just to make it easy to add new products.
+        // Each state is associated with a relative probability.
+        AZ(.1f, "dog-food_10", "cat-food_8", "leather-collar_25",
+                "snake-bite ointment_30", "turtle-food_11"),
+        AK(.1f,
+                "dog-food_10", "cat-food_8", "fuzzy-collar_19",
+                "antelope-caller_20", "salmon-bait_30"),
+        CT(.1f, "dog-food_10",
+                "cat-food_8", "fuzzy-collar_19", "turtle-pellets_5"),
+        OK(.1f,
+                "dog-food_10", "cat-food_8", "duck-caller_13",
+                "rodent-cage_40", "hay-bail_5", "cow-dung_2"),
+        CO(.1f,
+                "dog-food_10", "cat-food_8", "choke-collar_15",
+                "antelope snacks_30", "duck-caller_18"),
+        CA(.3f, "dog-food_10",
+                "cat-food_8", "fish-food_12", "organic-dog-food_16",
+                "turtle-pellets_5"),
+        NY(.2f, "dog-food_10", "cat-food_8", "steel-leash_20",
+                "fish-food_20", "seal-spray_25");
+
+        public static Random rand = new Random();
+        public float probability;
+        public String[] products;
+
+        private STATE(float probability, String... products) {
+            this.probability = probability;
+            this.products = products;
+        }
+
+        public Pair<String, Integer> randProduct() {
+            String product = products[rand.nextInt(products.length - 1)];
+            String name = StringUtils.substringBefore(product, "_");
+            Integer basePrice = Integer.parseInt(StringUtils.substringAfter(
+                    product, "_"));
+            return new Pair(name, basePrice);
+        }
+
+    }
+
+    public static class KeyVal<K, V> {
+
+        public final K key;
+        public final V val;
+
+        public KeyVal(K key, V val) {
+            this.key = key;
+            this.val = val;
+        }
+    }
+
+    private Iterator<KeyVal<String, String>> dataIterator;
+
+    Random r;
+
+    public TransactionIteratorFactory(final int records, final STATE state) {
+
+        /**
+         * Random is seeded by STATE. This way similar names will be randomly
+         * selected for states .
+         */
+        r = new Random(state.hashCode());
+
+        if (records == 0) {
+            throw new RuntimeException(
+                    "Cant create a data iterator with no records (records==0) !");
+        }
+
+        this.dataIterator = new Iterator<KeyVal<String, String>>() {
+            int trans_id = 1;
+
+            @Override
+            public boolean hasNext() {
+                // TODO Auto-generated method stub
+                return trans_id <= records;
+            }
+
+            int repeat = 0;
+            String fname = randFirstName();
+            String lname = randLastName();
+
+            @Override
+            public KeyVal<String, String> next() {
+                /**
+                 * Some customers come back for more :) We repeat a name up to
+                 * ten times.
+                 */
+                if (repeat > 0)
+                    repeat--;
+                else {
+                    fname = randFirstName();
+                    lname = randLastName();
+                    repeat = (int) (r.nextGaussian() * 10f);
+                }
+                String key, val;
+                key = join(",", "BigPetStore", "storeCode_" + state.name(),
+                        trans_id++ + "");
+                Pair<String, Integer> product_price = state.randProduct();
+                val = join(
+                        ",",
+                        fname,
+                        lname,
+                        getDate().toString(),
+                        fudgePrice(product_price.getFirst(),
+                                product_price.getSecond())
+                                + "", product_price.getFirst()); // products are
+                                                                 // biased by
+                                                                 // state
+
+                return new KeyVal<String, String>(key, val);
+            }
+
+            @Override
+            public void remove() {
+                // TODO Auto-generated method stub
+
+            }
+
+        };
+    }
+
+    /**
+     * Add some decimals to the price;
+     *
+     * @param i
+     * @return
+     */
+    public Float fudgePrice(String product, Integer i) {
+        float f = (float) i;
+        if (product.contains("dog")) {
+            return i + .50f;
+        }
+        if (product.contains("cat")) {
+            return i - .50f;
+        }
+        if (product.contains("fish")) {
+            return i - .25f;
+        } else
+            return i + .10f;
+    }
+
+    static String join(String sep, String... strs) {
+        if (strs.length == 0) {
+            return "";
+        } else if (strs.length == 1) {
+            return strs[0];
+        }
+        String temp = strs[0]; // inefficient ... should probably use
+                               // StringBuilder instead
+        for (int i = 1; i < strs.length; i++) {
+            temp += "," + strs[i];
+        }
+        return temp;
+    }
+
+    public Iterator<KeyVal<String, String>> getData() {
+        return this.dataIterator;
+    }
+
+    private String randFirstName() {
+        return FIRSTNAMES[this.r.nextInt(FIRSTNAMES.length - 1)].toLowerCase();
+    }
+
+    private String randLastName() {
+        return LASTNAMES[this.r.nextInt(LASTNAMES.length - 1)].toLowerCase();
+    }
+
+    private Date getDate() {
+        return new Date(this.r.nextInt());
+    }
+
+    private Integer getPrice() {
+        return this.r.nextInt(MAX_PRICE);
+    }
+
+    public static final Integer MINUTES_IN_DAY = 60 * 24;
+    public static final Integer MAX_PRICE = 10000;
+
+    private static String[] FIRSTNAMES = { "Aaron", "Abby", "Abigail", "Adam",
+            "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia",
+            "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda",
+            "Amber", "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela",
+            "Angie", "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio",
+            "April", "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby",
+            "Barb", "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty",
+            "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie",
+            "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon",
+            "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian",
+            "Brianna", "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan",
+            "Caleb", "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol",
+            "Carolyn", "Carrie", "Casey", "Cassandra", "Catherine", "Cathy",
+            "Chad", "Charlene", "Charles", "Charlie", "Charlotte", "Chase",
+            "Chasity", "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne",
+            "Chris", "Christian", "Christina", "Christine", "Christoph",
+            "Christopher", "Christy", "Chuck", "Cindy", "Clara", "Clarence",
+            "Clayton", "Clifford", "Clint", "Cody", "Colton", "Connie",
+            "Corey", "Cory", "Courtney", "Craig", "Crystal", "Curtis",
+            "Cynthia", "Dakota", "Dale", "Dallas", "Dalton", "Dan", "Dana",
+            "Daniel", "Danielle", "Danny", "Darla", "Darlene", "Darrell",
+            "Darren", "Dave", "David", "Dawn", "Dean", "Deanna", "Debbie",
+            "Deborah", "Debra", "Denise", "Dennis", "Derek", "Derrick",
+            "Destiny", "Devin", "Diana", "Diane", "Dillon", "Dixie", "Dominic",
+            "Don", "Donald", "Donna", "Donnie", "Doris", "Dorothy", "Doug",
+            "Douglas", "Drew", "Duane", "Dustin", "Dusty", "Dylan", "Earl",
+            "Ed", "Eddie", "Edward", "Elaine", "Elizabeth", "Ellen", "Emily",
+            "Eric", "Erica", "Erika", "Erin", "Ernest", "Ethan", "Eugene",
+            "Eva", "Evelyn", "Everett", "Faith", "Father", "Felicia", "Floyd",
+            "Francis", "Frank", "Fred", "Gabriel", "Gage", "Gail", "Gary",
+            "Gene", "George", "Gerald", "Gina", "Ginger", "Glen", "Glenn",
+            "Gloria", "Grace", "Greg", "Gregory", "Haley", "Hannah", "Harley",
+            "Harold", "Harry", "Heath", "Heather", "Heidi", "Helen", "Herbert",
+            "Holly", "Hope", "Howard", "Hunter", "Ian", "Isaac", "Jack",
+            "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie", "Jan", "Jane",
+            "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay", "Jean",
+            "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer",
+            "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica",
+            "Jessie", "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi",
+            "Jody", "Joe", "Joel", "Joey", "John", "Johnathan", "Johnny",
+            "Jon", "Jonathan", "Jonathon", "Jordan", "Joseph", "Josh",
+            "Joshua", "Joyce", "Juanita", "Judy", "Julia", "Julie", "Justin",
+            "Kaitlyn", "Karen", "Katelyn", "Katherine", "Kathleen", "Kathryn",
+            "Kathy", "Katie", "Katrina", "Kay", "Kayla", "Kaylee", "Keith",
+            "Kelly", "Kelsey", "Ken", "Kendra", "Kenneth", "Kenny", "Kevin",
+            "Kim", "Kimberly", "Kris", "Krista", "Kristen", "Kristin",
+            "Kristina", "Kristy", "Kyle", "Kylie", "Lacey", "Laken", "Lance",
+            "Larry", "Laura", "Lawrence", "Leah", "Lee", "Leonard", "Leroy",
+            "Leslie", "Levi", "Lewis", "Linda", "Lindsay", "Lindsey", "Lisa",
+            "Lloyd", "Logan", "Lois", "Loretta", "Lori", "Louis", "Lynn",
+            "Madison", "Mandy", "Marcus", "Margaret", "Maria", "Mariah",
+            "Marie", "Marilyn", "Marion", "Mark", "Marlene", "Marsha",
+            "Martha", "Martin", "Marty", "Marvin", "Mary", "Mary ann", "Mason",
+            "Matt", "Matthew", "Max", "Megan", "Melanie", "Melinda", "Melissa",
+            "Melody", "Michael", "Michelle", "Mickey", "Mike", "Mindy",
+            "Miranda", "Misty", "Mitchell", "Molly", "Monica", "Morgan",
+            "Mother", "Myron", "Nancy", "Natasha", "Nathan", "Nicholas",
+            "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman", "Olivia",
+            "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick", "Patty",
+            "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip", "Phyllis",
+            "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy", "Ray",
+            "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda",
+            "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert",
+            "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger",
+            "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy",
+            "Russ", "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally",
+            "Sam", "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah",
+            "Savannah", "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna",
+            "Shannon", "Sharon", "Shaun", "Shawn", "Shawna", "Sheila",
+            "Shelly", "Sher", "Sherri", "Sherry", "Shirley", "Sierra",
+            "Skyler", "Stacey", "Stacy", "Stanley", "Stephanie", "Stephen",
+            "Steve", "Steven", "Sue", "Summer", "Susan", "Sydney", "Tabatha",
+            "Tabitha", "Tamara", "Tammy", "Tara", "Tasha", "Tashia", "Taylor",
+            "Ted", "Teresa", "Terri", "Terry", "Tessa", "Thelma", "Theresa",
+            "Thomas", "Tia", "Tiffany", "Tim", "Timmy", "Timothy", "Tina",
+            "Todd", "Tom", "Tommy", "Toni", "Tony", "Tonya", "Tracey",
+            "Tracie", "Tracy", "Travis", "Trent", "Trevor", "Trey", "Trisha",
+            "Tristan", "Troy", "Tyler", "Tyrone", "Unborn", "Valerie",
+            "Vanessa", "Vernon", "Veronica", "Vicki", "Vickie", "Vicky",
+            "Victor", "Victoria", "Vincent", "Virginia", "Vivian", "Walter",
+            "Wanda", "Wayne", "Wendy", "Wesley", "Whitney", "William",
+            "Willie", "Wyatt", "Zachary" };
+
+    public static String[] LASTNAMES = { "Abbott", "Acevedo", "Acosta",
+            "Adams", "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander",
+            "Alford", "Allen", "Allison", "Alston", "Alvarado", "Alvarez",
+            "Anderson", "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley",
+            "Atkins", "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers",
+            "Bailey", "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks",
+            "Barber", "Smith", "Johnson", "Williams", "Jones", "Brown",
+            "Davis", "Miller", "Wilson", "Moore", "Taylor", "Thomas",
+            "Jackson", "Barker", "Barlow", "Barnes", "Barnett", "Barr",
+            "Barrera", "Barrett", "Barron", "Barry", "Bartlett", "Barton",
+            "Bass", "Bates", "Battle", "Bauer", "Baxter", "Beach", "Bean",
+            "Beard", "Beasley", "Beck", "Becker", "Bell", "Bender", "Benjamin",
+            "Bennett", "Benson", "Bentley", "Benton", "Berg", "Berger",
+            "Bernard", "Berry", "Best", "Bird", "Bishop", "Black", "Blackburn",
+            "Blackwell", "Blair", "Blake", "Blanchard", "Blankenship",
+            "Blevins", "Bolton", "Bond", "Bonner", "Booker", "Boone", "Booth",
+            "Bowen", "Bowers", "Bowman", "Boyd", "Boyer", "Boyle", "Bradford",
+            "Bradley", "Bradshaw", "Brady", "Branch", "Bray", "Brennan",
+            "Brewer", "Bridges", "Briggs", "Bright", "Britt", "Brock",
+            "Brooks", "Browning", "Bruce", "Bryan", "Bryant", "Buchanan",
+            "Buck", "Buckley", "Buckner", "Bullock", "Burch", "Burgess",
+            "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt", "Burton",
+            "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain", "Calderon",
+            "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron",
+            "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas",
+            "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo",
+            "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash",
+            "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan",
+            "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase",
+            "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church",
+            "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons",
+            "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole",
+            "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton",
+            "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook",
+            "Cooke", "Cooley", "Cooper", "Copeland", "Cortez", "Cote",
+            "Cotton", "Cox", "Craft", "Craig", "Crane", "Crawford", "Crosby",
+            "Cross", "Cruz", "Cummings", "Cunningham", "Curry", "Curtis",
+            "Dale", "Dalton", "Daniel", "Daniels", "Daugherty", "Davenport",
+            "David", "Davidson", "Dawson", "Day", "Dean", "Decker", "Dejesus",
+            "Delacruz", "Delaney", "Deleon", "Delgado", "Dennis", "Diaz",
+            "Dickerson", "Dickinson", "Dillard", "Dillon", "Dixon", "Dodson",
+            "Dominguez", "Donaldson", "Donovan", "Dorsey", "Dotson", "Douglas",
+            "Downs", "Doyle", "Drake", "Dudley", "Duffy", "Duke", "Duncan",
+            "Dunlap", "Dunn", "Duran", "Durham", "Dyer", "Eaton", "Edwards",
+            "Elliott", "Ellis", "Ellison", "Emerson", "England", "English",
+            "Erickson", "Espinoza", "Estes", "Estrada", "Evans", "Everett",
+            "Ewing", "Farley", "Farmer", "Farrell", "Faulkner", "Ferguson",
+            "Fernandez", "Ferrell", "Fields", "Figueroa", "Finch", "Finley",
+            "Fischer", "Fisher", "Fitzgerald", "Fitzpatrick", "Fleming",
+            "Fletcher", "Flores", "Flowers", "Floyd", "Flynn", "Foley",
+            "Forbes", "Ford", "Foreman", "Foster", "Fowler", "Fox", "Francis",
+            "Franco", "Frank", "Franklin", "Franks", "Frazier", "Frederick",
+            "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes", "Fuller",
+            "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway", "Gamble",
+            "Garcia", "Gardner", "Garner", "Garrett", "Garrison", "Garza",
+            "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson", "Gilbert",
+            "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore", "Glass",
+            "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales",
+            "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould",
+            "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer",
+            "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra",
+            "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn",
+            "Hale", "Haley", "Hall", "Hamilton", "Hammond", "Hampton",
+            "Hancock", "Haney", "Hansen", "Hanson", "Hardin", "Harding",
+            "Hardy", "Harmon", "Harper", "Harris", "Harrington", "Harrison",
+            "Hart", "Hartman", "Harvey", "Hatfield", "Hawkins", "Hayden",
+            "Hayes", "Haynes", "Hays", "Head", "Heath", "Hebert", "Henderson",
+            "Hendricks", "Hendrix", "Henry", "Hensley", "Henson", "Herman",
+            "Hernandez", "Herrera", "Herring", "Hess", "Hester", "Hewitt",
+            "Hickman", "Hicks", "Higgins", "Hill", "Hines", "Hinton", "Hobbs",
+            "Hodge", "Hodges", "Hoffman", "Hogan", "Holcomb", "Holden",
+            "Holder", "Holland", "Holloway", "Holman", "Holmes", "Holt",
+            "Hood", "Hooper", "Hoover", "Hopkins", "Hopper", "Horn", "Horne",
+            "Horton", "House", "Houston", "Howard", "Howe", "Howell",
+            "Hubbard", "Huber", "Hudson", "Huff", "Huffman", "Hughes", "Hull",
+            "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst", "Hutchinson",
+            "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson", "James", "Jarvis",
+            "Jefferson", "Jenkins", "Jennings", "Jensen", "Jimenez", "Johns",
+            "Johnston", "Jordan", "Joseph", "Joyce", "Joyner", "Juarez",
+            "Justice", "Kane", "Kaufman", "Keith", "Keller", "Kelley", "Kelly",
+            "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd", "Kim", "King",
+            "Kinney", "Kirby", "Kirk", "Kirkland", "Klein", "Kline", "Knapp",
+            "Knight", "Knowles", "Knox", "Koch", "Kramer", "Lamb", "Lambert",
+            "Lancaster", "Landry", "Lane", "Lang", "Langley", "Lara", "Larsen",
+            "Larson", "Lawrence", "Lawson", "Le", "Leach", "Leblanc", "Lee",
+            "Leon", "Leonard", "Lester", "Levine", "Levy", "Lewis", "Lindsay",
+            "Lindsey", "Little", "Livingston", "Lloyd", "Logan", "Long",
+            "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas", "Luna",
+            "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack", "Madden",
+            "Maddox", "Maldonado", "Malone", "Mann", "Manning", "Marks",
+            "Marquez", "Marsh", "Marshall", "Martin", "Martinez", "Mason",
+            "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May",
+            "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall",
+            "McCarthy", "McCarty", "McClain", "McClure", "McConnell",
+            "McCormick", "McCoy", "McCray", "McCullough", "McDaniel",
+            "McDonald", "McDowell", "McFadden", "McFarland", "McGee",
+            "McGowan", "McGuire", "McIntosh", "McIntyre", "McKay", "McKee",
+            "McKenzie", "McKinney", "McKnight", "McLaughlin", "McLean",
+            "McLeod", "McMahon", "McMillan", "McNeil", "McPherson", "Meadows",
+            "Medina", "Mejia", "Melendez", "Melton", "Mendez", "Mendoza",
+            "Mercado", "Mercer", "Merrill", "Merritt", "Meyer", "Meyers",
+            "Michael", "Middleton", "Miles", "Mills", "Miranda", "Mitchell",
+            "Molina", "Monroe", "Montgomery", "Montoya", "Moody", "Moon",
+            "Mooney", "Morales", "Moran", "Moreno", "Morgan", "Morin",
+            "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses",
+            "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz",
+            "Murphy", "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson",
+            "Newman", "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen",
+            "Nieves", "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris",
+            "Norton", "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom",
+            "Odonnell", "Oliver", "Olsen", "Olson", "O'neal", "O'neil",
+            "O'neill", "Orr", "Ortega", "Ortiz", "Osborn", "Osborne", "Owen",
+            "Owens", "Pace", "Pacheco", "Padilla", "Page", "Palmer", "Park",
+            "Parker", "Parks", "Parrish", "Parsons", "Pate", "Patel",
+            "Patrick", "Patterson", "Patton", "Paul", "Payne", "Pearson",
+            "Peck", "Pena", "Pennington", "Perez", "Perkins", "Perry",
+            "Peters", "Petersen", "Peterson", "Petty", "Phelps", "Phillips",
+            "Pickett", "Pierce", "Pittman", "Pitts", "Pollard", "Poole",
+            "Pope", "Porter", "Potter", "Potts", "Powell", "Powers", "Pratt",
+            "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh", "Quinn",
+            "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph", "Rasmussen",
+            "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves", "Reid",
+            "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich", "Richard",
+            "Richards", "Richardson", "Richmond", "Riddle", "Riggs", "Riley",
+            "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins",
+            "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha",
+            "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins",
+            "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross",
+            "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell",
+            "Russo", "Rutledge", "Ryan", "Salas", "Salazar", "Salinas",
+            "Sampson", "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana",
+            "Santiago", "Santos", "Sargent", "Saunders", "Savage", "Sawyer",
+            "Schmidt", "Schneider", "Schroeder", "Schultz", "Schwartz",
+            "Scott", "Sears", "Sellers", "Serrano", "Sexton", "Shaffer",
+            "Shannon", "Sharp", "Sharpe", "Shaw", "Shelton", "Shepard",
+            "Shepherd", "Sheppard", "Sherman", "Shields", "Short", "Silva",
+            "Simmons", "Simon", "Simpson", "Sims", "Singleton", "Skinner",
+            "Slater", "Sloan", "Small", "Snider", "Snow", "Snyder", "Solis",
+            "Solomon", "Sosa", "Soto", "Sparks", "Spears", "Spence", "Spencer",
+            "Stafford", "Stanley", "Stanton", "Stark", "Steele", "Stein",
+            "Stephens", "Stephenson", "Stevens", "Stevenson", "Stewart",
+            "Stokes", "Stone", "Stout", "Strickland", "Strong", "Stuart",
+            "Suarez", "Sullivan", "Summers", "Sutton", "Swanson", "Sweeney",
+            "Sweet", "Sykes", "Talley", "Tanner", "Tate", "Terrell", "Terry",
+            "Thompson", "Thornton", "Tillman", "Todd", "Torres", "Townsend",
+            "Tran", "Travis", "Trevino", "Trujillo", "Tucker", "Turner",
+            "Tyler", "Tyson", "Underwood", "Valdez", "Valencia", "Valentine",
+            "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan",
+            "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez",
+            "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker",
+            "Wall", "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters",
+            "Walton", "Ward", "Ware", "Warner", "Warren", "Washington",
+            "Waters", "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber",
+            "Webster", "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler",
+            "Whitaker", "White", "Whitehead", "Whitfield", "Whitley",
+            "Whitney", "Wiggins", "Wilcox", "Wilder", "Wiley", "Wilkerson",
+            "Wilkins", "Wilkinson", "William", "Williamson", "Willis",
+            "Winters", "Wise", "Witt", "Wolf", "Wolfe", "Wong", "Wood",
+            "Woodard", "Woods", "Woodward", "Wooten", "Workman", "Wright",
+            "Wyatt", "Wynn", "Yang", "Yates", "York", "Young", "Zamora",
+            "Zimmerman"
+    };
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
new file mode 100755
index 0000000..29f7c67
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * Static final constants
+ *
+ * is useful to have the basic sql here as the HIVE SQL can vary between hive
+ * versions if updated here will update everywhere
+ */
+
+package org.apache.bigtop.bigpetstore.util;
+
+public class BigPetStoreConstants {
+
+   //Files should be stored in graphviz arch.dot
+   public enum OUTPUTS{
+        generated,//generator
+        cleaned,//pig
+        pig_ad_hoc_script,
+        MAHOUT_CF_IN,//hive view over data for mahout
+        MAHOUT_CF_OUT,//mahout cf results
+        CUSTOMER_PAGE//crunchhh
+    };
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java
new file mode 100755
index 0000000..9c2d684
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.util;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Dev utilities for testing arguments etc...
+ */
+public class DeveloperTools {
+
+    /**
+     * Validates that the expected args are present in the "args" array.
+     * Just some syntactic sugar for good arg error handling.
+     * @param args
+     * @param expected arguments.
+     */
+    public static void validate(String[] args, String... expected) {
+        int i=-1;
+        try{
+            for(i = 0 ; i < expected.length ; i++) {
+                System.out.println("VALUE OF " + expected[i] + " = " + args[i]);
+            }
+        }
+        catch(Throwable t) {
+            System.out.println("Argument " + i + " not available.");
+            System.out.println("We expected " + expected.length + " arguments for this phase");
+        }
+
+
+    }
+    public static void main(String[] args) throws Exception {
+        Log LOG = LogFactory.getLog(Job.class);
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
new file mode 100644
index 0000000..9fa9455
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.bigpetstore.util;
+
+import java.math.BigInteger;
+
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+
+/**
+ * User and Product IDs need numerical
+ * identifiers for recommender algorithms
+ * which attempt to interpolate new
+ * products.
+ * 
+ * TODO: Delete this class. Its not necessarily required: We might just use HIVE HASH() as our
+ * standard for this.
+ */
+public class NumericalIdUtils {
+
+    /**
+     * People: Leading with ordinal code for state.
+     */
+    public static long toId(STATE state, String name){
+        String fromRawData =
+                state==null?
+                        name:
+                         (state.name()+"_"+name);
+        return fromRawData.hashCode();
+    }
+    /**
+     * People: Leading with ordinal code for state.
+     */
+    public static long toId(String name){
+        return toId(null,name);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
new file mode 100644
index 0000000..a96fa44
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.bigpetstore.util;
+
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory;
+
+import java.util.Comparator;
+
+@Deprecated
+public class Pair<S, T> implements Comparable<Pair<S, T>> {
+
+  private final S first;
+  private final T second;
+
+  public Pair(final S car, final T cdr) {
+    first = car;
+    second = cdr;
+  }
+
+  public S getFirst() { return first; }
+  public T getSecond() { return second; }
+
+  @Override
+  public boolean equals(Object o) {
+    if (null == o) {
+      return false;
+    } else if (o instanceof Pair) {
+      Pair<S, T> p = (Pair<S, T>) o;
+      if (first == null && second == null) {
+        return p.first == null && p.second == null;
+      } else if (first == null) {
+        return p.first == null && second.equals(p.second);
+      } else if (second == null) {
+        return p.second == null && first.equals(p.first);
+      } else {
+        return first.equals(p.first) && second.equals(p.second);
+      }
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    int code = 0;
+
+    if (null != first) {
+      code += first.hashCode();
+    }
+
+    if (null != second) {
+      code += second.hashCode() << 1;
+    }
+
+    return code;
+  }
+
+  @Override
+  public int compareTo(Pair<S, T> p) {
+    if (null == p) {
+      return 1;
+    }
+
+    Comparable<S> firstCompare = (Comparable<S>) first;
+
+    int firstResult = firstCompare.compareTo(p.first);
+    if (firstResult == 0) {
+      Comparable<T> secondCompare = (Comparable<T>) second;
+      return secondCompare.compareTo(p.second);
+    } else {
+      return firstResult;
+    }
+  }
+
+  // TODO: Can this be made static? Same with SecondElemComparator?
+  public class FirstElemComparator implements Comparator<Pair<S, T>> {
+    public FirstElemComparator() {
+    }
+
+    public int compare(Pair<S, T> p1, Pair<S, T> p2) {
+      Comparable<S> cS = (Comparable<S>) p1.first;
+      return cS.compareTo(p2.first);
+    }
+  }
+
+  public class SecondElemComparator implements Comparator<Pair<S, T>> {
+    public SecondElemComparator() {
+    }
+
+    public int compare(Pair<S, T> p1, Pair<S, T> p2) {
+      Comparable<T> cT = (Comparable<T>) p1.second;
+      return cT.compareTo(p2.second);
+    }
+  }
+
+  @Override
+  public String toString() {
+    String firstString = "null";
+    String secondString = "null";
+
+    if (null != first) {
+      firstString = first.toString();
+    }
+
+    if (null != second) {
+      secondString = second.toString();
+    }
+
+    return "(" + firstString + ", " + secondString + ")";
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java
new file mode 100755
index 0000000..7b6bede
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.bigpetstore.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * TODO: This might be dead code.
+ */
+public class PetStoreParseFunctions {
+
+    String[] headers = { "code", "city", "country", "lat", "lon" };
+
+    public Map<String, Object> parse(String line) {
+
+        Map<String, Object> resultMap = new HashMap<String, Object>();
+
+        List<String> csvObj = null;
+
+        String[] temp = line.split(",");
+        csvObj = new ArrayList<String>(Arrays.asList(temp));
+
+        if (csvObj.isEmpty()) {
+            return resultMap;
+        }
+
+        int k = 0;
+
+        for (String valueStr : csvObj) {
+
+            resultMap.put(headers[k++], valueStr);
+
+        }
+
+        return resultMap;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java
new file mode 100644
index 0000000..02399bf
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.util;
+
+import java.util.ArrayList;
+
+/**
+********************************************************************
+* Borrowed from apache-commons-lang StringUtils, overtime we might
+* add more elements here .  
+* To maintain minor dependencies on a cluster sometimes this is easier
+* jar's manually in the hadoop classpath or via DistributedCache. 
+********************************************************************/
+
+public class StringUtils {
+
+     public static String substringBefore(String str, String separator) {
+         int pos = str.indexOf(separator);
+         if (pos == -1) {
+             return str;
+         }
+         return str.substring(0, pos);
+     }
+
+
+     public static String substringAfter(String str, String separator) {
+         if (str.length()==0) {
+             return str;
+         }
+         if (separator == null) {
+             return "";
+         }
+         int pos = str.indexOf(separator);
+         if (pos == -1) {
+             return "";
+         }
+         return str.substring(pos + separator.length());
+     }
+ }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/resources/hive-log4j.properties
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/resources/hive-log4j.properties b/bigtop-bigpetstore/src/main/resources/hive-log4j.properties
new file mode 100755
index 0000000..9236008
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/resources/hive-log4j.properties
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hive.log.threshold=ERROR
+hive.root.logger=ERROR,DRFA
+hive.log.dir=/tmp/${user.name}
+hive.log.file=hive.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hive.root.logger}, EventCounter, console
+
+# Logging Threshold
+log4j.threshold=${hive.log.threshold}
+
+#
+# Daily Rolling File Appender
+#
+# Use the PidDailyerRollingFileAppend class instead if you want to use separate log files
+# for different CLI session.
+#
+# log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+
+log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.console.encoding=UTF-8
+
+#custom logging levels
+#log4j.logger.xxx=DEBUG
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
+
+
+log4j.category.DataNucleus=OFF
+log4j.category.Datastore=OFF
+log4j.category.Datastore.Schema=OFF
+log4j.category.JPOX.Datastore=OFF
+log4j.category.JPOX.Plugin=OFF
+log4j.category.JPOX.MetaData=OFF
+log4j.category.JPOX.Query=OFF
+log4j.category.JPOX.General=OFF
+log4j.category.JPOX.Enhancer=OFF
+

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/resources/hive-site.xml
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/resources/hive-site.xml b/bigtop-bigpetstore/src/main/resources/hive-site.xml
new file mode 100644
index 0000000..dd96f32
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/resources/hive-site.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
+<!-- that are implied by Hadoop setup variables.                                                -->
+<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
+<!-- resource).                                                                                 -->
+
+<!-- Hive Execution Parameters -->
+
+<property>
+  <name>javax.jdo.option.ConnectionURL</name>
+  <!-- value>jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true</value -->
+    <value>jdbc:derby:;databaseName=/tmp/metastore/metastore_db;create=true</value>
+  <description>JDBC connect string for a JDBC metastore</description>
+</property>
+
+<property>
+  <name>hive.metastore.warehouse.dir</name>
+  <value>/tmp</value>
+  <description>Driver class name for a JDBC metastore</description>
+</property>
+
+
+<property>
+  <name>javax.jdo.option.ConnectionDriverName</name>
+  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+  <description>Driver class name for a JDBC metastore</description>
+</property>
+
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java
new file mode 100644
index 0000000..883bb55
--- /dev/null
+++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.docs;
+
+import java.io.File;
+
+import junit.framework.Assert;
+
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants.OUTPUTS;
+import org.apache.commons.io.FileUtils;
+import org.junit.Test;
+
+public class TestDocs {
+
+    @Test
+    public void testGraphViz() throws Exception{
+        //test the graphviz file
+        //by grepping out the constants.
+        String graphviz=FileUtils.readFileToString(new File("arch.dot"));
+        System.out.println(graphviz);
+
+        org.junit.Assert.assertTrue(
+                graphviz.contains(
+                        OUTPUTS.generated.name()));
+
+        org.junit.Assert.assertTrue(
+                graphviz.contains(
+                        OUTPUTS.cleaned.name()));
+
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
new file mode 100644
index 0000000..c68d471
--- /dev/null
+++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import junit.framework.Assert;
+
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.bigtop.bigpetstore.util.NumericalIdUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestNumericalIdUtils {
+
+    @Test
+    public void testName() {
+        String strId= STATE.OK.name()+"_"+ "jay vyas";
+        long id = NumericalIdUtils.toId(strId);
+        String strId2= STATE.CO.name()+"_"+ "jay vyas";
+        long id2 = NumericalIdUtils.toId(strId2);
+        System.out.println(id + " " + id2);
+        Assert.assertFalse(id==id2);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
new file mode 100755
index 0000000..d1a60b3
--- /dev/null
+++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.InputStreamReader;
+import java.lang.management.ManagementFactory;
+import java.util.Date;
+
+import junit.framework.Assert;
+
+import org.apache.bigtop.bigpetstore.generator.BPSGenerator;
+import org.apache.bigtop.bigpetstore.generator.BPSGenerator.props;
+import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * run this test with vm options -XX:MaxPermSize=256m -Xms512m -Xmx1024m
+ *
+ */
+public class TestPetStoreTransactionGeneratorJob {
+
+    final static Logger log = LoggerFactory
+            .getLogger(TestPetStoreTransactionGeneratorJob.class);
+
+    @Test
+    public void test() throws Exception {
+
+        System.out.println("memory : " + Runtime.getRuntime().freeMemory()
+                / 1000000);
+        if (Runtime.getRuntime().freeMemory() / 1000000 < 75) {
+            // throw new
+            // RuntimeException("need more memory to run this test !");
+        }
+        int records = 20;
+        /**
+         * Setup configuration with prop.
+         */
+        Configuration c = new Configuration();
+        c.setInt(props.bigpetstore_records.name(), records);
+
+        /**
+         * Run the job
+         */
+        Path output = new Path("petstoredata/" + (new Date()).toString());
+        Job createInput = BPSGenerator.createJob(output, c);
+        createInput.submit();
+        System.out.println(createInput);
+        createInput.waitForCompletion(true);
+
+        FileSystem fs = FileSystem.getLocal(new Configuration());
+
+        /**
+         * Read file output into string.
+         */
+        DataInputStream f = fs.open(new Path(output, "part-r-00000"));
+        BufferedReader br = new BufferedReader(new InputStreamReader(f));
+        String s;
+        int recordsSeen = 0;
+        boolean CTseen = false;
+        boolean AZseen = false;
+
+        // confirm that both CT and AZ are seen in the outputs.
+        while (br.ready()) {
+            s = br.readLine();
+            System.out.println("===>" + s);
+            recordsSeen++;
+            if (s.contains(STATE.CT.name())) {
+                CTseen = true;
+            }
+            if (s.contains(STATE.AZ.name())) {
+                AZseen = true;
+            }
+        }
+
+        // records seen should = 20
+        Assert.assertEquals(records, recordsSeen);
+        // Assert that a couple of the states are seen (todo make it
+        // comprehensive for all states).
+        Assert.assertTrue(CTseen);
+        Assert.assertTrue(AZseen);
+        log.info("Created " + records + " , file was "
+                + fs.getFileStatus(new Path(output, "part-r-00000")).getLen()
+                + " bytes.");
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/resources/log4j.properties b/bigtop-bigpetstore/src/test/resources/log4j.properties
new file mode 100644
index 0000000..1e33093
--- /dev/null
+++ b/bigtop-bigpetstore/src/test/resources/log4j.properties
@@ -0,0 +1,47 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+#
+# Job Summary Appender
+#
+# Use following logger to send summary to separate file defined by
+# hadoop.mapreduce.jobsummary.log.file rolled daily:
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+#
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.EventCounter=org.apache.log4j.ConsoleAppender
+log4j.appender.EventCounter.layout=org.apache.log4j.PatternLayout
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout


[2/2] git commit: BigPetStore - initial code drop - (lines commited by others : nigelsavage~200, mattfenwick~200, michaelmcune~5, anushshetty~10, jeffvance~10)

Posted by ma...@apache.org.
BigPetStore - initial code drop - (lines commited by others : nigelsavage~200,mattfenwick~200,michaelmcune~5,anushshetty~10,jeffvance~10)


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/d3da8ceb
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/d3da8ceb
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/d3da8ceb

Branch: refs/heads/master
Commit: d3da8ceb165ea8692a5432224ef4b116476498be
Parents: 3298063
Author: jayunit100 <ja...@gmail.com>
Authored: Mon Apr 14 21:28:45 2014 -0400
Committer: Sean Mackrory <ma...@apache.org>
Committed: Tue Apr 15 18:29:53 2014 -0600

----------------------------------------------------------------------
 bigtop-bigpetstore/BPS_analytics.pig            |  77 ++
 bigtop-bigpetstore/README.md                    | 140 ++++
 bigtop-bigpetstore/arch.dot                     |  44 +
 bigtop-bigpetstore/pom.xml                      | 797 +++++++++++++++++++
 bigtop-bigpetstore/setuphive.sh                 |  22 +
 .../bigtop/bigpetstore/BigPetStoreHiveIT.java   | 108 +++
 .../bigtop/bigpetstore/BigPetStoreMahoutIT.java |  88 ++
 .../bigtop/bigpetstore/BigPetStorePigIT.java    | 165 ++++
 .../org/apache/bigtop/bigpetstore/ITUtils.java  | 145 ++++
 .../bigpetstore/clustering/BPSRecommnder.java   |  83 ++
 .../contract/PetStoreStatistics.java            |  34 +
 .../bigtop/bigpetstore/etl/CrunchETL.java       | 142 ++++
 .../bigtop/bigpetstore/etl/HiveViewCreator.java | 157 ++++
 .../apache/bigtop/bigpetstore/etl/LineItem.java | 112 +++
 .../bigtop/bigpetstore/etl/PigCSVCleaner.java   | 171 ++++
 .../bigpetstore/generator/BPSGenerator.java     | 116 +++
 ...GeneratePetStoreTransactionsInputFormat.java | 134 ++++
 .../generator/PetStoreTransaction.java          |  32 +
 .../PetStoreTransactionInputSplit.java          |  67 ++
 .../generator/TransactionIteratorFactory.java   | 468 +++++++++++
 .../bigpetstore/util/BigPetStoreConstants.java  |  36 +
 .../bigtop/bigpetstore/util/DeveloperTools.java |  58 ++
 .../bigpetstore/util/NumericalIdUtils.java      |  50 ++
 .../apache/bigtop/bigpetstore/util/Pair.java    | 125 +++
 .../util/PetStoreParseFunctions.java            |  55 ++
 .../bigtop/bigpetstore/util/StringUtils.java    |  53 ++
 .../src/main/resources/hive-log4j.properties    |  84 ++
 .../src/main/resources/hive-site.xml            |  36 +
 .../bigtop/bigpetstore/docs/TestDocs.java       |  46 ++
 .../generator/TestNumericalIdUtils.java         |  36 +
 .../TestPetStoreTransactionGeneratorJob.java    | 106 +++
 .../src/test/resources/log4j.properties         |  47 ++
 32 files changed, 3834 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/BPS_analytics.pig
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/BPS_analytics.pig b/bigtop-bigpetstore/BPS_analytics.pig
new file mode 100755
index 0000000..44ed541
--- /dev/null
+++ b/bigtop-bigpetstore/BPS_analytics.pig
@@ -0,0 +1,77 @@
+----------------------------------------------------------------------------
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+-- http://www.apache.org/licenses/LICENSE-2.0
+-- 
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+-----------------------------------------------------------------------------
+
+-- This is the analytics script that BigPetStore uses as an example for 
+-- demos of how to do ad-hoc analytics on the cleaned transaction data.
+-- It is used in conjunction with the big pet store web app, soon to be 
+-- added to apache bigtop (As of 4/12/2014, the
+-- corresponding web app to consume this scripts output is 
+-- in jayunit100.github.io/bigpetstore).
+
+-- invoke with two arguments, the input file , and the output file. -input /bps/gen -output /bps/analytics
+
+-- FYI...
+-- If you run into errors, you can see them in
+-- ./target/failsafe-reports/TEST-org.bigtop.bigpetstore.integration.BigPetStorePigIT.xml
+
+-- First , we load data in from a file, as tuples.
+-- in pig, relations like tables in a relational database
+-- so each relation is just a bunch of tuples.
+-- in this case csvdata will be a relation,
+-- where each tuple is a single petstore transaction.
+csvdata =
+    LOAD '$input' using PigStorage()
+        AS (
+          dump:chararray,
+          state:chararray,
+          transaction:int,
+          fname:chararray,
+          lname:chararray,
+          date:chararray,
+          price:float,
+          product:chararray);
+
+-- RESULT:
+-- (BigPetStore,storeCode_AK,1,jay,guy,Thu Dec 18 12:17:10 EST 1969,10.5,dog-food)
+-- ...
+
+-- Okay! Now lets group our data so we can do some stats.
+-- lets create a new relation,
+-- where each tuple will contain all transactions for a product in a state.
+
+state_product = group csvdata by ( state, product ) ;
+
+-- RESULT
+-- ((storeCode_AK,dog-food) , {(BigPetStore,storeCode_AK,1,jay,guy,Thu Dec 18 12:17:10 EST 1969,10.5,dog-food)}) --
+-- ...
+
+
+-- Okay now lets make some summary stats so that the boss man can
+-- decide which products are hottest in which states.
+
+-- Note that for the "groups", we tease out each individual field here for formatting with
+-- the BigPetStore visualization app.
+summary1 = FOREACH state_product generate STRSPLIT(group.state,'_').$1 as sp, group.product, COUNT($1);
+
+
+-- Okay, the stats look like this.  Lets clean them up.
+-- (storeCode_AK,cat-food)      2530
+-- (storeCode_AK,dog-food)      2540
+-- (storeCode_AK,fuzzy-collar)     2495
+
+dump summary1;
+
+store summary1 into '$output';

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/README.md
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/README.md b/bigtop-bigpetstore/README.md
new file mode 100644
index 0000000..95245a8
--- /dev/null
+++ b/bigtop-bigpetstore/README.md
@@ -0,0 +1,140 @@
+(See accompanying source code for licensing information)
+
+BigPetStore
+============
+
+test mvn deploy1
+
+Apache Bigtop/Hadoop Ecosystem Demo
+-----------------------------------
+This software is created to demonstrate Apache Bigtop for processing
+big data sets.
+
+Architecture
+------------
+The application consists of the following modules
+
+* generator: generates raw data on the dfs
+* clustering: Apache Mahout demo code for processing the data using Itembased Collaborative Filtering
+* Pig: demo code for processing the data using Apache Pig
+* Hive: demo code for processing the data using Apache Hive demo code
+* Crunch: demo code for processing the data using Apache Crunch
+
+Build Instructions
+------------------
+
+* BUILD THE JAR
+
+  "mvn clean package" will build the bigpetstore jar
+
+* Run Intergration tests with
+
+  * Pig profile: mvn clean verify -P pig
+  * Crunch profile: mvn clean verify -P crunch
+  * Hive provile:
+     * First, see and run the setuphive.sh script.  Read it and try to under
+     stand what it does.
+
+     * mvn clean verify -P pig
+
+For Eclipse Users
+-----------------
+
+1) run "mvn eclipse:eclipse" to create an IDE loadable project.
+
+2) open .classpath and add
+    `<classpathentry kind="src" path="src/integration/java" including="**/*.java"/>`
+
+3) import the project into eclipse
+
+
+High level summary
+------------------
+
+
+The bigpetstore project exemplifies the hadoop ecosystem for newcomers, and also for benchmarking and
+comparing functional space of tools.
+
+The end goal is to run many different implementations of each phase
+using different tools, thus exemplifying overlap of tools in the hadoop ecosystem, and allowing people to benchmark/compare tools
+using a common framework and easily understood use case
+
+
+How it works (To Do)
+--------------------
+
+* Phase 1: Generating pet store data:
+
+The first step is to generate a raw data set.  This is done by the "GeneratePetStoreTransactionsInputFormat":
+
+The first MapReduce job in the pipeline runs a simple job which takes this input format and forwards
+its output.  The result is a list of "transactions".  Each transaction is a tuple of the format
+
+  *{state,name,date,price,product}.*
+
+* Phase 2: Processing the data
+
+The next phase of the application processes the data to create basic aggregations.
+For example with both pig and hive these could easily include
+
+  *Number of transactions by state* or
+  *Most valuable customer by state* or
+  *Most popular items by state*
+
+
+* Phase 3: Clustering the states by all fields
+
+  Now, say we want to cluster the states, so as to put different states into different buying categories
+  for our marketing team to deal with differently.
+
+* Phase 4: Visualizing the Data in D3.
+
+ - try it [on the gh-pages branch](http://jayunit100.github.io/bigpetstore/)
+
+Running on a hadoop cluster
+---------------------------
+
+wget s3://bigpetstore/bigpetstore.jar
+
+hadoop jar bigpetstore.jar org.apache.bigtop.bigpetstore.generator.BPSGenerator 1000000 bigpetstore/gen
+
+hadoop jar bigpetstore.jar org.apache.bigtop.bigpetstore.etl.PigCSVCleaner bigpetstore/gen/ bigpetstore/pig/ custom_pigscript.pig
+... (will add more steps as we add more phases to the workflow) ...
+
+
+Example of running in EMR
+--------------------------
+- Put the jar in s3.  Right now there is a copy of it at the url below.
+
+- Download the elastic-mapreduce ruby shell script.
+create your "credentials.json" file.
+
+Now run this to generate 1,000,000 pet store transactions:
+
+./elastic-mapreduce --create --jar s3://bigpetstore/bigpetstore.jar \
+--main-class org.apache.bigtop.bigpetstore.generator.BPSGenerator \
+--num-instances 10  \
+--arg 1000000 \
+--arg s3://bigpetstore/data/generated \
+--hadoop-version "2.2.0"  \
+--master-instance-type m1.medium \
+--slave-instance-type m1.medium
+
+...Now lets clean the data with pig...
+
+Replace the above "main-class", and "--arg" options with
+--main-class org.apache.bigtop.bigpetstore.etl.PigCSVCleaner
+--arg s3://bigpetstore/data/generated
+--arg s3://bigpetstore/data/pig_out
+(optional, you can send a script referencing the cleaned $input path to do some
+custom analytics, see the BPS_Analytics.pig script and companion
+http://jayunit100.github.io/bigpetstore) as an example).
+--arg s3://path_to_custom_analytics_script.pig
+
+(note about pig: We support custom pig scripts.... for EMR, custom pig scripts will need to point to a
+local path, so youll have to put that script on the machine as part
+of EMR setup w/ a custom script).
+
+...
+
+And so on.

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/arch.dot
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/arch.dot b/bigtop-bigpetstore/arch.dot
new file mode 100644
index 0000000..4eb8ac4
--- /dev/null
+++ b/bigtop-bigpetstore/arch.dot
@@ -0,0 +1,44 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+digraph bigpetstore {
+
+   node [shape=record];
+
+   PIG_ANALYTICS [label="PIG_ANALYTICS|Unstructured-unsupported-pigscripts| pig_ad_hoc(0-n)"];
+
+   CUSTOMER_PAGE [label="CUSTOMER_PAGE|json|CUSTOMER_PAGE/part*"];
+   DIRTY_CSV [label="DIRTY_CSV|fname   lname -prod , price ,prod,..|generated/part*"];
+   CSV [label="CSV|fname,lname,prod,price,date,xcoord,ycoord,...|cleaned/part*"];
+   MAHOUT_VIEW_INPUT [label="MAHOUT_VIEW  |  (hashed name) 10001, (hashed purchases) 203 |  <hive_warehouse>/mahout_cf_in/part*" ];
+   MAHOUT_CF [label="MAHOUT_CF  | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ];
+
+   Generate -> DIRTY_CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.generator.BPSGenerator 100 bps/generated/"] ;
+   DIRTY_CSV -> pig [label=""];
+
+   pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/"];
+   pig -> PIG_ANALYTICS [label="same as CSV job, but add your scripts to end... p1.pig p2.pig ..."];
+   PIG_ANALYTICS -> CSV;
+   PROD_HASH -> hive [label="hive hash udf"];
+   USER_HASH -> hive  [label="hive hash udf"];
+
+   CSV -> hive ;
+   hive -> MAHOUT_VIEW_INPUT [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.HiveViewCreator bps/pig_out mahout_cf_in"];
+   MAHOUT_VIEW_INPUT -> mahout_collab_filter_recomender  -> MAHOUT_CF;
+   MAHOUT_CF  -> crunch ;
+   CSV -> crunch ;
+   crunch -> CUSTOMER_PAGE [label="high performance joining"];
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/pom.xml
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/pom.xml b/bigtop-bigpetstore/pom.xml
new file mode 100644
index 0000000..0bc226e
--- /dev/null
+++ b/bigtop-bigpetstore/pom.xml
@@ -0,0 +1,797 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+        <parent>
+           <groupId>org.apache.bigtop</groupId>
+           <artifactId>bigtop</artifactId>
+           <version>0.8.0-SNAPSHOT</version>
+           <relativePath>../pom.xml</relativePath>
+        </parent>
+
+	<artifactId>BigPetStore</artifactId>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+		<slf4j.version>1.7.5</slf4j.version>
+		<guava.version>15.0</guava.version>
+		<hadoop.version>2.2.0</hadoop.version>
+		<derby.version>10.8.1.2</derby.version>
+		<hive.version>0.12.0</hive.version>
+		<datanucleus.version>3.2.2</datanucleus.version>
+		<datanucleus.jpa.version>3.2.1</datanucleus.jpa.version>
+		<bonecp.version>0.8.0.RELEASE</bonecp.version>
+		<derby.version>10.10.1.1</derby.version>
+	</properties>
+
+	<dependencies>
+
+		<dependency>
+			<groupId>org.kohsuke</groupId>
+			<artifactId>graphviz-api</artifactId>
+			<version>1.0</version>
+		</dependency>
+
+		<!-- CRUNCH : These are repeated in the profile and necessary for compilation
+			even without the profile -->
+		<dependency>
+			<groupId>org.apache.crunch</groupId>
+			<artifactId>crunch-core</artifactId>
+			<version>0.9.0-hadoop2</version>
+		</dependency>
+
+		<!-- misc deps -->
+		<dependency>
+			<groupId>com.jolbox</groupId>
+			<artifactId>bonecp</artifactId>
+			<version>${bonecp.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.derby</groupId>
+			<artifactId>derby</artifactId>
+			<version>${derby.version}</version>
+		</dependency>
+		<!-- <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId>
+			<version>3.1</version> </dependency> -->
+
+		<dependency>
+			<groupId>com.google.guava</groupId>
+			<artifactId>guava</artifactId>
+			<version>15.0</version>
+		</dependency>
+
+		<!--
+		  We keep this at top level so that mvn eclipse:eclipse creates  a nice 
+		  tidy project, but its  a little messy.  later we'll create a profile for 
+		  eclipse and move this (and other deps) into profiles as needed.
+		  Important: Remove this dependency when running hive integration tests...
+		-->		
+		  <dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-client</artifactId>
+			<version>${hadoop.version}</version>
+		  </dependency>
+               <!-- mahout deps : may need to turn these on/off when testing mahout locally-->
+		
+		<dependency> <groupId>org.apache.mahout</groupId> <artifactId>mahout-core</artifactId>
+			<version>0.9</version> <exclusions> </exclusions> </dependency>
+		<!-- pig deps -->
+		<dependency>
+			<groupId>org.apache.pig</groupId>
+			<artifactId>pig</artifactId>
+			<classifier>h2</classifier>
+			<version>0.12.0</version>
+			<scope>provided</scope>
+		</dependency>
+
+		<!--logging -->
+
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-api</artifactId>
+			<version>${slf4j.version}</version>
+		</dependency>
+
+		<!-- SL4J Binding provided at runtime -->
+		<dependency>
+			<groupId>log4j</groupId>
+			<artifactId>log4j</artifactId>
+			<version>1.2.12</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-log4j12</artifactId>
+			<version>${slf4j.version}</version>
+			<scope>provided</scope>
+		</dependency>
+
+		<!-- hive -->
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-common</artifactId>
+			<version>${hive.version}</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-serde</artifactId>
+			<version>${hive.version}</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-jdbc</artifactId>
+			<version>${hive.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-contrib</artifactId>
+			<version>${hive.version}</version>
+		</dependency>
+
+		<!-- datanucleus -->
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-core</artifactId>
+			<version>${datanucleus.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-rdbms</artifactId>
+			<version>${datanucleus.jpa.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-api-jdo</artifactId>
+			<version>${datanucleus.jpa.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-accessplatform-jdo-rdbms</artifactId>
+			<version>${datanucleus.jpa.version}</version>
+			<type>pom</type>
+		</dependency>
+
+		<!-- Unit test artifacts -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.11</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.hamcrest</groupId>
+			<artifactId>hamcrest-all</artifactId>
+			<version>1.3</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.mrunit</groupId>
+			<artifactId>mrunit</artifactId>
+			<version>1.0.0</version>
+			<classifier>hadoop2</classifier>
+		</dependency>
+
+	</dependencies>
+
+	<build>
+		<extensions>
+			<extension>
+				<groupId>org.springframework.build.aws</groupId>
+				<artifactId>org.springframework.build.aws.maven</artifactId>
+				<version>3.0.0.RELEASE</version>
+			</extension>
+		</extensions>
+		<finalName>bigpetstore-${version}</finalName>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-release-plugin</artifactId>
+				<version>2.5</version>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-eclipse-plugin</artifactId>
+				<configuration>
+					<downloadSources>true</downloadSources>
+					<downloadJavadocs>true</downloadJavadocs>
+				</configuration>
+			</plugin>
+
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.3.2</version>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-jar-plugin</artifactId>
+				<version>2.4</version>
+				<configuration>
+					<outputDirectory>${basedir}/target</outputDirectory>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<configuration>
+					<excludes>
+						<exclude>**/*TestPig.java</exclude>
+						<exclude>**/*TestHiveEmbedded.java</exclude>
+						<exclude>**/*TestCrunch.java</exclude>
+					</excludes>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+	<profiles>
+		<profile>
+			<id>pig</id>
+			<activation>
+				<activeByDefault>false</activeByDefault>
+			</activation>
+			<properties>
+				<skip.unit.tests>false</skip.unit.tests>
+			</properties>
+			<dependencies>
+				<!-- misc -->
+				<dependency>
+					<groupId>org.apache.commons</groupId>
+					<artifactId>commons-lang3</artifactId>
+					<version>3.1</version>
+				</dependency>
+				<dependency>
+					<groupId>joda-time</groupId>
+					<artifactId>joda-time</artifactId>
+					<version>2.3</version>
+				</dependency>
+				<dependency>
+					<groupId>com.google.guava</groupId>
+					<artifactId>guava</artifactId>
+					<version>${guava.version}</version>
+				</dependency>
+
+				<!-- pig -->
+				<dependency>
+					<groupId>org.apache.pig</groupId>
+					<artifactId>pig</artifactId>
+					<classifier>h2</classifier>
+					<version>0.12.0</version>
+					<scope>provided</scope>
+				</dependency>
+
+				<!-- hadoop -->
+				<dependency>
+					<groupId>org.apache.hadoop</groupId>
+					<artifactId>hadoop-client</artifactId>
+					<version>${hadoop.version}</version>
+				</dependency>
+				<!-- <dependency> <groupId>org.apache.mrunit</groupId> <artifactId>mrunit</artifactId>
+					<version>1.0.0</version> <classifier>hadoop2</classifier> </dependency> -->
+			</dependencies>
+
+			<build>
+				<plugins>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-surefire-plugin</artifactId>
+						<configuration>
+
+							<excludes>
+								<exclude>**/*TestPig.java</exclude>
+								<exclude>**/*TestHiveEmbedded.java</exclude>
+								<exclude>**/*TestCrunch.java</exclude>
+								<exclude>**/*TestPetStoreTransactionGeneratorJob.java</exclude>
+							</excludes>
+
+						</configuration>
+					</plugin>
+					<plugin>
+						<groupId>org.codehaus.mojo</groupId>
+						<artifactId>build-helper-maven-plugin</artifactId>
+						<version>1.5</version>
+						<executions>
+							<execution>
+								<id>add-test-source</id>
+								<phase>generate-test-sources</phase>
+								<goals>
+									<goal>add-test-source</goal>
+								</goals>
+								<configuration>
+									<sources>
+										<source>src/integration/java</source>
+									</sources>
+								</configuration>
+							</execution>
+						</executions>
+					</plugin>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-failsafe-plugin</artifactId>
+						<version>2.12</version>
+
+						<configuration>
+							<argLine>-Xmx1g</argLine>
+							<excludes>
+								<exclude>**/*BigPetStoreMahoutIT.java</exclude>
+								<exclude>**/*BigPetStoreHiveIT.java</exclude>
+								<exclude>**/*BigPetStoreCrunchIT.java</exclude>
+							</excludes>
+						</configuration>
+						<executions>
+							<!-- States that both integration-test and verify goals of the Failsafe
+								Maven plugin are executed. -->
+							<execution>
+								<id>integration-tests</id>
+								<goals>
+									<goal>integration-test</goal>
+									<goal>verify</goal>
+								</goals>
+							</execution>
+						</executions>
+					</plugin>
+				</plugins>
+			</build>
+		</profile>
+
+		<profile>
+			<id>hive</id>
+			<activation>
+				<activeByDefault>false</activeByDefault>
+			</activation>
+			<properties>
+				<derby.version>10.8.1.2</derby.version>
+				<hive.version>0.12.0</hive.version>
+				<datanucleus.version>3.2.2</datanucleus.version>
+				<datanucleus.jpa.version>3.2.1</datanucleus.jpa.version>
+				<bonecp.version>0.8.0.RELEASE</bonecp.version>
+				<derby.version>10.10.1.1</derby.version>
+				<skip.unit.tests>false</skip.unit.tests>
+			</properties>
+
+			<build>
+				<plugins>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-surefire-plugin</artifactId>
+						<configuration>
+
+							<excludes>
+								<exclude>**/*TestPig.java</exclude>
+								<exclude>**/*TestHiveEmbedded.java</exclude>
+								<exclude>**/*TestCrunch.java</exclude>
+								<exclude>**/*TestPetStoreTransactionGeneratorJob.java</exclude>
+							</excludes>
+
+						</configuration>
+					</plugin>
+					<plugin>
+						<groupId>org.codehaus.mojo</groupId>
+						<artifactId>build-helper-maven-plugin</artifactId>
+						<version>1.5</version>
+						<executions>
+							<execution>
+								<id>add-test-source</id>
+								<phase>generate-test-sources</phase>
+								<goals>
+									<goal>add-test-source</goal>
+								</goals>
+								<configuration>
+									<sources>
+										<source>src/integration/java</source>
+									</sources>
+								</configuration>
+							</execution>
+						</executions>
+					</plugin>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-failsafe-plugin</artifactId>
+						<version>2.12</version>
+						<configuration>
+							<excludes>
+								<exclude>**/*BigPetStoreMahoutIT.java</exclude>
+								<exclude>**/*BigPetStorePigIT.java</exclude>
+								<exclude>**/*BigPetStoreCrunchIT.java</exclude>
+							</excludes>
+						</configuration>
+						<executions>
+							<!-- States that both integration-test and verify goals of the Failsafe
+								Maven plugin are executed. -->
+							<execution>
+								<id>integration-tests</id>
+								<goals>
+									<goal>integration-test</goal>
+									<goal>verify</goal>
+								</goals>
+							</execution>
+						</executions>
+					</plugin>
+				</plugins>
+			</build>
+
+
+			<dependencies>
+				<!-- misc -->
+				<dependency>
+					<groupId>org.apache.commons</groupId>
+					<artifactId>commons-lang3</artifactId>
+					<version>3.1</version>
+				</dependency>
+
+				<dependency>
+					<groupId>com.google.guava</groupId>
+					<artifactId>guava</artifactId>
+					<version>${guava.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.derby</groupId>
+					<artifactId>derby</artifactId>
+					<version>${derby.version}</version>
+				</dependency>
+
+
+				<dependency>
+					<groupId>org.datanucleus</groupId>
+					<artifactId>datanucleus-core</artifactId>
+					<version>${datanucleus.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>org.datanucleus</groupId>
+					<artifactId>datanucleus-rdbms</artifactId>
+					<version>${datanucleus.jpa.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>org.datanucleus</groupId>
+					<artifactId>datanucleus-api-jdo</artifactId>
+					<version>${datanucleus.jpa.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>org.datanucleus</groupId>
+					<artifactId>datanucleus-accessplatform-jdo-rdbms</artifactId>
+					<version>${datanucleus.jpa.version}</version>
+					<type>pom</type>
+				</dependency>
+
+				<!-- hadoop -->
+				<dependency>
+					<groupId>org.apache.hadoop</groupId>
+					<artifactId>hadoop-common</artifactId>
+					<version>${hadoop.version}</version>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.hadoop</groupId>
+					<artifactId>hadoop-mapreduce-client-app</artifactId>
+					<version>2.3.0</version>
+				</dependency>
+				<!-- hive -->
+				<dependency>
+					<groupId>org.apache.hive</groupId>
+					<artifactId>hive-common</artifactId>
+					<version>${hive.version}</version>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.hive</groupId>
+					<artifactId>hive-serde</artifactId>
+					<version>${hive.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.hive</groupId>
+					<artifactId>hive-jdbc</artifactId>
+					<version>${hive.version}</version>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.hive</groupId>
+					<artifactId>hive-contrib</artifactId>
+					<version>${hive.version}</version>
+				</dependency>
+
+				<dependency>
+					<groupId>com.jolbox</groupId>
+					<artifactId>bonecp</artifactId>
+					<version>${bonecp.version}</version>
+				</dependency>
+
+				<!-- logging -->
+				<dependency>
+					<groupId>org.slf4j</groupId>
+					<artifactId>slf4j-api</artifactId>
+					<version>${slf4j.version}</version>
+				</dependency>
+
+				<!-- SL4J Binding provided at runtime -->
+				<dependency>
+					<groupId>log4j</groupId>
+					<artifactId>log4j</artifactId>
+					<version>1.2.12</version>
+					<scope>provided</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.slf4j</groupId>
+					<artifactId>slf4j-log4j12</artifactId>
+					<version>${slf4j.version}</version>
+					<scope>provided</scope>
+				</dependency>
+
+				<!-- Unit test artifacts -->
+				<dependency>
+					<groupId>junit</groupId>
+					<artifactId>junit</artifactId>
+					<version>4.11</version>
+					<scope>test</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.hamcrest</groupId>
+					<artifactId>hamcrest-all</artifactId>
+					<version>1.3</version>
+					<scope>test</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.mrunit</groupId>
+					<artifactId>mrunit</artifactId>
+					<version>1.0.0</version>
+					<classifier>hadoop2</classifier>
+				</dependency>
+
+			</dependencies>
+		</profile>
+		<profile>
+			<id>crunch</id>
+			<activation>
+				<activeByDefault>false</activeByDefault>
+			</activation>
+			<properties>
+				<skip.unit.tests>true</skip.unit.tests>
+			</properties>
+			<build>
+				<plugins>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-surefire-plugin</artifactId>
+						<configuration>
+							<excludes>
+								<exclude>**/*TestPig.java</exclude>
+								<exclude>**/*TestHiveEmbedded.java</exclude>
+								<exclude>**/*TestCrunch.java</exclude>
+								<exclude>**/*TestPetStoreTransactionGeneratorJob.java</exclude>
+							</excludes>
+						</configuration>
+					</plugin>
+					<plugin>
+						<groupId>org.codehaus.mojo</groupId>
+						<artifactId>build-helper-maven-plugin</artifactId>
+						<version>1.5</version>
+						<executions>
+							<execution>
+								<id>add-test-source</id>
+								<phase>generate-test-sources</phase>
+								<goals>
+									<goal>add-test-source</goal>
+								</goals>
+								<configuration>
+									<sources>
+										<source>src/integration/java</source>
+									</sources>
+								</configuration>
+							</execution>
+						</executions>
+					</plugin>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-failsafe-plugin</artifactId>
+						<version>2.12</version>
+						<configuration>
+							<excludes>
+								<exclude>**/*BigPetStorePigIT.java</exclude>
+								<exclude>**/*BigPetStoreHiveIT.java</exclude>
+								<exclude>**/*BigPetStoreMahoutIT.java</exclude>
+							</excludes>
+						</configuration>
+						<executions>
+							<!-- States that both integration-test and verify goals of the Failsafe
+								Maven plugin are executed. -->
+							<execution>
+								<id>integration-tests</id>
+								<goals>
+									<goal>integration-test</goal>
+									<goal>verify</goal>
+								</goals>
+							</execution>
+						</executions>
+					</plugin>
+				</plugins>
+			</build>
+
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.crunch</groupId>
+					<artifactId>crunch-core</artifactId>
+					<version>0.9.0-hadoop2</version>
+				</dependency>
+				<dependency>
+					<groupId>com.google.protobuf</groupId>
+					<artifactId>protobuf-java</artifactId>
+					<version>2.5.0</version>
+				</dependency>
+			</dependencies>
+		</profile>
+
+		<profile>
+			<id>mahout</id>
+			<activation>
+				<activeByDefault>false</activeByDefault>
+			</activation>
+			<properties>
+				<skip.unit.tests>true</skip.unit.tests>
+			</properties>
+			<build>
+				<plugins>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-surefire-plugin</artifactId>
+						<configuration>
+							<excludes>
+								<exclude>**/*TestPig.java</exclude>
+								<exclude>**/*TestHiveEmbedded.java</exclude>
+								<exclude>**/*TestCrunch.java</exclude>
+								<exclude>**/*TestPetStoreTransactionGeneratorJob.java</exclude>
+							</excludes>
+						</configuration>
+					</plugin>
+					<plugin>
+						<groupId>org.codehaus.mojo</groupId>
+						<artifactId>build-helper-maven-plugin</artifactId>
+						<version>1.5</version>
+						<executions>
+							<execution>
+								<id>add-test-source</id>
+								<phase>generate-test-sources</phase>
+								<goals>
+									<goal>add-test-source</goal>
+								</goals>
+								<configuration>
+									<sources>
+										<source>src/integration/java</source>
+									</sources>
+								</configuration>
+							</execution>
+						</executions>
+					</plugin>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-failsafe-plugin</artifactId>
+						<version>2.12</version>
+						<configuration>
+							<excludes>
+								<exclude>**/*BigPetStorePigIT.java</exclude>
+								<exclude>**/*BigPetStoreCrunchIT.java</exclude>
+								<exclude>**/*BigPetStoreHiveIT.java</exclude>
+							</excludes>
+						</configuration>
+						<executions>
+							<!-- States that both integration-test and verify goals of the Failsafe
+								Maven plugin are executed. -->
+							<execution>
+								<id>integration-tests</id>
+								<goals>
+									<goal>integration-test</goal>
+									<goal>verify</goal>
+								</goals>
+							</execution>
+						</executions>
+					</plugin>
+				</plugins>
+			</build>
+
+			<dependencies>
+
+				<dependency>
+				    <groupId>commons-logging</groupId>
+				    <artifactId>commons-logging</artifactId>
+				    <version>1.1.3</version>
+				</dependency>
+
+			        <!--
+				     For testing on my machine,
+				     I created a bigpetstore mahout jar which
+				     is compiled for 2.2.0  .  Or substitute this with
+				     the standard apache mahout-core but not sure if it
+				     will work.
+			        -->	
+				<dependency>
+					<groupId>bigpetstore</groupId>
+					<artifactId>mahout-core</artifactId>
+					<version>1.0-SNAPSHOT</version>
+					<exclusions>
+					</exclusions>
+				</dependency>
+
+				<dependency>
+				    <groupId>org.apache.mahout</groupId>
+				    <artifactId>mahout-math</artifactId>
+				    <version>0.9</version>
+				</dependency>
+
+
+				<dependency>
+					<groupId>org.slf4j</groupId>
+					<artifactId>slf4j-api</artifactId>
+					<version>LATEST</version>
+
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.commons</groupId>
+					<artifactId>commons-lang3</artifactId>
+					<version>LATEST</version>
+				</dependency>
+
+				<dependency>
+					<groupId>com.thoughtworks.xstream</groupId>
+					<artifactId>xstream</artifactId>
+					<version>LATEST</version>
+
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.lucene</groupId>
+					<artifactId>lucene-core</artifactId>
+					<version>LATEST</version>
+
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.lucene</groupId>
+					<artifactId>lucene-analyzers-common</artifactId>
+					<version>LATEST</version>
+
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.mahout.commons</groupId>
+					<artifactId>commons-cli</artifactId>
+					<version>LATEST</version>
+
+				</dependency>
+
+				<dependency>
+					<groupId>org.apache.commons</groupId>
+					<artifactId>commons-math3</artifactId>
+					<version>LATEST</version>
+				</dependency>
+
+
+				<dependency>
+					<groupId>org.apache.solr</groupId>
+					<artifactId>solr-commons-csv</artifactId>
+					<version>3.5.0</version>
+				</dependency>
+
+			</dependencies>
+
+
+
+		</profile>
+
+	</profiles>
+
+</project>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/setuphive.sh
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/setuphive.sh b/bigtop-bigpetstore/setuphive.sh
new file mode 100755
index 0000000..8dff6dd
--- /dev/null
+++ b/bigtop-bigpetstore/setuphive.sh
@@ -0,0 +1,22 @@
+### THIS SCRIPT SETS UP HIVE AND HADOOP TARBALLS FOR YOU ###
+HIVE_TARBALL="http://archive.apache.org/dist/hive/hive-0.12.0/hive-0.12.0.tar.gz"
+HADOOP_TARBALL="https://archive.apache.org/dist/hadoop/core/hadoop-1.2.1/hadoop-1.2.1.tar.gz"
+wget $HIVE_TARBALL
+wget $HADOOP_TARBALL
+
+
+# REMEBER SO WE CAN CD BACK AT END 
+mydir=`pwd`
+
+## HADOOP SETUP
+mkdir -p /opt/bigpetstore
+cd /opt/bigpetstore
+tar -xvf hadoop-1.2.1.tar.gz
+export HADOOP_HOME=`pwd`/hadoop-1.2.1
+
+## HIVE SETUP 
+tar -xvf hive-0.12.0.tar.gz
+cp /opt/hive-0.12.0/lib/hive*.jar $HADOOP_HOME/lib
+
+## CD BACK TO ORIGINAL DIR
+cd $mydir

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreHiveIT.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreHiveIT.java b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreHiveIT.java
new file mode 100644
index 0000000..c3646a4
--- /dev/null
+++ b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreHiveIT.java
@@ -0,0 +1,108 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.bigtop.bigpetstore;
+
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.ITUtils;
+import org.apache.bigtop.bigpetstore.etl.HiveViewCreator;
+import org.apache.bigtop.bigpetstore.etl.PigCSVCleaner;
+import org.apache.bigtop.bigpetstore.generator.BPSGenerator;
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.pig.ExecType;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+import com.google.common.base.Function;
+import com.google.common.io.Files;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Run this after running the @link{BigPetStorePigIT} test.
+ * Duh...
+ */
+public class BigPetStoreHiveIT extends ITUtils{
+    final static Logger log = LoggerFactory.getLogger(BigPetStoreHiveIT.class);
+
+    @Before
+    public void setupTest() throws Throwable {
+        super.setup();
+        try {
+            FileSystem.get(new Configuration()).delete(BPS_TEST_MAHOUT_IN);
+        } catch (Exception e) {
+            System.out.println("didnt need to delete hive output.");
+            // not necessarily an error
+        }
+    }
+
+    @Test
+    public void testPetStorePipeline() throws Exception {
+        new HiveViewCreator().run(
+                new String[]{
+                        BPS_TEST_PIG_CLEANED.toString(),
+                        BPS_TEST_MAHOUT_IN.toString()});
+
+        assertOutput(BPS_TEST_MAHOUT_IN, new Function<String, Boolean>() {
+            public Boolean apply(String x) {
+                System.out.println("Verifying "+x);
+                String[] cols = x.split(",");
+                Long.parseLong(cols[0].trim());
+                Long.parseLong(cols[1].trim());
+                Long.parseLong(cols[2].trim());
+                return true;
+            }
+        });
+    }
+
+    public static void assertOutput(Path base,
+            Function<String, Boolean> validator) throws Exception {
+        FileSystem fs = FileSystem.getLocal(new Configuration());
+
+        FileStatus[] files = fs.listStatus(base);
+        // print out all the files.
+        for (FileStatus stat : files) {
+            System.out.println(stat.getPath() + "  " + stat.getLen());
+        }
+
+        Path p = new Path(base, "000000_0");
+        BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(p)));
+
+        // line:{"product":"big chew toy","count":3}
+        while (r.ready()) {
+            String line = r.readLine();
+            log.info("line:" + line);
+            System.out.println("line:" + line);
+            Assert.assertTrue("validationg line : " + line,
+                    validator.apply(line));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreMahoutIT.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreMahoutIT.java b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreMahoutIT.java
new file mode 100644
index 0000000..5e6f69c
--- /dev/null
+++ b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStoreMahoutIT.java
@@ -0,0 +1,88 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.bigtop.bigpetstore;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import org.apache.bigtop.bigpetstore.clustering.BPSRecommnder;
+import org.apache.bigtop.bigpetstore.etl.HiveViewCreator;
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Function;
+
+public class BigPetStoreMahoutIT extends ITUtils{
+
+    final static Logger log = LoggerFactory.getLogger(BigPetStoreHiveIT.class);
+
+    @Before
+    public void setupTest() throws Throwable {
+        super.setup();
+        try {
+            FileSystem.get(new Configuration()).delete(super.BPS_TEST_MAHOUT_OUT);
+        }
+        catch (Exception e) {
+            System.out.println("didnt need to delete mahout output.");
+        }
+    }
+
+    @Test
+    public void testPetStorePipeline() throws Exception {
+        new BPSRecommnder().run(
+                new String[]{
+                        BPS_TEST_MAHOUT_IN.toString(),
+                        BPS_TEST_MAHOUT_OUT.toString()});
+
+        assertOutput(BPS_TEST_MAHOUT_OUT, new Function<String, Boolean>() {
+            public Boolean apply(String x) {
+                System.out.println("Verifying "+x);
+                return true;
+            }
+        });
+    }
+
+    public static void assertOutput(Path base,
+            Function<String, Boolean> validator) throws Exception {
+        FileSystem fs = FileSystem.getLocal(new Configuration());
+
+        FileStatus[] files = fs.listStatus(base);
+        // print out all the files.
+        for (FileStatus stat : files) {
+            System.out.println(stat.getPath() + "  " + stat.getLen());
+        }
+
+        Path p = new Path(base, "part-r-00000");
+        BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(p)));
+
+        // line:{"product":"big chew toy","count":3}
+        while (r.ready()) {
+            String line = r.readLine();
+            log.info("line:" + line);
+            System.out.println("line:" + line);
+            Assert.assertTrue("validationg line : " + line,
+                    validator.apply(line));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStorePigIT.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStorePigIT.java b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStorePigIT.java
new file mode 100644
index 0000000..db766de
--- /dev/null
+++ b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/BigPetStorePigIT.java
@@ -0,0 +1,165 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.bigtop.bigpetstore;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import junit.framework.Assert;
+
+import org.apache.bigtop.bigpetstore.etl.PigCSVCleaner;
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.ExecType;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.io.Files;
+
+/**
+*  This is the main integration test for pig.
+*  Like all BPS integration tests, it is designed 
+*  to simulate exactly what will happen on the 
+*  actual cluster, except with a small amount of records.
+*
+*  In addition to cleaning the dataset, it also runs the BPS_analytics.pig
+*  script which BigPetStore ships with. 
+*/
+public class BigPetStorePigIT extends ITUtils{
+
+    final static Logger log = LoggerFactory.getLogger(BigPetStorePigIT.class);
+
+    /**
+     * An extra unsupported code path that we have so
+     * people can do ad hoc analytics on pig data after it is
+     * cleaned.
+     */
+    public static final Path BPS_TEST_PIG_COUNT_PRODUCTS = fs.makeQualified(
+            new Path("bps_integration_",
+                    BigPetStoreConstants.OUTPUTS.pig_ad_hoc_script.name()+"0"));
+
+    static final File PIG_SCRIPT = new File("BPS_analytics.pig");
+
+    static {
+        if(PIG_SCRIPT.exists()) {
+
+        }
+        else
+            throw new RuntimeException("Couldnt find pig script at " + PIG_SCRIPT.getAbsolutePath());
+    }
+
+    @Before
+    public void setupTest() throws Throwable {
+        super.setup();
+        try{
+            FileSystem.get(new Configuration()).delete(BPS_TEST_PIG_CLEANED);
+            FileSystem.get(new Configuration()).delete(BPS_TEST_PIG_COUNT_PRODUCTS);
+        }
+        catch(Exception e){
+            System.out.println("didnt need to delete pig output.");
+            //not necessarily an error
+        }
+    }
+
+    static Map<Path,Function<String,Boolean>> TESTS = ImmutableMap.of(
+            /**
+            * Test of the main output
+            */
+            BPS_TEST_PIG_CLEANED,
+            new Function<String, Boolean>(){
+                public Boolean apply(String x){
+                    //System.out.println("Verified...");
+                    return true;
+                }
+            },
+            //Example of how to count products
+            //after doing basic pig data cleanup
+            BPS_TEST_PIG_COUNT_PRODUCTS,
+            new Function<String, Boolean>(){
+                //Jeff'
+                public Boolean apply(String x){
+                    return true;
+                }
+            });
+
+    /**
+     * The "core" task reformats data to TSV.  lets test that first.
+     */
+    @Test
+    public void testPetStoreCorePipeline()  throws Exception {
+        runPig(
+               BPS_TEST_GENERATED,
+               BPS_TEST_PIG_CLEANED,
+               PIG_SCRIPT);
+        for(Entry<Path,Function<String,Boolean>> e : TESTS.entrySet()) {
+            assertOutput(e.getKey(),e.getValue());
+        }
+    }
+
+    public static void assertOutput(Path base,Function<String, Boolean> validator) throws Exception{
+        FileSystem fs = FileSystem.getLocal(new Configuration());
+
+        FileStatus[] files=fs.listStatus(base);
+        //print out all the files.
+        for(FileStatus stat : files){
+            System.out.println(stat.getPath() +"  " + stat.getLen());
+        }
+
+        /**
+         * Support map OR reduce outputs
+         */
+        Path partm = new Path(base,"part-m-00000");
+        Path partr = new Path(base,"part-r-00000");
+        Path p = fs.exists(partm)?partm:partr;
+
+        /**
+         * Now we read through the file and validate
+         * its contents.
+         */
+        BufferedReader r =
+                new BufferedReader(
+                        new InputStreamReader(fs.open(p)));
+
+        //line:{"product":"big chew toy","count":3}
+        while(r.ready()){
+            String line = r.readLine();
+            log.info("line:"+line);
+            //System.out.println("line:"+line);
+            Assert.assertTrue("validationg line : " + line, validator.apply(line));
+        }
+    }
+
+    Map pigResult;
+
+    private void runPig(Path input, Path output, File pigscript) throws Exception {
+
+                new PigCSVCleaner(
+                        input,
+                        output,
+                        ExecType.LOCAL,
+                        pigscript);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/ITUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/ITUtils.java b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/ITUtils.java
new file mode 100644
index 0000000..e93d9ce
--- /dev/null
+++ b/bigtop-bigpetstore/src/integration/java/org/apache/bigtop/bigpetstore/ITUtils.java
@@ -0,0 +1,145 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.bigtop.bigpetstore;
+
+import java.net.InetAddress;
+import java.nio.charset.Charset;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.generator.BPSGenerator;
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Files;
+
+public class ITUtils {
+
+    static final Logger log = LoggerFactory.getLogger(ITUtils.class);
+
+    static FileSystem fs;
+    static{
+        try{
+            fs=FileSystem.getLocal(new Configuration());
+        }
+        catch(Throwable e)
+        {
+           String cpath = (String) System.getProperties().get("java.class.path");
+           String msg="";
+           for(String cp : cpath.split(":")) {
+               if(cp.contains("hadoop")) {
+                   msg+=cp.replaceAll("hadoop", "**HADOOP**")+"\n";
+               }
+           }
+           throw new RuntimeException("Major error:  Probably issue.   " +
+            		"Check hadoop version?  "+ e.getMessage() +" .... check these classpath elements:"
+                    +msg);
+        }
+    }
+    public static final Path BPS_TEST_GENERATED = fs.makeQualified(
+            new Path("bps_integration_",BigPetStoreConstants.OUTPUTS.generated.name())) ;
+
+    public static final Path BPS_TEST_PIG_CLEANED = fs.makeQualified(
+            new Path("bps_integration_",BigPetStoreConstants.OUTPUTS.cleaned.name()));
+
+    public static final Path BPS_TEST_MAHOUT_IN = fs.makeQualified(
+            new Path("bps_integration_",BigPetStoreConstants.OUTPUTS.MAHOUT_CF_IN.name()));
+
+    public static final Path BPS_TEST_MAHOUT_OUT = fs.makeQualified(
+            new Path("bps_integration_",BigPetStoreConstants.OUTPUTS.MAHOUT_CF_OUT.name()));
+
+    public static void main(String[] args){
+
+    }
+    //public static final Path CRUNCH_OUT = new Path("bps_integration_",BigPetStoreConstants.OUTPUT_3).makeQualified(fs);
+
+    /**
+     * Some simple checks to make sure that unit tests in local FS.
+     * these arent designed to be run against a distribtued system.
+     */
+    public static void checkConf(Configuration conf) throws Exception {
+        if(conf.get("mapreduce.jobtracker.address")==null) {
+            log.warn("Missing mapreduce.jobtracker.address???????!!!! " +
+            		"This can be the case in hive tests which use special " +
+            		"configurations, but we should fix it sometime.");
+            return;
+        }
+        if(! conf.get("mapreduce.jobtracker.address").equals("local")) {
+            throw new RuntimeException("ERROR: bad conf : " + "mapreduce.jobtracker.address");
+        }
+        if(! conf.get("fs.AbstractFileSystem.file.impl").contains("Local")) {
+            throw new RuntimeException("ERROR: bad conf : " + "mapreduce.jobtracker.address");
+        }
+        try {
+            InetAddress addr = java.net.InetAddress.getLocalHost();
+            System.out.println("Localhost = hn=" + addr.getHostName() +" / ha="+addr.getHostAddress());
+        }
+        catch (Throwable e) {
+            throw new RuntimeException(
+            " ERROR : Hadoop wont work at all  on this machine yet"+
+            "...I can't get / resolve localhost ! Check java version/ " +
+            "/etc/hosts / DNS or other networking related issues on your box" +
+            e.getMessage());
+        }
+    }
+
+
+    /**
+     * Creates a generated input data set in
+     *
+     * test_data_directory/generated.
+     * i.e.
+     *  test_data_directory/generated/part-r-00000
+     */
+    public static void setup() throws Throwable{
+        int records = 10;
+        /**
+         * Setup configuration with prop.
+         */
+        Configuration conf = new Configuration();
+
+        //debugging for jeff and others in local fs
+        //that wont build
+        checkConf(conf);
+
+        conf.setInt(BPSGenerator.props.bigpetstore_records.name(), records);
+
+        /**
+         * Only create if doesnt exist already.....
+         */
+        if(FileSystem.getLocal(conf).exists(BPS_TEST_GENERATED)){
+            return;
+        }
+
+        /**
+         * Create the data set.
+         */
+        Job createInput= BPSGenerator.createJob(BPS_TEST_GENERATED, conf);
+        createInput.waitForCompletion(true);
+
+        Path outputfile = new Path(BPS_TEST_GENERATED,"part-r-00000");
+        List<String> lines = Files.readLines(FileSystem.getLocal(conf).pathToFile(outputfile), Charset.defaultCharset());
+        log.info("output : " + FileSystem.getLocal(conf).pathToFile(outputfile));
+        for(String l : lines){
+            System.out.println(l);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/clustering/BPSRecommnder.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/clustering/BPSRecommnder.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/clustering/BPSRecommnder.java
new file mode 100644
index 0000000..748578a
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/clustering/BPSRecommnder.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.clustering;
+
+import org.apache.bigtop.bigpetstore.util.DeveloperTools;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
+import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
+import org.apache.pig.builtin.LOG;
+
+/**
+ * Implement user based collab filter.
+ *
+ * The input set is the
+ *
+ * userid,productid,weight
+ *
+ * rows.
+ */
+public class BPSRecommnder implements Tool {
+
+
+    Configuration c;
+    @Override
+    public void setConf(Configuration conf) {
+        c=conf;
+    }
+
+    @Override
+    public Configuration getConf() {
+        return c;
+    }
+
+    @Override
+    public int run(String[] args) throws Exception {
+        DeveloperTools.validate(args,"input path","output path");
+
+        Configuration conf = new Configuration();
+
+        System.out.println("Runnning recommender against : " + args[0] +" -> " + args[1]);
+
+        RecommenderJob recommenderJob = new RecommenderJob();
+        /**
+        int x = ToolRunner.run(getConf(), new BPSPreparePreferenceMatrixJob(), new String[]{
+            "--input", args[0],
+            "--output", args[1],
+            "--tempDir", "/tmp",
+          });
+        System.out.println("RETURN = " + x);
+         **/
+
+        int ret = recommenderJob.run(new String[] {
+             "--input",args[0],
+             "--output",args[1],
+             "--usersFile","/tmp/users.txt",
+             "--tempDir", "/tmp/mahout_"+System.currentTimeMillis(),
+             "--similarityClassname", "SIMILARITY_PEARSON_CORRELATION",
+             "--threshold",".00000000001",
+             "--numRecommendations", "4",
+             //"--encodeLongsAsInts",
+             //Boolean.FALSE.toString(),
+             //"--itemBased", Boolean.FALSE.toString()
+        });
+
+        System.out.println("Exit of recommender: " + ret);
+        return ret;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/contract/PetStoreStatistics.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/contract/PetStoreStatistics.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/contract/PetStoreStatistics.java
new file mode 100755
index 0000000..ed618a8
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/contract/PetStoreStatistics.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.bigtop.bigpetstore.contract;
+
+import java.util.Map;
+
+/**
+ * This is the contract for the web site. This object is created by each ETL
+ * tool : Summary stats.
+ */
+public abstract class PetStoreStatistics {
+
+    public abstract Map<String, ? extends Number> numberOfTransactionsByState()
+            throws Exception;
+
+    public abstract Map<String, ? extends Number> numberOfProductsByProduct()
+            throws Exception;
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/CrunchETL.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/CrunchETL.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/CrunchETL.java
new file mode 100755
index 0000000..f6f459c
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/CrunchETL.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.etl;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.contract.PetStoreStatistics;
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class CrunchETL extends PetStoreStatistics {
+
+    public static MapFn<LineItem, String> COUNT_BY_PRODUCT = new MapFn<LineItem, String>() {
+        public String map(LineItem lineItem) {
+            try {
+                return lineItem.getDescription();
+            } catch (Throwable t) {
+                throw new RuntimeException(t);
+            }
+        }
+    };
+    public static MapFn<LineItem, String> COUNT_BY_STATE = new MapFn<LineItem, String>() {
+        public String map(LineItem lineItem) {
+            try {
+                return lineItem.getDescription();
+            } catch (Throwable t) {
+                throw new RuntimeException(t);
+            }
+        }
+    };
+
+    PCollection<LineItem> lineItems;
+
+    public CrunchETL(Path input, Path output) throws Exception {
+        Pipeline pipeline = MemPipeline.getInstance();
+        PCollection<String> lines = pipeline.read(From.textFile(new Path(input,
+                "part-r-00000")));
+        System.out.println("crunch : " + lines.getName() + "  "
+                + lines.getSize());
+        lineItems = lines.parallelDo(ETL, Avros.reflects(LineItem.class));
+
+    }
+
+    public static MapFn ETL = new MapFn<String, LineItem>() {
+        @Override
+        public LineItem map(String input) {
+            String[] fields = input.split(",");
+            LineItem li = new LineItem();
+            li.setAppName(fields[1]);
+            li.setFirstName(fields[3]);
+            // ...
+            li.setDescription(fields[fields.length - 1]);
+            return li;
+        }
+    };
+
+    @Override
+    public Map<String, ? extends Number> numberOfTransactionsByState()
+            throws Exception {
+        PTable<String, Long> counts = lineItems.parallelDo(COUNT_BY_STATE,
+                Avros.strings()).count();
+        Map m = counts.materializeToMap();
+
+        System.out.println("Crunch:::  " + m);
+        return m;
+    }
+
+    @Override
+    public Map<String, ? extends Number> numberOfProductsByProduct()
+            throws Exception {
+        PTable<String, Long> counts = lineItems.parallelDo(COUNT_BY_PRODUCT,
+                Avros.strings()).count();
+        Map m = counts.materializeToMap();
+        //CrunchETL. System.out.println("Crunch:::  " + m);
+        return m;
+    }
+
+    public static void main(String... args) throws Exception {
+        /**
+         * PCollection<String> lines = MemPipeline .collectionOf(
+         *  "BigPetStore,storeCode_AK,1  lindsay,franco,Sat Jan 10 00:11:10 EST 1970,10.5,dog-food"
+         *  "BigPetStore,storeCode_AZ,1  tom,giles,Sun Dec 28 23:08:45 EST 1969,10.5,dog-food"
+         *  "BigPetStore,storeCode_CA,1  brandon,ewing,Mon Dec 08 20:23:57 EST 1969,16.5,organic-dog-food"
+         *  "BigPetStore,storeCode_CA,2  angie,coleman,Thu Dec 11 07:00:31 EST 1969,10.5,dog-food"
+         *  "BigPetStore,storeCode_CA,3  angie,coleman,Tue Jan 20 06:24:23 EST 1970,7.5,cat-food"
+         *  "BigPetStore,storeCode_CO,1  sharon,trevino,Mon Jan 12 07:52:10 EST 1970,30.1,antelope snacks"
+         *  "BigPetStore,storeCode_CT,1  kevin,fitzpatrick,Wed Dec 10 05:24:13 EST 1969,10.5,dog-food"
+         *  "BigPetStore,storeCode_NY,1  dale,holden,Mon Jan 12 23:02:13 EST 1970,19.75,fish-food"
+         *  "BigPetStore,storeCode_NY,2  dale,holden,Tue Dec 30 12:29:52 EST 1969,10.5,dog-food"
+         *  "BigPetStore,storeCode_OK,1  donnie,tucker,Sun Jan 18 04:50:26 EST 1970,7.5,cat-food"
+         * );
+         **/
+        // FAILS
+        Pipeline pipeline = new MRPipeline(CrunchETL.class);
+
+        PCollection<String> lines = pipeline.read(From.textFile(new Path(
+                "/tmp/BigPetStore1388719888255/generated/part-r-00000")));
+
+
+        PCollection<LineItem> lineItems = lines.parallelDo(
+                new MapFn<String, LineItem>() {
+                    @Override
+                    public LineItem map(String input) {
+
+                        System.out.println("proc1 " + input);
+                        String[] fields = input.split(",");
+                        LineItem li = new LineItem();
+                        li.setAppName("" + fields[1]);
+                        li.setFirstName("" + fields[3]);
+                        li.setDescription("" + fields[fields.length - 1]);
+                        return li;
+                    }
+                }, Avros.reflects(LineItem.class));
+
+        for (LineItem i : lineItems.materialize())
+            System.out.println(i);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/HiveViewCreator.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/HiveViewCreator.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/HiveViewCreator.java
new file mode 100755
index 0000000..4fabb6f
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/HiveViewCreator.java
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.etl;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.bigtop.bigpetstore.util.NumericalIdUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.booleanValue_return;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.Tool;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ *
+ * Hive View creator is designed to read from Pigs cleaned output.
+ * The basic strategy is:
+ *
+ * 1) store pig output as a hive table
+ * 2) use "select .. as" to select a subset
+ *
+ * Note on running locally:
+ *
+ * 1) Local mode requires a hive and hadoop tarball, with HIVE_HOME and
+ * HADOOP_HOME pointing to it. 2) In HADOOP_HOME, you will need to cp the
+ * HIVE_HOME/lib/hive-serde*jar file into HADOOP_HOME/lib.
+ *
+ * Then, the below queries will run.
+ *
+ * The reason for this is that the hive SerDe stuff is used in the MapReduce
+ * phase of things, so those utils need to be available to hadoop itself. That
+ * is because the regex input/output is processed vthe mappers
+ *
+ */
+public class HiveViewCreator implements Tool {
+
+    static {
+        try{
+            Class.forName("org.apache.hadoop.hive.ql.exec.mr.ExecDriver");
+            System.out.println("found exec driver !!!!!!!!!!!!!!!!");
+        }
+        catch(Throwable t) {
+            throw new RuntimeException(t);
+        }
+        try{
+            //Class.forName("org.apache.hadoop.hive.ql.exec.mr.ExecDriver");
+        }
+        catch(Throwable t) {
+            throw new RuntimeException(t);
+        }
+    }
+    Configuration conf;
+    @Override
+    public void setConf(Configuration conf) {
+        this.conf=conf;
+    }
+
+    @Override
+    public Configuration getConf() {
+        return conf;
+    }
+
+    /**
+     * Input args:
+     *  Cleaned data files from pig (tsv)
+     *  Ouptut table (desired path to mahout input data set)
+     *
+     */
+    @Override
+    public int run(String[] args) throws Exception {
+        Statement stmt = getConnection();
+        stmt.execute("DROP TABLE IF EXISTS " + BigPetStoreConstants.OUTPUTS.MAHOUT_CF_IN.name());
+        System.out.println("input data " + args[0]);
+        System.out.println("output table " + args[1]);
+
+        Path inTablePath =  new Path(args[0]);
+        String inTableName = "cleaned"+System.currentTimeMillis();
+        String outTableName = BigPetStoreConstants.OUTPUTS.MAHOUT_CF_IN.name();
+
+        Path outTablePath = new Path (inTablePath.getParent(),outTableName);
+
+        final String create = "CREATE EXTERNAL TABLE "+inTableName+" ("
+                + "  dump STRING,"
+                + "  state STRING,"
+                + "  trans_id STRING,"
+                + "  lname STRING,"
+                + "  fname STRING,"
+                + "  date STRING,"
+                + "  price STRING,"
+                + "  product STRING"
+                + ") ROW FORMAT "
+                + "DELIMITED FIELDS TERMINATED BY '\t' "
+                + "LINES TERMINATED BY '\n' "
+                + "STORED AS TEXTFILE "
+                + "LOCATION '"+inTablePath+"'";
+        boolean res = stmt.execute(create);
+        System.out.println("Execute return code : " +res);
+        //will change once we add hashes into pig ETL clean
+        String create2 =
+                "create table "+outTableName+" as "+
+                "select hash(concat(state,fname,lname)),',',hash(product),',',1 "
+                + "from "+inTableName;
+
+        System.out.println("CREATE = " + create2  );
+        System.out.println("OUT PATH = " + outTablePath);
+        boolean res2 = stmt.execute(create2);
+
+        String finalOutput = String.format(
+                "INSERT OVERWRITE DIRECTORY '%s' SELECT * FROM %s",outTablePath, outTableName) ;
+
+        stmt.execute(finalOutput);
+        System.out.println("FINAL OUTPUT STORED : " + outTablePath);
+        return 0;
+    }
+
+    public static final String HIVE_JDBC_DRIVER = "org.apache.hive.jdbc.HiveDriver";
+    public static final String HIVE_JDBC_EMBEDDED_CONNECTION = "jdbc:hive2://";
+
+    final static Logger log = LoggerFactory.getLogger(HiveViewCreator.class);
+
+
+    private Statement getConnection() throws ClassNotFoundException,
+            SQLException {
+        Class.forName(HIVE_JDBC_DRIVER);
+        Connection con = DriverManager.getConnection(
+                HIVE_JDBC_EMBEDDED_CONNECTION, "", "");
+        System.out.println("hive con = " + con.getClass().getName());
+        Statement stmt = con.createStatement();
+        return stmt;
+    }
+
+    public static void main(String[] args) throws Exception {
+        new HiveViewCreator()
+            .run(args);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/LineItem.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/LineItem.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/LineItem.java
new file mode 100755
index 0000000..87e5d0d
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/LineItem.java
@@ -0,0 +1,112 @@
+/** 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.etl;
+
+import java.io.Serializable;
+
+public class LineItem implements Serializable{
+
+    public LineItem(String appName, String storeCode, Integer lineId, String firstName, String lastName, String timestamp, Double price, String description){
+        super();
+        this.appName=appName;
+        this.storeCode=storeCode;
+        this.lineId=lineId;
+        this.firstName=firstName;
+        this.lastName=lastName;
+        this.timestamp=timestamp;
+        this.price=price;
+        this.description=description;
+    }
+
+    String appName;
+    String storeCode;
+    Integer lineId;
+    String firstName;
+    String lastName;
+    String timestamp;
+    Double price;
+    String description;
+
+    public LineItem(){
+        super();
+    }
+
+    public String getAppName(){
+        return appName;
+    }
+
+    public void setAppName(String appName){
+        this.appName=appName;
+    }
+
+    public String getStoreCode(){
+        return storeCode;
+    }
+
+    public void setStoreCode(String storeCode){
+        this.storeCode=storeCode;
+    }
+
+    public int getLineId(){
+        return lineId;
+    }
+
+    public void setLineId(int lineId){
+        this.lineId=lineId;
+    }
+
+    public String getFirstName(){
+        return firstName;
+    }
+
+    public void setFirstName(String firstName){
+        this.firstName=firstName;
+    }
+
+    public String getLastName(){
+        return lastName;
+    }
+
+    public void setLastName(String lastName){
+        this.lastName=lastName;
+    }
+
+    public String getTimestamp(){
+        return timestamp;
+    }
+
+    public void setTimestamp(String timestamp){
+        this.timestamp=timestamp;
+    }
+
+    public double getPrice(){
+        return price;
+    }
+
+    public void setPrice(double price){
+        this.price=price;
+    }
+
+    public String getDescription(){
+        return description;
+    }
+
+    public void setDescription(String description){
+        this.description=description;
+    }
+
+    // other constructors, parsers, etc.
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d3da8ceb/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java
new file mode 100644
index 0000000..01ddd6e
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.etl;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants;
+import org.apache.bigtop.bigpetstore.util.DeveloperTools;
+import org.apache.bigtop.bigpetstore.util.NumericalIdUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+
+/**
+ * This class operates by ETL'ing the dataset into pig.
+ * The pigServer is persisted through the life of the class, so that the
+ * intermediate data sets created in the constructor can be reused.
+ */
+public class PigCSVCleaner  {
+
+    PigServer pigServer;
+
+    public PigCSVCleaner(Path inputPath, Path outputPath, ExecType ex, File... scripts)
+            throws Exception {
+
+
+
+        FileSystem fs = FileSystem.get(inputPath.toUri(), new Configuration());
+
+        if(! fs.exists(inputPath)){
+            throw new RuntimeException("INPUT path DOES NOT exist : " + inputPath);
+        }
+
+        if(fs.exists(outputPath)){
+            throw new RuntimeException("OUTPUT already exists : " + outputPath);
+        }
+        // run pig in local mode
+        pigServer = new PigServer(ex);
+
+        /**
+         * First, split the tabs up.
+         *
+         * BigPetStore,storeCode_OK,2 yang,jay,Mon Dec 15 23:33:49 EST
+         * 1969,69.56,flea collar
+         *
+         * ("BigPetStore,storeCode_OK,2",
+         * "yang,jay,Mon Dec 15 23:33:49 EST 1969,69.56,flea collar")
+         *
+         * BigPetStore,storeCode_AK,1 amanda,fitzgerald,Sat Dec 20 09:44:25 EET
+         * 1969,7.5,cat-food
+         */
+        pigServer.registerQuery("csvdata = LOAD '<i>' AS (ID,DETAILS);"
+                .replaceAll("<i>", inputPath.toString()));
+
+        /**
+         * Now, we want to split the two tab delimited feidls into uniform
+         * fields of comma separated values. To do this, we 1) Internally split
+         * the FIRST and SECOND fields by commas "a,b,c" --> (a,b,c) 2) FLATTEN
+         * the FIRST and SECOND fields. (d,e) (a,b,c) -> d e a b c
+         */
+        pigServer
+                .registerQuery(
+                        "id_details = FOREACH csvdata GENERATE "
+                        + "FLATTEN" + "(STRSPLIT(ID,',',3)) AS " +
+                        		"(drop, code, transaction) ,"
+
+                        + "FLATTEN" + "(STRSPLIT(DETAILS,',',5)) AS " +
+                            "(lname, fname, date, price," +
+                            "product:chararray);");
+
+        pigServer.store("id_details", outputPath.toString());
+
+        /**
+         * Now we run scripts... this is where you can add some
+         * arbitrary analytics.
+         *
+         * We add "input" and "output" parameters so that each
+         * script can read them and use them if they want.
+         *
+         * Otherwise, just hardcode your inputs into your pig scripts.
+         */
+        int i = 0;
+        for(File script : scripts) {
+            Map<String,String> parameters = new HashMap<String,String>();
+            parameters.put("input",
+                    outputPath.toString());
+
+            Path dir = outputPath.getParent();
+            Path adHocOut=
+                    new Path(
+                            dir,
+                            BigPetStoreConstants.OUTPUTS.pig_ad_hoc_script.name()+(i++));
+            System.out.println("Setting default output to " + adHocOut);
+            parameters.put("output", adHocOut.toString());
+
+            pigServer.registerScript(script.getAbsolutePath(), parameters);
+        }
+    }
+
+    private static File[] files(String[] args,int startIndex) {
+        List<File> files = new ArrayList<File>();
+        for(int i = startIndex ; i < args.length ; i++) {
+            File f = new File(args[i]);
+            if(! f.exists()) {
+                throw new RuntimeException("Pig script arg " + i+ " " + f.getAbsolutePath() + " not found. ");
+            }
+            files.add(f);
+        }
+        System.out.println(
+                "Ad-hoc analytics:"+
+                "Added  " + files.size() + " pig scripts to post process.  "+
+                "Each one will be given $input and $output arguments.");
+        return files.toArray(new File[]{});
+    }
+    public static void main(final String[] args) throws Exception {
+        System.out.println("Starting pig etl " + args.length);
+
+        Configuration c = new Configuration();
+        int res = ToolRunner.run(
+                c,
+
+                new Tool() {
+                    Configuration conf;
+                    @Override
+                    public void setConf(Configuration conf) {
+                        this.conf=conf;
+                    }
+
+                    @Override
+                    public Configuration getConf() {
+                        return this.conf;
+                    }
+
+                    @Override
+                    public int run(String[] args) throws Exception {
+                        DeveloperTools.validate(
+                                args,
+                                "generated data directory",
+                                "pig output directory");
+                        new PigCSVCleaner(
+                                new Path(args[0]),
+                                new Path(args[1]),
+                                ExecType.MAPREDUCE,
+                                files(args,2));
+                        return 0;
+                    }
+                }, args);
+        System.exit(res);
+      }
+}
\ No newline at end of file