You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by rn...@apache.org on 2015/03/30 19:04:34 UTC
[12/13] bigtop git commit: BIGTOP-1783: Import BigPetStore Data
Generator into BigTop
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
new file mode 100644
index 0000000..91db010
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategoryBuilder;
+
+import com.google.common.collect.Lists;
+import com.google.gson.Gson;
+
+public class ProductsReader
+{
+ InputStream path;
+
+ public ProductsReader(InputStream path)
+ {
+ this.path = path;
+ }
+
+ protected Product parseProduct(Object productJson)
+ {
+ Map<String, Object> fields = (Map<String, Object>) productJson;
+ Product product = new Product(fields);
+ return product;
+ }
+
+ protected ProductCategory parseProductCategory(Object productCategoryObject) throws Exception
+ {
+ Map<String, Object> jsonProductCategory = (Map<String, Object>) productCategoryObject;
+
+ ProductCategoryBuilder builder = new ProductCategoryBuilder();
+
+ for(Map.Entry<String, Object> entry : jsonProductCategory.entrySet())
+ {
+ Object key = entry.getKey();
+ Object value = entry.getValue();
+
+ if(key.equals("category"))
+ {
+ builder.setCategory( (String) entry.getValue());
+ }
+ else if(key.equals("species"))
+ {
+ for(String species : (List<String>) value)
+ {
+ if(species.equals("dog"))
+ {
+ builder.addApplicableSpecies(PetSpecies.DOG);
+ }
+ else if(species.equals("cat"))
+ {
+ builder.addApplicableSpecies(PetSpecies.CAT);
+ }
+ else
+ {
+ throw new Exception("Invalid species " + species + " encountered when parsing product categories JSON.");
+ }
+ }
+ }
+ else if(key.equals("trigger_transaction"))
+ {
+ builder.setTriggerTransaction((Boolean) entry.getValue());
+ }
+ else if(key.equals("fields"))
+ {
+ for(String fieldName : (List<String>) value)
+ {
+ builder.addFieldName(fieldName);
+ }
+ }
+ else if(key.equals("daily_usage_rate"))
+ {
+ builder.setDailyUsageRate((Double) value);
+ }
+ else if(key.equals("base_amount_used_average"))
+ {
+ builder.setAmountUsedPetPetAverage((Double) value);
+ }
+ else if(key.equals("base_amount_used_variance"))
+ {
+ builder.setAmountUsedPetPetVariance((Double) value);
+ }
+ else if(key.equals("transaction_trigger_rate"))
+ {
+ builder.setTriggerTransactionRate((Double) value);
+ }
+ else if(key.equals("transaction_purchase_rate"))
+ {
+ builder.setTriggerPurchaseRate((Double) value);
+ }
+ else if(key.equals("items"))
+ {
+ for(Object productJson : (List<Object>) value)
+ {
+ Product product = parseProduct(productJson);
+ builder.addProduct(product);
+ }
+ }
+ else
+ {
+ throw new Exception("Invalid field " + key + " encountered when parsing product categories JSON.");
+ }
+
+ }
+
+ return builder.build();
+ }
+
+ public List<ProductCategory> readData() throws Exception
+ {
+ Gson gson = new Gson();
+
+ Reader reader = new InputStreamReader(path);
+ Object json = gson.fromJson(reader, Object.class);
+
+ List<Object> productCategoryObjects = (List<Object>) json;
+
+ List<ProductCategory> productCategories = Lists.newArrayList();
+
+ for(Object obj : productCategoryObjects)
+ {
+ ProductCategory productCategory = parseProductCategory(obj);
+ productCategories.add(productCategory);
+ }
+
+ reader.close();
+
+ return productCategories;
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
new file mode 100644
index 0000000..a4ccdd6
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class ZipcodeReader
+{
+ private static class ZipcodeLocationRecord
+ {
+ public final Pair<Double, Double> coordinates;
+ public final String state;
+ public final String city;
+
+ public ZipcodeLocationRecord(Pair<Double, Double> coordinates,
+ String city, String state)
+ {
+ this.coordinates = coordinates;
+ this.city = city;
+ this.state = state;
+ }
+ }
+
+ InputStream zipcodeIncomesFile = null;
+ InputStream zipcodePopulationFile = null;
+ InputStream zipcodeCoordinatesFile = null;
+
+ public void setIncomesFile(InputStream path)
+ {
+ this.zipcodeIncomesFile = path;
+ }
+
+ public void setPopulationFile(InputStream path)
+ {
+ this.zipcodePopulationFile = path;
+ }
+
+ public void setCoordinatesFile(InputStream path)
+ {
+ this.zipcodeCoordinatesFile = path;
+ }
+
+ private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException
+ {
+ Scanner scanner = new Scanner(path);
+
+ // skip headers
+ scanner.nextLine();
+ scanner.nextLine();
+
+ Map<String, Double> entries = Maps.newHashMap();
+ while(scanner.hasNextLine())
+ {
+ String line = scanner.nextLine().trim();
+ String[] cols = line.split(",");
+ // zipcodes are in the form "ZCTA5 XXXXX"
+ String zipcode = cols[2].split(" ")[1].trim();
+ try
+ {
+ double medianHouseholdIncome = Integer.parseInt(cols[5].trim());
+ entries.put(zipcode, medianHouseholdIncome);
+ }
+ catch(NumberFormatException e)
+ {
+
+ }
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException
+ {
+ Scanner scanner = new Scanner(path);
+
+ // skip header
+ scanner.nextLine();
+
+ Map<String, Long> entries = Maps.newHashMap();
+ while(scanner.hasNextLine())
+ {
+ String line = scanner.nextLine().trim();
+
+ if(line.length() == 0)
+ continue;
+
+ String[] cols = line.split(",");
+
+ String zipcode = cols[0].trim();
+ Long population = Long.parseLong(cols[1].trim());
+
+ if(entries.containsKey(zipcode))
+ {
+ entries.put(zipcode, Math.max(entries.get(zipcode), population));
+ }
+ else
+ {
+ entries.put(zipcode, population);
+ }
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException
+ {
+ Scanner scanner = new Scanner(path);
+
+ // skip header
+ scanner.nextLine();
+
+ Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap();
+ while(scanner.hasNextLine())
+ {
+ String line = scanner.nextLine().trim();
+
+ String[] cols = line.split(", ");
+
+ // remove quote marks
+ String zipcode = cols[0].substring(1, cols[0].length() - 1);
+ String state = cols[1].substring(1, cols[1].length() - 1);
+ Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1));
+ Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1));
+ String city = cols[4].substring(1, cols[4].length() - 1);
+
+ Pair<Double, Double> coords = new Pair<Double, Double>(latitude, longitude);
+
+ ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state);
+
+ entries.put(zipcode, record);
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException
+ {
+ ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile);
+ ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile);
+ ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile);
+
+ Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
+ zipcodeSubset.retainAll(populations.keySet());
+ zipcodeSubset.retainAll(coordinates.keySet());
+
+ List<ZipcodeRecord> table = new Vector<ZipcodeRecord>();
+ for(String zipcode : zipcodeSubset)
+ {
+ ZipcodeRecord record = new ZipcodeRecord(zipcode,
+ coordinates.get(zipcode).coordinates,
+ coordinates.get(zipcode).city,
+ coordinates.get(zipcode).state,
+ incomes.get(zipcode),
+ populations.get(zipcode));
+ table.add(record);
+ }
+
+ return ImmutableList.copyOf(table);
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
new file mode 100644
index 0000000..aea004e
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework;
+
+import java.util.Random;
+
+public class SeedFactory
+{
+ Random rng;
+
+ public SeedFactory()
+ {
+ rng = new Random();
+ }
+
+ public SeedFactory(long seed)
+ {
+ rng = new Random(seed);
+ }
+
+ public long getNextSeed()
+ {
+ return rng.nextLong();
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
new file mode 100644
index 0000000..cf2a40d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.io.Serializable;
+import java.util.Map;
+
+public class MarkovModel<T> implements Serializable
+{
+ final Map<T, Map<T, Double>> transitionWeights;
+ final Map<T, Double> startWeights;
+
+ public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights)
+ {
+ this.transitionWeights = transitionWeights;
+ this.startWeights = startWeights;
+ }
+
+ public Map<T, Map<T, Double>> getTransitionWeights()
+ {
+ return transitionWeights;
+ }
+
+ public Map<T, Double> getStartWeights()
+ {
+ return startWeights;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
new file mode 100644
index 0000000..861c0ef
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableTable;
+
+public class MarkovModelBuilder<S>
+{
+ ImmutableTable.Builder<S, S, Double> transitionWeights;
+ ImmutableMap.Builder<S, Double> startWeights;
+
+ public MarkovModelBuilder()
+ {
+ transitionWeights = ImmutableTable.builder();
+ startWeights = ImmutableMap.builder();
+ }
+
+ public static <T> MarkovModelBuilder<T> create()
+ {
+ return new MarkovModelBuilder<T>();
+ }
+
+ public void addStartState(S state, double weight)
+ {
+ startWeights.put(state, weight);
+ }
+
+ public void addTransition(S state1, S state2, double weight)
+ {
+ transitionWeights.put(state1, state2, weight);
+ }
+
+ public MarkovModel<S> build()
+ {
+ return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build());
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
new file mode 100644
index 0000000..2a72e65
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableTable;
+
+public class MarkovProcess<T> implements Sampler<T>
+{
+ final ImmutableMap<T, Sampler<T>> transitionSamplers;
+ final Sampler<T> startStateSampler;
+
+ T currentState;
+
+
+ public MarkovProcess(MarkovModel<T> model, SeedFactory factory)
+ {
+ Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights();
+
+ startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory);
+
+ ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder();
+ for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet())
+ {
+ builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory));
+ }
+
+
+ this.transitionSamplers = builder.build();
+
+ currentState = null;
+ }
+
+ public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory)
+ {
+ return new MarkovProcess<T>(model, factory);
+ }
+
+ public T sample() throws Exception
+ {
+ if(currentState == null)
+ {
+ currentState = startStateSampler.sample();
+ return currentState;
+ }
+
+ currentState = transitionSamplers.get(currentState).sample();
+ return currentState;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
new file mode 100644
index 0000000..f879870
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ConditionalProbabilityDensityFunction<T, S>
+{
+ public double probability(T datum, S conditionalDatum);
+
+ public ProbabilityDensityFunction<T> fixConditional(S conditionalDatum);
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
new file mode 100644
index 0000000..9d0d6f2
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableMap;
+
+public class DiscretePDF<T> implements ProbabilityDensityFunction<T>
+{
+ private final ImmutableMap<T, Double> probabilities;
+
+ public DiscretePDF(Map<T, Double> probabilities)
+ {
+ this.probabilities = ImmutableMap.copyOf(probabilities);
+ }
+
+ public Set<T> getData()
+ {
+ return probabilities.keySet();
+ }
+
+ public double probability(T value)
+ {
+ if(probabilities.containsKey(value))
+ {
+ return probabilities.get(value);
+ }
+
+ return 0.0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
new file mode 100644
index 0000000..dcc1278
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class ExponentialPDF implements ProbabilityDensityFunction<Double>
+{
+ private final double lambda;
+
+ public ExponentialPDF(double lambda)
+ {
+ this.lambda = lambda;
+ }
+
+ public double probability(Double value)
+ {
+ return lambda * Math.exp(-1.0 * value * lambda);
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
new file mode 100644
index 0000000..55ebc93
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class GaussianPDF implements ProbabilityDensityFunction<Double>
+{
+ private double mean;
+ private double std;
+
+ public GaussianPDF(double mean, double std)
+ {
+ this.mean = mean;
+ this.std = std;
+ }
+
+ public double probability(Double value)
+ {
+ double diff = (mean - value) * (mean - value);
+ double var = std * std;
+ double exp = Math.exp(-1.0 * diff / (2.0 * var));
+
+ return exp / (std * Math.sqrt(2.0 * Math.PI));
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
new file mode 100644
index 0000000..fdf2db0
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.List;
+
+public class JointPDF<T> implements ProbabilityDensityFunction<T>
+{
+
+ double normalizationFactor;
+ ProbabilityDensityFunction<T>[] pdfs;
+
+ public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs)
+ {
+ this.pdfs = pdfs;
+
+ normalizationFactor = 0.0d;
+ for(T datum : data)
+ {
+ double prob = 1.0;
+ for(ProbabilityDensityFunction<T> pdf : pdfs)
+ prob *= pdf.probability(datum);
+ normalizationFactor += prob;
+ }
+
+ }
+
+ public double probability(T datum)
+ {
+ double weight = 1.0;
+ for(ProbabilityDensityFunction<T> pdf : pdfs)
+ weight *= pdf.probability(datum);
+
+ return weight / normalizationFactor;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
new file mode 100644
index 0000000..1b691ca
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ProbabilityDensityFunction<T>
+{
+ public double probability(T datum);
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
new file mode 100644
index 0000000..ea8e77e
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class UniformPDF<T> implements ProbabilityDensityFunction<T>
+{
+ private final double probability;
+
+ public UniformPDF(long count)
+ {
+ probability = 1.0 / ((double) count);
+ }
+
+ public UniformPDF(double probability)
+ {
+ this.probability = probability;
+ }
+
+ public double probability(T datum)
+ {
+ return probability;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
new file mode 100644
index 0000000..475b24d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.List;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+import com.google.common.collect.ImmutableList;
+
+public class BoundedMultiModalGaussianSampler implements Sampler<Double>
+{
+ ImmutableList<Pair<Double, Double>> distributions;
+
+ double min;
+ double max;
+ Random rng;
+
+ public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory)
+ {
+ rng = new Random(seedFactory.getNextSeed());
+ this.distributions = ImmutableList.copyOf(distributions);
+
+ this.min = min;
+ this.max = max;
+ }
+
+ public Double sample()
+ {
+ int idx = rng.nextInt(distributions.size());
+
+ double mean = distributions.get(idx).getFirst();
+ double std = distributions.get(idx).getSecond();
+
+ double value = mean + rng.nextGaussian() * std;
+
+ value = Math.min(value, this.max);
+ value = Math.max(value, this.min);
+
+ return value;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
new file mode 100644
index 0000000..54506e2
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public interface ConditionalSampler<T, S>
+{
+ public T sample(S conditional) throws Exception;
+
+ public Sampler<T> fixConditional(S conditional) throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
new file mode 100644
index 0000000..82e4d2d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public class DoubleSequenceSampler implements Sampler<Double>
+{
+ Double start;
+ Double end;
+ Double step;
+ Double next;
+
+ public DoubleSequenceSampler()
+ {
+ start = 0.0;
+ end = null;
+ step = 1.0;
+ next = start;
+ }
+
+ public DoubleSequenceSampler(Double start)
+ {
+ this.start = start;
+ end = null;
+ step = 1.0;
+ next = start;
+ }
+
+ public DoubleSequenceSampler(Double start, Double end)
+ {
+ this.start = start;
+ this.end = end;
+ step = 1.0;
+ next = start;
+ }
+
+ public DoubleSequenceSampler(Double start, Double end, Double step)
+ {
+ this.start = start;
+ this.end = end;
+ this.step = step;
+ next = start;
+ }
+
+ public Double sample() throws Exception
+ {
+ if(end == null || next < end)
+ {
+ Double current = next;
+ next = current + step;
+ return current;
+ }
+
+ throw new Exception("All values have been sampled");
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
new file mode 100644
index 0000000..082f3ac
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class ExponentialSampler implements Sampler<Double>
+{
+ final private Random rng;
+ final private double lambda;
+
+ public ExponentialSampler(double lambda, SeedFactory seedFactory)
+ {
+ rng = new Random(seedFactory.getNextSeed());
+ this.lambda = lambda;
+ }
+
+ public Double sample()
+ {
+ return - Math.log(1.0 - rng.nextDouble()) / lambda;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
new file mode 100644
index 0000000..ed40cc8
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class GaussianSampler implements Sampler<Double>
+{
+ double mean;
+ double std;
+ Random rng;
+
+ public GaussianSampler(double mean, double std, SeedFactory seedFactory)
+ {
+ rng = new Random(seedFactory.getNextSeed());
+ this.mean = mean;
+ this.std = std;
+ }
+
+ public Double sample()
+ {
+ return rng.nextGaussian() * std + mean;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
new file mode 100644
index 0000000..0db8200
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+
+public class MonteCarloSampler<T> implements Sampler<T>
+{
+ private final Sampler<T> stateSampler;
+ private final Random rng;
+ private final ProbabilityDensityFunction<T> acceptancePDF;
+
+ public MonteCarloSampler(Sampler<T> stateGenerator,
+ ProbabilityDensityFunction<T> acceptancePDF,
+ SeedFactory seedFactory)
+ {
+ this.acceptancePDF = acceptancePDF;
+ this.stateSampler = stateGenerator;
+
+ rng = new Random(seedFactory.getNextSeed());
+ }
+
+ public T sample() throws Exception
+ {
+ while(true)
+ {
+ T proposedState = this.stateSampler.sample();
+ double probability = acceptancePDF.probability(proposedState);
+ double r = rng.nextDouble();
+
+ if(r < probability)
+ {
+ return proposedState;
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
new file mode 100644
index 0000000..72681f8
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.DiscretePDF;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
+
+public class RouletteWheelSampler<T> implements Sampler<T>
+{
+ Random rng;
+ final ImmutableList<Pair<T, Double>> wheel;
+
+ public static <T> RouletteWheelSampler<T> create(Map<T, Double> domainWeights, SeedFactory factory)
+ {
+ return new RouletteWheelSampler<T>(domainWeights, factory);
+ }
+
+ public static <T> RouletteWheelSampler<T> create(DiscretePDF<T> pdf, SeedFactory factory)
+ {
+ return new RouletteWheelSampler<T>(pdf.getData(), pdf, factory);
+ }
+
+ public static <T> RouletteWheelSampler<T> create(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory)
+ {
+ return new RouletteWheelSampler<T>(data, pdf, factory);
+ }
+
+ public static <T> RouletteWheelSampler<T> createUniform(Collection<T> data, SeedFactory factory)
+ {
+ Map<T, Double> pdf = Maps.newHashMap();
+ for(T datum : data)
+ {
+ pdf.put(datum, 1.0);
+ }
+
+ return create(pdf, factory);
+ }
+
+ public RouletteWheelSampler(Map<T, Double> domainWeights, SeedFactory factory)
+ {
+ this.rng = new Random(factory.getNextSeed());
+ this.wheel = this.normalize(domainWeights);
+ }
+
+ public RouletteWheelSampler(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory)
+ {
+ this.rng = new Random(factory.getNextSeed());
+
+ Map<T, Double> domainWeights = Maps.newHashMap();
+ for(T datum : data)
+ {
+ double prob = pdf.probability(datum);
+ domainWeights.put(datum, prob);
+ }
+
+ this.wheel = this.normalize(domainWeights);
+ }
+
+ private ImmutableList<Pair<T, Double>> normalize(Map<T, Double> domainWeights)
+ {
+ double weightSum = 0.0;
+ for(Map.Entry<T, Double> entry : domainWeights.entrySet())
+ {
+ weightSum += entry.getValue();
+ }
+
+ double cumProb = 0.0;
+ ImmutableList.Builder<Pair<T, Double>> builder = ImmutableList.builder();
+ for(Map.Entry<T, Double> entry : domainWeights.entrySet())
+ {
+ double prob = entry.getValue() / weightSum;
+ cumProb += prob;
+
+ builder.add(Pair.create(entry.getKey(), cumProb));
+ }
+
+ return builder.build();
+ }
+
+ public T sample()
+ {
+ double r = rng.nextDouble();
+ for(Pair<T, Double> cumProbPair : wheel)
+ if(r < cumProbPair.getSecond())
+ return cumProbPair.getFirst();
+
+ throw new IllegalStateException("Invalid state -- RouletteWheelSampler should never fail to sample!");
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
new file mode 100644
index 0000000..08af7e0
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public interface Sampler<T>
+{
+ public T sample() throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
new file mode 100644
index 0000000..a81c846
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public class SequenceSampler implements Sampler<Integer>
+{
+ Integer start;
+ Integer end;
+ Integer step;
+ Integer next;
+
+ public SequenceSampler()
+ {
+ start = 0;
+ end = null;
+ step = 1;
+ next = start;
+ }
+
+ public SequenceSampler(Integer start)
+ {
+ this.start = start;
+ end = null;
+ step = 1;
+ next = start;
+ }
+
+ public SequenceSampler(Integer start, Integer end)
+ {
+ this.start = start;
+ this.end = end;
+ step = 1;
+ next = start;
+ }
+
+ public SequenceSampler(Integer start, Integer end, Integer step)
+ {
+ this.start = start;
+ this.end = end;
+ this.step = step;
+ next = start;
+ }
+
+ public Integer sample() throws Exception
+ {
+ if(end == null || next < end)
+ {
+ Integer current = next;
+ next = current + step;
+ return current;
+ }
+
+ throw new Exception("All values have been sampled");
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
new file mode 100644
index 0000000..c447692
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ConditionalProbabilityDensityFunction;
+
+
+public class StatefulMonteCarloSampler<T> implements Sampler<T>
+{
+ private final Sampler<T> stateSampler;
+ private final Random rng;
+ private final ConditionalProbabilityDensityFunction<T, T> acceptancePDF;
+ private T currentState;
+
+ public StatefulMonteCarloSampler(Sampler<T> stateGenerator,
+ ConditionalProbabilityDensityFunction<T, T> acceptancePDF,
+ T initialState,
+ SeedFactory seedFactory)
+ {
+ this.acceptancePDF = acceptancePDF;
+ this.stateSampler = stateGenerator;
+
+ rng = new Random(seedFactory.getNextSeed());
+
+ this.currentState = initialState;
+ }
+
+ public T sample() throws Exception
+ {
+ while(true)
+ {
+ T proposedState = this.stateSampler.sample();
+ double probability = acceptancePDF.probability(proposedState, currentState);
+ double r = rng.nextDouble();
+
+ if(r < probability)
+ {
+ this.currentState = proposedState;
+ return proposedState;
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
new file mode 100644
index 0000000..3fdf550
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class UniformIntSampler implements Sampler<Integer>
+{
+ int lowerbound;
+ int upperbound;
+ Random rng;
+
+ /*
+ * Upperbound is inclusive
+ */
+ public UniformIntSampler(int lowerbound, int upperbound, SeedFactory seedFactory)
+ {
+ this.lowerbound = lowerbound;
+ this.upperbound = upperbound;
+ rng = new Random(seedFactory.getNextSeed());
+ }
+
+ public Integer sample()
+ {
+ int range = upperbound + 1 - lowerbound;
+ return rng.nextInt(range) + lowerbound;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
new file mode 100644
index 0000000..3f78471
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class UniformSampler implements Sampler<Double>
+{
+ final Random rng;
+ final double lowerbound;
+ final double upperbound;
+
+ public UniformSampler(SeedFactory seedFactory)
+ {
+ rng = new Random(seedFactory.getNextSeed());
+ lowerbound = 0.0;
+ upperbound = 1.0;
+ }
+
+ public UniformSampler(double lowerbound, double upperbound, SeedFactory seedFactory)
+ {
+ rng = new Random(seedFactory.getNextSeed());
+ this.lowerbound = lowerbound;
+ this.upperbound = upperbound;
+ }
+
+ public Double sample()
+ {
+ return (upperbound - lowerbound) * rng.nextDouble() + lowerbound;
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
new file mode 100644
index 0000000..21d0109
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs;
+
+public interface ConditionalWeightFunction<T, S>
+{
+ public double weight(T datum, S given);
+
+ public WeightFunction<T> fixConditional(S given);
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
new file mode 100644
index 0000000..1145043
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs;
+
+public interface WeightFunction<T>
+{
+ public double weight(T datum);
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
new file mode 100644
index 0000000..bb97a60
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRecord>
+{
+ private final Map<ZipcodeRecord, Double> pdf;
+
+ public CustomerLocationPDF(List<ZipcodeRecord> zipcodes, Store store, double averageDistance)
+ {
+ this.pdf = build(zipcodes, store, averageDistance);
+ }
+
+ protected ImmutableMap<ZipcodeRecord, Double> build(List<ZipcodeRecord> zipcodeTable,
+ Store store, double averageDistance)
+ {
+ double lambda = 1.0 / averageDistance;
+
+ Map<ZipcodeRecord, Double> zipcodeWeights = Maps.newHashMap();
+ double totalWeight = 0.0;
+ for(ZipcodeRecord record : zipcodeTable)
+ {
+ double dist = record.distance(store.getLocation());
+
+ double weight = lambda * Math.exp(-1.0 * lambda * dist);
+ totalWeight += weight;
+ zipcodeWeights.put(record, weight);
+ }
+
+ Map<ZipcodeRecord, Double> pdf = Maps.newHashMap();
+ for(ZipcodeRecord record : zipcodeTable)
+ {
+ pdf.put(record, zipcodeWeights.get(record) / totalWeight);
+ }
+
+ return ImmutableMap.copyOf(pdf);
+ }
+
+ public double probability(ZipcodeRecord record)
+ {
+ if(!this.pdf.containsKey(record))
+ return 0.0;
+
+ return this.pdf.get(record);
+ }
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
new file mode 100644
index 0000000..4e5689c
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+public class CustomerSampler implements Sampler<Customer>
+{
+ private final Sampler<Integer> idSampler;
+ private final Sampler<String> firstNameSampler;
+ private final Sampler<String> lastNameSampler;
+ private final Sampler<Store> storeSampler;
+ private final ConditionalSampler<ZipcodeRecord, Store> locationSampler;
+
+
+ public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> firstNameSampler,
+ Sampler<String> lastNameSampler, Sampler<Store> storeSampler,
+ ConditionalSampler<ZipcodeRecord, Store> locationSampler)
+ {
+ this.idSampler = idSampler;
+ this.firstNameSampler = firstNameSampler;
+ this.lastNameSampler = lastNameSampler;
+ this.storeSampler = storeSampler;
+ this.locationSampler = locationSampler;
+ }
+
+ public Customer sample() throws Exception
+ {
+ Integer id = idSampler.sample();
+ Pair<String, String> name = Pair.create(firstNameSampler.sample(),
+ lastNameSampler.sample());
+ Store store = storeSampler.sample();
+ ZipcodeRecord location = locationSampler.sample(store);
+
+ return new Customer(id, name, store, location);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
new file mode 100644
index 0000000..209b099
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.Constants;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.SequenceSampler;
+
+import com.google.common.collect.Maps;
+
+public class CustomerSamplerBuilder
+{
+ private final List<Store> stores;
+ private final InputData inputData;
+ private final SeedFactory seedFactory;
+
+ public CustomerSamplerBuilder(List<Store> stores, InputData inputData, SeedFactory seedFactory)
+ {
+ this.stores = stores;
+ this.seedFactory = seedFactory;
+ this.inputData = inputData;
+ }
+
+ protected ConditionalSampler<ZipcodeRecord, Store> buildLocationSampler()
+ {
+ final Map<Store, Sampler<ZipcodeRecord>> locationSamplers = Maps.newHashMap();
+ for(Store store : stores)
+ {
+ ProbabilityDensityFunction<ZipcodeRecord> locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(),
+ store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE);
+ Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory);
+ locationSamplers.put(store, locationSampler);
+ }
+
+ return new ConditionalSampler<ZipcodeRecord, Store>()
+ {
+ public ZipcodeRecord sample(Store store) throws Exception
+ {
+ return locationSamplers.get(store).sample();
+ }
+
+ public Sampler<ZipcodeRecord> fixConditional(Store store)
+ {
+ return locationSamplers.get(store);
+ }
+ };
+ }
+
+ public Sampler<Customer> build()
+ {
+ ProbabilityDensityFunction<Store> storePDF = new CustomerStorePDF(stores);
+
+ Sampler<Integer> idSampler = new SequenceSampler();
+ Sampler<String> firstNameSampler = RouletteWheelSampler.create(inputData.getNames().getFirstNames(), seedFactory);
+ Sampler<String> lastNameSampler = RouletteWheelSampler.create(inputData.getNames().getLastNames(), seedFactory);
+ Sampler<Store> storeSampler = RouletteWheelSampler.create(stores, storePDF, seedFactory);
+
+ return new CustomerSampler(idSampler, firstNameSampler, lastNameSampler, storeSampler, buildLocationSampler());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
new file mode 100644
index 0000000..400b02a
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+public class CustomerStorePDF implements ProbabilityDensityFunction<Store>
+{
+ double populationSum = 0.0;
+
+ public CustomerStorePDF(List<Store> stores)
+ {
+ for(Store store : stores)
+ {
+ populationSum += (double) store.getLocation().getPopulation();
+ }
+ }
+
+ @Override
+ public double probability(Store store)
+ {
+ return ((double) store.getLocation().getPopulation()) / populationSum;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
new file mode 100644
index 0000000..cae8794
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.purchase;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovModel;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovProcess;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
+
+public class MarkovPurchasingModel implements PurchasingModel
+{
+
+ private static final long serialVersionUID = 3098355461347511619L;
+ ImmutableMap<String, MarkovModel<Product>> productCategoryProfiles;
+
+ public MarkovPurchasingModel(Map<String, MarkovModel<Product>> productCategoryProfiles)
+ {
+ this.productCategoryProfiles = ImmutableMap.copyOf(productCategoryProfiles);
+ }
+
+ @Override
+ public ImmutableSet<String> getProductCategories()
+ {
+ return productCategoryProfiles.keySet();
+ }
+
+ public MarkovModel<Product> getProfile(String productCategory)
+ {
+ return productCategoryProfiles.get(productCategory);
+ }
+
+ @Override
+ public PurchasingProcesses buildProcesses(SeedFactory seedFactory)
+ {
+ Map<String, Sampler<Product>> processes = Maps.newHashMap();
+ for(String category : getProductCategories())
+ {
+ MarkovModel<Product> model = getProfile(category);
+ processes.put(category, new MarkovProcess<Product>(model, seedFactory));
+ }
+
+ return new PurchasingProcesses(processes);
+ }
+}