You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by rn...@apache.org on 2015/03/30 19:04:34 UTC

[12/13] bigtop git commit: BIGTOP-1783: Import BigPetStore Data Generator into BigTop

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
new file mode 100644
index 0000000..91db010
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategoryBuilder;
+
+import com.google.common.collect.Lists;
+import com.google.gson.Gson;
+
+public class ProductsReader
+{
+	InputStream path;
+	
+	public ProductsReader(InputStream path)
+	{
+		this.path = path;
+	}
+	
+	protected Product parseProduct(Object productJson)
+	{
+		Map<String, Object> fields = (Map<String, Object>) productJson;
+		Product product = new Product(fields);
+		return product;
+	}
+	
+	protected ProductCategory parseProductCategory(Object productCategoryObject) throws Exception
+	{
+		Map<String, Object> jsonProductCategory = (Map<String, Object>) productCategoryObject;
+
+		ProductCategoryBuilder builder = new ProductCategoryBuilder();
+		
+		for(Map.Entry<String, Object> entry : jsonProductCategory.entrySet())
+		{
+			Object key = entry.getKey();
+			Object value = entry.getValue();
+
+			if(key.equals("category"))
+			{
+				builder.setCategory( (String) entry.getValue());
+			}
+			else if(key.equals("species"))
+			{
+				for(String species : (List<String>) value)
+				{
+					if(species.equals("dog"))
+					{
+						builder.addApplicableSpecies(PetSpecies.DOG);
+					}
+					else if(species.equals("cat"))
+					{
+						builder.addApplicableSpecies(PetSpecies.CAT);
+					}
+					else
+					{
+						throw new Exception("Invalid species " + species + " encountered when parsing product categories JSON.");
+					}
+				}
+			}
+			else if(key.equals("trigger_transaction"))
+			{
+				builder.setTriggerTransaction((Boolean) entry.getValue()); 
+			}
+			else if(key.equals("fields"))
+			{
+				for(String fieldName : (List<String>) value)
+				{
+					builder.addFieldName(fieldName);
+				}
+			}
+			else if(key.equals("daily_usage_rate"))
+			{
+				builder.setDailyUsageRate((Double) value);
+			}
+			else if(key.equals("base_amount_used_average"))
+			{
+				builder.setAmountUsedPetPetAverage((Double) value);
+			}
+			else if(key.equals("base_amount_used_variance"))
+			{
+				builder.setAmountUsedPetPetVariance((Double) value);
+			}
+			else if(key.equals("transaction_trigger_rate"))
+			{
+				builder.setTriggerTransactionRate((Double) value);
+			}
+			else if(key.equals("transaction_purchase_rate"))
+			{
+				builder.setTriggerPurchaseRate((Double) value);
+			}
+			else if(key.equals("items"))
+			{
+				for(Object productJson : (List<Object>) value)
+				{
+					Product product = parseProduct(productJson);
+					builder.addProduct(product);
+				}
+			}
+			else
+			{
+				throw new Exception("Invalid field " + key + " encountered when parsing product categories JSON.");
+			}
+			
+		}
+		
+		return builder.build();
+	}
+	
+	public List<ProductCategory> readData() throws Exception
+	{
+		Gson gson = new Gson();
+		
+		Reader reader = new InputStreamReader(path);
+		Object json = gson.fromJson(reader, Object.class);
+		
+		List<Object> productCategoryObjects = (List<Object>) json;
+
+		List<ProductCategory> productCategories = Lists.newArrayList();
+		
+		for(Object obj : productCategoryObjects)
+		{
+			ProductCategory productCategory = parseProductCategory(obj);
+			productCategories.add(productCategory);
+		}
+		
+		reader.close();
+		
+		return productCategories;
+		
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
new file mode 100644
index 0000000..a4ccdd6
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class ZipcodeReader
+{
+	private static class ZipcodeLocationRecord
+	{
+		public final Pair<Double, Double> coordinates;
+		public final String state;
+		public final String city;
+		
+		public ZipcodeLocationRecord(Pair<Double, Double> coordinates,
+				String city, String state)
+		{
+			this.coordinates = coordinates;
+			this.city = city;
+			this.state = state;
+		}
+	}
+	
+	InputStream zipcodeIncomesFile = null;
+	InputStream zipcodePopulationFile = null;
+	InputStream zipcodeCoordinatesFile = null;
+	
+	public void setIncomesFile(InputStream path)
+	{
+		this.zipcodeIncomesFile = path;
+	}
+	
+	public void setPopulationFile(InputStream path)
+	{
+		this.zipcodePopulationFile = path;
+	}
+	
+	public void setCoordinatesFile(InputStream path)
+	{
+		this.zipcodeCoordinatesFile = path;
+	}
+	
+	private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip headers
+		scanner.nextLine();
+		scanner.nextLine();
+		
+		Map<String, Double> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			String[] cols = line.split(",");
+			// zipcodes are in the form "ZCTA5 XXXXX"
+			String zipcode = cols[2].split(" ")[1].trim();
+			try
+			{
+				double medianHouseholdIncome = Integer.parseInt(cols[5].trim());
+				entries.put(zipcode, medianHouseholdIncome);
+			}
+			catch(NumberFormatException e)
+			{
+				
+			}
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip header
+		scanner.nextLine();
+		
+		Map<String, Long> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			
+			if(line.length() == 0)
+				continue;
+			
+			String[] cols = line.split(",");
+			
+			String zipcode = cols[0].trim();
+			Long population = Long.parseLong(cols[1].trim());
+			
+			if(entries.containsKey(zipcode))
+			{
+				entries.put(zipcode, Math.max(entries.get(zipcode), population));
+			}
+			else
+			{
+				entries.put(zipcode, population);
+			}
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip header
+		scanner.nextLine();
+		
+		Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			
+			String[] cols = line.split(", ");
+			
+			// remove quote marks
+			String zipcode = cols[0].substring(1, cols[0].length() - 1);
+			String state = cols[1].substring(1, cols[1].length() - 1);
+			Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1));
+			Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1));
+			String city = cols[4].substring(1, cols[4].length() - 1);
+			
+			Pair<Double, Double> coords = new Pair<Double, Double>(latitude, longitude);
+
+			ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state);
+			
+			entries.put(zipcode, record);
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException
+	{
+		ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile);
+		ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile);
+		ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile);
+		
+		Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
+		zipcodeSubset.retainAll(populations.keySet());
+		zipcodeSubset.retainAll(coordinates.keySet());
+		
+		List<ZipcodeRecord> table = new Vector<ZipcodeRecord>();
+		for(String zipcode : zipcodeSubset)
+		{
+			ZipcodeRecord record = new ZipcodeRecord(zipcode, 
+					coordinates.get(zipcode).coordinates, 
+					coordinates.get(zipcode).city,
+					coordinates.get(zipcode).state,
+					incomes.get(zipcode),
+					populations.get(zipcode));
+			table.add(record);
+		}
+		
+		return ImmutableList.copyOf(table);
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
new file mode 100644
index 0000000..aea004e
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework;
+
+import java.util.Random;
+
+public class SeedFactory
+{
+	Random rng;
+	
+	public SeedFactory()
+	{
+		rng = new Random();
+	}
+	
+	public SeedFactory(long seed)
+	{
+		rng = new Random(seed);
+	}
+	
+	public long getNextSeed()
+	{
+		return rng.nextLong();
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
new file mode 100644
index 0000000..cf2a40d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.io.Serializable;
+import java.util.Map;
+
+public class MarkovModel<T> implements Serializable
+{
+	final Map<T, Map<T, Double>> transitionWeights;
+	final Map<T, Double> startWeights;
+	
+	public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights)
+	{
+		this.transitionWeights = transitionWeights;
+		this.startWeights = startWeights;
+	}
+
+	public Map<T, Map<T, Double>> getTransitionWeights()
+	{
+		return transitionWeights;
+	}
+
+	public Map<T, Double> getStartWeights()
+	{
+		return startWeights;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
new file mode 100644
index 0000000..861c0ef
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableTable;
+
+public class MarkovModelBuilder<S>
+{
+	ImmutableTable.Builder<S, S, Double> transitionWeights;
+	ImmutableMap.Builder<S, Double> startWeights;
+	
+	public MarkovModelBuilder()
+	{
+		transitionWeights = ImmutableTable.builder();
+		startWeights = ImmutableMap.builder();
+	}
+	
+	public static <T> MarkovModelBuilder<T> create()
+	{
+		return new MarkovModelBuilder<T>();
+	}
+	
+	public void addStartState(S state, double weight)
+	{
+		startWeights.put(state, weight);
+	}
+	
+	public void addTransition(S state1, S state2, double weight)
+	{
+		transitionWeights.put(state1, state2, weight);
+	}
+	
+	public MarkovModel<S> build()
+	{
+		return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build());
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
new file mode 100644
index 0000000..2a72e65
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableTable;
+
+public class MarkovProcess<T> implements Sampler<T>
+{
+	final ImmutableMap<T, Sampler<T>> transitionSamplers;
+	final Sampler<T> startStateSampler;
+	
+	T currentState;
+	
+	
+	public MarkovProcess(MarkovModel<T> model, SeedFactory factory)
+	{
+		Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights();
+		
+		startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory);
+		
+		ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder();
+		for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet())
+		{
+			builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory));
+		}
+		
+		
+		this.transitionSamplers = builder.build();
+		
+		currentState = null;
+	}
+	
+	public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory)
+	{
+		return new MarkovProcess<T>(model, factory);
+	}
+	
+	public T sample() throws Exception
+	{
+		if(currentState == null)
+		{
+			currentState = startStateSampler.sample();
+			return currentState;
+		}
+		
+		currentState = transitionSamplers.get(currentState).sample();
+		return currentState;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
new file mode 100644
index 0000000..f879870
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ConditionalProbabilityDensityFunction<T, S>
+{
+	public double probability(T datum, S conditionalDatum);
+	
+	public ProbabilityDensityFunction<T> fixConditional(S conditionalDatum);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
new file mode 100644
index 0000000..9d0d6f2
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableMap;
+
+public class DiscretePDF<T> implements ProbabilityDensityFunction<T>
+{
+	private final ImmutableMap<T, Double> probabilities;
+	
+	public DiscretePDF(Map<T, Double> probabilities)
+	{
+		this.probabilities = ImmutableMap.copyOf(probabilities);
+	}
+	
+	public Set<T> getData()
+	{
+		return probabilities.keySet();
+	}
+	
+	public double probability(T value)
+	{
+		if(probabilities.containsKey(value))
+		{
+			return probabilities.get(value);
+		}
+		
+		return 0.0;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
new file mode 100644
index 0000000..dcc1278
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class ExponentialPDF implements ProbabilityDensityFunction<Double>
+{
+	private final double lambda;
+	
+	public ExponentialPDF(double lambda)
+	{
+		this.lambda = lambda;
+	}
+	
+	public double probability(Double value)
+	{
+		return lambda * Math.exp(-1.0 * value * lambda);
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
new file mode 100644
index 0000000..55ebc93
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class GaussianPDF implements ProbabilityDensityFunction<Double>
+{
+	private double mean;
+	private double std;
+	
+	public GaussianPDF(double mean, double std)
+	{
+		this.mean = mean;
+		this.std = std;
+	}
+	
+	public double probability(Double value)
+	{
+		double diff = (mean - value) * (mean - value);
+		double var = std * std;
+		double exp = Math.exp(-1.0 * diff / (2.0 * var));
+		
+		return exp / (std * Math.sqrt(2.0 * Math.PI));
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
new file mode 100644
index 0000000..fdf2db0
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.List;
+
+public class JointPDF<T> implements ProbabilityDensityFunction<T>
+{
+	
+	double normalizationFactor;
+	ProbabilityDensityFunction<T>[] pdfs;
+	
+	public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs)
+	{
+		this.pdfs = pdfs;
+		
+		normalizationFactor = 0.0d;
+		for(T datum : data)
+		{
+			double prob = 1.0;
+			for(ProbabilityDensityFunction<T> pdf : pdfs)
+				prob *= pdf.probability(datum);
+			normalizationFactor += prob;
+		}
+		
+	}
+	
+	public double probability(T datum)
+	{
+		double weight = 1.0;
+		for(ProbabilityDensityFunction<T> pdf : pdfs)
+			weight *= pdf.probability(datum);
+		
+		return weight / normalizationFactor;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
new file mode 100644
index 0000000..1b691ca
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ProbabilityDensityFunction<T>
+{
+	public double probability(T datum);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
new file mode 100644
index 0000000..ea8e77e
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class UniformPDF<T> implements ProbabilityDensityFunction<T>
+{
+	private final double probability;
+	
+	public UniformPDF(long count)
+	{
+		probability = 1.0 / ((double) count);
+	}
+	
+	public UniformPDF(double probability)
+	{
+		this.probability = probability;
+	}
+	
+	public double probability(T datum)
+	{
+		return probability;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
new file mode 100644
index 0000000..475b24d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.List;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+import com.google.common.collect.ImmutableList;
+
+public class BoundedMultiModalGaussianSampler implements Sampler<Double>
+{
+	ImmutableList<Pair<Double, Double>> distributions;
+	
+	double min;
+	double max;
+	Random rng;
+	
+	public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.distributions = ImmutableList.copyOf(distributions);
+		
+		this.min = min;
+		this.max = max;
+	}
+	
+	public Double sample()
+	{
+		int idx = rng.nextInt(distributions.size());
+		
+		double mean = distributions.get(idx).getFirst();
+		double std = distributions.get(idx).getSecond();
+		
+		double value = mean + rng.nextGaussian() * std;
+		
+		value = Math.min(value, this.max);
+		value = Math.max(value, this.min);
+		
+		return value;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
new file mode 100644
index 0000000..54506e2
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public interface ConditionalSampler<T, S>
+{
+	public T sample(S conditional) throws Exception;
+	
+	public Sampler<T> fixConditional(S conditional) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
new file mode 100644
index 0000000..82e4d2d
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public class DoubleSequenceSampler implements Sampler<Double>
+{
+	Double start;
+	Double end;
+	Double step;
+	Double next;
+	
+	public DoubleSequenceSampler()
+	{
+		start = 0.0;
+		end = null;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start)
+	{
+		this.start = start;
+		end = null;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start, Double end)
+	{
+		this.start = start;
+		this.end = end;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start, Double end, Double step)
+	{
+		this.start = start;
+		this.end = end;
+		this.step = step;
+		next = start;
+	}
+	
+	public Double sample() throws Exception
+	{
+		if(end == null || next < end)
+		{
+			Double current = next;
+			next = current + step;
+			return current;
+		}
+		
+		throw new Exception("All values have been sampled");
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
new file mode 100644
index 0000000..082f3ac
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class ExponentialSampler implements Sampler<Double>
+{
+	final private Random rng;
+	final private double lambda;
+	
+	public ExponentialSampler(double lambda, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.lambda = lambda;
+	}
+	
+	public Double sample()
+	{
+		return - Math.log(1.0 - rng.nextDouble()) / lambda;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
new file mode 100644
index 0000000..ed40cc8
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class GaussianSampler implements Sampler<Double>
+{
+	double mean;
+	double std;
+	Random rng;
+	
+	public GaussianSampler(double mean, double std, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.mean = mean;
+		this.std = std;
+	}
+	
+	public Double sample()
+	{
+		return rng.nextGaussian() * std + mean;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
new file mode 100644
index 0000000..0db8200
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+
+public class MonteCarloSampler<T> implements Sampler<T>
+{
+	private final Sampler<T> stateSampler;
+	private final Random rng;
+	private final ProbabilityDensityFunction<T> acceptancePDF;
+	
+	public MonteCarloSampler(Sampler<T> stateGenerator,
+			ProbabilityDensityFunction<T> acceptancePDF,
+			SeedFactory seedFactory)
+	{
+		this.acceptancePDF = acceptancePDF;
+		this.stateSampler = stateGenerator;
+		
+		rng = new Random(seedFactory.getNextSeed());
+	}
+
+	public T sample() throws Exception
+	{
+		while(true)
+		{
+			T proposedState = this.stateSampler.sample();
+			double probability = acceptancePDF.probability(proposedState);
+			double r = rng.nextDouble();
+			
+			if(r < probability)
+			{
+				return proposedState;
+			}
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
new file mode 100644
index 0000000..72681f8
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.DiscretePDF;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
+
+public class RouletteWheelSampler<T> implements Sampler<T>
+{
+	Random rng;
+	final ImmutableList<Pair<T, Double>> wheel;
+	
+	public static <T> RouletteWheelSampler<T> create(Map<T, Double> domainWeights, SeedFactory factory)
+	{
+		return new RouletteWheelSampler<T>(domainWeights, factory);
+	}
+	
+	public static <T> RouletteWheelSampler<T> create(DiscretePDF<T> pdf, SeedFactory factory)
+	{
+		return new RouletteWheelSampler<T>(pdf.getData(), pdf, factory);
+	}
+	
+	public static <T> RouletteWheelSampler<T> create(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory)
+	{
+		return new RouletteWheelSampler<T>(data, pdf, factory);
+	}
+	
+	public static <T> RouletteWheelSampler<T> createUniform(Collection<T> data, SeedFactory factory)
+	{
+		Map<T, Double> pdf = Maps.newHashMap();
+		for(T datum : data)
+		{
+			pdf.put(datum, 1.0);
+		}
+		
+		return create(pdf, factory);
+	}
+	
+	public RouletteWheelSampler(Map<T, Double> domainWeights, SeedFactory factory)
+	{
+		this.rng = new Random(factory.getNextSeed());
+		this.wheel = this.normalize(domainWeights);
+	}
+	
+	public RouletteWheelSampler(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory)
+	{
+		this.rng = new Random(factory.getNextSeed());
+		
+		Map<T, Double> domainWeights = Maps.newHashMap();
+		for(T datum : data)
+		{
+			double prob = pdf.probability(datum);
+			domainWeights.put(datum, prob);
+		}
+		
+		this.wheel = this.normalize(domainWeights);
+	}
+	
+	private ImmutableList<Pair<T, Double>> normalize(Map<T, Double> domainWeights)
+	{
+		double weightSum = 0.0;
+		for(Map.Entry<T, Double> entry : domainWeights.entrySet())
+		{
+			weightSum += entry.getValue();
+		}
+		
+		double cumProb = 0.0;
+		ImmutableList.Builder<Pair<T, Double>> builder = ImmutableList.builder();
+		for(Map.Entry<T, Double> entry : domainWeights.entrySet())
+		{
+			double prob = entry.getValue() / weightSum;
+			cumProb += prob;
+			
+			builder.add(Pair.create(entry.getKey(), cumProb));
+		}
+		
+		return builder.build();
+	}
+	
+	public T sample()
+	{
+		double r = rng.nextDouble();
+		for(Pair<T, Double> cumProbPair : wheel)
+			if(r < cumProbPair.getSecond())
+				return cumProbPair.getFirst();
+		
+		throw new IllegalStateException("Invalid state -- RouletteWheelSampler should never fail to sample!");
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
new file mode 100644
index 0000000..08af7e0
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public interface Sampler<T>
+{
+	public T sample() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
new file mode 100644
index 0000000..a81c846
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public class SequenceSampler implements Sampler<Integer>
+{
+	Integer start;
+	Integer end;
+	Integer step;
+	Integer next;
+	
+	public SequenceSampler()
+	{
+		start = 0;
+		end = null;
+		step = 1;
+		next = start;
+	}
+	
+	public SequenceSampler(Integer start)
+	{
+		this.start = start;
+		end = null;
+		step = 1;
+		next = start;
+	}
+	
+	public SequenceSampler(Integer start, Integer end)
+	{
+		this.start = start;
+		this.end = end;
+		step = 1;
+		next = start;
+	}
+	
+	public SequenceSampler(Integer start, Integer end, Integer step)
+	{
+		this.start = start;
+		this.end = end;
+		this.step = step;
+		next = start;
+	}
+	
+	public Integer sample() throws Exception
+	{
+		if(end == null || next < end)
+		{
+			Integer current = next;
+			next = current + step;
+			return current;
+		}
+		
+		throw new Exception("All values have been sampled");
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
new file mode 100644
index 0000000..c447692
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ConditionalProbabilityDensityFunction;
+
+
+public class StatefulMonteCarloSampler<T> implements Sampler<T>
+{
+	private final Sampler<T> stateSampler;
+	private final Random rng;
+	private final ConditionalProbabilityDensityFunction<T, T> acceptancePDF;
+	private T currentState;
+	
+	public StatefulMonteCarloSampler(Sampler<T> stateGenerator,
+			ConditionalProbabilityDensityFunction<T, T> acceptancePDF,
+			T initialState,
+			SeedFactory seedFactory)
+	{
+		this.acceptancePDF = acceptancePDF;
+		this.stateSampler = stateGenerator;
+		
+		rng = new Random(seedFactory.getNextSeed());
+		
+		this.currentState = initialState;
+	}
+
+	public T sample() throws Exception
+	{
+		while(true)
+		{
+			T proposedState = this.stateSampler.sample();
+			double probability = acceptancePDF.probability(proposedState, currentState);
+			double r = rng.nextDouble();
+			
+			if(r < probability)
+			{
+				this.currentState = proposedState;
+				return proposedState;
+			}
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
new file mode 100644
index 0000000..3fdf550
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class UniformIntSampler implements Sampler<Integer>
+{
+	int lowerbound;
+	int upperbound;
+	Random rng;
+	
+	/*
+	 * Upperbound is inclusive
+	 */
+	public UniformIntSampler(int lowerbound, int upperbound, SeedFactory seedFactory)
+	{
+		this.lowerbound = lowerbound;
+		this.upperbound = upperbound;
+		rng = new Random(seedFactory.getNextSeed());
+	}
+	
+	public Integer sample()
+	{
+		int range = upperbound + 1 - lowerbound;
+		return rng.nextInt(range) + lowerbound;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
new file mode 100644
index 0000000..3f78471
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class UniformSampler implements Sampler<Double>
+{
+	final Random rng;
+	final double lowerbound;
+	final double upperbound;
+	
+	public UniformSampler(SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		lowerbound = 0.0;
+		upperbound = 1.0;
+	}
+	
+	public UniformSampler(double lowerbound, double upperbound, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.lowerbound = lowerbound;
+		this.upperbound = upperbound;
+	}
+	
+	public Double sample()
+	{
+		return (upperbound - lowerbound) * rng.nextDouble() + lowerbound;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
new file mode 100644
index 0000000..21d0109
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs;
+
+public interface ConditionalWeightFunction<T, S>
+{
+	public double weight(T datum, S given);
+	
+	public WeightFunction<T> fixConditional(S given);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
new file mode 100644
index 0000000..1145043
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs;
+
+public interface WeightFunction<T>
+{
+	public double weight(T datum);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
new file mode 100644
index 0000000..bb97a60
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRecord>
+{
+	private final Map<ZipcodeRecord, Double> pdf;
+	
+	public CustomerLocationPDF(List<ZipcodeRecord> zipcodes, Store store, double averageDistance)
+	{
+		this.pdf = build(zipcodes, store, averageDistance);
+	}
+	
+	protected ImmutableMap<ZipcodeRecord, Double> build(List<ZipcodeRecord> zipcodeTable,
+			Store store, double averageDistance)
+	{
+		double lambda = 1.0 / averageDistance;
+		
+		Map<ZipcodeRecord, Double> zipcodeWeights = Maps.newHashMap();
+		double totalWeight = 0.0;
+		for(ZipcodeRecord record : zipcodeTable)
+		{
+			double dist = record.distance(store.getLocation());
+			
+			double weight = lambda * Math.exp(-1.0 * lambda * dist);
+			totalWeight += weight;
+			zipcodeWeights.put(record, weight);
+		}
+		
+		Map<ZipcodeRecord, Double> pdf = Maps.newHashMap();
+		for(ZipcodeRecord record : zipcodeTable)
+		{
+			pdf.put(record, zipcodeWeights.get(record) / totalWeight);
+		}
+		
+		return ImmutableMap.copyOf(pdf);
+	}
+	
+	public double probability(ZipcodeRecord record)
+	{
+		if(!this.pdf.containsKey(record))
+			return 0.0;
+		
+		return this.pdf.get(record);
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
new file mode 100644
index 0000000..4e5689c
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+public class CustomerSampler implements Sampler<Customer>
+{
+	private final Sampler<Integer> idSampler;
+	private final Sampler<String> firstNameSampler;
+	private final Sampler<String> lastNameSampler;
+	private final Sampler<Store> storeSampler;
+	private final ConditionalSampler<ZipcodeRecord, Store> locationSampler;
+	
+	
+	public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> firstNameSampler,
+			Sampler<String> lastNameSampler, Sampler<Store> storeSampler,
+			ConditionalSampler<ZipcodeRecord, Store> locationSampler)
+	{
+		this.idSampler = idSampler;
+		this.firstNameSampler = firstNameSampler;
+		this.lastNameSampler = lastNameSampler;
+		this.storeSampler = storeSampler;
+		this.locationSampler = locationSampler;
+	}
+
+	public Customer sample() throws Exception
+	{
+		Integer id = idSampler.sample();
+		Pair<String, String> name = Pair.create(firstNameSampler.sample(),
+				lastNameSampler.sample());
+		Store store = storeSampler.sample();
+		ZipcodeRecord location = locationSampler.sample(store);
+		
+		return new Customer(id, name, store, location);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
new file mode 100644
index 0000000..209b099
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.Constants;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.SequenceSampler;
+
+import com.google.common.collect.Maps;
+
+public class CustomerSamplerBuilder
+{
+	private final List<Store> stores;
+	private final InputData inputData;
+	private final SeedFactory seedFactory;
+	
+	public CustomerSamplerBuilder(List<Store> stores, InputData inputData, SeedFactory seedFactory)
+	{
+		this.stores = stores;
+		this.seedFactory = seedFactory;
+		this.inputData = inputData;
+	}
+	
+	protected ConditionalSampler<ZipcodeRecord, Store> buildLocationSampler()
+	{
+		final Map<Store, Sampler<ZipcodeRecord>> locationSamplers = Maps.newHashMap();
+		for(Store store : stores)
+		{
+			ProbabilityDensityFunction<ZipcodeRecord> locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(),
+					store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE);
+			Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory);
+			locationSamplers.put(store, locationSampler);
+		}
+			
+		return new ConditionalSampler<ZipcodeRecord, Store>()
+				{
+					public ZipcodeRecord sample(Store store) throws Exception
+					{
+						return locationSamplers.get(store).sample();
+					}
+					
+					public Sampler<ZipcodeRecord> fixConditional(Store store)
+					{
+						return locationSamplers.get(store);
+					}
+				};
+	}
+	
+	public Sampler<Customer> build()
+	{
+		ProbabilityDensityFunction<Store> storePDF = new CustomerStorePDF(stores);
+		
+		Sampler<Integer> idSampler = new SequenceSampler();
+		Sampler<String> firstNameSampler = RouletteWheelSampler.create(inputData.getNames().getFirstNames(), seedFactory);
+		Sampler<String> lastNameSampler = RouletteWheelSampler.create(inputData.getNames().getLastNames(), seedFactory);
+		Sampler<Store> storeSampler = RouletteWheelSampler.create(stores, storePDF, seedFactory);
+		
+		return new CustomerSampler(idSampler, firstNameSampler, lastNameSampler, storeSampler, buildLocationSampler());
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
new file mode 100644
index 0000000..400b02a
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.customer;
+
+import java.util.List;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+public class CustomerStorePDF implements ProbabilityDensityFunction<Store>
+{
+	double populationSum = 0.0;
+	
+	public CustomerStorePDF(List<Store> stores)
+	{
+		for(Store store : stores)
+		{
+			populationSum += (double) store.getLocation().getPopulation();
+		}
+	}
+	
+	@Override
+	public double probability(Store store)
+	{
+		return ((double) store.getLocation().getPopulation()) / populationSum;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
new file mode 100644
index 0000000..cae8794
--- /dev/null
+++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.generators.purchase;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovModel;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovProcess;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
+
+public class MarkovPurchasingModel implements PurchasingModel
+{
+
+	private static final long serialVersionUID = 3098355461347511619L;
+	ImmutableMap<String, MarkovModel<Product>> productCategoryProfiles;
+	
+	public MarkovPurchasingModel(Map<String, MarkovModel<Product>> productCategoryProfiles)
+	{
+		this.productCategoryProfiles = ImmutableMap.copyOf(productCategoryProfiles);
+	}
+	
+	@Override
+	public ImmutableSet<String> getProductCategories()
+	{
+		return productCategoryProfiles.keySet();
+	}
+
+	public MarkovModel<Product> getProfile(String productCategory)
+	{
+		return productCategoryProfiles.get(productCategory);
+	}
+
+	@Override
+	public PurchasingProcesses buildProcesses(SeedFactory seedFactory)
+	{
+		Map<String, Sampler<Product>> processes = Maps.newHashMap();
+		for(String category : getProductCategories())
+		{
+			MarkovModel<Product> model = getProfile(category);
+			processes.put(category, new MarkovProcess<Product>(model, seedFactory));
+		}
+		
+		return new PurchasingProcesses(processes);
+	}
+}