You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/04/02 06:11:42 UTC
[02/50] [abbrv] git commit: A few clean up items when cleaning up and
standardizing the benchmark config file to look similar the server config
file.
A few clean up items when cleaning up and standardizing the benchmark config file to look similar the server config file.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/fda09916
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/fda09916
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/fda09916
Branch: refs/heads/prestonc/hash_join
Commit: fda0991687db8719a4a105b9db20337fa58a03ec
Parents: b43fe33
Author: Preston Carman <pr...@apache.org>
Authored: Mon Feb 24 14:02:29 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Apr 1 20:56:23 2014 -0700
----------------------------------------------------------------------
.../scripts/weather_benchmark.py | 26 --------------
.../noaa-ghcn-daily/scripts/weather_cli.py | 11 ++----
.../noaa-ghcn-daily/scripts/weather_config.py | 4 +--
.../scripts/weather_data_files.py | 36 ++------------------
.../scripts/weather_download_files.py | 2 +-
.../noaa-ghcn-daily/scripts/weather_example.xml | 6 ++--
.../scripts/weather_example_cluster.xml | 22 ++++++------
.../src/main/resources/scripts/cluster_cli.py | 2 +-
8 files changed, 22 insertions(+), 87 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 68c93b3..6d9301e 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -23,7 +23,6 @@ import socket
from weather_config import *
from weather_data_files import *
-from collections import OrderedDict
# Weather data files created to manage the conversion process.
# Allows partition and picking up where you left off.
@@ -143,29 +142,6 @@ class WeatherBenchmark:
def get_partition_folders(self, base_path):
glob.glob(base_path + "partitions/d*_p*_i*")
-# test_data_path = self.base_path + "/" + self.test + "/data"
-# if not os.path.isdir(test_data_path):
-# os.makedirs(test_data_path)
-#
-# if self.test == "local_speed_up":
-# for i in range(virtual_partitions):
-# # one virtual partition per disk
-# split = 0
-# for j in range(len(base_paths)):
-# # for each disk look at each partition
-# for index, path in enumerate(partition_list):
-# offset = partitions * j
-# group = partitions / (i + 1)
-#
-# if (group) * split + offset <= index and index < (group) * (split + 1) + offset:
-# split += 1
-#
-# test_partition_path = test_data_path + "/p" + str(i + 1) + ".i" + str(split) + ".d" + str(j + 1)
-# if not os.path.isdir(test_partition_path):
-# os.makedirs(test_partition_path)
-# os.symlink(path, test_partition_path + "/index" + str(index))
-
-
def copy_query_files(self):
for test in self.dataset.get_tests():
if test in self.BENCHMARK_LOCAL_TESTS:
@@ -215,12 +191,10 @@ class WeatherBenchmark:
sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
def get_number_of_slices(self):
- print self.dataset
if len(self.dataset.get_tests()) == 0:
print "No test has been defined in config file."
else:
for test in self.dataset.get_tests():
- print "test = " + test
if test in self.BENCHMARK_LOCAL_TESTS:
return get_local_virtual_partitions(self.partitions)
elif test in self.BENCHMARK_CLUSTER_TESTS:
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 92145a2..0f529f2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -34,7 +34,6 @@ COMPRESSED = False
def main(argv):
append = False
max_records = 0
- package = "ghcnd_gsn"
process_file_name = ""
reset = False
section = "all"
@@ -79,7 +78,7 @@ def main(argv):
print 'Error: Argument must be a file name for --file (-f).'
sys.exit()
elif opt in ('-l', "--locality"):
- if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links", "queries", "statistics"):
+ if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links", "queries", "statistics"):
section = arg
else:
print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
@@ -90,12 +89,6 @@ def main(argv):
else:
print 'Error: Argument must be an integer for --max_station_files (-m).'
sys.exit()
- elif opt in ('-p', "--package"):
- if arg in ("all", "gsn", "hcn"):
- package = "ghcnd_" + arg
- else:
- print 'Error: Argument must be an string for one of the known weather packages: "all", "gsn", "hcn"'
- sys.exit()
elif opt == '-r':
reset = True
elif opt == '-u':
@@ -137,7 +130,7 @@ def main(argv):
download.download_all_files(reset)
# Unzip the required file.
- download.unzip_package(package, reset)
+ download.unzip_package(config.get_package(), reset)
# Create some basic paths for save files and references.
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
index 9d2e289..a6513c2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
@@ -51,10 +51,10 @@ class WeatherConfig:
# Node Specific Functions
# --------------------------------------------------------------------------
def get_node_ip(self, node):
- return self.get_text(node.getElementsByTagName("ip_address")[0])
+ return self.get_text(node.getElementsByTagName("cluster_ip")[0])
def get_node_name(self, node):
- return self.get_text(node.getElementsByTagName("name")[0])
+ return self.get_text(node.getElementsByTagName("id")[0])
# --------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 42dea81..64b86d6 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -92,38 +92,6 @@ class WeatherDataFiles:
self.close_progress_data(True)
self.reset()
- def create_test_links(self, save_path, xml_save_path, test, node, partitions, virtual_partitions, base_paths=[]):
- if (len(base_paths) == 0):
- base_paths.append(os.path.dirname(save_path))
- partition_list = sorted(get_partition_paths(partitions, base_paths))
-
- test_path = save_path + "/" + test
- if not os.path.isdir(test_path):
- os.makedirs(test_path)
- for i in range(virtual_partitions):
- # one virtual partition per disk
- for j in range(len(base_paths)):
- for index, path in enumerate(partition_list):
- offset = partitions * j
- test_partition_path = test_path + "/partition" + str(i + 1) + "_disk" + str(j + 1)
- if not os.path.isdir(test_partition_path):
- os.makedirs(test_partition_path)
- if (node <= i):
- if test == "speed_up":
- group = partitions / (i + 1)
- elif test == "batch_scale_up":
- group = partitions / virtual_partitions
- else:
- group = -1
- # link
- if (group) * node + offset <= index and index < (group) * (node + 1) + offset:
- os.symlink(path, test_partition_path + "/index" + str(index))
- else:
- # fake directories
- os.makedirs(test_partition_path + "/sensors")
- os.makedirs(test_partition_path + "/stations")
-
-
# Once the initial data has been generated, the data can be copied into a set number of partitions.
def copy_to_n_partitions(self, save_path, partitions, base_paths=[]):
if (len(base_paths) == 0):
@@ -347,7 +315,7 @@ class WeatherDataFiles:
break
return columns[self.INDEX_DATA_FILE_NAME]
-def get_partition_paths(partitions, base_paths, key = "partitions"):
+def get_partition_paths(partitions, base_paths, key="partitions"):
partition_paths = []
for i in range(0, partitions):
for j in range(0, len(base_paths)):
@@ -356,6 +324,6 @@ def get_partition_paths(partitions, base_paths, key = "partitions"):
return partition_paths
def get_partition_folder(disks, partitions, index):
- return "d" + str(disks) +"_p" + str(partitions) + "_i" + str(index)
+ return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
index 72f66bb..87adb11 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -65,7 +65,7 @@ def report_download_status(count, block, size):
line_size = 50
erase = "\b" * line_size
sys.stdout.write(erase)
- report = get_report_line( (float(count) * block / size), line_size)
+ report = get_report_line((float(count) * block / size), line_size)
sys.stdout.write(report)
# Creates a string to be used in reporting the percentage done.
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
index 7af1e9d..4f31dff 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
@@ -17,10 +17,10 @@
<data xmlns="data">
<name>Local Example</name>
<save_path>/data</save_path>
- <package>all</package>
+ <package>ghcnd_all</package>
<node>
- <name>localhost</name>
- <ip_address>127.0.0.1</ip_address>
+ <id>localhost</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
</node>
<dataset>
<name>tiny</name>
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
index 34be0df..87be4e3 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
@@ -17,26 +17,26 @@
<data xmlns="data">
<name>Cluster Example</name>
<save_path>/data</save_path>
- <package>all</package>
+ <package>ghcnd_all</package>
<node>
- <name>machine1</name>
- <ip_address>127.0.0.1</ip_address>
+ <id>machine1</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
</node>
<node>
- <name>machine2</name>
- <ip_address>127.0.0.2</ip_address>
+ <id>machine2</id>
+ <cluster_ip>127.0.0.2</cluster_ip>
</node>
<node>
- <name>machine3</name>
- <ip_address>127.0.0.3</ip_address>
+ <id>machine3</id>
+ <cluster_ip>127.0.0.3</cluster_ip>
</node>
<node>
- <name>machine4</name>
- <ip_address>127.0.0.4</ip_address>
+ <id>machine4</id>
+ <cluster_ip>127.0.0.4</cluster_ip>
</node>
<node>
- <name>machine5</name>
- <ip_address>127.0.0.5</ip_address>
+ <id>machine5</id>
+ <cluster_ip>127.0.0.5</cluster_ip>
</node>
<dataset>
<name>tiny-1drive</name>
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-server/src/main/resources/scripts/cluster_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py
index 370e77f..089ad08 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_cli.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py
@@ -59,7 +59,7 @@ def main(argv):
else:
deploy_path = arg
else:
- print 'Error: Argument must be a file name for --folder (-f).'
+ print 'Error: Argument must be a file name for --deploy_folder (-d).'
sys.exit()
# Required fields to run the script.