You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/04/02 06:11:42 UTC

[02/50] [abbrv] git commit: A few clean up items when cleaning up and standardizing the benchmark config file to look similar the server config file.

A few clean up items when cleaning up and standardizing the benchmark config file to look similar the server config file.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/fda09916
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/fda09916
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/fda09916

Branch: refs/heads/prestonc/hash_join
Commit: fda0991687db8719a4a105b9db20337fa58a03ec
Parents: b43fe33
Author: Preston Carman <pr...@apache.org>
Authored: Mon Feb 24 14:02:29 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Apr 1 20:56:23 2014 -0700

----------------------------------------------------------------------
 .../scripts/weather_benchmark.py                | 26 --------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      | 11 ++----
 .../noaa-ghcn-daily/scripts/weather_config.py   |  4 +--
 .../scripts/weather_data_files.py               | 36 ++------------------
 .../scripts/weather_download_files.py           |  2 +-
 .../noaa-ghcn-daily/scripts/weather_example.xml |  6 ++--
 .../scripts/weather_example_cluster.xml         | 22 ++++++------
 .../src/main/resources/scripts/cluster_cli.py   |  2 +-
 8 files changed, 22 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 68c93b3..6d9301e 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -23,7 +23,6 @@ import socket
 
 from weather_config import *
 from weather_data_files import *
-from collections import OrderedDict
 
 # Weather data files created to manage the conversion process.
 # Allows partition and picking up where you left off.
@@ -143,29 +142,6 @@ class WeatherBenchmark:
     def get_partition_folders(self, base_path):
         glob.glob(base_path + "partitions/d*_p*_i*")
             
-#         test_data_path = self.base_path + "/" + self.test + "/data"
-#         if not os.path.isdir(test_data_path):
-#             os.makedirs(test_data_path)
-#     
-#         if self.test == "local_speed_up":
-#             for i in range(virtual_partitions):
-#                 # one virtual partition per disk
-#                 split = 0
-#                 for j in range(len(base_paths)):
-#                     # for each disk look at each partition
-#                     for index, path in enumerate(partition_list):
-#                         offset = partitions * j
-#                         group = partitions / (i + 1)
-#                         
-#                         if (group) * split + offset <= index and index < (group) * (split + 1) + offset:
-#                             split += 1
-#                         
-#                         test_partition_path = test_data_path + "/p" + str(i + 1) + ".i" + str(split) + ".d" + str(j + 1)
-#                         if not os.path.isdir(test_partition_path):
-#                             os.makedirs(test_partition_path)
-#                         os.symlink(path, test_partition_path + "/index" + str(index))
-        
-
     def copy_query_files(self):
         for test in self.dataset.get_tests():
             if test in self.BENCHMARK_LOCAL_TESTS:
@@ -215,12 +191,10 @@ class WeatherBenchmark:
                     sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
                     
     def get_number_of_slices(self):
-        print self.dataset
         if len(self.dataset.get_tests()) == 0:
             print "No test has been defined in config file."
         else:
             for test in self.dataset.get_tests():
-                print "test = " + test
                 if test in self.BENCHMARK_LOCAL_TESTS:
                     return get_local_virtual_partitions(self.partitions)
                 elif test in self.BENCHMARK_CLUSTER_TESTS:

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 92145a2..0f529f2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -34,7 +34,6 @@ COMPRESSED = False
 def main(argv):
     append = False
     max_records = 0
-    package = "ghcnd_gsn"
     process_file_name = ""
     reset = False
     section = "all"
@@ -79,7 +78,7 @@ def main(argv):
                 print 'Error: Argument must be a file name for --file (-f).'
                 sys.exit()
         elif opt in ('-l', "--locality"):
-            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links",  "queries", "statistics"):
+            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links", "queries", "statistics"):
                 section = arg
             else:
                 print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
@@ -90,12 +89,6 @@ def main(argv):
             else:
                 print 'Error: Argument must be an integer for --max_station_files (-m).'
                 sys.exit()
-        elif opt in ('-p', "--package"):
-            if arg in ("all", "gsn", "hcn"):
-                package = "ghcnd_" + arg
-            else:
-                print 'Error: Argument must be an string for one of the known weather packages: "all", "gsn", "hcn"'
-                sys.exit()
         elif opt == '-r':
             reset = True
         elif opt == '-u':
@@ -137,7 +130,7 @@ def main(argv):
         download.download_all_files(reset)
 
         # Unzip the required file.
-        download.unzip_package(package, reset)
+        download.unzip_package(config.get_package(), reset)
 
 
     # Create some basic paths for save files and references.

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
index 9d2e289..a6513c2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
@@ -51,10 +51,10 @@ class WeatherConfig:
     # Node Specific Functions
     # --------------------------------------------------------------------------
     def get_node_ip(self, node):
-        return self.get_text(node.getElementsByTagName("ip_address")[0])
+        return self.get_text(node.getElementsByTagName("cluster_ip")[0])
 
     def get_node_name(self, node):
-        return self.get_text(node.getElementsByTagName("name")[0])
+        return self.get_text(node.getElementsByTagName("id")[0])
 
     
     # --------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 42dea81..64b86d6 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -92,38 +92,6 @@ class WeatherDataFiles:
             self.close_progress_data(True)
         self.reset()
         
-    def create_test_links(self, save_path, xml_save_path, test, node, partitions, virtual_partitions, base_paths=[]):
-        if (len(base_paths) == 0):
-            base_paths.append(os.path.dirname(save_path))
-        partition_list = sorted(get_partition_paths(partitions, base_paths))
-        
-        test_path = save_path + "/" + test
-        if not os.path.isdir(test_path):
-            os.makedirs(test_path)
-        for i in range(virtual_partitions):
-            # one virtual partition per disk
-            for j in range(len(base_paths)):
-                for index, path in enumerate(partition_list):
-                    offset = partitions * j
-                    test_partition_path = test_path + "/partition" + str(i + 1) + "_disk" + str(j + 1)
-                    if not os.path.isdir(test_partition_path):
-                        os.makedirs(test_partition_path)
-                    if (node <= i):
-                        if test == "speed_up":
-                            group = partitions / (i + 1)
-                        elif test == "batch_scale_up":
-                            group = partitions / virtual_partitions
-                        else:
-                            group = -1
-                        # link
-                        if (group) * node + offset <= index and index < (group) * (node + 1) + offset:
-                            os.symlink(path, test_partition_path + "/index" + str(index))
-                    else:
-                        # fake directories
-                        os.makedirs(test_partition_path + "/sensors")
-                        os.makedirs(test_partition_path + "/stations")
-            
-        
     # Once the initial data has been generated, the data can be copied into a set number of partitions. 
     def copy_to_n_partitions(self, save_path, partitions, base_paths=[]):
         if (len(base_paths) == 0):
@@ -347,7 +315,7 @@ class WeatherDataFiles:
                 break
         return columns[self.INDEX_DATA_FILE_NAME]
     
-def get_partition_paths(partitions, base_paths, key = "partitions"):        
+def get_partition_paths(partitions, base_paths, key="partitions"):        
     partition_paths = []
     for i in range(0, partitions):
         for j in range(0, len(base_paths)):
@@ -356,6 +324,6 @@ def get_partition_paths(partitions, base_paths, key = "partitions"):
     return partition_paths
 
 def get_partition_folder(disks, partitions, index):        
-    return "d" + str(disks) +"_p" + str(partitions) + "_i" + str(index)
+    return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
 
 

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
index 72f66bb..87adb11 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -65,7 +65,7 @@ def report_download_status(count, block, size):
     line_size = 50
     erase = "\b" * line_size
     sys.stdout.write(erase)
-    report = get_report_line( (float(count) * block / size), line_size)
+    report = get_report_line((float(count) * block / size), line_size)
     sys.stdout.write(report)
 
 # Creates a string to be used in reporting the percentage done.

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
index 7af1e9d..4f31dff 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
@@ -17,10 +17,10 @@
 <data xmlns="data">
     <name>Local Example</name>
     <save_path>/data</save_path>
-    <package>all</package>
+    <package>ghcnd_all</package>
     <node>
-        <name>localhost</name>
-        <ip_address>127.0.0.1</ip_address>
+        <id>localhost</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
     </node>
     <dataset>
         <name>tiny</name>

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
index 34be0df..87be4e3 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
@@ -17,26 +17,26 @@
 <data xmlns="data">
     <name>Cluster Example</name>
     <save_path>/data</save_path>
-    <package>all</package>
+    <package>ghcnd_all</package>
     <node>
-        <name>machine1</name>
-        <ip_address>127.0.0.1</ip_address>
+        <id>machine1</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
     </node>
     <node>
-        <name>machine2</name>
-        <ip_address>127.0.0.2</ip_address>
+        <id>machine2</id>
+        <cluster_ip>127.0.0.2</cluster_ip>
     </node>
     <node>
-        <name>machine3</name>
-        <ip_address>127.0.0.3</ip_address>
+        <id>machine3</id>
+        <cluster_ip>127.0.0.3</cluster_ip>
     </node>
     <node>
-        <name>machine4</name>
-        <ip_address>127.0.0.4</ip_address>
+        <id>machine4</id>
+        <cluster_ip>127.0.0.4</cluster_ip>
     </node>
     <node>
-        <name>machine5</name>
-        <ip_address>127.0.0.5</ip_address>
+        <id>machine5</id>
+        <cluster_ip>127.0.0.5</cluster_ip>
     </node>
     <dataset>
         <name>tiny-1drive</name>

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fda09916/vxquery-server/src/main/resources/scripts/cluster_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py
index 370e77f..089ad08 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_cli.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py
@@ -59,7 +59,7 @@ def main(argv):
                 else:
                     deploy_path = arg
             else:
-                print 'Error: Argument must be a file name for --folder (-f).'
+                print 'Error: Argument must be a file name for --deploy_folder (-d).'
                 sys.exit()
 
     # Required fields to run the script.