You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/08/14 19:00:50 UTC
[09/16] git commit: Removed custom frame size from benchmark and found some uncommitted testing changes.

Removed custom frame size from benchmark and found some uncommitted testing changes.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/cee27a70
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/cee27a70
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/cee27a70

Branch: refs/heads/master
Commit: cee27a704805c40dba5a0d778fea0aa1397cb2a9
Parents: a08b97f
Author: Preston Carman <pr...@apache.org>
Authored: Thu Jul 17 14:23:27 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Jul 17 14:23:27 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |   7 +-
 .../scripts/run_benchmark_cluster.sh            |   7 +-
 .../scripts/weather_benchmark.py                |  15 +--
 .../scripts/weather_data_files.py               | 103 +++++++++----------
 4 files changed, 66 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index ff2d761..b82f0be 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -27,7 +27,7 @@
 #
 REPEAT=5
 FRAME_SIZE=10000
-BUFFER_SIZE=$((8*1024*1024))
+BUFFER_SIZE=$((32*1024*1024))
 
 if [ -z "${1}" ]
 then
@@ -46,9 +46,10 @@ do
         log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
         log_base_path=$(dirname ${j/queries/query_logs})
         mkdir -p ${log_base_path}
-        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+        #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
         echo "Buffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-        echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+        #echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
     fi;
 done
 

http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
index 67b7ca7..6c19713 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -27,7 +27,7 @@
 #
 REPEAT=5
 FRAME_SIZE=10000
-BUFFER_SIZE=1*1024*1024
+BUFFER_SIZE=$((32*1024*1024))
 
 if [ -z "${1}" ]
 then
@@ -63,9 +63,10 @@ do
             log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
             log_base_path=$(dirname ${j/queries/query_logs})
             mkdir -p ${log_base_path}
-            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+            #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
             echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+            #echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
         fi;
     fi;
 done

http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index f3c9e68..3b0f9b3 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -47,6 +47,7 @@ class WeatherBenchmark:
                        "q07.xq"
                        ] 
     QUERY_UTILITY_LIST = [
+                          "no_result.xq",
                           "sensor_count.xq",
                           "station_count.xq",
                           "q04_join_count.xq",
@@ -92,6 +93,7 @@ class WeatherBenchmark:
     def print_local_partition_schemes(self, test):
         node_index = 0
         virtual_partitions = get_local_virtual_partitions(self.partitions)
+        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
         for p in self.partitions:
             scheme = self.get_local_partition_scheme(test, p)
             self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
@@ -99,6 +101,7 @@ class WeatherBenchmark:
     def print_cluster_partition_schemes(self, test):
         node_index = self.get_current_node_index()
         virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
+        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
         for p in self.partitions:
             scheme = self.get_cluster_partition_scheme(test, p)
             self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
@@ -112,7 +115,7 @@ class WeatherBenchmark:
         print "    Partitions: " + str(partitions)
         print "    Node Id: " + str(node_id)
         
-        if len(scheme) > 0:
+        if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
             folder_length = len(scheme[0][3]) + 5
             row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
             HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
@@ -127,7 +130,7 @@ class WeatherBenchmark:
         scheme = []
         virtual_partitions = get_local_virtual_partitions(self.partitions)
         data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
-        link_base_schemes = get_partition_scheme(0, partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+        link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
 
         # Match link paths to real data paths.
         group_size = len(data_schemes) / len(link_base_schemes)
@@ -155,20 +158,20 @@ class WeatherBenchmark:
         scheme = []
         local_virtual_partitions = get_local_virtual_partitions(self.partitions)
         virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
+        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
         data_schemes = get_partition_scheme(node_index, virtual_partitions, self.base_paths)
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
 
         # Match link paths to real data paths.
         for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
             # Prep
             if test == "speed_up":
-                group_size = virtual_partitions / (link_node + 1)
+                group_size = virtual_partitions_per_disk / (link_node + 1)
             elif test == "batch_scale_out":
-                group_size = virtual_partitions / len(self.nodes)
+                group_size = virtual_partitions_per_disk / len(self.nodes)
             else:
                 print "Unknown test."
                 return
-            group_size = group_size / link_virtual
             node_offset = group_size * (node_index * partition)
             node_offset += group_size * link_index
             has_data = True

http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 1c9f129..b39f934 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -150,62 +150,56 @@ class WeatherDataFiles:
         
         XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
         
-        # Initialize the partition paths.
         partition_paths = get_partition_paths(0, partitions, base_paths)
-        sensors_partition_files = []
-        stations_partition_files = []
-        for path in partition_paths:
-            # Make sure the xml folder is available.
-            prepare_path(path, reset)
-            prepare_path(path + "sensors/", False)
-            prepare_path(path + "stations/", False)
-            sensors_partition_files.append(open(path + "sensors/partition.xml", 'w'))
-            stations_partition_files.append(open(path + "stations/partition.xml", 'w'))
-    
-        for row in range(0, len(partition_paths)):
-            sensors_partition_files[row].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
-            stations_partition_files[row].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
 
         import fnmatch
         import os
         
-        # copy stations and sensors into each partition
-        current_sensor_partition = 0
-        current_station_partition = 0
-        self.open_progress_data()
-        row_count = len(self.progress_data)
-        for row in range(0, row_count):
-            row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-            file_name = row_contents[self.INDEX_DATA_FILE_NAME]
-            station_id = os.path.basename(file_name).split('.')[0]
-               
-            # Copy sensor files
-            type = "sensors"
-            file_path = build_base_save_folder(save_path, station_id, type) + station_id
-            for root, dirnames, filenames in os.walk(file_path):
-                for filename in fnmatch.filter(filenames, '*.xml'):
-                    xml_path = os.path.join(root, filename)
-                    xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
-                    sensors_partition_files[current_sensor_partition].write(xml_data)
-                    current_sensor_partition += 1
-                    if current_sensor_partition >= len(sensors_partition_files):
-                        current_sensor_partition = 0
-            
-            # Copy station files
-            type = "stations"
-            file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
-            xml_path = os.path.join(root, file_path)
-            xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
-            stations_partition_files[current_station_partition].write(xml_data)
-            current_station_partition += 1
-            if current_station_partition >= len(partition_paths):
-                current_station_partition = 0
+        for path in partition_paths:
+            prepare_path(path, reset)
+
+        # Initialize the partition paths.
+        types = ["sensors", "stations"]
+        for type in types:
+            partition_files = []
+            for path in partition_paths:
+                # Make sure the xml folder is available.
+                prepare_path(path + type + "/", False)
+                partition_files.append(open(path + type + "/partition.xml", 'w'))
+                partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
+
+            # copy into each partition
+            current_partition = 0
+            self.open_progress_data()
+            row_count = len(self.progress_data)
+            for row in range(0, row_count):
+                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+                file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+                station_id = os.path.basename(file_name).split('.')[0]
                 
-        for row in range(0, len(partition_paths)):
-            sensors_partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
-            sensors_partition_files[row].close()
-            stations_partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
-            stations_partition_files[row].close()
+                # Copy files
+                if type == "sensors":
+                    file_path = build_base_save_folder(save_path, station_id, type) + station_id
+                    for root, dirnames, filenames in os.walk(file_path):
+                        for filename in fnmatch.filter(filenames, '*.xml'):
+                            xml_path = os.path.join(root, filename)
+                            xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+                            partition_files[current_partition].write(xml_data)
+                            current_partition += 1
+                            if current_partition >= len(partition_files):
+                                current_partition = 0
+                elif type == "stations":
+                    file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+                    xml_path = os.path.join(root, file_path)
+                    xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+                    partition_files[current_partition].write(xml_data)
+                    current_partition += 1
+                    if current_partition >= len(partition_paths):
+                        current_partition = 0
+
+            for row in range(0, len(partition_paths)):
+                partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
+                partition_files[row].close()
 
     def get_file_row(self, file_name):
         for i in range(0, len(self.progress_data)):
@@ -388,12 +382,13 @@ def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
         partition_paths.append(scheme[PARTITION_INDEX_PATH])
     return partition_paths
 
-def get_partition_scheme(node_id, partitions, base_paths, key="partitions"):        
+def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):        
     partition_scheme = []
-    for i in range(0, partitions):
+    partitions_per_disk = virtual_partitions / len(base_paths)
+    for i in range(0, partitions_per_disk):
         for j in range(0, len(base_paths)):
-            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions, i) + "/"
-            partition_scheme.append((node_id, j, partitions, i, new_partition_path))
+            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions_per_disk, i) + "/"
+            partition_scheme.append((node_id, j, partitions_per_disk, i, new_partition_path))
     return partition_scheme
 
 def get_partition_folder(disks, partitions, index):