You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/08/14 19:00:50 UTC
[09/16] git commit: Removed custom frame size from benchmark and
found some uncommitted testing changes.
Removed custom frame size from benchmark and found some uncommitted testing changes.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/cee27a70
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/cee27a70
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/cee27a70
Branch: refs/heads/master
Commit: cee27a704805c40dba5a0d778fea0aa1397cb2a9
Parents: a08b97f
Author: Preston Carman <pr...@apache.org>
Authored: Thu Jul 17 14:23:27 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Jul 17 14:23:27 2014 -0700
----------------------------------------------------------------------
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 7 +-
.../scripts/run_benchmark_cluster.sh | 7 +-
.../scripts/weather_benchmark.py | 15 +--
.../scripts/weather_data_files.py | 103 +++++++++----------
4 files changed, 66 insertions(+), 66 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index ff2d761..b82f0be 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -27,7 +27,7 @@
#
REPEAT=5
FRAME_SIZE=10000
-BUFFER_SIZE=$((8*1024*1024))
+BUFFER_SIZE=$((32*1024*1024))
if [ -z "${1}" ]
then
@@ -46,9 +46,10 @@ do
log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
log_base_path=$(dirname ${j/queries/query_logs})
mkdir -p ${log_base_path}
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
echo "Buffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ #echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
fi;
done
http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
index 67b7ca7..6c19713 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -27,7 +27,7 @@
#
REPEAT=5
FRAME_SIZE=10000
-BUFFER_SIZE=1*1024*1024
+BUFFER_SIZE=$((32*1024*1024))
if [ -z "${1}" ]
then
@@ -63,9 +63,10 @@ do
log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
log_base_path=$(dirname ${j/queries/query_logs})
mkdir -p ${log_base_path}
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ #echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
fi;
fi;
done
http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index f3c9e68..3b0f9b3 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -47,6 +47,7 @@ class WeatherBenchmark:
"q07.xq"
]
QUERY_UTILITY_LIST = [
+ "no_result.xq",
"sensor_count.xq",
"station_count.xq",
"q04_join_count.xq",
@@ -92,6 +93,7 @@ class WeatherBenchmark:
def print_local_partition_schemes(self, test):
node_index = 0
virtual_partitions = get_local_virtual_partitions(self.partitions)
+ virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
for p in self.partitions:
scheme = self.get_local_partition_scheme(test, p)
self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
@@ -99,6 +101,7 @@ class WeatherBenchmark:
def print_cluster_partition_schemes(self, test):
node_index = self.get_current_node_index()
virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
+ virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
for p in self.partitions:
scheme = self.get_cluster_partition_scheme(test, p)
self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
@@ -112,7 +115,7 @@ class WeatherBenchmark:
print " Partitions: " + str(partitions)
print " Node Id: " + str(node_id)
- if len(scheme) > 0:
+ if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
folder_length = len(scheme[0][3]) + 5
row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
@@ -127,7 +130,7 @@ class WeatherBenchmark:
scheme = []
virtual_partitions = get_local_virtual_partitions(self.partitions)
data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
- link_base_schemes = get_partition_scheme(0, partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
# Match link paths to real data paths.
group_size = len(data_schemes) / len(link_base_schemes)
@@ -155,20 +158,20 @@ class WeatherBenchmark:
scheme = []
local_virtual_partitions = get_local_virtual_partitions(self.partitions)
virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
+ virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
data_schemes = get_partition_scheme(node_index, virtual_partitions, self.base_paths)
- link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ link_base_schemes = get_cluster_link_scheme(len(self.nodes), virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
# Match link paths to real data paths.
for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
# Prep
if test == "speed_up":
- group_size = virtual_partitions / (link_node + 1)
+ group_size = virtual_partitions_per_disk / (link_node + 1)
elif test == "batch_scale_out":
- group_size = virtual_partitions / len(self.nodes)
+ group_size = virtual_partitions_per_disk / len(self.nodes)
else:
print "Unknown test."
return
- group_size = group_size / link_virtual
node_offset = group_size * (node_index * partition)
node_offset += group_size * link_index
has_data = True
http://git-wip-us.apache.org/repos/asf/vxquery/blob/cee27a70/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 1c9f129..b39f934 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -150,62 +150,56 @@ class WeatherDataFiles:
XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
- # Initialize the partition paths.
partition_paths = get_partition_paths(0, partitions, base_paths)
- sensors_partition_files = []
- stations_partition_files = []
- for path in partition_paths:
- # Make sure the xml folder is available.
- prepare_path(path, reset)
- prepare_path(path + "sensors/", False)
- prepare_path(path + "stations/", False)
- sensors_partition_files.append(open(path + "sensors/partition.xml", 'w'))
- stations_partition_files.append(open(path + "stations/partition.xml", 'w'))
-
- for row in range(0, len(partition_paths)):
- sensors_partition_files[row].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
- stations_partition_files[row].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
import fnmatch
import os
- # copy stations and sensors into each partition
- current_sensor_partition = 0
- current_station_partition = 0
- self.open_progress_data()
- row_count = len(self.progress_data)
- for row in range(0, row_count):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- file_name = row_contents[self.INDEX_DATA_FILE_NAME]
- station_id = os.path.basename(file_name).split('.')[0]
-
- # Copy sensor files
- type = "sensors"
- file_path = build_base_save_folder(save_path, station_id, type) + station_id
- for root, dirnames, filenames in os.walk(file_path):
- for filename in fnmatch.filter(filenames, '*.xml'):
- xml_path = os.path.join(root, filename)
- xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
- sensors_partition_files[current_sensor_partition].write(xml_data)
- current_sensor_partition += 1
- if current_sensor_partition >= len(sensors_partition_files):
- current_sensor_partition = 0
-
- # Copy station files
- type = "stations"
- file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
- xml_path = os.path.join(root, file_path)
- xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
- stations_partition_files[current_station_partition].write(xml_data)
- current_station_partition += 1
- if current_station_partition >= len(partition_paths):
- current_station_partition = 0
+ for path in partition_paths:
+ prepare_path(path, reset)
+
+ # Initialize the partition paths.
+ types = ["sensors", "stations"]
+ for type in types:
+ partition_files = []
+ for path in partition_paths:
+ # Make sure the xml folder is available.
+ prepare_path(path + type + "/", False)
+ partition_files.append(open(path + type + "/partition.xml", 'w'))
+ partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
+
+ # copy into each partition
+ current_partition = 0
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
- for row in range(0, len(partition_paths)):
- sensors_partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
- sensors_partition_files[row].close()
- stations_partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
- stations_partition_files[row].close()
+ # Copy files
+ if type == "sensors":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id
+ for root, dirnames, filenames in os.walk(file_path):
+ for filename in fnmatch.filter(filenames, '*.xml'):
+ xml_path = os.path.join(root, filename)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_files):
+ current_partition = 0
+ elif type == "stations":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+ xml_path = os.path.join(root, file_path)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_paths):
+ current_partition = 0
+
+ for row in range(0, len(partition_paths)):
+ partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
+ partition_files[row].close()
def get_file_row(self, file_name):
for i in range(0, len(self.progress_data)):
@@ -388,12 +382,13 @@ def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
partition_paths.append(scheme[PARTITION_INDEX_PATH])
return partition_paths
-def get_partition_scheme(node_id, partitions, base_paths, key="partitions"):
+def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
partition_scheme = []
- for i in range(0, partitions):
+ partitions_per_disk = virtual_partitions / len(base_paths)
+ for i in range(0, partitions_per_disk):
for j in range(0, len(base_paths)):
- new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions, i) + "/"
- partition_scheme.append((node_id, j, partitions, i, new_partition_path))
+ new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions_per_disk, i) + "/"
+ partition_scheme.append((node_id, j, partitions_per_disk, i, new_partition_path))
return partition_scheme
def get_partition_folder(disks, partitions, index):