You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/08/14 19:00:51 UTC
[10/16] git commit: Updated with little changes to improve the
benchmark process.
Updated with little changes to improve the benchmark process.
- Next version of Hyracks.
- Updated MRQL queries.
- Benchmark frame and buffer sizes updated.
- Conditionally send out benchmark finish e-mail
- Added CLI argument for ignoring queries when producing timing results.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/61b22a9c
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/61b22a9c
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/61b22a9c
Branch: refs/heads/master
Commit: 61b22a9c77930a8b69f308de9d001e743cd318eb
Parents: cee27a7
Author: Preston Carman <pr...@apache.org>
Authored: Sun Aug 3 15:34:21 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Sun Aug 3 15:34:21 2014 -0700
----------------------------------------------------------------------
pom.xml | 2 +-
vxquery-benchmark/pom.xml | 21 +++++++++
.../other_systems/mrql_gsn/q01.mrql | 2 +-
.../other_systems/mrql_gsn/q04.mrql | 4 +-
.../other_systems/mrql_gsn/q06.mrql | 2 -
.../other_systems/mrql_test/q00.mrql | 7 +++
.../other_systems/mrql_test/q01.mrql | 5 ++
.../other_systems/mrql_test/q02.mrql | 8 ++++
.../other_systems/mrql_test/q03.mrql | 6 +++
.../other_systems/mrql_test/q04.mrql | 8 ++++
.../other_systems/mrql_test/q05.mrql | 11 +++++
.../other_systems/mrql_test/q06.mrql | 11 +++++
.../other_systems/mrql_test/q07.mrql | 10 ++++
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 23 +++++----
.../scripts/run_benchmark_cluster.sh | 28 ++++++-----
.../noaa-ghcn-daily/scripts/run_group_test.sh | 23 +++++----
.../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 16 +++++--
.../scripts/weather_benchmark.py | 49 +++++++++-----------
.../noaa-ghcn-daily/scripts/weather_cli.py | 4 +-
.../scripts/weather_data_files.py | 21 ++++++---
.../src/main/resources/util/merge_xml_files.py | 2 +-
.../java/org/apache/vxquery/cli/VXQuery.java | 24 +++++-----
.../VXQueryCollectionOperatorDescriptor.java | 2 +-
.../metadata/VXQueryMetadataProvider.java | 26 ++++++-----
.../org/apache/vxquery/xmlparser/XMLParser.java | 18 +++++--
vxquery-xtest/pom.xml | 25 ++++++----
.../apache/vxquery/xtest/TestRunnerFactory.java | 3 --
27 files changed, 247 insertions(+), 114 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 752a633..cf17361 100644
--- a/pom.xml
+++ b/pom.xml
@@ -594,7 +594,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
- <hyracks.version>0.2.12-SNAPSHOT</hyracks.version>
+ <hyracks.version>0.2.13-SNAPSHOT</hyracks.version>
</properties>
<modules>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/pom.xml b/vxquery-benchmark/pom.xml
index da981d7..e08da38 100644
--- a/vxquery-benchmark/pom.xml
+++ b/vxquery-benchmark/pom.xml
@@ -47,6 +47,27 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.5</version>
+ <executions>
+ <execution>
+ <id>copy-scripts</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/scripts</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
index 5e8de9b..1712cfe 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
@@ -1,5 +1,5 @@
select (r)
from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
where text(r.dataType) = "AWND"
- and toInt(text(r.value)) > 491.744
+ and toFloat(text(r.value)) > 491.744
;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
index 938f6d8..2929478 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
@@ -1,8 +1,8 @@
-select (r)
+select (sensors)
from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
l in stations.locationLabels
where text(stations.id) = text(sensors.station)
and text(sensors.date) = "1976-07-04T00:00:00.000"
- and text(l.displayName) = "WASHINGTON"
+ and text(l.displayName) = "Washington"
;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
index c4ab3da..bef3413 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
@@ -4,9 +4,7 @@ from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
v in sensors.value,
stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
n in stations.displayName,
- l in stations.locationLabels
where text(stations.id) = text(sensors.station)
and toInt(substring(text(d), 0, 4)) = 2000
and text(sensors.dataType) = "TMAX"
- and text(l.displayName) = "WASHINGTON"
;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
new file mode 100644
index 0000000..2e606d8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
@@ -0,0 +1,7 @@
+select (r)
+from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+where text(r.station) = "GHCND:AS000000003"
+ and toInt(substring(text(r.date), 0, 4)) >= 2000
+ and toInt(substring(text(r.date), 5, 7)) = 3
+ and toInt(substring(text(r.date), 8, 10)) = 3
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
new file mode 100644
index 0000000..1e41c2c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
@@ -0,0 +1,5 @@
+select (r)
+from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+where text(r.dataType) = "AWND"
+ and toFloat(text(r.value)) > 491.744
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
new file mode 100644
index 0000000..818ad31
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
@@ -0,0 +1,8 @@
+sum(
+ select (toFloat(text(r.value)))
+ from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+ where text(r.station) = "GHCND:US000000002"
+ and toInt(substring(text(r.date), 0, 4)) = 2002
+ and text(r.dataType) = "PRCP"
+) / 10
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
new file mode 100644
index 0000000..68fa926
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
@@ -0,0 +1,6 @@
+max(
+ select (toFloat(text(r.value)))
+ from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+ where text(r.dataType) = "TMAX"
+) / 10
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
new file mode 100644
index 0000000..467d318
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
@@ -0,0 +1,8 @@
+select (sensors)
+from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+ stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+ l in stations.locationLabels
+where text(stations.id) = text(sensors.station)
+ and text(sensors.date) = "2002-02-02T00:00:00.000"
+ and text(l.displayName) = "State 1"
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
new file mode 100644
index 0000000..c95d7d8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
@@ -0,0 +1,11 @@
+min(
+ select (toFloat(text(sensors.value)))
+ from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+ stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+ l in stations.locationLabels
+ where text(stations.id) = text(sensors.station)
+ and toInt(substring(text(sensors.date), 0, 4)) = 2001
+ and text(sensors.dataType) = "TMIN"
+ and text(l.id) = "FIPS:US"
+) / 10
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
new file mode 100644
index 0000000..8989e48
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
@@ -0,0 +1,11 @@
+select (n, d, v)
+from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+ d in sensors.date,
+ v in sensors.value,
+ stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+ n in stations.displayName,
+ l in stations.locationLabels
+where text(stations.id) = text(sensors.station)
+ and toInt(substring(text(d), 0, 4)) = 2002
+ and text(sensors.dataType) = "TMAX"
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
new file mode 100644
index 0000000..ac28716
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
@@ -0,0 +1,10 @@
+avg(
+ select (toFloat(text(rtmax.value))-toFloat(text(rtmin.value)))
+ from rtmax in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+ rtmin in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+ where text(rtmax.date) = text(rtmin.date)
+ and text(rtmax.station) = text(rtmin.station)
+ and text(rtmax.dataType) = "TMAX"
+ and text(rtmin.dataType) = "TMIN"
+) / 10
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index b82f0be..0852d86 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -26,7 +26,7 @@
# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
#
REPEAT=5
-FRAME_SIZE=10000
+FRAME_SIZE=$((8*1024))
BUFFER_SIZE=$((32*1024*1024))
if [ -z "${1}" ]
@@ -46,15 +46,20 @@ do
log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
log_base_path=$(dirname ${j/queries/query_logs})
mkdir -p ${log_base_path}
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
- #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
echo "Buffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- #echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
fi;
done
-SUBJECT="Benchmark Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in folder ${1}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in folder ${1}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
old mode 100644
new mode 100755
index 6c19713..5c27266
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -25,8 +25,9 @@
# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
#
+CLUSTER="uci"
REPEAT=5
-FRAME_SIZE=10000
+FRAME_SIZE=$((8*1024))
BUFFER_SIZE=$((32*1024*1024))
if [ -z "${1}" ]
@@ -43,7 +44,7 @@ fi
# Run queries for the specified number of nodes.
echo "Starting ${2} cluster nodes"
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a start
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
# wait for cluster to finish setting up
sleep 5
@@ -63,19 +64,24 @@ do
log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
log_base_path=$(dirname ${j/queries/query_logs})
mkdir -p ${log_base_path}
- #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- #echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
fi;
fi;
done
# Stop cluster.
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a stop
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
-SUBJECT="Benchmark Cluster Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in folder ${1} for a ${2} node cluster.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Cluster Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
old mode 100644
new mode 100755
index 7bef3cb..d5b8dc5
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
@@ -19,26 +19,33 @@
DATASET="dataset-hcn-d2"
cluster_ip=${1}
+base_weather_folder=${2}
-for n in 4
+for n in 7 6 5 3 4 2 1 0
do
#for t in "batch_scale_out" "speed_up"
for t in "batch_scale_out"
#for t in "speed_up"
do
- for p in 0
+ for p in 2 1 0
do
for c in 4
do
echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
- sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh weather_data/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
done
done
done
done
-SUBJECT="Benchmark Group Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in the predefined group for ${DATASET}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Group Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in the predefined group for ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
old mode 100644
new mode 100755
index 1fa58dd..a6788be
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
@@ -29,8 +29,14 @@ do
time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done;
done
-SUBJECT="MRQL Tests Finished (${DATASET})"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all MRQL tests on ${DATASET}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="MRQL Tests Finished (${DATASET})"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all MRQL tests on ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 3b0f9b3..c013d8e 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -92,19 +92,17 @@ class WeatherBenchmark:
def print_local_partition_schemes(self, test):
node_index = 0
- virtual_partitions = get_local_virtual_partitions(self.partitions)
- virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
+ virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
for p in self.partitions:
scheme = self.get_local_partition_scheme(test, p)
- self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
+ self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
def print_cluster_partition_schemes(self, test):
node_index = self.get_current_node_index()
- virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
- virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
+ virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
for p in self.partitions:
scheme = self.get_cluster_partition_scheme(test, p)
- self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
+ self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
print
@@ -128,7 +126,7 @@ class WeatherBenchmark:
def get_local_partition_scheme(self, test, partition):
scheme = []
- virtual_partitions = get_local_virtual_partitions(self.partitions)
+ virtual_partitions = get_local_virtual_disk_partitions(self.partitions)
data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
@@ -156,28 +154,27 @@ class WeatherBenchmark:
return
scheme = []
- local_virtual_partitions = get_local_virtual_partitions(self.partitions)
- virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
- virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
- data_schemes = get_partition_scheme(node_index, virtual_partitions, self.base_paths)
- link_base_schemes = get_cluster_link_scheme(len(self.nodes), virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+ data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
+ link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
# Match link paths to real data paths.
for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
# Prep
if test == "speed_up":
- group_size = virtual_partitions_per_disk / (link_node + 1)
+ group_size = virtual_disk_partitions / (link_node + 1) / partition
elif test == "batch_scale_out":
- group_size = virtual_partitions_per_disk / len(self.nodes)
+ group_size = virtual_disk_partitions / len(self.nodes) / partition
else:
print "Unknown test."
return
- node_offset = group_size * (node_index * partition)
+
+ node_offset = group_size * node_index * partition
node_offset += group_size * link_index
has_data = True
if link_node < node_index:
has_data = False
-
+
# Make links
for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
if has_data and data_disk == link_disk \
@@ -295,7 +292,7 @@ class WeatherBenchmark:
prepare_path(query_path, reset)
# Copy query files.
- partition_paths = get_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+ partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
self.copy_and_replace_query(query_path, partition_paths)
def copy_local_query_files(self, test, reset):
@@ -313,7 +310,7 @@ class WeatherBenchmark:
prepare_path(query_path, reset)
# Copy query files.
- partition_paths = get_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
self.copy_and_replace_query(query_path, partition_paths)
def copy_and_replace_query(self, query_path, replacement_list):
@@ -339,15 +336,15 @@ class WeatherBenchmark:
for line in fileinput.input(query_path + query_file, True):
sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
- def get_number_of_slices(self):
+ def get_number_of_slices_per_disk(self):
if len(self.dataset.get_tests()) == 0:
print "No test has been defined in config file."
else:
for test in self.dataset.get_tests():
if test in self.BENCHMARK_LOCAL_TESTS:
- return get_local_virtual_partitions(self.partitions)
+ return get_local_virtual_disk_partitions(self.partitions)
elif test in self.BENCHMARK_CLUSTER_TESTS:
- return get_cluster_virtual_partitions(self.nodes, self.partitions)
+ return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
else:
print "Unknown test."
exit()
@@ -355,7 +352,7 @@ class WeatherBenchmark:
def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):
link_paths = []
for n in range(0, nodes):
- new_link_path = get_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
+ new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
link_paths.extend(new_link_path)
return link_paths
@@ -368,12 +365,12 @@ def get_local_query_folder(disks, partitions):
def get_cluster_query_path(base_paths, test, partition, nodes):
return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
-def get_cluster_virtual_partitions(nodes, partitions):
- vp = get_local_virtual_partitions(partitions)
- vn = calculate_partitions(range(len(nodes), 0, -1))
+def get_cluster_virtual_disk_partitions(nodes, partitions):
+ vp = get_local_virtual_disk_partitions(partitions)
+ vn = calculate_partitions(range(1, len(nodes)+1, 1))
return vp * vn
-def get_local_virtual_partitions(partitions):
+def get_local_virtual_disk_partitions(partitions):
return calculate_partitions(partitions)
def calculate_partitions(list):
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 8ac6d17..eeae25c 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -53,7 +53,7 @@ def main(argv):
print ' -a Append the results to the progress file.'
print ' -f (str) The file name of a specific station to process.'
print ' * Helpful when testing a single stations XML file output.'
- print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, inventory, statistics).'
+ print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
print ' -m (int) Limits the number of files created for each station.'
print ' * Helpful when testing to make sure all elements are supported for each station.'
print ' Alternate form: --max_station_files=(int)'
@@ -203,7 +203,7 @@ def main(argv):
benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
if section in ("all", "partition", "partition_scheme"):
- slices = benchmark.get_number_of_slices()
+ slices = benchmark.get_number_of_slices_per_disk()
print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
data.reset()
if section == "partition_scheme":
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index b39f934..4877120 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -376,19 +376,28 @@ PARTITION_INDEX = 3
PARTITION_INDEX_PATH = 4
PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
-def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
+def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
partition_paths = []
for scheme in get_partition_scheme(node_id, partitions, base_paths, key):
partition_paths.append(scheme[PARTITION_INDEX_PATH])
return partition_paths
-def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
- partition_scheme = []
+def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
partitions_per_disk = virtual_partitions / len(base_paths)
- for i in range(0, partitions_per_disk):
+ return get_disk_partition_scheme(node_id, partitions_per_disk, base_paths, key)
+
+def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
+ partition_paths = []
+ for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
+ partition_paths.append(scheme[PARTITION_INDEX_PATH])
+ return partition_paths
+
+def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
+ partition_scheme = []
+ for i in range(0, virtual_disk_partitions):
for j in range(0, len(base_paths)):
- new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions_per_disk, i) + "/"
- partition_scheme.append((node_id, j, partitions_per_disk, i, new_partition_path))
+ new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
+ partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
return partition_scheme
def get_partition_folder(disks, partitions, index):
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
index 8a6952b..9238a19 100644
--- a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -43,7 +43,7 @@ def main(argv):
except getopt.GetoptError:
print 'The file options for list_xml_files.py were not correctly specified.'
print 'To see a full list of options try:'
- print ' $ python list_xml_files.py -h'
+ print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index a0a5c5a..c513a72 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -83,7 +83,6 @@ public class VXQuery {
private static long sumSquaredTiming;
private static long minTiming = Long.MAX_VALUE;
private static long maxTiming = Long.MIN_VALUE;
- private static byte TIMING_QUERIES_TO_IGNORE = 2;
/**
* Constructor to use command line options passed.
@@ -124,9 +123,9 @@ public class VXQuery {
if (opts.timing) {
Date end = new Date();
timingMessage("Execution time: " + (end.getTime() - start.getTime()) + " ms");
- if (opts.repeatExec > TIMING_QUERIES_TO_IGNORE) {
- long mean = sumTiming / (opts.repeatExec - TIMING_QUERIES_TO_IGNORE);
- double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Byte(TIMING_QUERIES_TO_IGNORE).doubleValue()) - mean * mean);
+ if (opts.repeatExec > opts.timingIgnoreQueries) {
+ long mean = sumTiming / (opts.repeatExec - opts.timingIgnoreQueries);
+ double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Integer(opts.timingIgnoreQueries).doubleValue()) - mean * mean);
timingMessage("Average execution time: " + mean + " ms");
timingMessage("Standard deviation: " + String.format( "%.4f", sd));
timingMessage("Coefficient of variation: " + String.format( "%.4f", (sd / mean)));
@@ -290,7 +289,7 @@ public class VXQuery {
if (opts.timing) {
end = new Date();
long currentRun = end.getTime() - start.getTime();
- if ((i + 1) > TIMING_QUERIES_TO_IGNORE) {
+ if ((i + 1) > opts.timingIgnoreQueries) {
sumTiming += currentRun;
sumSquaredTiming += currentRun * currentRun;
if (currentRun < minTiming) {
@@ -434,22 +433,22 @@ public class VXQuery {
*/
private static class CmdLineOptions {
@Option(name = "-available-processors", usage = "Number of available processors. (default java's available processors)")
- public int availableProcessors = -1;
+ private int availableProcessors = -1;
@Option(name = "-client-net-ip-address", usage = "IP Address of the ClusterController")
- public String clientNetIpAddress = null;
+ private String clientNetIpAddress = null;
@Option(name = "-client-net-port", usage = "Port of the ClusterController (default 1098)")
- public int clientNetPort = 1098;
+ private int clientNetPort = 1098;
@Option(name = "-local-node-controllers", usage = "Number of local node controllers (default 1)")
- public int localNodeControllers = 1;
+ private int localNodeControllers = 1;
@Option(name = "-frame-size", usage = "Frame size in bytes. (default 65536)")
- public int frameSize = 65536;
+ private int frameSize = 65536;
@Option(name = "-buffer-size", usage = "Disk read buffer size in bytes.")
- public int bufferSize = -1;
+ private int bufferSize = -1;
@Option(name = "-O", usage = "Optimization Level. Default: Full Optimization")
private int optimizationLevel = Integer.MAX_VALUE;
@@ -478,6 +477,9 @@ public class VXQuery {
@Option(name = "-timing", usage = "Produce timing information")
private boolean timing;
+ @Option(name = "-timing-ignore-queries", usage = "Ignore the first X number of quereies.")
+ private int timingIgnoreQueries = 2;
+
@Option(name = "-x", usage = "Bind an external variable")
private Map<String, String> bindings = new HashMap<String, String>();
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
index 89968d5..8fdd1ec 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
@@ -105,7 +105,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
}
}
} else {
- throw new HyracksDataException("Invalid directory parameter ("
+ throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":"
+ collectionDirectory.getAbsolutePath() + ") passed to collection.");
}
}
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
index 40a02ae..bc92ffc 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
@@ -68,8 +68,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource,
List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed,
- IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context,
- JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
+ List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema,
+ IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig)
+ throws AlgebricksException {
VXQueryCollectionDataSource ds = (VXQueryCollectionDataSource) dataSource;
if (sourceFileMap != null) {
final int len = ds.getPartitions().length;
@@ -123,23 +124,24 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(
IDataSource<String> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys,
- LogicalVariable payLoadVar, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
+ LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, JobGenContext context,
+ JobSpecification jobSpec) throws AlgebricksException {
return null;
}
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<String> dataSource,
IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
- LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
+ LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc,
+ JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
return null;
}
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(IDataSource<String> dataSource,
IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
- LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
+ LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc,
+ JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
return null;
}
@@ -147,8 +149,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema,
IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc,
- JobGenContext context, JobSpecification spec) throws AlgebricksException {
+ List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
return null;
}
@@ -156,8 +159,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema,
IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc,
- JobGenContext context, JobSpecification spec) throws AlgebricksException {
+ List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
return null;
}
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
index 1d979b5..81b9191 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
@@ -20,6 +20,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.io.Reader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
@@ -53,7 +54,7 @@ public class XMLParser {
public XMLParser(boolean attachTypes, ITreeNodeIdProvider idProvider, String nodeId, ByteBuffer frame,
FrameTupleAppender appender, List<Integer> childSeq, StaticContext staticContext)
throws HyracksDataException {
- bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size"));
+ bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size", "-1"));
this.nodeId = nodeId;
try {
parser = XMLReaderFactory.createXMLReader();
@@ -76,13 +77,16 @@ public class XMLParser {
public void parseDocument(File file, ArrayBackedValueStorage abvs) throws HyracksDataException {
try {
+ Reader input;
if (bufferSize > 0) {
- in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize));
+ input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize);
} else {
- in.setCharacterStream(new InputStreamReader(new FileInputStream(file)));
+ input = new InputStreamReader(new FileInputStream(file));
}
+ in.setCharacterStream(input);
parser.parse(in);
handler.writeDocument(abvs);
+ input.close();
} catch (FileNotFoundException e) {
HyracksDataException hde = new VXQueryFileNotFoundException(e, file);
hde.setNodeId(nodeId);
@@ -101,13 +105,17 @@ public class XMLParser {
public void parseElements(File file, IFrameWriter writer, FrameTupleAccessor fta, int tupleIndex)
throws HyracksDataException {
try {
+ Reader input;
if (bufferSize > 0) {
- in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize));
+ input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize);
+// System.err.println("buffer size: " + bufferSize);
} else {
- in.setCharacterStream(new InputStreamReader(new FileInputStream(file)));
+ input = new InputStreamReader(new FileInputStream(file));
}
+ in.setCharacterStream(input);
handler.setupElementWriter(writer, fta, tupleIndex);
parser.parse(in);
+ input.close();
} catch (FileNotFoundException e) {
HyracksDataException hde = new VXQueryFileNotFoundException(e, file);
hde.setNodeId(nodeId);
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-xtest/pom.xml b/vxquery-xtest/pom.xml
index 320950b..762aa0b 100644
--- a/vxquery-xtest/pom.xml
+++ b/vxquery-xtest/pom.xml
@@ -67,11 +67,9 @@
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
- <version>2.5</version>
<executions>
<execution>
- <id>copy-scripts</id>
- <!-- here the phase you need -->
+ <id>copy-xtest-scripts</id>
<phase>package</phase>
<goals>
<goal>copy-resources</goal>
@@ -85,14 +83,23 @@
</resources>
</configuration>
</execution>
+ <execution>
+ <id>copy-xtest-conf</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/conf</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/conf</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
</executions>
</plugin>
- <!--
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-site-plugin</artifactId>
- </plugin>
- -->
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
----------------------------------------------------------------------
diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
index b9f1d70..6b3fb4b 100644
--- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
+++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
@@ -23,12 +23,9 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.List;
-import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import javax.xml.namespace.QName;
-
import org.apache.vxquery.compiler.CompilerControlBlock;
import org.apache.vxquery.compiler.algebricks.VXQueryGlobalDataFactory;
import org.apache.vxquery.context.DynamicContext;