You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/08/14 19:00:51 UTC

[10/16] git commit: Updated with little changes to improve the benchmark process.

Updated with little changes to improve the benchmark process.

 - Next version of Hyracks.
 - Updated MRQL queries.
 - Benchmark frame and buffer sizes updated.
 - Conditionally send out benchmark finish e-mail
 - Added CLI argument for ignoring queries when producing timing results.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/61b22a9c
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/61b22a9c
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/61b22a9c

Branch: refs/heads/master
Commit: 61b22a9c77930a8b69f308de9d001e743cd318eb
Parents: cee27a7
Author: Preston Carman <pr...@apache.org>
Authored: Sun Aug 3 15:34:21 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Sun Aug 3 15:34:21 2014 -0700

----------------------------------------------------------------------
 pom.xml                                         |  2 +-
 vxquery-benchmark/pom.xml                       | 21 +++++++++
 .../other_systems/mrql_gsn/q01.mrql             |  2 +-
 .../other_systems/mrql_gsn/q04.mrql             |  4 +-
 .../other_systems/mrql_gsn/q06.mrql             |  2 -
 .../other_systems/mrql_test/q00.mrql            |  7 +++
 .../other_systems/mrql_test/q01.mrql            |  5 ++
 .../other_systems/mrql_test/q02.mrql            |  8 ++++
 .../other_systems/mrql_test/q03.mrql            |  6 +++
 .../other_systems/mrql_test/q04.mrql            |  8 ++++
 .../other_systems/mrql_test/q05.mrql            | 11 +++++
 .../other_systems/mrql_test/q06.mrql            | 11 +++++
 .../other_systems/mrql_test/q07.mrql            | 10 ++++
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    | 23 +++++----
 .../scripts/run_benchmark_cluster.sh            | 28 ++++++-----
 .../noaa-ghcn-daily/scripts/run_group_test.sh   | 23 +++++----
 .../noaa-ghcn-daily/scripts/run_mrql_tests.sh   | 16 +++++--
 .../scripts/weather_benchmark.py                | 49 +++++++++-----------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |  4 +-
 .../scripts/weather_data_files.py               | 21 ++++++---
 .../src/main/resources/util/merge_xml_files.py  |  2 +-
 .../java/org/apache/vxquery/cli/VXQuery.java    | 24 +++++-----
 .../VXQueryCollectionOperatorDescriptor.java    |  2 +-
 .../metadata/VXQueryMetadataProvider.java       | 26 ++++++-----
 .../org/apache/vxquery/xmlparser/XMLParser.java | 18 +++++--
 vxquery-xtest/pom.xml                           | 25 ++++++----
 .../apache/vxquery/xtest/TestRunnerFactory.java |  3 --
 27 files changed, 247 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 752a633..cf17361 100644
--- a/pom.xml
+++ b/pom.xml
@@ -594,7 +594,7 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    <hyracks.version>0.2.12-SNAPSHOT</hyracks.version>
+    <hyracks.version>0.2.13-SNAPSHOT</hyracks.version>
   </properties>
 
   <modules>

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/pom.xml b/vxquery-benchmark/pom.xml
index da981d7..e08da38 100644
--- a/vxquery-benchmark/pom.xml
+++ b/vxquery-benchmark/pom.xml
@@ -47,6 +47,27 @@
           </execution>          
         </executions>
       </plugin>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.5</version>
+        <executions>
+          <execution>
+            <id>copy-scripts</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/appassembler/scripts</outputDirectory>
+              <resources>
+                <resource>
+                  <directory>src/main/resources</directory>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
index 5e8de9b..1712cfe 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
@@ -1,5 +1,5 @@
 select (r)
 from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
 where text(r.dataType) = "AWND"
-    and toInt(text(r.value)) > 491.744
+    and toFloat(text(r.value)) > 491.744
 ;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
index 938f6d8..2929478 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
@@ -1,8 +1,8 @@
-select (r)
+select (sensors)
 from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
     stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
     l in stations.locationLabels
 where text(stations.id) = text(sensors.station) 
     and text(sensors.date) = "1976-07-04T00:00:00.000"
-    and text(l.displayName) = "WASHINGTON"
+    and text(l.displayName) = "Washington"
 ;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
index c4ab3da..bef3413 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
@@ -4,9 +4,7 @@ from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
     v in sensors.value,
     stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
     n in stations.displayName,
-    l in stations.locationLabels
 where text(stations.id) = text(sensors.station) 
     and toInt(substring(text(d), 0, 4)) = 2000
     and text(sensors.dataType) = "TMAX"
-    and text(l.displayName) = "WASHINGTON"
 ;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
new file mode 100644
index 0000000..2e606d8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
@@ -0,0 +1,7 @@
+select (r)
+from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+where text(r.station) = "GHCND:AS000000003"
+    and toInt(substring(text(r.date), 0, 4)) >= 2000
+    and toInt(substring(text(r.date), 5, 7)) = 3
+    and toInt(substring(text(r.date), 8, 10)) = 3
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
new file mode 100644
index 0000000..1e41c2c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
@@ -0,0 +1,5 @@
+select (r)
+from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+where text(r.dataType) = "AWND"
+    and toFloat(text(r.value)) > 491.744
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
new file mode 100644
index 0000000..818ad31
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
@@ -0,0 +1,8 @@
+sum(
+    select (toFloat(text(r.value)))
+    from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+    where text(r.station) = "GHCND:US000000002"
+        and toInt(substring(text(r.date), 0, 4)) = 2002
+        and text(r.dataType) = "PRCP"
+) / 10
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
new file mode 100644
index 0000000..68fa926
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
@@ -0,0 +1,6 @@
+max(
+    select (toFloat(text(r.value)))
+    from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+    where text(r.dataType) = "TMAX"
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
new file mode 100644
index 0000000..467d318
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
@@ -0,0 +1,8 @@
+select (sensors)
+from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+    stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+    l in stations.locationLabels
+where text(stations.id) = text(sensors.station) 
+    and text(sensors.date) = "2002-02-02T00:00:00.000"
+    and text(l.displayName) = "State 1"
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
new file mode 100644
index 0000000..c95d7d8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
@@ -0,0 +1,11 @@
+min(
+    select (toFloat(text(sensors.value)))
+    from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+        stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+        l in stations.locationLabels
+    where text(stations.id) = text(sensors.station) 
+        and toInt(substring(text(sensors.date), 0, 4)) = 2001
+        and text(sensors.dataType) = "TMIN"
+        and text(l.id) = "FIPS:US"
+) / 10
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
new file mode 100644
index 0000000..8989e48
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
@@ -0,0 +1,11 @@
+select (n, d, v)
+from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+    d in sensors.date,
+    v in sensors.value,
+    stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
+    n in stations.displayName,
+    l in stations.locationLabels
+where text(stations.id) = text(sensors.station) 
+    and toInt(substring(text(d), 0, 4)) = 2002
+    and text(sensors.dataType) = "TMAX"
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
new file mode 100644
index 0000000..ac28716
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
@@ -0,0 +1,10 @@
+avg(
+    select (toFloat(text(rtmax.value))-toFloat(text(rtmin.value)))
+    from rtmax in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
+        rtmin in source(xml, "sample_xml/nano_sensors.xml", {"data"})
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and text(rtmin.dataType) = "TMIN"
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index b82f0be..0852d86 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -26,7 +26,7 @@
 # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
 #
 REPEAT=5
-FRAME_SIZE=10000
+FRAME_SIZE=$((8*1024))
 BUFFER_SIZE=$((32*1024*1024))
 
 if [ -z "${1}" ]
@@ -46,15 +46,20 @@ do
         log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
         log_base_path=$(dirname ${j/queries/query_logs})
         mkdir -p ${log_base_path}
-        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
-        #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
         echo "Buffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-        #echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+        echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
     fi;
 done
 
-SUBJECT="Benchmark Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in folder ${1}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
old mode 100644
new mode 100755
index 6c19713..5c27266
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -25,8 +25,9 @@
 # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
 # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
 #
+CLUSTER="uci"
 REPEAT=5
-FRAME_SIZE=10000
+FRAME_SIZE=$((8*1024))
 BUFFER_SIZE=$((32*1024*1024))
 
 if [ -z "${1}" ]
@@ -43,7 +44,7 @@ fi
 
 # Run queries for the specified number of nodes.
 echo "Starting ${2} cluster nodes"
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a start
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
 
 # wait for cluster to finish setting up  
 sleep 5
@@ -63,19 +64,24 @@ do
             log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
             log_base_path=$(dirname ${j/queries/query_logs})
             mkdir -p ${log_base_path}
-            #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
-            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
             echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-            #echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
         fi;
     fi;
 done
     
 # Stop cluster.
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a stop
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
 
-SUBJECT="Benchmark Cluster Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in folder ${1} for a ${2} node cluster.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Cluster Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
old mode 100644
new mode 100755
index 7bef3cb..d5b8dc5
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
@@ -19,26 +19,33 @@
 
 DATASET="dataset-hcn-d2"
 cluster_ip=${1}
+base_weather_folder=${2}
 
-for n in 4 
+for n in 7 6 5 3 4 2 1 0
 do
     #for t in "batch_scale_out" "speed_up"
     for t in "batch_scale_out"
     #for t in "speed_up"
     do 
-        for p in 0  
+        for p in 2 1 0 
         do 
             for c in 4
             do 
                 echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
-                sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh weather_data/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
+                sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
             done
         done
     done
 done
 
-SUBJECT="Benchmark Group Tests Finished"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all tests in the predefined group for ${DATASET}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Group Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in the predefined group for ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
old mode 100644
new mode 100755
index 1fa58dd..a6788be
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
@@ -29,8 +29,14 @@ do
     time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
 done
 
-SUBJECT="MRQL Tests Finished (${DATASET})"
-EMAIL="ecarm002@ucr.edu"
-/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-Completed all MRQL tests on ${DATASET}.
-EOM
\ No newline at end of file
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="MRQL Tests Finished (${DATASET})"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all MRQL tests on ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 3b0f9b3..c013d8e 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -92,19 +92,17 @@ class WeatherBenchmark:
             
     def print_local_partition_schemes(self, test):
         node_index = 0
-        virtual_partitions = get_local_virtual_partitions(self.partitions)
-        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
+        virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
         for p in self.partitions:
             scheme = self.get_local_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
         
     def print_cluster_partition_schemes(self, test):
         node_index = self.get_current_node_index()
-        virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
-        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
+        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
         for p in self.partitions:
             scheme = self.get_cluster_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
         
     def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
         print
@@ -128,7 +126,7 @@ class WeatherBenchmark:
 
     def get_local_partition_scheme(self, test, partition):
         scheme = []
-        virtual_partitions = get_local_virtual_partitions(self.partitions)
+        virtual_partitions = get_local_virtual_disk_partitions(self.partitions)
         data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
         link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
 
@@ -156,28 +154,27 @@ class WeatherBenchmark:
             return 
         
         scheme = []
-        local_virtual_partitions = get_local_virtual_partitions(self.partitions)
-        virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions)
-        virtual_partitions_per_disk = virtual_partitions / len(self.base_paths)
-        data_schemes = get_partition_scheme(node_index, virtual_partitions, self.base_paths)
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
+        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+        data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
 
         # Match link paths to real data paths.
         for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
             # Prep
             if test == "speed_up":
-                group_size = virtual_partitions_per_disk / (link_node + 1)
+                group_size = virtual_disk_partitions / (link_node + 1) / partition
             elif test == "batch_scale_out":
-                group_size = virtual_partitions_per_disk / len(self.nodes)
+                group_size = virtual_disk_partitions / len(self.nodes) / partition
             else:
                 print "Unknown test."
                 return
-            node_offset = group_size * (node_index * partition)
+            
+            node_offset = group_size * node_index * partition
             node_offset += group_size * link_index
             has_data = True
             if link_node < node_index:
                 has_data = False
-                
+    
             # Make links
             for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
                 if has_data and data_disk == link_disk \
@@ -295,7 +292,7 @@ class WeatherBenchmark:
                 prepare_path(query_path, reset)
             
                 # Copy query files.
-                partition_paths = get_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+                partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
                 self.copy_and_replace_query(query_path, partition_paths)
 
     def copy_local_query_files(self, test, reset):
@@ -313,7 +310,7 @@ class WeatherBenchmark:
             prepare_path(query_path, reset)
     
             # Copy query files.
-            partition_paths = get_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
+            partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
             self.copy_and_replace_query(query_path, partition_paths)
 
     def copy_and_replace_query(self, query_path, replacement_list):
@@ -339,15 +336,15 @@ class WeatherBenchmark:
                 for line in fileinput.input(query_path + query_file, True):
                     sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
                     
-    def get_number_of_slices(self):
+    def get_number_of_slices_per_disk(self):
         if len(self.dataset.get_tests()) == 0:
             print "No test has been defined in config file."
         else:
             for test in self.dataset.get_tests():
                 if test in self.BENCHMARK_LOCAL_TESTS:
-                    return get_local_virtual_partitions(self.partitions)
+                    return get_local_virtual_disk_partitions(self.partitions)
                 elif test in self.BENCHMARK_CLUSTER_TESTS:
-                    return get_cluster_virtual_partitions(self.nodes, self.partitions)
+                    return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
                 else:
                     print "Unknown test."
                     exit()
@@ -355,7 +352,7 @@ class WeatherBenchmark:
 def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):        
     link_paths = []
     for n in range(0, nodes):
-        new_link_path = get_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
+        new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
         link_paths.extend(new_link_path)
     return link_paths
 
@@ -368,12 +365,12 @@ def get_local_query_folder(disks, partitions):
 def get_cluster_query_path(base_paths, test, partition, nodes):        
     return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
 
-def get_cluster_virtual_partitions(nodes, partitions):
-    vp = get_local_virtual_partitions(partitions)
-    vn = calculate_partitions(range(len(nodes), 0, -1))
+def get_cluster_virtual_disk_partitions(nodes, partitions):
+    vp = get_local_virtual_disk_partitions(partitions)
+    vn = calculate_partitions(range(1, len(nodes)+1, 1))
     return vp * vn
 
-def get_local_virtual_partitions(partitions):
+def get_local_virtual_disk_partitions(partitions):
     return calculate_partitions(partitions)
 
 def calculate_partitions(list):

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 8ac6d17..eeae25c 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -53,7 +53,7 @@ def main(argv):
             print '    -a        Append the results to the progress file.'
             print '    -f (str)  The file name of a specific station to process.'
             print '              * Helpful when testing a single stations XML file output.'
-            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, inventory, statistics).'
+            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
             print '    -m (int)  Limits the number of files created for each station.'
             print '              * Helpful when testing to make sure all elements are supported for each station.'
             print '              Alternate form: --max_station_files=(int)'
@@ -203,7 +203,7 @@ def main(argv):
         benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
         
         if section in ("all", "partition", "partition_scheme"):
-            slices = benchmark.get_number_of_slices()
+            slices = benchmark.get_number_of_slices_per_disk()
             print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
             data.reset()
             if section == "partition_scheme":

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index b39f934..4877120 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -376,19 +376,28 @@ PARTITION_INDEX = 3
 PARTITION_INDEX_PATH = 4
 PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
             
-def get_partition_paths(node_id, partitions, base_paths, key="partitions"):        
+def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
     partition_paths = []
     for scheme in get_partition_scheme(node_id, partitions, base_paths, key):
         partition_paths.append(scheme[PARTITION_INDEX_PATH])
     return partition_paths
 
-def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):        
-    partition_scheme = []
+def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
     partitions_per_disk = virtual_partitions / len(base_paths)
-    for i in range(0, partitions_per_disk):
+    return get_disk_partition_scheme(node_id, partitions_per_disk, base_paths, key)
+
+def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
+    partition_paths = []
+    for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
+        partition_paths.append(scheme[PARTITION_INDEX_PATH])
+    return partition_paths
+
+def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
+    partition_scheme = []
+    for i in range(0, virtual_disk_partitions):
         for j in range(0, len(base_paths)):
-            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions_per_disk, i) + "/"
-            partition_scheme.append((node_id, j, partitions_per_disk, i, new_partition_path))
+            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
+            partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
     return partition_scheme
 
 def get_partition_folder(disks, partitions, index):        

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
index 8a6952b..9238a19 100644
--- a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -43,7 +43,7 @@ def main(argv):
     except getopt.GetoptError:
         print 'The file options for list_xml_files.py were not correctly specified.'
         print 'To see a full list of options try:'
-        print '  $ python list_xml_files.py -h'
+        print '  $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
         sys.exit(2)
     for opt, arg in opts:
         if opt == '-h':

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index a0a5c5a..c513a72 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -83,7 +83,6 @@ public class VXQuery {
     private static long sumSquaredTiming;
     private static long minTiming = Long.MAX_VALUE;
     private static long maxTiming = Long.MIN_VALUE;
-    private static byte TIMING_QUERIES_TO_IGNORE = 2;
 
     /**
      * Constructor to use command line options passed.
@@ -124,9 +123,9 @@ public class VXQuery {
         if (opts.timing) {
             Date end = new Date();
             timingMessage("Execution time: " + (end.getTime() - start.getTime()) + " ms");
-            if (opts.repeatExec > TIMING_QUERIES_TO_IGNORE) {
-                long mean = sumTiming / (opts.repeatExec - TIMING_QUERIES_TO_IGNORE);
-                double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Byte(TIMING_QUERIES_TO_IGNORE).doubleValue()) - mean * mean);
+            if (opts.repeatExec > opts.timingIgnoreQueries) {
+                long mean = sumTiming / (opts.repeatExec - opts.timingIgnoreQueries);
+                double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Integer(opts.timingIgnoreQueries).doubleValue()) - mean * mean);
                 timingMessage("Average execution time: " + mean + " ms");
                 timingMessage("Standard deviation: " + String.format( "%.4f", sd));
                 timingMessage("Coefficient of variation: " + String.format( "%.4f", (sd / mean)));
@@ -290,7 +289,7 @@ public class VXQuery {
                 if (opts.timing) {
                     end = new Date();
                     long currentRun = end.getTime() - start.getTime();
-                    if ((i + 1) > TIMING_QUERIES_TO_IGNORE) {
+                    if ((i + 1) > opts.timingIgnoreQueries) {
                         sumTiming += currentRun;
                         sumSquaredTiming += currentRun * currentRun;
                         if (currentRun < minTiming) {
@@ -434,22 +433,22 @@ public class VXQuery {
      */
     private static class CmdLineOptions {
         @Option(name = "-available-processors", usage = "Number of available processors. (default java's available processors)")
-        public int availableProcessors = -1;
+        private int availableProcessors = -1;
 
         @Option(name = "-client-net-ip-address", usage = "IP Address of the ClusterController")
-        public String clientNetIpAddress = null;
+        private String clientNetIpAddress = null;
 
         @Option(name = "-client-net-port", usage = "Port of the ClusterController (default 1098)")
-        public int clientNetPort = 1098;
+        private int clientNetPort = 1098;
 
         @Option(name = "-local-node-controllers", usage = "Number of local node controllers (default 1)")
-        public int localNodeControllers = 1;
+        private int localNodeControllers = 1;
 
         @Option(name = "-frame-size", usage = "Frame size in bytes. (default 65536)")
-        public int frameSize = 65536;
+        private int frameSize = 65536;
 
         @Option(name = "-buffer-size", usage = "Disk read buffer size in bytes.")
-        public int bufferSize = -1;
+        private int bufferSize = -1;
 
         @Option(name = "-O", usage = "Optimization Level. Default: Full Optimization")
         private int optimizationLevel = Integer.MAX_VALUE;
@@ -478,6 +477,9 @@ public class VXQuery {
         @Option(name = "-timing", usage = "Produce timing information")
         private boolean timing;
 
+        @Option(name = "-timing-ignore-queries", usage = "Ignore the first X number of quereies.")
+        private int timingIgnoreQueries = 2;
+
         @Option(name = "-x", usage = "Bind an external variable")
         private Map<String, String> bindings = new HashMap<String, String>();
 

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
index 89968d5..8fdd1ec 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
@@ -105,7 +105,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                         }
                     }
                 } else {
-                    throw new HyracksDataException("Invalid directory parameter ("
+                    throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":"
                             + collectionDirectory.getAbsolutePath() + ") passed to collection.");
                 }
             }

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
index 40a02ae..bc92ffc 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
@@ -68,8 +68,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource,
             List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed,
-            IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context,
-            JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
+            List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema,
+            IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig)
+            throws AlgebricksException {
         VXQueryCollectionDataSource ds = (VXQueryCollectionDataSource) dataSource;
         if (sourceFileMap != null) {
             final int len = ds.getPartitions().length;
@@ -123,23 +124,24 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(
             IDataSource<String> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys,
-            LogicalVariable payLoadVar, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
+            LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, JobGenContext context,
+            JobSpecification jobSpec) throws AlgebricksException {
         return null;
     }
 
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<String> dataSource,
             IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
-            LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec)
-            throws AlgebricksException {
+            LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc,
+            JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
         return null;
     }
 
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(IDataSource<String> dataSource,
             IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
-            LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec)
-            throws AlgebricksException {
+            LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc,
+            JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
         return null;
     }
 
@@ -147,8 +149,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
             IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema,
             IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
-            List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc,
-            JobGenContext context, JobSpecification spec) throws AlgebricksException {
+            List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
+            ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+            throws AlgebricksException {
         return null;
     }
 
@@ -156,8 +159,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
             IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema,
             IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
-            List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc,
-            JobGenContext context, JobSpecification spec) throws AlgebricksException {
+            List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
+            ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+            throws AlgebricksException {
         return null;
     }
 

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
index 1d979b5..81b9191 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
@@ -20,6 +20,7 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.Reader;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
@@ -53,7 +54,7 @@ public class XMLParser {
     public XMLParser(boolean attachTypes, ITreeNodeIdProvider idProvider, String nodeId, ByteBuffer frame,
             FrameTupleAppender appender, List<Integer> childSeq, StaticContext staticContext)
             throws HyracksDataException {
-        bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size"));
+        bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size", "-1"));
         this.nodeId = nodeId;
         try {
             parser = XMLReaderFactory.createXMLReader();
@@ -76,13 +77,16 @@ public class XMLParser {
 
     public void parseDocument(File file, ArrayBackedValueStorage abvs) throws HyracksDataException {
         try {
+            Reader input;
             if (bufferSize > 0) {
-                in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize));
+                input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize);
             } else {
-                in.setCharacterStream(new InputStreamReader(new FileInputStream(file)));
+                input = new InputStreamReader(new FileInputStream(file));
             }
+            in.setCharacterStream(input);
             parser.parse(in);
             handler.writeDocument(abvs);
+            input.close();
         } catch (FileNotFoundException e) {
             HyracksDataException hde = new VXQueryFileNotFoundException(e, file);
             hde.setNodeId(nodeId);
@@ -101,13 +105,17 @@ public class XMLParser {
     public void parseElements(File file, IFrameWriter writer, FrameTupleAccessor fta, int tupleIndex)
             throws HyracksDataException {
         try {
+            Reader input;
             if (bufferSize > 0) {
-                in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize));
+                input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize);
+//                System.err.println("buffer size: " + bufferSize);
             } else {
-                in.setCharacterStream(new InputStreamReader(new FileInputStream(file)));
+                input = new InputStreamReader(new FileInputStream(file));
             }
+            in.setCharacterStream(input);
             handler.setupElementWriter(writer, fta, tupleIndex);
             parser.parse(in);
+            input.close();
         } catch (FileNotFoundException e) {
             HyracksDataException hde = new VXQueryFileNotFoundException(e, file);
             hde.setNodeId(nodeId);

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-xtest/pom.xml b/vxquery-xtest/pom.xml
index 320950b..762aa0b 100644
--- a/vxquery-xtest/pom.xml
+++ b/vxquery-xtest/pom.xml
@@ -67,11 +67,9 @@
       </plugin>
       <plugin>
         <artifactId>maven-resources-plugin</artifactId>
-        <version>2.5</version>
         <executions>
           <execution>
-            <id>copy-scripts</id>
-            <!-- here the phase you need -->
+            <id>copy-xtest-scripts</id>
             <phase>package</phase>
             <goals>
               <goal>copy-resources</goal>
@@ -85,14 +83,23 @@
               </resources>
             </configuration>
           </execution>
+          <execution>
+            <id>copy-xtest-conf</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/appassembler/conf</outputDirectory>
+              <resources>
+                <resource>
+                  <directory>src/main/resources/conf</directory>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
         </executions>
       </plugin>
-      <!--
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-site-plugin</artifactId>
-      </plugin>
-      -->
       <plugin>
         <artifactId>maven-antrun-plugin</artifactId>
         <executions>

http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
----------------------------------------------------------------------
diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
index b9f1d70..6b3fb4b 100644
--- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
+++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java
@@ -23,12 +23,9 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.List;
-import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import javax.xml.namespace.QName;
-
 import org.apache.vxquery.compiler.CompilerControlBlock;
 import org.apache.vxquery.compiler.algebricks.VXQueryGlobalDataFactory;
 import org.apache.vxquery.context.DynamicContext;