You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/10/21 21:36:14 UTC

[13/14] git commit: Continuing to tweak the MRQL scripts.

Continuing to tweak the MRQL scripts.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/9e0133ad
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/9e0133ad
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/9e0133ad

Branch: refs/heads/master
Commit: 9e0133adc899f580d89a47765da92b53c6d3ee17
Parents: 7f06298
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 11:08:28 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 11:08:28 2014 -0700

----------------------------------------------------------------------
 .../mrql_scripts/load_node_file.sh              | 29 +++++---
 .../mrql_scripts/run_group_test.sh              | 25 ++++---
 .../mrql_scripts/run_mrql_tests.sh              |  2 +-
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |  2 +-
 .../RemoveUnusedSortDistinctNodesRule.java      | 19 +-----
 .../rules/util/CardinalityRuleToolbox.java      | 13 ----
 .../rewriter/rules/util/OperatorToolbox.java    | 72 --------------------
 .../vxquery/functions/builtin-functions.xml     |  1 +
 .../xmlquery/query/XMLQueryCompiler.java        |  2 +-
 .../src/main/resources/conf/cluster_example.xml | 12 ++--
 .../src/main/resources/conf/local.xml           | 18 ++---
 11 files changed, 57 insertions(+), 138 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index ead0902..206c38b 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -19,20 +19,29 @@
 
 if [ -z "${1}" ]
 then
-    echo "Please enter the node number."
+    echo "Please enter the data set as the first argument."
     exit
 fi
 
-echo "Loading node ${1} data file in to cluster."
+if [ -z "${2}" ]
+then
+    echo "Please enter the node number as the second argument."
+    exit
+fi
+
+DATASET=${1}
+NODES=${2}
+
+echo "Loading ${NODES} node ${DATASET} data file in to cluster."
 
 # Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/all_sensors_${1}.xml
+cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors
+rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml
 
 # Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/all_stations_${1}.xml
+cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations
+rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index f42a451..0208beb 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -19,11 +19,18 @@
 
 if [ -z "${1}" ]
 then
-    echo "Please enter the number of nodes."
+    echo "Please enter the data set as the first argument."
     exit
 fi
 
-NODES=${1}
+if [ -z "${2}" ]
+then
+    echo "Please enter the node number as the second argument."
+    exit
+fi
+
+DATASET=${1}
+NODES=${2}
 REPEAT=1
 
 # Start Hadoop
@@ -32,24 +39,26 @@ sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
 sleep 10
 
 # Prepare hadoop file system
-hadoop fs -mkdir all
+hadoop fs -mkdir ${DATASET}
 hadoop fs -ls 
-hadoop fs -mkdir all/sensors
-hadoop fs -mkdir all/stations
-hadoop fs -ls all
+hadoop fs -mkdir ${DATASET}/sensors
+hadoop fs -mkdir ${DATASET}/stations
+hadoop fs -ls ${DATASET}
+
+hadoop balancer
 
 
 # Upload test data
 COUNTER=0
 while [ ${COUNTER} -lt ${NODES} ];
 do
-    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER}
     let COUNTER=COUNTER+1 
 done
 
 
 # Start test
-sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET}
 
 
 # Stop Hadoop

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 1e512e1..d6bc9ab 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -22,7 +22,7 @@
 
 NODES=${2}
 REPEAT=${3}
-DATASET="all"
+DATASET=${4}
 
 
 # Make log folder

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index 88339bd..5146586 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -52,7 +52,7 @@ do
         echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
         echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
         echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
-        fi;
+    fi;
 done
 
 if which programname >/dev/null;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
index 43d636b..43e2603 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
@@ -384,8 +384,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 // Find the last operator to set a variable and call this function again.
                 SubplanOperator subplan = (SubplanOperator) op;
                 for (int index = 0; index < subplan.getNestedPlans().size(); index++) {
-                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index)
-                            .getRoots().get(0).getValue();
+                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans()
+                            .get(index).getRoots().get(0).getValue();
                     updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables,
                             vxqueryContext);
                 }
@@ -437,21 +437,6 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 break;
 
             // The following operators' analysis has not yet been implemented.
-            case CLUSTER:
-            case DISTINCT:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case LIMIT:
-            case PARTITIONINGSPLIT:
-            case REPLICATE:
-            case RUNNINGAGGREGATE:
-            case SCRIPT:
-            case SINK:
-            case UNIONALL:
-            case UNNEST_MAP:
-            case UPDATE:
             default:
                 throw new RuntimeException("Operator (" + op.getOperatorTag()
                         + ") has not been implemented in rewrite rule.");

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
index a586c06..5b4594e 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
@@ -81,19 +81,6 @@ public class CardinalityRuleToolbox {
                 break;
 
             // The following operators' analysis has not yet been implemented.
-            case CLUSTER:
-            case DISTINCT:
-            case EXTENSION_OPERATOR:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case PARTITIONINGSPLIT:
-            case REPLICATE:
-            case RUNNINGAGGREGATE:
-            case SCRIPT:
-            case SINK:
-            case UNIONALL:
-            case UNNEST_MAP:
-            case UPDATE:
             default:
                 throw new RuntimeException("Operator (" + op.getOperatorTag()
                         + ") has not been implemented in rewrite rule.");

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
index da85f2d..725a082 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
@@ -84,29 +84,6 @@ public class OperatorToolbox {
                 AbstractUnnestOperator auo = (AbstractUnnestOperator) op;
                 result.add(auo.getExpressionRef());
                 break;
-            case CLUSTER:
-            case DATASOURCESCAN:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EMPTYTUPLESOURCE:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case LIMIT:
-            case NESTEDTUPLESOURCE:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // TODO Not yet implemented.
                 break;
@@ -129,32 +106,6 @@ public class OperatorToolbox {
             case UNNEST_MAP:
                 AbstractUnnestOperator ano = (AbstractUnnestOperator) op;
                 return ano.getExpressionRef();
-            case CLUSTER:
-            case DATASOURCESCAN:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EMPTYTUPLESOURCE:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INNERJOIN:
-            case INSERT_DELETE:
-            case LEFTOUTERJOIN:
-            case LIMIT:
-            case NESTEDTUPLESOURCE:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SELECT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // TODO Not yet implemented.
                 break;
@@ -196,29 +147,6 @@ public class OperatorToolbox {
             case EMPTYTUPLESOURCE:
             case NESTEDTUPLESOURCE:
                 return null;
-            case CLUSTER:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INNERJOIN:
-            case INSERT_DELETE:
-            case LEFTOUTERJOIN:
-            case LIMIT:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SELECT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // Skip operators and go look at input.
                 for (Mutable<ILogicalOperator> input : op.getInputs()) {

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index b439a83..38f03a4 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -125,6 +125,7 @@
     <function name="fn:collection">
         <param name="arg" type="xs:string?"/>
         <return type="node()*"/>
+        <!-- Collection operator is added during the rewrite rules phase.  -->
     </function>
     
     <!-- fn:compare($comparand1  as xs:string?, $comparand2 as xs:string?)  as xs:integer?  -->

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 966bd87..3cdc492 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -113,7 +113,7 @@ public class XMLQueryCompiler {
                 });
         builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize);
         if (joinHashSize > 0) {
-            builder.getPhysicalOptimizationConfig().setInMemHashJoinTableSize(joinHashSize);
+            builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize);
         }
         builder.setLogicalRewrites(buildDefaultLogicalRewrites());
         builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/cluster_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/cluster_example.xml b/vxquery-server/src/main/resources/conf/cluster_example.xml
index 41d642d..18d9173 100644
--- a/vxquery-server/src/main/resources/conf/cluster_example.xml
+++ b/vxquery-server/src/main/resources/conf/cluster_example.xml
@@ -15,13 +15,13 @@
   limitations under the License.
 -->
 <cluster xmlns="cluster">
-	<name>local</name>
+    <name>local</name>
     <username>joe</username>
-	<master_node>
-		<id>master</id>
-		<client_ip>128.195.52.177</client_ip>
-    	<cluster_ip>192.168.100.0</cluster_ip>
-	</master_node>
+    <master_node>
+        <id>master</id>
+        <client_ip>128.195.52.177</client_ip>
+        <cluster_ip>192.168.100.0</cluster_ip>
+    </master_node>
     <node>
         <id>nodeA</id>
         <cluster_ip>192.168.100.1</cluster_ip>

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/local.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/local.xml b/vxquery-server/src/main/resources/conf/local.xml
index 5c27e11..4a48620 100644
--- a/vxquery-server/src/main/resources/conf/local.xml
+++ b/vxquery-server/src/main/resources/conf/local.xml
@@ -15,14 +15,14 @@
   limitations under the License.
 -->
 <cluster xmlns="cluster">
-	<name>local</name>
-	<master_node>
-		<id>master</id>
-		<client_ip>127.0.0.1</client_ip>
-    	<cluster_ip>127.0.0.1</cluster_ip>
-	</master_node>
-	<node>
-		<id>node1</id>
-		<cluster_ip>127.0.0.1</cluster_ip>
+    <name>local</name>
+    <master_node>
+        <id>master</id>
+        <client_ip>127.0.0.1</client_ip>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </master_node>
+    <node>
+        <id>node1</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
     </node>
 </cluster>