You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/10/21 21:36:14 UTC
[13/14] git commit: Continuing to tweak the MRQL scripts.
Continuing to tweak the MRQL scripts.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/9e0133ad
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/9e0133ad
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/9e0133ad
Branch: refs/heads/master
Commit: 9e0133adc899f580d89a47765da92b53c6d3ee17
Parents: 7f06298
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 11:08:28 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 11:08:28 2014 -0700
----------------------------------------------------------------------
.../mrql_scripts/load_node_file.sh | 29 +++++---
.../mrql_scripts/run_group_test.sh | 25 ++++---
.../mrql_scripts/run_mrql_tests.sh | 2 +-
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 2 +-
.../RemoveUnusedSortDistinctNodesRule.java | 19 +-----
.../rules/util/CardinalityRuleToolbox.java | 13 ----
.../rewriter/rules/util/OperatorToolbox.java | 72 --------------------
.../vxquery/functions/builtin-functions.xml | 1 +
.../xmlquery/query/XMLQueryCompiler.java | 2 +-
.../src/main/resources/conf/cluster_example.xml | 12 ++--
.../src/main/resources/conf/local.xml | 18 ++---
11 files changed, 57 insertions(+), 138 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index ead0902..206c38b 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -19,20 +19,29 @@
if [ -z "${1}" ]
then
- echo "Please enter the node number."
+ echo "Please enter the data set as the first argument."
exit
fi
-echo "Loading node ${1} data file in to cluster."
+if [ -z "${2}" ]
+then
+ echo "Please enter the node number as the second argument."
+ exit
+fi
+
+DATASET=${1}
+NODES=${2}
+
+echo "Loading ${NODES} node ${DATASET} data file in to cluster."
# Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/all_sensors_${1}.xml
+cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors
+rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml
# Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/all_stations_${1}.xml
+cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations
+rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index f42a451..0208beb 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -19,11 +19,18 @@
if [ -z "${1}" ]
then
- echo "Please enter the number of nodes."
+ echo "Please enter the data set as the first argument."
exit
fi
-NODES=${1}
+if [ -z "${2}" ]
+then
+ echo "Please enter the node number as the second argument."
+ exit
+fi
+
+DATASET=${1}
+NODES=${2}
REPEAT=1
# Start Hadoop
@@ -32,24 +39,26 @@ sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
sleep 10
# Prepare hadoop file system
-hadoop fs -mkdir all
+hadoop fs -mkdir ${DATASET}
hadoop fs -ls
-hadoop fs -mkdir all/sensors
-hadoop fs -mkdir all/stations
-hadoop fs -ls all
+hadoop fs -mkdir ${DATASET}/sensors
+hadoop fs -mkdir ${DATASET}/stations
+hadoop fs -ls ${DATASET}
+
+hadoop balancer
# Upload test data
COUNTER=0
while [ ${COUNTER} -lt ${NODES} ];
do
- sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER}
let COUNTER=COUNTER+1
done
# Start test
-sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET}
# Stop Hadoop
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 1e512e1..d6bc9ab 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -22,7 +22,7 @@
NODES=${2}
REPEAT=${3}
-DATASET="all"
+DATASET=${4}
# Make log folder
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index 88339bd..5146586 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -52,7 +52,7 @@ do
echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
- fi;
+ fi;
done
if which programname >/dev/null;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
index 43d636b..43e2603 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
@@ -384,8 +384,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
// Find the last operator to set a variable and call this function again.
SubplanOperator subplan = (SubplanOperator) op;
for (int index = 0; index < subplan.getNestedPlans().size(); index++) {
- AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index)
- .getRoots().get(0).getValue();
+ AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans()
+ .get(index).getRoots().get(0).getValue();
updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables,
vxqueryContext);
}
@@ -437,21 +437,6 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
break;
// The following operators' analysis has not yet been implemented.
- case CLUSTER:
- case DISTINCT:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case LIMIT:
- case PARTITIONINGSPLIT:
- case REPLICATE:
- case RUNNINGAGGREGATE:
- case SCRIPT:
- case SINK:
- case UNIONALL:
- case UNNEST_MAP:
- case UPDATE:
default:
throw new RuntimeException("Operator (" + op.getOperatorTag()
+ ") has not been implemented in rewrite rule.");
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
index a586c06..5b4594e 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
@@ -81,19 +81,6 @@ public class CardinalityRuleToolbox {
break;
// The following operators' analysis has not yet been implemented.
- case CLUSTER:
- case DISTINCT:
- case EXTENSION_OPERATOR:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case PARTITIONINGSPLIT:
- case REPLICATE:
- case RUNNINGAGGREGATE:
- case SCRIPT:
- case SINK:
- case UNIONALL:
- case UNNEST_MAP:
- case UPDATE:
default:
throw new RuntimeException("Operator (" + op.getOperatorTag()
+ ") has not been implemented in rewrite rule.");
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
index da85f2d..725a082 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
@@ -84,29 +84,6 @@ public class OperatorToolbox {
AbstractUnnestOperator auo = (AbstractUnnestOperator) op;
result.add(auo.getExpressionRef());
break;
- case CLUSTER:
- case DATASOURCESCAN:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EMPTYTUPLESOURCE:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case LIMIT:
- case NESTEDTUPLESOURCE:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// TODO Not yet implemented.
break;
@@ -129,32 +106,6 @@ public class OperatorToolbox {
case UNNEST_MAP:
AbstractUnnestOperator ano = (AbstractUnnestOperator) op;
return ano.getExpressionRef();
- case CLUSTER:
- case DATASOURCESCAN:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EMPTYTUPLESOURCE:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INNERJOIN:
- case INSERT_DELETE:
- case LEFTOUTERJOIN:
- case LIMIT:
- case NESTEDTUPLESOURCE:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SELECT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// TODO Not yet implemented.
break;
@@ -196,29 +147,6 @@ public class OperatorToolbox {
case EMPTYTUPLESOURCE:
case NESTEDTUPLESOURCE:
return null;
- case CLUSTER:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INNERJOIN:
- case INSERT_DELETE:
- case LEFTOUTERJOIN:
- case LIMIT:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SELECT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// Skip operators and go look at input.
for (Mutable<ILogicalOperator> input : op.getInputs()) {
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index b439a83..38f03a4 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -125,6 +125,7 @@
<function name="fn:collection">
<param name="arg" type="xs:string?"/>
<return type="node()*"/>
+ <!-- Collection operator is added during the rewrite rules phase. -->
</function>
<!-- fn:compare($comparand1 as xs:string?, $comparand2 as xs:string?) as xs:integer? -->
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 966bd87..3cdc492 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -113,7 +113,7 @@ public class XMLQueryCompiler {
});
builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize);
if (joinHashSize > 0) {
- builder.getPhysicalOptimizationConfig().setInMemHashJoinTableSize(joinHashSize);
+ builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize);
}
builder.setLogicalRewrites(buildDefaultLogicalRewrites());
builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/cluster_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/cluster_example.xml b/vxquery-server/src/main/resources/conf/cluster_example.xml
index 41d642d..18d9173 100644
--- a/vxquery-server/src/main/resources/conf/cluster_example.xml
+++ b/vxquery-server/src/main/resources/conf/cluster_example.xml
@@ -15,13 +15,13 @@
limitations under the License.
-->
<cluster xmlns="cluster">
- <name>local</name>
+ <name>local</name>
<username>joe</username>
- <master_node>
- <id>master</id>
- <client_ip>128.195.52.177</client_ip>
- <cluster_ip>192.168.100.0</cluster_ip>
- </master_node>
+ <master_node>
+ <id>master</id>
+ <client_ip>128.195.52.177</client_ip>
+ <cluster_ip>192.168.100.0</cluster_ip>
+ </master_node>
<node>
<id>nodeA</id>
<cluster_ip>192.168.100.1</cluster_ip>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/local.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/local.xml b/vxquery-server/src/main/resources/conf/local.xml
index 5c27e11..4a48620 100644
--- a/vxquery-server/src/main/resources/conf/local.xml
+++ b/vxquery-server/src/main/resources/conf/local.xml
@@ -15,14 +15,14 @@
limitations under the License.
-->
<cluster xmlns="cluster">
- <name>local</name>
- <master_node>
- <id>master</id>
- <client_ip>127.0.0.1</client_ip>
- <cluster_ip>127.0.0.1</cluster_ip>
- </master_node>
- <node>
- <id>node1</id>
- <cluster_ip>127.0.0.1</cluster_ip>
+ <name>local</name>
+ <master_node>
+ <id>master</id>
+ <client_ip>127.0.0.1</client_ip>
+ <cluster_ip>127.0.0.1</cluster_ip>
+ </master_node>
+ <node>
+ <id>node1</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
</node>
</cluster>