You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mrql.apache.org by fe...@apache.org on 2013/10/08 01:45:54 UTC
git commit: MRQL-20: Fix maven problems to prepare a new release candidate

Updated Branches:
  refs/heads/master 0c9cc0f8f -> f91ce385d


MRQL-20: Fix maven problems to prepare a new release candidate


Project: http://git-wip-us.apache.org/repos/asf/incubator-mrql/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mrql/commit/f91ce385
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mrql/tree/f91ce385
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mrql/diff/f91ce385

Branch: refs/heads/master
Commit: f91ce385de592c7b1b66f01332eea4fe040d716c
Parents: 0c9cc0f
Author: fegaras <fe...@cse.uta.edu>
Authored: Mon Oct 7 18:45:27 2013 -0500
Committer: fegaras <fe...@cse.uta.edu>
Committed: Mon Oct 7 18:45:27 2013 -0500

----------------------------------------------------------------------
 .gitignore                               |  6 +-
 BSP/pom.xml                              |  6 +-
 Gen/pom.xml                              |  2 +-
 MapReduce/pom.xml                        |  6 +-
 README                                   | 22 ++++++-
 RELEASE_NOTES                            | 24 ++++++-
 Spark/pom.xml                            |  8 +--
 conf/mrql-env.sh                         |  8 +--
 core/pom.xml                             |  2 +-
 dist/pom.xml                             |  2 +-
 pom.xml                                  |  5 +-
 src/main/java/core/DataSource.java       |  2 +-
 src/main/java/core/Interpreter.gen       |  6 --
 src/main/java/core/JSON.cup              |  6 +-
 src/main/java/core/JSON.lex              |  2 +-
 src/main/java/core/JsonFormatParser.java | 91 +++++++++++++++++++++++++++
 src/main/java/core/JsonParser.java       | 90 --------------------------
 src/main/java/core/JsonSplitter.java     | 28 +++++++--
 src/main/java/core/Main.java             |  2 +-
 src/main/java/core/PlanGeneration.gen    |  8 +--
 src/main/java/core/TypeInference.gen     | 28 ++++-----
 src/main/java/core/XMLParser.java        |  2 +-
 src/main/java/core/XMLSplitter.java      |  4 +-
 src/main/java/spark/MR_rdd.java          |  3 +-
 src/main/java/spark/RDDDataSource.java   |  2 +-
 src/main/java/spark/SparkEvaluator.gen   | 54 +++++++++++-----
 src/site/xdoc/getting_started.xml        | 20 +++---
 tests/queries/joins_1.mrql               |  6 ++
 28 files changed, 256 insertions(+), 189 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 1061b15..297a9e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,3 @@
 *~
 tmp/*
-classes/*
-mrql/*
-mrql-bsp.jar
-mrql-spark.jar
-mrql.jar
+lib/*

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/BSP/pom.xml
----------------------------------------------------------------------
diff --git a/BSP/pom.xml b/BSP/pom.xml
index 55f99a2..14d6680 100644
--- a/BSP/pom.xml
+++ b/BSP/pom.xml
@@ -29,7 +29,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
   
   <dependencies>
@@ -127,12 +127,12 @@
 	  </execution>
           <execution>
 	    <id>validate</id>
-            <phase>verify</phase>
+            <phase>test</phase>
 	    <goals>
 	      <goal>run</goal>
 	    </goals>
 	    <configuration>
-	      <target name="validate_hama" description="Validate all test queries on Apache Hama">
+	      <target name="validate_hama" if="tests" description="Validate all test queries on Apache Hama">
 		<property name="runtime_classpath" refid="maven.runtime.classpath" />
 		<echo message="Evaluating test queries in memory (BSP mode):" />
 		<java classname="org.apache.mrql.Test" classpath="${runtime_classpath}" dir=".." fork="yes">

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/Gen/pom.xml
----------------------------------------------------------------------
diff --git a/Gen/pom.xml b/Gen/pom.xml
index bb90718..3a423e4 100644
--- a/Gen/pom.xml
+++ b/Gen/pom.xml
@@ -29,7 +29,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
 
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/MapReduce/pom.xml
----------------------------------------------------------------------
diff --git a/MapReduce/pom.xml b/MapReduce/pom.xml
index 51d51a2..0ae7d10 100644
--- a/MapReduce/pom.xml
+++ b/MapReduce/pom.xml
@@ -29,7 +29,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
   
   <dependencies>
@@ -130,12 +130,12 @@
 	  </execution>
           <execution>
 	    <id>validate</id>
-            <phase>verify</phase>
+            <phase>test</phase>
 	    <goals>
 	      <goal>run</goal>
 	    </goals>
 	    <configuration>
-	      <target name="validate_hadoop" description="Validate all test queries on Apache Hadoop">
+	      <target name="validate_hadoop" if="tests" description="Validate all test queries on Apache Hadoop">
 		<property name="runtime_classpath" refid="maven.runtime.classpath" />
 		<echo message="Evaluating test queries in memory (Map-Reduce mode):" />
 		<java classname="org.apache.mrql.Test" classpath="${runtime_classpath}" dir=".." fork="yes">

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/README
----------------------------------------------------------------------
diff --git a/README b/README
index a5fff46..766764b 100644
--- a/README
+++ b/README
@@ -1,7 +1,27 @@
+***************************************************************************
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+***************************************************************************
+
 Apache MRQL 0.9.0
 =================
 
-MRQL (pronounced miracle) is a query processing and optimization
+Apache MRQL (pronounced miracle) is a query processing and optimization
 system for large-scale, distributed data analysis. MRQL (the MapReduce
 Query Language) is an SQL-like query language for large-scale data
 analysis on a cluster of computers. The MRQL query processing system

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/RELEASE_NOTES
----------------------------------------------------------------------
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 3ccb2ec..56eeefa 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,4 +1,22 @@
-Release Notes - MRQL 0.9.0
+***************************************************************************
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+***************************************************************************
+
+Release Notes - Apache MRQL 0.9.0
 
 New Features
 
@@ -27,4 +45,6 @@ Tasks
 [MRQL-14] Add the "-SNAPSHOT" suffix
 [MRQL-15] Setup svnpubsub for the MRQL dist directory
 [MRQL-16] correct source files. ASF licenses, and POMs for release
-[MRQL-18]: correct NOTICE, DISCLAIMER, xdoc
+[MRQL-17] Update the MRQL status page
+[MRQL-18] correct NOTICE, DISCLAIMER, xdoc
+[MRQL-19] Fix license issues to prepare a new release candidate

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/Spark/pom.xml
----------------------------------------------------------------------
diff --git a/Spark/pom.xml b/Spark/pom.xml
index 1917f15..ce1fa38 100644
--- a/Spark/pom.xml
+++ b/Spark/pom.xml
@@ -29,7 +29,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
   
   <dependencies>
@@ -44,7 +44,7 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project</groupId>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_2.9.3</artifactId>
       <version>${spark.version}</version>
     </dependency>
@@ -127,12 +127,12 @@
 	  </execution>
           <execution>
 	    <id>validate</id>
-            <phase>verify</phase>
+            <phase>test</phase>
 	    <goals>
 	      <goal>run</goal>
 	    </goals>
 	    <configuration>
-	      <target name="validate_spark" description="Validate all test queries on Apache Spark">
+	      <target name="validate_spark" if="tests" description="Validate all test queries on Apache Spark">
 		<property name="runtime_classpath" refid="maven.runtime.classpath" />
 		<echo message="Evaluating test queries in Apache Spark local mode:" />
 		<java classname="org.apache.mrql.Test" classpath="../lib/mrql-spark-${project.version}.jar:${runtime_classpath}" dir=".." fork="yes" error="/dev/null">

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/conf/mrql-env.sh
----------------------------------------------------------------------
diff --git a/conf/mrql-env.sh b/conf/mrql-env.sh
index d73aaeb..4c2af45 100644
--- a/conf/mrql-env.sh
+++ b/conf/mrql-env.sh
@@ -29,7 +29,7 @@
 JAVA_HOME=/root/jdk
 
 # Required: The CUP parser library
-# You may install it as a linux package or download it from http://www2.cs.tum.edu/projects/cup/
+# You may download it from http://www2.cs.tum.edu/projects/cup/
 CUP_JAR=${HOME}/.m2/repository/net/sf/squirrel-sql/thirdparty/non-maven/java-cup/11a/java-cup-11a.jar
 
 # Required: The JLine library
@@ -57,9 +57,7 @@ HAMA_ZOOKEEPER_QUORUM=localhost
 
 
 # Optional: Spark configuration
-SPARK_HOME=${HOME}/spark-0.7.3
-# Location of the Scala libs
-SCALA_LIB=/usr/share/java
+SPARK_HOME=${HOME}/spark-0.8.0-incubating-bin-hadoop1
 # URI of the Spark master node
 SPARK_MASTER=spark://crete:7077
 # Spark memory per node
@@ -72,4 +70,4 @@ HADOOP_JARS=${HADOOP_HOME}/hadoop-core-${HADOOP_VERSION}.jar:${HADOOP_HOME}/lib/
 
 HAMA_JAR=${HAMA_HOME}/hama-core-${HAMA_VERSION}.jar
 
-SPARK_JARS=${SCALA_LIB}/scala-library.jar:${SCALA_LIB}/scala-compiler.jar:${SPARK_HOME}/core/target/scala-2.9.3/classes:${SPARK_HOME}/lib_managed/jars/*:${SPARK_HOME}/lib_managed/bundles/*
+SPARK_JARS=${SPARK_HOME}/assembly/target/scala-2.9.3/*

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 097fe50..f3f70e5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -28,7 +28,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
   
   <dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/dist/pom.xml
----------------------------------------------------------------------
diff --git a/dist/pom.xml b/dist/pom.xml
index 21b95ba..c0971ec 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -24,7 +24,7 @@
   <parent>
     <groupId>org.apache.mrql</groupId>
     <artifactId>mrql-parent</artifactId>
-    <version>0.9.1-incubating-SNAPSHOT</version>
+    <version>0.9.0-incubating-SNAPSHOT</version>
   </parent>
 
   <build>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2ffb6d1..8e8f334 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.mrql</groupId>
   <artifactId>mrql-parent</artifactId>
-  <version>0.9.1-incubating-SNAPSHOT</version>
+  <version>0.9.0-incubating-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache MRQL parent POM</name>
   <description>Apache MRQL is a query processing and optimization system for large-scale, distributed data analysis, built on top of Apache Hadoop, Hama, and Spark</description>
@@ -40,12 +40,13 @@
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
     <version>13</version>
+    <relativePath/>
   </parent>
 
   <properties>
     <hadoop.version>1.0.3</hadoop.version>
     <hama.version>0.6.2</hama.version>
-    <spark.version>0.7.3</spark.version>
+    <spark.version>0.8.0-incubating</spark.version>
   </properties>
 
   <modules>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/DataSource.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/DataSource.java b/src/main/java/core/DataSource.java
index 7a88741..ffd5f30 100644
--- a/src/main/java/core/DataSource.java
+++ b/src/main/java/core/DataSource.java
@@ -98,7 +98,7 @@ public class DataSource {
     public static void loadParsers() {
         if (!loaded) {
             DataSource.parserDirectory.put("xml",XMLParser.class);
-            DataSource.parserDirectory.put("json",JsonParser.class);
+            DataSource.parserDirectory.put("json",JsonFormatParser.class);
             DataSource.parserDirectory.put("line",LineParser.class);
             loaded = true;
         }

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/Interpreter.gen
----------------------------------------------------------------------
diff --git a/src/main/java/core/Interpreter.gen b/src/main/java/core/Interpreter.gen
index a93db7d..bc8346c 100644
--- a/src/main/java/core/Interpreter.gen
+++ b/src/main/java/core/Interpreter.gen
@@ -457,12 +457,6 @@ public class Interpreter extends TypeInference {
             MRData z = evalE(x,env);
             System.err.println("*** "+x+": "+z);
             return z;
-        case BinarySource(...,`file,`tp):
-            if (Config.hadoop_mode)
-                if (collection_type(tp))
-                    return Plan.collect(Plan.binarySource(file.stringValue()));
-                else return Plan.collect(Plan.binarySource(file.stringValue())).get(0);
-            else return MapReduceAlgebra.read_binary(file.stringValue());
         case _:
             try {
                 if (Config.hadoop_mode)

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/JSON.cup
----------------------------------------------------------------------
diff --git a/src/main/java/core/JSON.cup b/src/main/java/core/JSON.cup
index 0897169..c045437 100644
--- a/src/main/java/core/JSON.cup
+++ b/src/main/java/core/JSON.cup
@@ -19,10 +19,6 @@ package org.apache.mrql;
 
 import java_cup.runtime.*;
 
-parser code {:
-       public static MRData top_level;
-:}
-
 terminal TRUE, FALSE, NULL, COLON, COMMA, O_BEGIN, O_END, A_BEGIN, A_END;
 
 terminal String STRING;
@@ -38,7 +34,7 @@ precedence left COLON;
 
 start with top;
 
-top ::= json:e                        {: JSONParser.top_level = e; :}
+top ::= json:e                        {: RESULT = e; :}
     ;
 json ::= O_BEGIN O_END                {: RESULT = new Union((byte)0,new Bag()); :}
      |   O_BEGIN members:m O_END      {: RESULT = new Union((byte)0,m); :}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/JSON.lex
----------------------------------------------------------------------
diff --git a/src/main/java/core/JSON.lex b/src/main/java/core/JSON.lex
index cbccc4c..c75d1bb 100644
--- a/src/main/java/core/JSON.lex
+++ b/src/main/java/core/JSON.lex
@@ -27,7 +27,7 @@ import java_cup.runtime.Symbol;
 %char
 %cup
 %eofval{
-  return symbol(sym.EOF);
+  return symbol(jsym.EOF);
 %eofval}
 %{
   public String text () { return yytext(); }

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/JsonFormatParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/JsonFormatParser.java b/src/main/java/core/JsonFormatParser.java
new file mode 100644
index 0000000..77aa891
--- /dev/null
+++ b/src/main/java/core/JsonFormatParser.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+import java.io.StringReader;
+import java.nio.ByteBuffer;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.DataOutputBuffer;
+import java_cup.runtime.Symbol;
+
+
+/** The JSON parser */
+public class JsonFormatParser implements Parser {
+    String[] tags;          // split tags
+    JsonSplitter splitter;
+
+    public void initialize ( Trees args ) {
+        try {
+            if (args.length() > 0) {
+                if (!(args.nth(0) instanceof Node)
+                    || !(((Node)args.nth(0)).name().equals("list")
+                         || ((Node)args.nth(0)).name().equals("bag")))
+                    throw new Error("Must provide a bag of synchronization property names to split the JSON source: "+args.nth(0));
+                Trees ts = ((Node)args.nth(0)).children();
+                if (ts.length() == 0)
+                    throw new Error("Expected at least one synchronization tagname in JSON source: "+ts);
+                tags = new String[ts.length()];
+                for ( int i = 0; i < tags.length; i++ )
+                    if (ts.nth(i) instanceof StringLeaf)
+                        tags[i] = ((StringLeaf)(ts.nth(i))).value();
+                    else throw new Error("Expected a synchronization tagname in JSON source: "+ts.nth(i));
+            }
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    public void open ( String file ) {
+        try {
+            splitter = new JsonSplitter(tags,file,new DataOutputBuffer());
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    public void open ( FSDataInputStream fsin, long start, long end ) {
+        try {
+            splitter = new JsonSplitter(tags,fsin,start,end,new DataOutputBuffer());
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    public Tree type () { return new VariableLeaf("JSON"); }
+
+    public String slice () {
+        if (splitter.hasNext()) {
+            DataOutputBuffer b = splitter.next();
+            return new String(b.getData(),0,b.getLength());
+        } else return null;
+    }
+
+    public Bag parse ( String s ) {
+        try {
+            JSONLex scanner = new JSONLex(new StringReader(s));
+            JSONParser parser = new JSONParser(scanner);
+            Symbol sym = parser.parse();
+            return new Bag((MRData)sym.value);
+        } catch (Exception e) {
+            System.err.println(e);
+            return new Bag();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/JsonParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/JsonParser.java b/src/main/java/core/JsonParser.java
deleted file mode 100644
index b5376dd..0000000
--- a/src/main/java/core/JsonParser.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mrql;
-
-import org.apache.mrql.gen.*;
-import java.io.StringReader;
-import java.nio.ByteBuffer;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.DataOutputBuffer;
-
-
-/** The JSON parser */
-public class JsonParser implements Parser {
-    String[] tags;          // split tags
-    JsonSplitter splitter;
-
-    public void initialize ( Trees args ) {
-        try {
-            if (args.length() > 0) {
-                if (!(args.nth(0) instanceof Node)
-                    || !(((Node)args.nth(0)).name().equals("list")
-                         || ((Node)args.nth(0)).name().equals("bag")))
-                    throw new Error("Expected a bag of synchronization tagnames in JSON source: "+args.nth(0));
-                Trees ts = ((Node)args.nth(0)).children();
-                if (ts.length() == 0)
-                    throw new Error("Expected at least one synchronization tagname in JSON source: "+ts);
-                tags = new String[ts.length()];
-                for ( int i = 0; i < tags.length; i++ )
-                    if (ts.nth(i) instanceof StringLeaf)
-                        tags[i] = ((StringLeaf)(ts.nth(i))).value();
-                    else throw new Error("Expected a synchronization tagname in JSON source: "+ts.nth(i));
-            }
-        } catch (Exception e) {
-            throw new Error(e);
-        }
-    }
-
-    public void open ( String file ) {
-        try {
-            splitter = new JsonSplitter(tags,file,new DataOutputBuffer());
-        } catch (Exception e) {
-            throw new Error(e);
-        }
-    }
-
-    public void open ( FSDataInputStream fsin, long start, long end ) {
-        try {
-            splitter = new JsonSplitter(tags,fsin,start,end,new DataOutputBuffer());
-        } catch (Exception e) {
-            throw new Error(e);
-        }
-    }
-
-    public Tree type () { return new VariableLeaf("JSON"); }
-
-    public String slice () {
-        if (splitter.hasNext()) {
-            DataOutputBuffer b = splitter.next();
-            return new String(b.getData(),0,b.getLength());
-        } else return null;
-    }
-
-    public Bag parse ( String s ) {
-        try {
-            JSONLex scanner = new JSONLex(new StringReader(s));
-            JSONParser parser = new JSONParser(scanner);
-            parser.parse();
-            return new Bag(parser.top_level);
-        } catch (Exception e) {
-            System.err.println(e);
-            return new Bag();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/JsonSplitter.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/JsonSplitter.java b/src/main/java/core/JsonSplitter.java
index 0e76390..7017e72 100644
--- a/src/main/java/core/JsonSplitter.java
+++ b/src/main/java/core/JsonSplitter.java
@@ -37,16 +37,17 @@ final public class JsonSplitter implements Iterator<DataOutputBuffer> {
     final DataOutputBuffer buffer;
 
     JsonSplitter ( String[] tags, FSDataInputStream fsin, long start, long end,
-                  DataOutputBuffer buffer ) {
+                   DataOutputBuffer buffer ) {
         in_memory = false;
         this.tags = tags;
         this.fsin = fsin;
-        this.start = start;
         this.end = end;
         this.buffer = buffer;
-        scanner = new JSONLex(fsin);
         try {
             fsin.seek(start);
+            this.start = (start == 0) ? start : sync(start);
+            fsin.seek(this.start);
+            scanner = new JSONLex(fsin);
         } catch ( IOException e ) {
             System.err.println("*** Cannot parse the data split: "+fsin);
         }
@@ -64,9 +65,26 @@ final public class JsonSplitter implements Iterator<DataOutputBuffer> {
         scanner = new JSONLex(in);
     }
 
+    private long sync ( long start ) {
+        try {
+            long first_quote = -1;
+            for ( long offset = 0; ; offset++ ) {
+                char c = (char)fsin.read();
+                if (c == '\"') {
+                    if (first_quote >= 0)
+                        if ((char)fsin.read() == ':')
+                            return start+first_quote;
+                    first_quote = offset;
+                }
+            }
+        } catch (IOException ex) {
+            return (long)0;
+        }
+    }
+
     public boolean hasNext () {
         try {
-            if (in_memory || fsin.getPos() < end)
+            if (in_memory || start+scanner.char_pos() < end)
                 if (skip())
                     return store();
             return false;
@@ -95,7 +113,7 @@ final public class JsonSplitter implements Iterator<DataOutputBuffer> {
     boolean skip () throws IOException {
         while (true) {
             Symbol s = scanner.next_token();
-            if (s.sym == jsym.EOF || (!in_memory && fsin.getPos() >= end))
+            if (s.sym == jsym.EOF || (!in_memory && start+scanner.char_pos() >= end))
                 return false;
             if (s.sym == jsym.STRING && is_start_tag((String)s.value)) {
                 String tag = (String)s.value;

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/Main.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/Main.java b/src/main/java/core/Main.java
index e3f7eba..6e0f527 100644
--- a/src/main/java/core/Main.java
+++ b/src/main/java/core/Main.java
@@ -95,7 +95,7 @@ final public class Main {
             else if (Config.distributed_mode)
                 System.out.print("distributed ");
             if (Config.spark_mode)
-                System.out.println("Spark mode using "+Config.nodes+" workers)");
+                System.out.println("Spark mode using "+Config.nodes+" tasks)");
             else if (Config.bsp_mode)
                 System.out.println("Hama BSP mode over "+Config.nodes+" BSP tasks)");
             else if (Config.nodes > 0)

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/PlanGeneration.gen
----------------------------------------------------------------------
diff --git a/src/main/java/core/PlanGeneration.gen b/src/main/java/core/PlanGeneration.gen
index 234ba07..59b8cb8 100644
--- a/src/main/java/core/PlanGeneration.gen
+++ b/src/main/java/core/PlanGeneration.gen
@@ -550,13 +550,7 @@ final public class PlanGeneration extends AlgebraicOptimization {
        case call(plus,`x,`y):
            if (!is_dataset_expr(x) || !is_dataset_expr(y))
                fail;
-           match TypeInference.type_inference2(x) {
-           case `T(_):
-               if (is_collection(T))
-                   return #<Merge(`(makePlan(x)),
-                                  `(makePlan(y)))>;
-           };
-           fail
+           return #<Merge(`(makePlan(x)),`(makePlan(y)))>;
        case call(`f,...el):
            if (!f.is_variable())
                fail;

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/TypeInference.gen
----------------------------------------------------------------------
diff --git a/src/main/java/core/TypeInference.gen b/src/main/java/core/TypeInference.gen
index c51a8ab..5276ae1 100644
--- a/src/main/java/core/TypeInference.gen
+++ b/src/main/java/core/TypeInference.gen
@@ -154,13 +154,6 @@ public class TypeInference extends Translator {
         return -1;
     }
 
-    private static int collection_order ( String T ) {
-        return (T.equals("List") ? 0
-                : (T.equals("list") ? 1
-                : (T.equals("Bag") ? 2
-                : (T.equals("bag") ? 3 : -1))));
-    }
-
     /** type equality in MRQL is structured equality, not named equality */
     public static boolean equal_types ( Tree tx, Tree ty ) {
         tx = expand(tx);
@@ -182,9 +175,15 @@ public class TypeInference extends Translator {
         return false;
     }
 
-    /** is the collection type name S a subtype of that of T? */
+    /** is the collection type name S a subtype of that of T?
+        List \lt Bag \lt bag and List \lt list \lt bag
+     */
     public static boolean subtype ( String S, String T ) {
-        return collection_order(S) <= collection_order(T);
+        return S.equals(T)
+            || (S.equals("List") && T.equals("list"))
+            || (S.equals("List") && T.equals("Bag"))
+            || (S.equals("list") && T.equals("bag"))
+            || (S.equals("Bag") && T.equals("bag"));
     }
 
     /** is the type tx a subtype of type ty? */
@@ -340,12 +339,13 @@ public class TypeInference extends Translator {
         case `T(`t):
             match t2 {
             case `S(`s):
-                if (!is_collection(T) || !is_collection(S))
+                if (!T.equals(S))
+                    fail;
+                if (!is_collection(T))
                     fail;
-                String R = max_collection(T,S);
-                Tree r = unify(t,s);
-                if (r != null)
-                    return #<`R(`r)>;
+                Tree ts = unify(t,s);
+                if (ts != null)
+                    return #<`T(`ts)>;
             }
         case `f(...ts1):
             match t2 {

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/XMLParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/XMLParser.java b/src/main/java/core/XMLParser.java
index 8f39f98..a25acbd 100644
--- a/src/main/java/core/XMLParser.java
+++ b/src/main/java/core/XMLParser.java
@@ -45,7 +45,7 @@ final public class XMLParser implements Parser {
                 if (!(args.nth(0) instanceof Node)
                     || !(((Node)args.nth(0)).name().equals("list")
                          || ((Node)args.nth(0)).name().equals("bag")))
-                    throw new Error("Expected a bag of synchronization tagnames in XML source: "+args.nth(0));
+                    throw new Error("Expected a bag of synchronization tagnames to split the XML source: "+args.nth(0));
                 Trees ts = ((Node)args.nth(0)).children();
                 if (ts.length() == 0)
                     throw new Error("Expected at least one synchronization tagname in XML source: "+ts);

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/core/XMLSplitter.java
----------------------------------------------------------------------
diff --git a/src/main/java/core/XMLSplitter.java b/src/main/java/core/XMLSplitter.java
index cea51fa..e407d85 100644
--- a/src/main/java/core/XMLSplitter.java
+++ b/src/main/java/core/XMLSplitter.java
@@ -86,6 +86,8 @@ final public class XMLSplitter implements Iterator<DataOutputBuffer> {
     public void remove () { }
 
     boolean is_start_tag () {
+        if (tags == null)
+            return true;
         for (String tag: tags)
             if (tag.contentEquals(tagname))
                 return true;
@@ -131,7 +133,7 @@ final public class XMLSplitter implements Iterator<DataOutputBuffer> {
             int b = in_memory ? in.read() : fsin.read();
             if (b == -1)
                 return false;
-            if (b == '&') {  // don't validate external XML references
+            if (b == '&') {  // don't validate external XML entities
                 buffer.write('&');buffer.write('a');buffer.write('m');buffer.write('p');buffer.write(';');
             } else buffer.write(b);
             if (b == '<') {

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/spark/MR_rdd.java
----------------------------------------------------------------------
diff --git a/src/main/java/spark/MR_rdd.java b/src/main/java/spark/MR_rdd.java
index 4b3eff5..48f4f92 100644
--- a/src/main/java/spark/MR_rdd.java
+++ b/src/main/java/spark/MR_rdd.java
@@ -22,8 +22,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.fs.*;
-
-import spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaRDD;
 
 
 /** a wrapper of a JavaRDD<MRData> (stored in HDFS) as an MRData */

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/spark/RDDDataSource.java
----------------------------------------------------------------------
diff --git a/src/main/java/spark/RDDDataSource.java b/src/main/java/spark/RDDDataSource.java
index 609c753..0e59a29 100644
--- a/src/main/java/spark/RDDDataSource.java
+++ b/src/main/java/spark/RDDDataSource.java
@@ -18,8 +18,8 @@
 package org.apache.mrql;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.spark.api.java.JavaRDD;
 
-import spark.api.java.JavaRDD;
 
 final public class RDDDataSource extends DataSource {
     JavaRDD<MRData> rdd;

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/main/java/spark/SparkEvaluator.gen
----------------------------------------------------------------------
diff --git a/src/main/java/spark/SparkEvaluator.gen b/src/main/java/spark/SparkEvaluator.gen
index 54d4da0..0f0be0f 100644
--- a/src/main/java/spark/SparkEvaluator.gen
+++ b/src/main/java/spark/SparkEvaluator.gen
@@ -26,20 +26,21 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import scala.Tuple2;
-import spark.TaskContext;
-import spark.Partition;
-import spark.Accumulator;
-import spark.broadcast.Broadcast;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
-import spark.api.java.function.FlatMapFunction;
-import spark.api.java.function.PairFlatMapFunction;
-import spark.api.java.function.Function;
-import spark.api.java.function.VoidFunction;
+import org.apache.spark.TaskContext;
+import org.apache.spark.Partition;
+import org.apache.spark.Accumulator;
+import org.apache.spark.broadcast.Broadcast;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.VoidFunction;
 
 
 /** Evaluates physical plans in Apache Spark mode */
@@ -114,6 +115,31 @@ final public class SparkEvaluator extends Evaluator implements Serializable {
         throw new Error("You can not run a BSP task in Spark mode");
     }
 
+    /** materialize the entire dataset into a Bag
+     * @param x the DataSet in HDFS to collect values from
+     * @param strip is not used in MapReduce mode
+     * @return the Bag that contains the collected values
+     */
+    final public Bag collect ( final DataSet x, boolean strip ) throws Exception {
+        Bag res = new Bag();
+        final MRContainer zero = new MRContainer(new MR_byte((byte)0));
+        for ( DataSource s: x.source )
+            if (s instanceof RDDDataSource) {
+		JavaPairRDD<MRContainer,MRContainer> rd
+		    = ((RDDDataSource)s).rdd.map(new PairFunction<MRData,MRContainer,MRContainer>() {
+			    public Tuple2<MRContainer,MRContainer> call ( final MRData value ) {
+				return new Tuple2<MRContainer,MRContainer>(zero,new MRContainer(value));
+			    }
+			});
+		String path = Plan.new_path(Plan.conf);
+		rd.saveAsHadoopFile(path,MRContainer.class,MRContainer.class,SequenceFileOutputFormat.class);
+		res = res.union(collect(new DataSet(new BinaryDataSource(path,Plan.conf),0,0),strip));
+            } else if (s.to_be_merged)
+                res = res.union(Plan.merge(s));
+            else res = res.union(s.inputFormat.newInstance().materialize(new Path(s.path)));
+        return res;
+    }
+
     /** return the FileInputFormat for parsed files (CSV, XML, JSON, etc) */
     final public Class<? extends MRQLFileInputFormat> parsedInputFormat () {
         return SparkParsedInputFormat.class;
@@ -239,7 +265,7 @@ final public class SparkEvaluator extends Evaluator implements Serializable {
             });
     }
 
-    final static TaskContext context = new TaskContext(0,0,0,null);
+    final static TaskContext context = new TaskContext(0,0,(long)0,Config.local_mode,null);
 
     /** Convert a Spark RDD into a lazy bag
      * @param rdd the Spark RDD

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/src/site/xdoc/getting_started.xml
----------------------------------------------------------------------
diff --git a/src/site/xdoc/getting_started.xml b/src/site/xdoc/getting_started.xml
index c02b65b..bacf589 100644
--- a/src/site/xdoc/getting_started.xml
+++ b/src/site/xdoc/getting_started.xml
@@ -28,29 +28,28 @@ xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd
 Apache MRQL can run in three modes: in Map-Reduce mode using Apache Hadoop, in BSP mode (Bulk Synchronous Parallel mode) using Apache Hama, and in Spark mode using Apache Spark.
 </p>
 <p>
-The MRQL MapReduce mode has been tested on Hadoop MapReduce releases 1.0.1, and 1.0.3. You can download the latest tarball from Apache Hadoop. The BSP and Spark modes are optional. The BSP mode has been tested on Hama 0.5.0 and 0.6.2. You can download the latest tarball from Apache Hama. The Spark mode has been tested on Spark 0.7.2 and 0.7.3 in both local and standalone deploy modes.
+The MRQL MapReduce mode has been tested on Apache Hadoop MapReduce releases 1.0.1, and 1.0.3. You can download the latest tarball from Apache Hadoop. The BSP and Spark modes are optional. The BSP mode has been tested on Apache Hama 0.5.0 and 0.6.2. You can download the latest tarball from Apache Hama. The Spark mode has been tested on Apache Spark 0.8.0-incubating in both local and standalone deploy modes.
 </p>
 <p>
-The following instructions assume that you have already installed Hadoop MapReduce and you have deployed it on your cluster successfully. Otherwise, follow the directions in <a href="http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-multi-node-cluster/">Running Hadoop On Ubuntu Linux</a>.
+The following instructions assume that you have already installed Apache Hadoop MapReduce and you have deployed it on your cluster successfully.
 </p>
     <subsection name="How to install MRQL"></subsection>
 <p>
-Download the latest stable MRQL binary release from <a href="http://www.apache.org/dist/incubator/mrql/">http://www.apache.org/dist/incubator/mrql/</a> and extract the files. The jar files <code>mrql-mr-*.jar</code>, <code>mrql-bsp-*.jar</code>, and <code>mrql-spark-*.jar</code> in the <code>lib</code> directory contain the libraries for evaluating MRQL queries in Hadoop, Hama, and Spark modes, respectively. They can be run using the scripts <code>bin/mrql</code>, <code>bin/mrql.bsp</code>, and <code>bin/mrql.spark</code>.
+Download the latest stable MRQL binary release from <a href="http://www.apache.org/dist/incubator/mrql/">http://www.apache.org/dist/incubator/mrql/</a> and extract the files. The scripts <code>bin/mrql</code>, <code>bin/mrql.bsp</code>, and <code>bin/mrql.spark</code> evaluate MRQL queries in Hadoop, Hama, and Spark modes, respectively.
 </p>
     <subsection name="How to run MRQL on a Hadoop MapReduce cluster"></subsection>
 <p>
-Change the first lines of <code>conf/mrql-env.sh</code> to point to your directories. One of these directories must be your existing Hadoop configuration directory. For a test, run the <a href="https://wiki.apache.org/mrql/Pagerank">PageRank example</a> or the <a href="https://wiki.apache.org/mrql/Kmeans">k-means clustering example</a> on a small Hadoop MapReduce cluster.
+Change the configuration file <code>conf/mrql-env.sh</code> to match your Hadoop installation. For a test, run the <a href="https://wiki.apache.org/mrql/Pagerank">PageRank example</a> or the <a href="https://wiki.apache.org/mrql/Kmeans">k-means clustering example</a> on a small Hadoop MapReduce cluster.
 </p>
     <subsection name="How to run MRQL on a Hama cluster"></subsection>
 <p>
 Follow the instructions in <a href="http://hama.apache.org/getting_started_with_hama.html">Getting Started with Hama</a> to set up and start Hama.
-Change the first lines of <code>conf/mrql-env.sh</code> to point to your directories. One of these directories must be your existing Hama configuration directory.
-For a test, run the <a href="https://wiki.apache.org/mrql/Pagerank">PageRank example</a> or the <a href="https://wiki.apache.org/mrql/Kmeans">k-means clustering example</a> on a Hama cluster.
+Change the configuration file <code>conf/mrql-env.sh</code> to match your Hama installation. For a test, run the <a href="https://wiki.apache.org/mrql/Pagerank">PageRank example</a> or the <a href="https://wiki.apache.org/mrql/Kmeans">k-means clustering example</a> on a Hama cluster.
 </p>
     <subsection name="How to run MRQL on a Spark standalone cluster"></subsection>
 <p>
-Follow the instructions in <a href="http://spark-project.org/docs/latest/spark-standalone.html">Spark Standalone Mode</a> to set up and start Spark in standalone deploy mode.
-Change the first lines of <code>conf/mrql-env.sh</code> to point to the Scala and Spark installation directories.
+Follow the instructions in <a href="http://spark.incubator.apache.org/docs/latest/spark-standalone.html">Spark Standalone Mode</a> to set up and start Apache Spark in standalone deploy mode.
+Change the configuration file <code>conf/mrql-env.sh</code> to match your Spark installation. 
 For a test, run the <a href="https://wiki.apache.org/mrql/Pagerank">PageRank example</a> or the <a href="https://wiki.apache.org/mrql/Kmeans">k-means clustering example</a> on a Spark cluster.
 </p>
     <section name="How to Recompile MRQL"></section>
@@ -59,10 +58,7 @@ Download the latest stable MRQL source release from <a href="http://www.apache.o
 <pre>
 git clone https://git-wip-us.apache.org/repos/asf/incubator-mrql.git
 </pre>
-To build MRQL using maven, use <code>mvn package</code>. To validate the installation use <code>mvn verify</code>, which runs the queries in <code>tests/queries</code> in memory, local Hadoop mode, local Hama mode, and Spark standalone mode.
-</p>
-<p>
-Alternatively, you may use <code>make</code> or <code>ant</code> to rebuild MRQL. Change the first lines of <code>conf/mrql-env.sh</code> and <code>conf/mrql-ant-env.sh</code> to point to your directories. <code>make</code> or <code>ant</code> rebuilds the <code>lib/mrql.jar</code>. <code>make bsp</code> or <code>ant bsp</code> rebuilds the <code>lib/mrql-bsp.jar</code>. <code>make spark</code> or <code>ant spark</code> rebuilds the <code>lib/mrql-spark.jar</code>.
+To build MRQL using maven, use <code>mvn install</code>. To validate the installation use <code>mvn install -Dtests</code>, which runs the queries in <code>tests/queries</code> in memory, local Hadoop mode, local Hama mode, and local Spark mode.
 </p>
 
   </body>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/f91ce385/tests/queries/joins_1.mrql
----------------------------------------------------------------------
diff --git a/tests/queries/joins_1.mrql b/tests/queries/joins_1.mrql
index ac17ccd..3230ce0 100644
--- a/tests/queries/joins_1.mrql
+++ b/tests/queries/joins_1.mrql
@@ -56,3 +56,9 @@ select (k,e) from e in E group by k: e.dno order by inv(k);
 select (k,count(e)) from e in E group by k: e.dno order by count(e);
 
 all e in E: e.dno > 1;
+
+(select e.dno from e in E)+{9,8};
+
+(select e.dno from e in E order by e.name)+[9,8];
+
+count((select e.dno from e in E)+{9,8}+(select e.dno from e in E));