You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mrql.apache.org by fe...@apache.org on 2014/03/13 15:24:44 UTC

[19/26] MRQL-32: Refactoring directory structure for Eclipse

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/SystemFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/SystemFunctions.java b/core/src/main/java/org/apache/mrql/SystemFunctions.java
new file mode 100644
index 0000000..5c0ed4b
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/SystemFunctions.java
@@ -0,0 +1,467 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import java.util.*;
+import java.lang.Math;
+
+
+/** System functions must be from MRData to MRData */
+final public class SystemFunctions {
+
+    // used for shortcutting sync in bsp supersteps
+    final public static Bag bsp_empty_bag = new Bag();
+    final public static MR_bool bsp_true_value = new MR_bool(true);
+    final public static MR_bool bsp_false_value = new MR_bool(false);
+
+    final static MRData null_value = new Tuple(0);
+    final static MR_bool true_value = new MR_bool(true);
+    final static MR_bool false_value = new MR_bool(false);
+
+    private static void error ( String s ) {
+        System.err.println("*** "+s);
+        throw new Error(s);
+    }
+
+    static Random random = new Random();
+
+    public static MR_bool synchronize ( MR_string peerName, MR_bool mr_exit ) {
+        return Evaluator.evaluator.synchronize(peerName,mr_exit);
+    }
+
+    public static Bag distribute ( MR_string peerName, Bag s ) {
+        return Evaluator.evaluator.distribute(peerName,s);
+    }
+
+    public static MR_bool lt ( MR_short x, MR_short y ) { return (x.get() < y.get()) ? true_value : false_value; }
+    public static MR_bool lt ( MR_int x, MR_int y ) { return (x.get() < y.get()) ? true_value : false_value; }
+    public static MR_bool lt ( MR_long x, MR_long y ) { return (x.get() < y.get()) ? true_value : false_value; }
+    public static MR_bool lt ( MR_float x, MR_float y ) { return (x.get() < y.get()) ? true_value : false_value; }
+    public static MR_bool lt ( MR_double x, MR_double y ) { return (x.get() < y.get()) ? true_value : false_value; }
+
+    public static MR_bool gt ( MR_short x, MR_short y ) { return (x.get() > y.get()) ? true_value : false_value; }
+    public static MR_bool gt ( MR_int x, MR_int y ) { return (x.get() > y.get()) ? true_value : false_value; }
+    public static MR_bool gt ( MR_long x, MR_long y ) { return (x.get() > y.get()) ? true_value : false_value; }
+    public static MR_bool gt ( MR_float x, MR_float y ) { return (x.get() > y.get()) ? true_value : false_value; }
+    public static MR_bool gt ( MR_double x, MR_double y ) { return (x.get() > y.get()) ? true_value : false_value; }
+
+    public static MR_bool leq ( MR_short x, MR_short y ) { return (x.get() <= y.get()) ? true_value : false_value; }
+    public static MR_bool leq ( MR_int x, MR_int y ) { return (x.get() <= y.get()) ? true_value : false_value; }
+    public static MR_bool leq ( MR_long x, MR_long y ) { return (x.get() <= y.get()) ? true_value : false_value; }
+    public static MR_bool leq ( MR_float x, MR_float y ) { return (x.get() <= y.get()) ? true_value : false_value; }
+    public static MR_bool leq ( MR_double x, MR_double y ) { return (x.get() <= y.get()) ? true_value : false_value; }
+
+    public static MR_bool geq ( MR_short x, MR_short y ) { return (x.get() >= y.get()) ? true_value : false_value; }
+    public static MR_bool geq ( MR_int x, MR_int y ) { return (x.get() >= y.get()) ? true_value : false_value; }
+    public static MR_bool geq ( MR_long x, MR_long y ) { return (x.get() >= y.get()) ? true_value : false_value; }
+    public static MR_bool geq ( MR_float x, MR_float y ) { return (x.get() >= y.get()) ? true_value : false_value; }
+    public static MR_bool geq ( MR_double x, MR_double y ) { return (x.get() >= y.get()) ? true_value : false_value; }
+
+    public static MR_bool eq ( MR_short x, MR_short y ) { return (x.get() == y.get()) ? true_value : false_value; }
+    public static MR_bool eq ( MR_int x, MR_int y ) { return (x.get() == y.get()) ? true_value : false_value; }
+    public static MR_bool eq ( MR_long x, MR_long y ) { return (x.get() == y.get()) ? true_value : false_value; }
+    public static MR_bool eq ( MR_float x, MR_float y ) { return (x.get() == y.get()) ? true_value : false_value; }
+    public static MR_bool eq ( MR_double x, MR_double y ) { return (x.get() == y.get()) ? true_value : false_value; }
+
+    public static MR_bool neq ( MR_short x, MR_short y ) { return (x.get() != y.get()) ? true_value : false_value; }
+    public static MR_bool neq ( MR_int x, MR_int y ) { return (x.get() != y.get()) ? true_value : false_value; }
+    public static MR_bool neq ( MR_long x, MR_long y ) { return (x.get() != y.get()) ? true_value : false_value; }
+    public static MR_bool neq ( MR_float x, MR_float y ) { return (x.get() != y.get()) ? true_value : false_value; }
+    public static MR_bool neq ( MR_double x, MR_double y ) { return (x.get() != y.get()) ? true_value : false_value; }
+
+    public static MR_bool eq ( MRData x, MRData y ) { return (x.compareTo(y) == 0) ? true_value : false_value; }
+    public static MR_bool neq ( MRData x, MRData y ) { return (x.compareTo(y) != 0) ? true_value : false_value; }
+    public static MR_bool lt ( MRData x, MRData y ) { return (x.compareTo(y) < 0) ? true_value : false_value; }
+    public static MR_bool leq ( MRData x, MRData y ) { return (x.compareTo(y) <= 0) ? true_value : false_value; }
+    public static MR_bool gt ( MRData x, MRData y ) { return (x.compareTo(y) > 0) ? true_value : false_value; }
+    public static MR_bool geq ( MRData x, MRData y ) { return (x.compareTo(y) >= 0) ? true_value : false_value; }
+
+    public static MR_string plus ( MR_string x, MR_string y ) { return new MR_string(x.get()+y.get()); }
+
+    public static Bag plus ( Bag x, Bag y ) { return x.union(y); }
+
+    public static MR_short plus ( MR_short x, MR_short y ) { return new MR_short((short)(x.get()+y.get())); }
+    public static MR_int plus ( MR_int x, MR_int y ) { return new MR_int(x.get()+y.get()); }
+    public static MR_long plus ( MR_long x, MR_long y ) { return new MR_long(x.get()+y.get()); }
+    public static MR_float plus ( MR_float x, MR_float y ) { return new MR_float(x.get()+y.get()); }
+    public static MR_double plus ( MR_double x, MR_double y ) { return new MR_double(x.get()+y.get()); }
+
+    public static MR_short minus ( MR_short x, MR_short y ) { return new MR_short((short)(x.get()-y.get())); }
+    public static MR_int minus ( MR_int x, MR_int y ) { return new MR_int(x.get()-y.get()); }
+    public static MR_long minus ( MR_long x, MR_long y ) { return new MR_long(x.get()-y.get()); }
+    public static MR_float minus ( MR_float x, MR_float y ) { return new MR_float(x.get()-y.get()); }
+    public static MR_double minus ( MR_double x, MR_double y ) { return new MR_double(x.get()-y.get()); }
+
+    public static MR_short times ( MR_short x, MR_short y ) { return new MR_short((short)(x.get()*y.get())); }
+    public static MR_int times ( MR_int x, MR_int y ) { return new MR_int(x.get()*y.get()); }
+    public static MR_long times ( MR_long x, MR_long y ) { return new MR_long(x.get()*y.get()); }
+    public static MR_float times ( MR_float x, MR_float y ) { return new MR_float(x.get()*y.get()); }
+    public static MR_double times ( MR_double x, MR_double y ) { return new MR_double(x.get()*y.get()); }
+
+    public static MR_short div ( MR_short x, MR_short y ) { return new MR_short((short)(x.get()/y.get())); }
+    public static MR_int div ( MR_int x, MR_int y ) { return new MR_int(x.get()/y.get()); }
+    public static MR_long div ( MR_long x, MR_long y ) { return new MR_long(x.get()/y.get()); }
+    public static MR_float div ( MR_float x, MR_float y ) { return new MR_float(x.get()/y.get()); }
+    public static MR_double div ( MR_double x, MR_double y ) { return new MR_double(x.get()/y.get()); }
+
+    public static MR_int mod ( MR_int x, MR_int y ) { return new MR_int(x.get() % y.get()); }
+    public static MR_long mod ( MR_long x, MR_long y ) { return new MR_long(x.get() % y.get()); }
+
+    public static MR_short min ( MR_short x, MR_short y ) { return new MR_short((short)(Math.min(x.get(),y.get()))); }
+    public static MR_int min ( MR_int x, MR_int y ) { return new MR_int(Math.min(x.get(),y.get())); }
+    public static MR_long min ( MR_long x, MR_long y ) { return new MR_long(Math.min(x.get(),y.get())); }
+    public static MR_float min ( MR_float x, MR_float y ) { return new MR_float(Math.min(x.get(),y.get())); }
+    public static MR_double min ( MR_double x, MR_double y ) { return new MR_double(Math.min(x.get(),y.get())); }
+
+    public static MR_short max ( MR_short x, MR_short y ) { return new MR_short((short)(Math.max(x.get(),y.get()))); }
+    public static MR_int max ( MR_int x, MR_int y ) { return new MR_int(Math.max(x.get(),y.get())); }
+    public static MR_long max ( MR_long x, MR_long y ) { return new MR_long(Math.max(x.get(),y.get())); }
+    public static MR_float max ( MR_float x, MR_float y ) { return new MR_float(Math.max(x.get(),y.get())); }
+    public static MR_double max ( MR_double x, MR_double y ) { return new MR_double(Math.max(x.get(),y.get())); }
+
+    public static MR_double sin ( MR_double x ) { return new MR_double(Math.sin(x.get())); }
+    public static MR_double cos ( MR_double x ) { return new MR_double(Math.cos(x.get())); }
+    public static MR_double tan ( MR_double x ) { return new MR_double(Math.tan(x.get())); }
+    public static MR_double asin ( MR_double x ) { return new MR_double(Math.asin(x.get())); }
+    public static MR_double acos ( MR_double x ) { return new MR_double(Math.acos(x.get())); }
+    public static MR_double atan ( MR_double x ) { return new MR_double(Math.atan(x.get())); }
+    public static MR_double pow ( MR_double x, MR_double y ) { return new MR_double(Math.pow(x.get(),y.get())); }
+    public static MR_double sqrt ( MR_double x ) { return new MR_double(Math.sqrt(x.get())); }
+    public static MR_double ceil ( MR_double x ) { return new MR_double(Math.ceil(x.get())); }
+    public static MR_double floor ( MR_double x ) { return new MR_double(Math.floor(x.get())); }
+    public static MR_double rint ( MR_double x ) { return new MR_double(Math.rint(x.get())); }
+    public static MR_int round ( MR_float x ) { return new MR_int((int)Math.round(x.get())); }
+    public static MR_long round ( MR_double x ) { return new MR_long(Math.round(x.get())); }
+
+    public static MR_short plus ( MR_short x ) { return x; }
+    public static MR_int plus ( MR_int x ) { return x; }
+    public static MR_long plus ( MR_long x ) { return x; }
+    public static MR_float plus ( MR_float x ) { return x; }
+    public static MR_double plus ( MR_double x ) { return x; }
+
+    public static MR_short minus ( MR_short x ) { return new MR_short((short)-x.get()); }
+    public static MR_int minus ( MR_int x ) { return new MR_int(-x.get()); }
+    public static MR_long minus ( MR_long x ) { return new MR_long(-x.get()); }
+    public static MR_float minus ( MR_float x ) { return new MR_float(-x.get()); }
+    public static MR_double minus ( MR_double x ) { return new MR_double(-x.get()); }
+
+    public static MR_short abs ( MR_short x ) { return new MR_short((short) Math.abs(x.get())); }
+    public static MR_int abs ( MR_int x ) { return new MR_int(Math.abs(x.get())); }
+    public static MR_long abs ( MR_long x ) { return new MR_long(Math.abs(x.get())); }
+    public static MR_float abs ( MR_float x ) { return new MR_float(Math.abs(x.get())); }
+    public static MR_double abs ( MR_double x ) { return new MR_double(Math.abs(x.get())); }
+
+    public static Inv inv ( MRData x ) { return new Inv(x); }
+
+    public static MR_bool and ( MR_bool x, MR_bool y ) { return (x.get()) ? y : false_value; }
+    public static MR_bool or ( MR_bool x, MR_bool y ) { return (x.get()) ? true_value : y; }
+    public static MR_bool not ( MR_bool x ) { return (x.get()) ? false_value : true_value; }
+
+    public static MR_bool toBool ( MR_string s ) { return (s.get().equals("true")) ? true_value : false_value; }
+    public static MR_short toShort ( MR_string s ) { return new MR_short(Short.parseShort(s.get())); }
+    public static MR_int toInt ( MR_string s ) { return new MR_int(Integer.parseInt(s.get())); }
+    public static MR_long toLong ( MR_string s ) { return new MR_long(Long.parseLong(s.get())); }
+    public static MR_float toFloat ( MR_string s ) { return new MR_float(Float.parseFloat(s.get())); }
+    public static MR_double toDouble ( MR_string s ) { return new MR_double(Double.parseDouble(s.get())); }
+
+    public static MR_int random ( MR_int n ) {
+        int v = random.nextInt(n.get());
+        return new MR_int(v);
+    }
+
+    public static MR_float log ( MR_float n ) { return new MR_float(Math.log(n.get())); }
+    public static MR_double log ( MR_double n ) { return new MR_double(Math.log(n.get())); }
+    public static MR_float exp ( MR_float n ) { return new MR_float(Math.exp(n.get())); }
+    public static MR_double exp ( MR_double n ) { return new MR_double(Math.exp(n.get())); }
+
+    public static MR_string string ( MRData x ) { return new MR_string(x.toString()); }
+
+    public static MR_bool contains ( MR_string x, MR_string y ) { return new MR_bool(x.get().contains(y.get())); }
+    public static MR_int length ( MR_string x ) { return new MR_int(x.get().length()); }
+    public static MR_string substring ( MR_string x, MR_int b, MR_int e ) { return new MR_string(x.get().substring(b.get(),e.get())); }
+
+    public static MR_bool exists ( Bag s ) {
+        return (s.iterator().hasNext()) ? true_value : false_value;
+    }
+
+    public static MR_bool some ( Bag x ) {
+        for ( MRData e: x )
+            if (e instanceof MR_bool)
+                if (((MR_bool)e).get())
+                    return true_value;
+        return false_value;
+    }
+
+    public static MR_bool all ( Bag x ) {
+        for ( MRData e: x )
+            if (e instanceof MR_bool)
+                if (!((MR_bool)e).get())
+                    return false_value;
+        return true_value;
+    }
+
+    public static MR_bool member ( MRData e, Bag s ) {
+        return (s.contains(e)) ? true_value : false_value;
+    }
+
+    public static MR_long count ( Bag s ) {
+        if (s.materialized())
+            return new MR_long(s.size());
+        long i = 0;
+        for ( MRData e: s )
+            i++;
+        return new MR_long(i);
+    }
+
+    public static MR_long hash_code ( MRData x ) {
+        return new MR_long(x.hashCode());
+    }
+
+    public static MRData index ( Bag b, MR_int mi ) {
+        int i = mi.get();
+        if (i < 0)
+            throw new Error("wrong index: "+i);
+        if (b.materialized())
+            return b.get(i);
+        int k = 0;
+        for ( MRData e: b )
+            if (k++ == i)
+                return e;
+        throw new Error("wrong index: "+i);
+    }
+
+    public static Bag range ( Bag b, MR_int mi, MR_int mj ) {
+        int i = mi.get();
+        int j = mj.get();
+        if (j < i)
+            throw new Error("wrong range indexes: "+i+","+j);
+        Bag bag = new Bag(j-i+1);
+        int k = 0;
+        for ( MRData e: b ) {
+            if (k >= i && k <= j)
+                bag.add(e);
+            k++;
+        };
+        return bag;
+    }
+
+    public static Bag union ( Bag x, Bag y ) {
+        return x.union(y);
+    }
+
+    public static Bag intersect ( Bag x, Bag y ) {
+        x.materialize();
+        Bag s = new Bag();
+        for ( MRData e: y )
+            if (x.contains(e))
+                s.add(e);
+        return s;
+    }
+
+    public static Bag except ( Bag x, Bag y ) {
+        y.materialize();
+        Bag s = new Bag();
+        for ( MRData e: x )
+            if (!y.contains(e))
+                s.add(e);
+        return s;
+    }
+
+    public static Bag materialize ( Bag x ) {
+        x.materialize();
+        return x;
+    }
+
+    /** coerce a basic type to a new type indicated by the basic type number */
+    public static MRData coerce ( MRData from, MR_int type ) {
+        byte tp = (byte)type.get();
+        if (from instanceof MR_short) {
+            if (tp == MRContainer.BYTE)
+                return new MR_byte((byte)((MR_short)from).get());
+            else if (tp == MRContainer.SHORT)
+                return from;
+            else if (tp == MRContainer.INT)
+                return new MR_int((int)((MR_short)from).get());
+            else if (tp == MRContainer.LONG)
+                return new MR_long((long)((MR_short)from).get());
+            else if (tp == MRContainer.FLOAT)
+                return new MR_float((float)((MR_short)from).get());
+            else if (tp == MRContainer.DOUBLE)
+                return new MR_double((double)((MR_short)from).get());
+        } else if (from instanceof MR_int) {
+            if (tp == MRContainer.BYTE)
+                return new MR_byte((byte)((MR_int)from).get());
+            else if (tp == MRContainer.SHORT)
+                return new MR_short((short)((MR_int)from).get());
+            else if (tp == MRContainer.INT)
+                return from;
+            else if (tp == MRContainer.LONG)
+                return new MR_long((long)((MR_int)from).get());
+            else if (tp == MRContainer.FLOAT)
+                return new MR_float((float)((MR_int)from).get());
+            else if (tp == MRContainer.DOUBLE)
+                return new MR_double((double)((MR_int)from).get());
+        } else if (from instanceof MR_long) {
+            if (tp == MRContainer.BYTE)
+                return new MR_byte((byte)((MR_long)from).get());
+            else if (tp == MRContainer.SHORT)
+                return new MR_short((short)((MR_long)from).get());
+            else if (tp == MRContainer.INT)
+                return new MR_int((int)((MR_long)from).get());
+            else if (tp == MRContainer.LONG)
+                return from;
+            else if (tp == MRContainer.FLOAT)
+                return new MR_float((float)((MR_long)from).get());
+            else if (tp == MRContainer.DOUBLE)
+                return new MR_double((double)((MR_long)from).get());
+        } else if (from instanceof MR_float) {
+            if (tp == MRContainer.BYTE)
+                return new MR_byte((byte)((MR_float)from).get());
+            else if (tp == MRContainer.SHORT)
+                return new MR_short((short)((MR_float)from).get());
+            else if (tp == MRContainer.INT)
+                return new MR_int((int)((MR_float)from).get());
+            else if (tp == MRContainer.LONG)
+                return new MR_long((long)((MR_float)from).get());
+            if (tp == MRContainer.FLOAT)
+                return from;
+            else if (tp == MRContainer.DOUBLE)
+                return new MR_double((double)((MR_float)from).get());
+        } else if (from instanceof MR_double) {
+            if (tp == MRContainer.BYTE)
+                return new MR_byte((byte)((MR_double)from).get());
+            else if (tp == MRContainer.SHORT)
+                return new MR_short((short)((MR_double)from).get());
+            else if (tp == MRContainer.INT)
+                return new MR_int((int)((MR_double)from).get());
+            else if (tp == MRContainer.LONG)
+                return new MR_long((long)((MR_double)from).get());
+            if (tp == MRContainer.FLOAT)
+                return new MR_float((float)((MR_double)from).get());
+            if (tp == MRContainer.DOUBLE)
+                return from;
+        };
+        error("Cannot up-coerce the numerical value "+from);
+        return null;
+    }
+ 
+    /** used in avg */
+    public static MR_double avg_value ( MRData t ) {
+        MR_double sum = (MR_double)((Tuple)t).first();
+        MR_long count = (MR_long)((Tuple)t).second();
+        return new MR_double(sum.get()/count.get());
+    }
+
+    public static MR_string text ( Union node ) {
+        if (node.tag() == 1)
+            return (MR_string)(node.value());
+        Bag b = (Bag)((Tuple)node.value()).get(2);
+        String s = "";
+        for ( MRData e: b )
+            if (((Union)e).tag() == 1)
+                s += ((MR_string)(((Union)e).value())).get();
+        return new MR_string(s);
+    }
+
+    public static MR_string text ( Bag nodes ) {
+        MR_string b = new MR_string("");
+        for ( MRData e: nodes )
+            b = plus(b,text((Union)e));
+        return b;
+    }
+
+    public static MR_string tag ( Union node ) {
+        if (node.tag() == 1)
+            error("Cannot extract the tagname of a CData: "+node);
+        return (MR_string)((Tuple) node.value()).get(0);
+    }
+
+    public static MR_string XMLattribute ( MR_string tagname, Union node ) {
+        if (node.tag() == 1)
+            error("Element "+node+" does not have attributes");
+        Tuple t = (Tuple)node.value();
+        String tag = tagname.get();
+        for ( MRData c: (Bag)t.get(1) ) {
+            Tuple p = (Tuple)c;
+            if (tag.equals(((MR_string)(p.get(0))).get()))
+                return new MR_string(((MR_string)p.get(1)).get());
+        };
+        error("Element "+node+" does not have attribute "+tagname);
+        return null;
+    }
+
+    public static Bag XMLattributes ( MR_string tagname, Union node ) {
+        if (node.tag() == 1)
+            return new Bag();
+        Tuple t = (Tuple)node.value();
+        Bag b = new Bag();
+        String tag = tagname.get();
+        for ( MRData c: (Bag)t.get(1) ) {
+            Tuple p = (Tuple)c;
+            if (tag.equals("*") || tag.equals(((MR_string)(p.get(0))).get()))
+                b.add(p.get(1));
+        };
+        return b;
+    }
+
+    public static Bag XMLattributes ( MR_string tagname, Bag nodes ) {
+        Bag b = new Bag();
+        for ( MRData e: nodes )
+            for (MRData c: XMLattributes(tagname,(Union)e))
+                b.add(c);
+        return b;
+    }
+
+    public static Bag XMLattribute ( MR_string tagname, Bag nodes ) {
+        Bag b = new Bag();
+        for ( MRData e: nodes )
+            for (MRData c: XMLattributes(tagname,(Union)e))
+                b.add(c);
+        return b;
+    }
+
+    public static Bag XMLchildren ( MR_string tagname, Union node ) {
+        if (node.tag() == 1)
+            return new Bag();
+        Tuple t = (Tuple)node.value();
+        Bag b = new Bag();
+        String tag = tagname.get();
+        for ( MRData c: (Bag)t.get(2) )
+            if (((Union)c).tag() == 0) {
+                Tuple s = (Tuple)(((Union)c).value());
+                if (tag.equals("*") || (((MR_string)(s.get(0))).get()).equals(tag))
+                    b.add(c);
+            };
+        return b;
+    }
+
+    public static Bag XMLchildren ( MR_string tagname, Bag nodes ) {
+        Bag b = new Bag();
+        for ( MRData e: nodes )
+            for (MRData c: XMLchildren(tagname,(Union)e))
+                b.add(c);
+        return b;
+    }
+
+    public static MRData fold ( Lambda c, MRData z, Bag s ) {
+        MRData v = z;
+        for ( MRData e: s )
+            z = c.lambda().eval(new Tuple(z,e));
+        return z;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/Test.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/Test.java b/core/src/main/java/org/apache/mrql/Test.java
new file mode 100644
index 0000000..eca272c
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/Test.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import java.io.*;
+import org.apache.hadoop.util.*;
+import org.apache.hadoop.conf.*;
+
+
+/** Test all the MRQL test queries */
+final public class Test {
+    public static PrintStream print_stream;
+    public static Configuration conf;
+    static MRQLParser parser = new MRQLParser();
+    static String result_directory;
+    static PrintStream test_out;
+    static PrintStream error_stream;
+
+    private static int compare ( String file1, String file2 ) throws Exception {
+        FileInputStream s1 = new FileInputStream(file1);
+        FileInputStream s2 = new FileInputStream(file2);
+        int b1, b2;
+        int i = 1;
+        while ((b1 = s1.read()) == (b2 = s2.read()) && b1 != -1 && b2 != -1)
+            i++;
+        return (b1 == -1 && b2 == -1) ? 0 : i;
+    }
+
+    private static void query ( File query ) throws Exception {
+        String path = query.getPath();
+        if (!path.endsWith(".mrql"))
+            return;
+        Translator.global_reset();
+        String qname = query.getName();
+        qname = qname.substring(0,qname.length()-5);
+        test_out.print("   Testing "+qname+" ... ");
+        String result_file = result_directory+"/"+qname+".txt";
+        boolean exists = new File(result_file).exists();
+        if (exists)
+            System.setOut(new PrintStream(result_directory+"/result.txt"));
+        else System.setOut(new PrintStream(result_file));
+        try {
+            parser = new MRQLParser(new MRQLLex(new FileInputStream(query)));
+            Main.parser = parser;
+            MRQLLex.reset();
+            parser.parse();
+            int i;
+            if (exists && (i = compare(result_file,result_directory+"/result.txt")) > 0)
+                test_out.println("MISMATCH AT "+(i-1));
+            else if (exists)
+                test_out.println("OK matched");
+            else test_out.println("OK created");
+        } catch (Error ex) {
+            error_stream.println(qname+": "+ex);
+            ex.printStackTrace(error_stream);
+            test_out.println("FAILED");
+            if (!exists)
+                new File(result_file).delete();
+        } catch (Exception ex) {
+            error_stream.println(qname+": "+ex);
+            ex.printStackTrace(error_stream);
+            test_out.println("FAILED");
+            if (!exists)
+                new File(result_file).delete();
+        } finally {
+            if (Config.hadoop_mode)
+                Plan.clean();
+            if (Config.compile_functional_arguments)
+                Compiler.clean();
+        }
+    }
+
+    public static void main ( String[] args ) throws Exception {
+        boolean hadoop = false;
+        for ( String arg: args ) {
+            hadoop |= arg.equals("-local") || arg.equals("-dist");
+            Config.bsp_mode |= arg.equals("-bsp");
+            Config.spark_mode |= arg.equals("-spark");
+        };
+        Config.map_reduce_mode = !Config.bsp_mode && !Config.spark_mode;
+        if (Config.map_reduce_mode)
+            Evaluator.evaluator = (Evaluator)Class.forName("org.apache.mrql.MapReduceEvaluator").newInstance();
+        if (Config.bsp_mode)
+            Evaluator.evaluator = (Evaluator)Class.forName("org.apache.mrql.BSPEvaluator").newInstance();
+        if (Config.spark_mode)
+            Evaluator.evaluator = (Evaluator)Class.forName("org.apache.mrql.SparkEvaluator").newInstance();
+        Config.quiet_execution = true;
+        if (hadoop) {
+            conf = Evaluator.evaluator.new_configuration();
+            GenericOptionsParser gop = new GenericOptionsParser(conf,args);
+            conf = gop.getConfiguration();
+            args = gop.getRemainingArgs();
+        };
+        Config.parse_args(args,conf);
+        Config.hadoop_mode = Config.local_mode || Config.distributed_mode;
+        Evaluator.evaluator.init(conf);
+        new TopLevel();
+        Config.testing = true;
+        if (Config.hadoop_mode && Config.bsp_mode)
+            Config.write(Plan.conf);
+        if (Main.query_file.equals("") || Config.extra_args.size() != 2)
+            throw new Error("Must provide a query directory, a result directory, and an error log file");
+        File query_dir = new File(Main.query_file);
+        result_directory = Config.extra_args.get(0);
+        (new File(result_directory)).mkdirs();
+        error_stream = new PrintStream(Config.extra_args.get(1));
+        System.setErr(error_stream);
+        test_out = System.out;
+        for ( File f: query_dir.listFiles() )
+            query(f);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/TopLevel.gen
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/TopLevel.gen b/core/src/main/java/org/apache/mrql/TopLevel.gen
new file mode 100644
index 0000000..0ed3f49
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/TopLevel.gen
@@ -0,0 +1,293 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+
+
+/** Provides the API for compilation/code-generation */
+final public class TopLevel extends Interpreter {
+    static Tree xml_type;
+
+    public TopLevel () {
+        // XML and JSON are user-defined types:
+        datadef("XML",#<union(Node(tuple(string,bag(tuple(string,string)),list(XML))),
+                              CData(string))>);
+        datadef("JSON",#<union(JObject(bag(tuple(string,JSON))),
+                               JArray(list(JSON)),
+                               Jstring(string),
+                               Jlong(long),
+                               Jdouble(double),
+                               Jbool(bool),
+                               Jnull(tuple()))>);
+        constant(#<PI>,#<double>,new MR_double(Math.PI));
+        xml_type = global_datatype_env.lookup("XML");
+        DataSource.loadParsers();
+    }
+
+    private static boolean memory_parsed_source ( Tree e ) {
+        match e {
+        case ParsedSource(...): return true;
+        case Merge(`x,`y): return memory_parsed_source(x) && memory_parsed_source(y);
+        };
+        return false;
+    }
+
+    /** translate and evaluate an MRQL expression into MRData
+     * @param e MRQL query to be evaluated
+     * @param print do we want to print the result?
+     * @return the result of evaluation (MRData)
+     */
+    public static MRData expression ( Tree e, boolean print ) {
+        try {
+            Tree plan = translate_expression(e);
+            query_plan = plan;
+            tab_count = -3;
+            if (plan == null)
+                return null;
+            if (Config.hadoop_mode)
+                Evaluator.evaluator.initialize_query();
+            MRData res = evalE(plan,null);
+            if (print) {
+                if (!Config.quiet_execution)
+                    System.out.println("Result:");
+                if (!Config.hadoop_mode && Config.bsp_mode && memory_parsed_source(plan))
+                    System.out.println(print(((Tuple)((Bag)res).get(0)).second(),query_type));
+                else System.out.println(print(res,query_type));
+            } return res;
+        } catch (Exception x) {
+            if (x.getMessage() != null)
+                System.err.println("*** MRQL System Error at line "+Main.parser.line_pos()+": "+x);
+            if (Config.trace)
+                x.printStackTrace(System.err);
+            if (Config.testing)
+                throw new Error(x);
+            return null;
+        } catch (Error x) {
+            if (x.getMessage() != null)
+                System.err.println("*** MRQL System Error at line "+Main.parser.line_pos()+": "+x);
+            if (Config.trace)
+                x.printStackTrace(System.err);
+            if (Config.testing)
+                throw new Error(x);
+            return null;
+        }
+    }
+
+    /** translate, evaluate, and print the results of an MRQL expression e
+     * @param e MRQL query to be evaluated
+     * @return the result of evaluation (MRData)
+     */
+    public final static MRData expression ( Tree e ) {
+        reset();
+        return expression(e,true);
+    }
+
+    /** handle the assignment v=e */
+    public final static void assign ( String v, Tree e ) {
+        if (variable_lookup(v,global_env) != null) {
+            global_type_env.remove(v);
+            remove_global_binding(v);
+        };
+        global_vars.insert(v,e);
+    }
+
+    private final static boolean is_function ( Tree e ) {
+        match e {
+        case function(...): return true;
+        };
+        return false;
+    }
+
+    /** handle the assignment v:=e */
+    public final static Tree store ( String v, Tree e ) {
+        reset();
+        if (global_vars.lookup(v) != null)
+            global_vars.remove(v);
+        MRData res = expression(e,false);
+        global_type_env.insert(v,query_type);
+        if (res instanceof Bag)
+            ((Bag)res).materialize();
+        new_global_binding(v,res);
+        return query_plan;
+    }
+
+    /** define an MRQL constant, such as PI */
+    private final static void constant ( Tree v, Tree type, MRData value ) {
+        String var = v.toString();
+        if (global_vars.lookup(var) != null)
+            global_vars.remove(var);
+        global_type_env.insert(var,type);
+        new_global_binding(var,value);
+    }
+
+    /** define a new function
+     * @param fnc function name
+     * @param params parameter list
+     * @param out_type output type
+     * @param body function body
+     */
+    public final static void functiondef ( String fnc, Trees params, Tree out_type, Tree body ) {
+        reset();
+        Trees as = #[];
+        Trees ps = #[];
+        for ( Tree param: params )
+            match param {
+            case bind(`v,`tp):
+                Tree ntp = normalize_type(tp);
+                as = as.append(ntp);
+                ps = ps.append(#<bind(`v,`ntp)>);
+            case _: type_error(param,"Ill-formed function parameter: "+param);
+            };
+        out_type = normalize_type(out_type);
+        // needed for recursive functions
+        global_type_env.insert(fnc,#<arrow(tuple(...as),`out_type)>);
+        Tree fname = #<`fnc>;
+        if (!is_pure(body))
+            impure_functions = impure_functions.append(fname);
+        Tree plan = store(fnc,#<function(tuple(...ps),`out_type,`body)>);
+        if (plan != null)
+            Translator.global_functions.insert(fnc,plan);
+        if (Config.hadoop_mode && plan != null)
+            Plan.conf.set("mrql.global."+fnc,
+                          closure(plan,global_env).toString());
+    }
+
+    /** dump the result of evaluating the MRQL query e to a binary file */
+    private final static void dump ( String file, Tree e ) {
+        MRData res = expression(e,false);
+        try {
+            query_type = make_persistent_type(query_type);
+            if (res != null)
+                if (Config.hadoop_mode) 
+                    Evaluator.evaluator.dump(file,query_type,res);
+                else MapReduceAlgebra.dump(file,query_type,res);
+        } catch (Exception x) {
+            throw new Error(x);
+        }
+    }
+
+    /** dump the result of evaluating the MRQL query e to a text CVS file */
+    private final static void dump_text ( String file, Tree e ) {
+        MRData res = expression(e,false);
+        if (res != null)
+            try {
+		Evaluator.evaluator.dump_text(file,query_type,res);
+            } catch (Exception x) {
+                throw new Error(x);
+            }
+    }
+
+    /** define a new named type (typedef) */
+    private final static void typedef ( String name, Tree type ) {
+        type_names.insert(name,normalize_type(type));
+    }
+
+    /** define a new data type, such as XML and JSON */
+    private final static void datadef ( String name, Tree type ) {
+        int i = 0;
+        Trees as = #[];
+        match type {
+        case union(...nl):
+            // needed for recursive datatypes
+            global_datatype_env.insert(name,#<union(...nl)>);
+            for ( Tree n: nl )
+                match n {
+                case `c(`t):
+                    if (data_constructors.lookup(c.toString()) == null)
+                        data_constructors.insert(c.toString(),#<`name(`i,`t)>);
+                    else type_error(type,"Data constructor "+c+" has already been defined");
+                    as = as.append(#<`c(`(normalize_type(t)))>);
+                    i++;
+                }
+        };
+        global_datatype_env.remove(name);
+        global_datatype_env.insert(name,#<union(...as)>);
+    }
+
+    /** define a user aggregation */
+    private static void aggregation ( String name, Tree type, Tree plus, Tree zero, Tree unit ) {
+        reset();
+        zero = Simplification.rename(zero);
+        plus = Simplification.rename(plus);
+        unit = Simplification.rename(unit);
+        type = normalize_type(type);
+        Tree ztp = TypeInference.type_inference2(zero);
+        Tree v1 = new_var();
+        type_env.insert(v1.toString(),ztp);
+        TypeInference.type_inference2(Normalization.normalize_all(#<apply(`plus,tuple(`v1,`v1))>));
+        Tree v2 = new_var();
+        type_env.insert(v2.toString(),type);
+        Tree utp = TypeInference.type_inference2(Normalization.normalize_all(#<apply(`unit,`v2)>));
+        if (unify(utp,ztp) == null)
+            type_error(unit,"Wrong type in unit result (expected "+ztp+" found "+utp);
+        monoids = monoids.append(#<`name(`type,`plus,`zero,`unit)>);
+    }
+
+    /** the MRQL top-level interfacse to evaluate a single MRQL expression or command */
+    public final static void evaluate_top_level ( Tree expr ) {
+        if (expr == null)
+            return;
+        match expr {
+        case expression(`e):
+            long t = System.currentTimeMillis();
+            if (expression(e) != null && !Config.quiet_execution)
+                System.out.println("Run time: "+(System.currentTimeMillis()-t)/1000.0+" secs");
+        case assign(`v,`e): assign(v.toString(),e);
+        case store(`v,`e):
+            long t = System.currentTimeMillis();
+            if (store(v.toString(),e) != null && !Config.quiet_execution)
+                System.out.println("Run time: "+(System.currentTimeMillis()-t)/1000.0+" secs");
+        case dump(`s,`e):
+            long t = System.currentTimeMillis();
+            dump(s.stringValue(),e);
+            if (!Config.quiet_execution)
+                System.out.println("Run time: "+(System.currentTimeMillis()-t)/1000.0+" secs");
+        case dump_text(`s,`e):
+            long t = System.currentTimeMillis();
+            dump_text(s.stringValue(),e);
+            if (!Config.quiet_execution)
+                System.out.println("Run time: "+(System.currentTimeMillis()-t)/1000.0+" secs");
+        case typedef(`v,`t): typedef(v.toString(),t);
+        case datadef(`v,`t): datadef(v.toString(),t);
+        case functiondef(`f,params(...p),`tp,`e):
+            functiondef(f.toString(),p,tp,e);
+        case macrodef(`name,params(...p),`e):
+            Translator.global_macros.insert(name.toString(),#<macro(params(...p),`e)>);
+        case aggregation(`aggr,`type,`plus,`zero,`unit):
+            aggregation(aggr.toString(),type,plus,zero,unit);
+        case import(`c):
+            ClassImporter.importClass(c.variableValue());
+        case import(`c,...l):
+            for (Tree m: l)
+                ClassImporter.importMethod(c.variableValue(),m.variableValue());
+        case include(`file):
+            Main.include_file(file.toString());
+        case parser(`n,`p):
+            try {
+                Class<? extends Parser> c = Class.forName(p.toString()).asSubclass(Parser.class);
+                DataSource.parserDirectory.put(n.toString(),c);
+            } catch (ClassNotFoundException e) {
+                throw new Error("Class "+p.toString()+" not found");
+            }
+        case impure(`fn):    // not used
+            impure_functions = impure_functions.append(fn);
+        case _: throw new Error("Unknown statement: "+expr);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/Translator.gen
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/Translator.gen b/core/src/main/java/org/apache/mrql/Translator.gen
new file mode 100644
index 0000000..47b34ac
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/Translator.gen
@@ -0,0 +1,445 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+import java.util.*;
+import java.io.*;
+
+
+/** contains useful methods used by all stages of compilation and code generation */
+public class Translator extends Printer {
+    static Trees functions = #[ ];
+
+    static {
+        ClassImporter.load_classes();
+        DataSource.loadParsers();
+    }
+
+    /** type environment that binds local variables to types */
+    static SymbolTable type_env = new SymbolTable();
+
+    /** type environment that binds global variables to types */
+    static SymbolTable global_type_env = new SymbolTable();
+
+    /** type environment that binds datatype names to types */
+    static SymbolTable global_datatype_env = new SymbolTable();
+
+    /** used in pattern compilation and variable renaming */
+    static SymbolTable st = new SymbolTable();
+
+    /** binds macro names to MRQL expressions (used for 'name = expr;' syntax) */
+    static SymbolTable global_vars = new SymbolTable();
+
+    /** binds a UDF name to its plan */
+    static SymbolTable global_functions = new SymbolTable();
+
+    /** binds a macro name to its body */
+    static SymbolTable global_macros = new SymbolTable();
+
+    /** used in typedefs */
+    static SymbolTable type_names = new SymbolTable();
+
+    /** binds a data constructor name to its type */
+    static SymbolTable data_constructors = new SymbolTable();
+
+    static Trees repeat_variables = #[];
+
+    static {
+        global_type_env.insert("args",#<list(string)>);
+    }
+
+    /** expressions with impure functions cannot factored out */
+    static Trees impure_functions = #[random];
+
+    private static int var_count = 0;
+
+    static void reset () {
+        var_count = 0;
+        type_env = global_type_env;
+        st = new SymbolTable();
+        type_env = new SymbolTable();
+        repeat_variables = #[];
+    }
+
+    static void global_reset () {
+        reset();
+        global_type_env = new SymbolTable();
+        global_datatype_env = new SymbolTable();
+        global_vars = new SymbolTable();
+        global_functions = new SymbolTable();
+        global_macros = new SymbolTable();
+        type_names = new SymbolTable();
+        data_constructors = new SymbolTable();
+        new TopLevel();
+    }
+
+    static void error ( String msg ) {
+        System.err.println("*** MRQL error at line "+Main.parser.line_pos()+": "+msg);
+        throw new Error();
+    }
+
+    final static Tree identity = #<lambda(x,x)>;
+
+    static Tree identity () {
+        return Normalization.rename(#<lambda(x,bag(x))>);
+    }
+
+    /** is this type a collection type? */
+    public static boolean is_collection ( String x ) {
+        return x.equals("Bag") || x.equals("bag") || x.equals("List") || x.equals("list");
+    }
+
+    /** is this type a collection type? */
+    public static boolean collection_type ( Tree tp ) {
+        match tp {
+        case `T(`t1): return is_collection(T);
+        };
+        return false;
+    }
+
+    /** is this type a collection type for values stored in HDFS? */
+    public static boolean is_persistent_collection ( String x ) {
+        return x.equals("Bag") || x.equals("List");
+    }
+
+    /** make this collection type a persistent type that is stored in HDFS */
+    public static String persistent_collection ( String x ) {
+        return (x.equals("list")) ? "List" : (x.equals("bag")) ? "Bag" : x;
+    }
+
+    /** make this collection type a transient type stored in memory */
+    public static String transient_collection ( String x ) {
+        return (x.equals("List")) ? "list" : (x.equals("Bag")) ? "bag" : x;
+    }
+
+    /** An aggeregation must be based on a commutative monoid (plus,zero) with a unit:
+     *    name(type,plus,zero,unit)
+     * plus: function from (b,b) to b, zero: b, unit: function from a to b
+     */
+    static Trees monoids =
+        #[ count(any,lambda(x,call(plus,nth(x,0),nth(x,1))),typed(0,long),lambda(x,typed(1,long))),
+           sum(int,lambda(x,call(plus,nth(x,0),nth(x,1))),typed(0,long),lambda(x,typed(x,long))),
+           sum(long,lambda(x,call(plus,nth(x,0),nth(x,1))),typed(0,long),`identity),
+           sum(float,lambda(x,call(plus,nth(x,0),nth(x,1))),typed(0.0,double),lambda(x,typed(x,double))),
+           sum(double,lambda(x,call(plus,nth(x,0),nth(x,1))),typed(0.0,double),`identity),
+           max(int,lambda(x,call(max,nth(x,0),nth(x,1))),typed(`(Integer.MIN_VALUE),int),`identity),
+           max(long,lambda(x,call(max,nth(x,0),nth(x,1))),typed(`(Long.MIN_VALUE),long),`identity),
+           max(float,lambda(x,call(max,nth(x,0),nth(x,1))),typed(`(Float.MIN_VALUE),float),`identity),
+           max(double,lambda(x,call(max,nth(x,0),nth(x,1))),typed(`(Double.MIN_VALUE),double),`identity),
+           min(int,lambda(x,call(min,nth(x,0),nth(x,1))),typed(`(Integer.MAX_VALUE),int),`identity),
+           min(long,lambda(x,call(min,nth(x,0),nth(x,1))),typed(`(Long.MAX_VALUE),long),`identity),
+           min(float,lambda(x,call(min,nth(x,0),nth(x,1))),typed(`(Float.MAX_VALUE),float),`identity),
+           min(double,lambda(x,call(min,nth(x,0),nth(x,1))),typed(`(Double.MAX_VALUE),double),`identity),
+           avg_aggr(int,lambda(x,tuple(call(plus,nth(nth(x,0),0),nth(nth(x,1),0)),
+                                       call(plus,nth(nth(x,0),1),nth(nth(x,1),1)))),
+                    tuple(typed(0.0,double),typed(0,long)),
+                    lambda(x,tuple(typed(x,double),typed(1,long)))),
+           avg_aggr(long,lambda(x,tuple(call(plus,nth(nth(x,0),0),nth(nth(x,1),0)),
+                                        call(plus,nth(nth(x,0),1),nth(nth(x,1),1)))),
+                    tuple(typed(0.0,double),typed(0,long)),
+                    lambda(x,tuple(typed(x,double),typed(1,long)))),
+           avg_aggr(float,lambda(x,tuple(call(plus,nth(nth(x,0),0),nth(nth(x,1),0)),
+                                         call(plus,nth(nth(x,0),1),nth(nth(x,1),1)))),
+                    tuple(typed(0.0,double),typed(0,long)),
+                    lambda(x,tuple(typed(x,double),typed(1,long)))),
+           avg_aggr(double,lambda(x,tuple(call(plus,nth(nth(x,0),0),nth(nth(x,1),0)),
+                                          call(plus,nth(nth(x,0),1),nth(nth(x,1),1)))),
+                    tuple(typed(0.0,double),typed(0,long)),
+                    lambda(x,tuple(typed(x,double),typed(1,long)))),
+           all(bool,lambda(x,call(and,nth(x,0),nth(x,1))),true,`identity),
+           some(bool,lambda(x,call(or,nth(x,0),nth(x,1))),false,`identity)
+           ];
+
+    static void print_aggregates () {
+        for ( Tree m: monoids )
+            match m {
+            case `f(`tp,...):
+                System.out.print(" "+f+":"+print_type(tp));
+            }
+        System.out.println();
+    }
+
+    static Trees plans_with_distributed_lambdas
+        = #[MapReduce,MapAggregateReduce,MapCombineReduce,FroupByJoin,Aggregate,
+            MapReduce2,MapCombineReduce2,MapAggregateReduce2,MapJoin,MapAggregateJoin,
+            CrossProduct,CrossAggregateProduct,cMap,AggregateMap,BSP,GroupByJoin,repeat,closure];
+
+    static Trees algebraic_operators
+        = #[mapReduce,mapReduce2,cmap,join,groupBy,orderBy,aggregate,map,filter];
+
+    static Trees plan_names = plans_with_distributed_lambdas.append(algebraic_operators)
+                                   .append(#[Repeat,Closure,Generator,Let,If]);
+
+    /** generates new variable names */
+    public static Tree new_var () {
+        return new VariableLeaf("x_"+(Integer.toString(var_count++)));
+    }
+
+    /** is this expression pure? (does it contain calls to impure functions?) */
+    static boolean is_pure ( Tree expr ) {
+        match expr {
+        case call(`f,...al):
+            if (impure_functions.member(f))
+                return false;
+            else fail
+        case `f(...al):
+            for ( Tree a: al )
+                if (!is_pure(a))
+                    return false;
+        };
+        return true;
+    }
+
+    public static Trees union ( Trees xs, Trees ys ) {
+        Trees res = xs;
+        for ( Tree y: ys )
+            if (!xs.member(y))
+                res = res.append(y);
+        return res;
+    }
+
+    /** return the variables of a pattern */
+    static Trees pattern_variables ( Tree pattern ) {
+        Trees args = #[];
+        match pattern {
+        case tuple(...pl):
+            for ( Tree p: pl )
+                args = union(args,pattern_variables(p));
+        case record(...bl):
+            for ( Tree b: bl )
+                match b {
+                case bind(`n,`p):
+                    args = union(args,pattern_variables(p));
+                };
+        case typed(`p,_):
+            args = pattern_variables(p);
+        case `v:
+            if (v.is_variable())
+                args = #[`v];
+        };
+        return args;
+    }
+
+    /** replace all occurences of from_expr in expr with to_expr
+     * @param from_expr target
+     * @param to_expr replacement
+     * @param expr input
+     * @return equal to expr but with all occurences of from_expr replaced with to_expr
+     */
+    public static Tree subst ( Tree from_expr, Tree to_expr, Tree expr ) {
+        if (expr.equals(from_expr))
+            return to_expr;
+        match expr {
+        case lambda(`v,_):
+            if (pattern_variables(v).member(from_expr))
+                return expr;
+            else fail
+        case bind(`a,`u):
+            return #<bind(`a,`(subst(from_expr,to_expr,u)))>;
+        case `f(...al):
+            return #<`f(...(subst_list(from_expr,to_expr,al)))>;
+        };
+        return expr;
+    }
+
+    /** replace all occurences of from_expr in el with to_expr
+     * @param from_expr target
+     * @param to_expr replacement
+     * @param el list of input expressions
+     * @return equal to el but with all occurences of from_expr replaced with to_expr
+     */
+    public static Trees subst_list ( Tree from_expr, Tree to_expr, Trees el ) {
+        Trees bl = #[];
+        for ( Tree e: el )
+            bl = bl.append(subst(from_expr,to_expr,e));
+        return bl;
+    }
+
+    /** replace all occurences of var in expr with to_expr only if to_expr is pure or it is used once only
+     * @param var target
+     * @param to_expr replacement
+     * @param expr input
+     * @return equal to expr but with all occurences of from_expr replaced with to_expr
+     */
+    public static Tree subst_var ( Tree var, Tree to_expr, Tree expr ) {
+        if (!is_pure(to_expr) && occurences(var,expr) > 1)
+            return #<let(`var,`to_expr,`expr)>;
+        else return subst(var,to_expr,expr);
+    }
+
+    /** used in the MRQL parser to handle templates */
+    public static Tree template ( Tree s ) {
+        match s {
+        case template(`parser,...as):
+            try {
+                Trees args = #[];
+                String tx = "";
+                int i = 0;
+                for ( Tree a: as )
+                    match a {
+                    case text(`t): tx += t;
+                    case _: args = args.append(a);
+                            tx += "{{"+(i++)+"}}";
+                    };
+                Class<? extends Parser> pc = DataSource.parserDirectory.get(parser.toString());
+                if (pc == null)
+                    throw new Error("Unrecognized parser: "+parser);
+                Parser p = pc.newInstance();
+                p.initialize(#[]);
+                Bag e = p.parse(tx);
+                Tree res = Interpreter.reify(e.get(0),p.type());
+                for ( int j = 0; j < i; j++ )
+                    res = subst(new VariableLeaf("t_"+j),args.nth(j),res);
+                return res;
+            } catch (Exception e) {
+                throw new Error("Wrong template: "+s+"\n"+e);
+            }
+        };
+        throw new Error("Wrong template: "+s);
+    }
+
+    /** convert Tree constructions to code that construct these Trees (used in the Compiler) */
+    public static String reify ( Tree e ) {
+        if (e instanceof LongLeaf)
+            return "new org.apache.mrql.gen.LongLeaf(" + e + ")";
+        else if (e instanceof DoubleLeaf)
+            return "new org.apache.mrql.gen.DoubleLeaf(" + e + ")";
+        else if (e instanceof VariableLeaf)
+            return "new org.apache.mrql.gen.VariableLeaf(\"" + e.variableValue() + "\")";
+        else if (e instanceof StringLeaf)
+            return "new org.apache.mrql.gen.StringLeaf(" + e.toString().replace("\\","\\\\") + ")";
+        else {
+            Node n = (Node) e;
+            return "new org.apache.mrql.gen.Node(\""+n.name()+"\","+reify(n.children())+")";
+        }
+    }
+
+    /** convert Tree constructions to code that construct these Trees (used in the Compiler) */
+    public static String reify ( Trees ts ) {
+        String s = "org.apache.mrql.gen.Trees.nil";
+        for ( Tree c: ts )
+            s += ".append("+reify(c)+")";
+        return s;
+    }
+
+    /** return the list of free variables in e that are not in exclude list */
+    public static Trees free_variables ( Tree e, Trees exclude ) {
+        if (e == null)
+            return #[];
+        match e {
+        case lambda(`x,`b):
+            return free_variables(b,exclude.append(pattern_variables(x)));
+        case let(`x,`u,`b):
+            return free_variables(b,exclude.append(pattern_variables(x)))
+                     .append(free_variables(u,exclude));
+        case Let(`x,`u,`b):
+            return free_variables(b,exclude.append(pattern_variables(x)))
+                     .append(free_variables(u,exclude));
+        case select(`u,from(...bs),`p):
+            Trees ex = exclude;
+            Trees fs = #[];
+            for ( Tree b: bs )
+                match b {
+                case bind(`v,`x):
+                    fs = fs.append(free_variables(x,ex));
+                    ex = ex.append(pattern_variables(v));
+                };
+            return free_variables(p,ex).append(free_variables(u,ex)).append(fs);
+        case `f(...as):
+            Trees res = #[];
+            for ( Tree a: as )
+                res = res.append(free_variables(a,exclude));
+            return res;
+        case `v:
+            if (v.is_variable() && v.toString().startsWith("x_") && !exclude.member(v))
+                return #[`v];
+        };
+        return #[];
+    }
+
+    /** count the occurences of x in e */
+    public static int occurences ( Tree x, Tree e ) {
+        if (x.equals(e))
+            return 1;
+        match e {
+        case `f(...as):
+            int i = 0;
+            for ( Tree a: as )
+                i += occurences(x,a);
+            return i;
+        };
+        return 0;
+    }
+
+    /** return true if x is equal to y modulo variable substitution */
+    public static boolean alpha_equivalent ( Tree x, Tree y, SymbolTable st ) {
+        match #<T(`x,`y)> {
+        case T(lambda(`vx,`bx),lambda(`vy,`by)):
+            if (!vx.equals(vy))
+                st.insert(vx.toString(),vy);
+            return alpha_equivalent(bx,by,st);
+        case T(`f(...xs),`g(...ys)):
+            if (!f.equals(g) || xs.length() != ys.length())
+                return false;
+            for ( ; !xs.is_empty(); xs = xs.tail(), ys = ys.tail() )
+                if (!alpha_equivalent(xs.head(),ys.head(),st))
+                    return false;
+            return true;
+        case T(`v,`w):
+            if (v.is_variable() && w.is_variable())
+                return v.equals(w) || (st.lookup(v.toString()) != null
+                                       && st.lookup(v.toString()).equals(w));
+        };
+        return x.equals(y);
+    }
+
+    private static SymbolTable alpha_symbol_table = new SymbolTable();
+
+    /** return true if x is equal to y modulo variable substitution */
+    public static boolean alpha_equivalent ( Tree x, Tree y ) {
+        alpha_symbol_table.begin_scope();
+        boolean b = alpha_equivalent(x,y,alpha_symbol_table);
+        alpha_symbol_table.end_scope();
+        return b;
+    }
+
+    /** translate a simplified select MRQL query to an algebraic form */
+    public static Tree translate_select ( Tree e ) {
+       match e {
+       case select(`u,from(),where(true)):
+           return #<bag(`(translate_select(u)))>;
+       case select(`u,from(),where(`c)):
+           return #<if(`(translate_select(c)),bag(`(translate_select(u))),bag())>;
+       case select(`u,from(bind(`v,`d),...bl),where(`c)):
+           Tree n = translate_select(#<select(`u,from(...bl),where(`c))>);
+           return #<cmap(lambda(`v,`n),`(translate_select(d)))>;
+       case `f(...al):
+           Trees bl = #[];
+           for ( Tree a: al )
+               bl = bl.append(translate_select(a));
+           return #<`f(...bl)>;
+       };
+       return e;
+    }
+
+    /** the MRQL top-level interfacse to evaluate a single MRQL expression or command */
+    public static void top_level ( Tree expr ) {
+        TopLevel.evaluate_top_level(expr);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/Tuple.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/Tuple.java b/core/src/main/java/org/apache/mrql/Tuple.java
new file mode 100644
index 0000000..bced3dd
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/Tuple.java
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import java.io.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+
+
+/** a container for Tuples */
+final public class Tuple extends MRData {
+    private final static long serialVersionUID = 723385754575L;
+
+    MRData[] tuple;
+
+    public Tuple ( int size ) {
+        tuple = new MRData[size];
+    }
+
+    public Tuple ( final MRData ...as ) {
+        tuple = as;
+    }
+
+    /** the number of elements in the tuple */
+    public short size () { return (short)tuple.length; }
+
+    public void materializeAll () {
+        for (MRData e: tuple)
+            e.materializeAll();
+    };
+
+    /** the i'th element of the tuple */
+    public MRData get ( int i ) {
+        return tuple[i];
+    }
+
+    /** the first element of the tuple */
+    public MRData first () { return tuple[0]; }
+
+    /** the second element of the tuple */
+    public MRData second () { return tuple[1]; }
+
+    /** replace the i'th element of a tuple with new data and return a new value */
+    public MRData set ( int i, MRData data, MRData ret ) {
+        tuple[i] = data;
+        return ret;
+    }
+
+    /** replace the i'th element of a tuple with new data */
+    public Tuple set ( int i, MRData data ) {
+        tuple[i] = data;
+        return this;
+    }
+
+    final public void write ( DataOutput out ) throws IOException {
+        if (tuple.length == 0)
+            out.writeByte(MRContainer.NULL);
+        else if (tuple.length == 2) {
+            out.writeByte(MRContainer.PAIR);
+            tuple[0].write(out);
+            tuple[1].write(out);
+        } else if (tuple.length == 3) {
+            out.writeByte(MRContainer.TRIPLE);
+            tuple[0].write(out);
+            tuple[1].write(out);
+            tuple[2].write(out);
+        } else {
+            out.writeByte(MRContainer.TUPLE);
+            WritableUtils.writeVInt(out,tuple.length);
+            for (short i = 0; i < tuple.length; i++)
+                tuple[i].write(out);
+        }
+    }
+
+    final public static Tuple read ( DataInput in ) throws IOException {
+        int n = WritableUtils.readVInt(in);
+        Tuple t = new Tuple(n);
+        for ( short i = 0; i < n; i++ )
+            t.tuple[i] = MRContainer.read(in);
+        return t;
+    }
+
+    final public static Tuple read2 ( DataInput in ) throws IOException {
+        return new Tuple(MRContainer.read(in),MRContainer.read(in));
+    }
+
+    final public static Tuple read3 ( DataInput in ) throws IOException {
+        return new Tuple(MRContainer.read(in),MRContainer.read(in),MRContainer.read(in));
+    }
+
+    public void readFields ( DataInput in ) throws IOException {
+        int n = WritableUtils.readVInt(in);
+        tuple = new Tuple[n];
+        for ( short i = 0; i < n; i++ )
+            tuple[i] = MRContainer.read(in);
+    }
+
+    public int compareTo ( MRData x ) {
+        assert(x instanceof Tuple);
+        Tuple t = (Tuple) x;
+        for ( short i = 0; i < tuple.length && i < t.tuple.length; i++ ) {
+            int c = get(i).compareTo(t.get(i));
+            if (c < 0)
+                return -1;
+            else if (c > 0)
+                return 1;
+        };
+        if (tuple.length > t.tuple.length)
+            return 1;
+        else if (tuple.length < t.tuple.length)
+            return -1;
+        else return 0;
+    }
+
+    final public static int compare ( byte[] x, int xs, int xl, byte[] y, int ys, int yl, int[] size ) {
+        try {
+            int n = WritableComparator.readVInt(x,xs);
+            int s = WritableUtils.decodeVIntSize(x[xs]);
+            for ( short i = 0; i < n; i++ ) {
+                int k = MRContainer.compare(x,xs+s,xl-s,y,ys+s,yl-s,size);
+                if (k != 0)
+                    return k;
+                s += size[0];
+            };
+            size[0] = s+1;
+            return 0;
+        } catch (IOException e) {
+            throw new Error(e);
+        }
+    }
+
+    final public static int compare2 ( byte[] x, int xs, int xl, byte[] y, int ys, int yl, int[] size ) {
+        int k = MRContainer.compare(x,xs,xl,y,ys,yl,size);
+        if (k != 0)
+            return k;
+        int s = size[0];
+        k = MRContainer.compare(x,xs+s,xl-s,y,ys+s,yl-s,size);
+        if (k != 0)
+            return k;
+        size[0] += s+1;
+        return 0;
+    }
+
+    final public static int compare3 ( byte[] x, int xs, int xl, byte[] y, int ys, int yl, int[] size ) {
+        int k = MRContainer.compare(x,xs,xl,y,ys,yl,size);
+        if (k != 0)
+            return k;
+        int s = size[0];
+        k = MRContainer.compare(x,xs+s,xl-s,y,ys+s,yl-s,size);
+        if (k != 0)
+            return k;
+        s += size[0];
+        k = MRContainer.compare(x,xs+s,xl-s,y,ys+s,yl-s,size);
+        if (k != 0)
+            return k;
+        size[0] += s+1;
+        return 0;
+    }
+
+    public boolean equals ( Object x ) {
+        if (!(x instanceof Tuple))
+            return false;
+        Tuple xt = (Tuple) x;
+        if (xt.tuple.length != tuple.length)
+            return false;
+        for ( short i = 0; i < tuple.length; i++ )
+            if (!xt.get(i).equals(get(i)))
+                return false;
+        return true;
+    }
+
+    public int hashCode () {
+        int h = 127;
+        for ( short i = 1; i < tuple.length; i++ )
+            h ^= get(i).hashCode();
+        return Math.abs(h);
+    }
+
+    public String toString () {
+        if (size() == 0)
+            return "()";
+        String s = "("+get((short)0);
+        for ( short i = 1; i < tuple.length; i++ )
+            s += ","+get(i);
+        return s+")";
+    }
+}