You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mrql.apache.org by fe...@apache.org on 2014/03/13 15:24:43 UTC
[18/26] MRQL-32: Refactoring directory structure for Eclipse
http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/TypeInference.gen
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/TypeInference.gen b/core/src/main/java/org/apache/mrql/TypeInference.gen
new file mode 100644
index 0000000..5276ae1
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/TypeInference.gen
@@ -0,0 +1,1356 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+import java.util.*;
+
+
+/** the type inference/checker for MRQL expressions and algebraic forms */
+public class TypeInference extends Translator {
+
+ private static Tree make_tuple ( Trees pl ) {
+ if (pl.length() == 1)
+ return pl.head();
+ return #<tuple(...pl)>;
+ }
+
+ public static Tree make_persistent_type ( Tree tp ) {
+ match tp {
+ case `f(...al):
+ Trees bs = #[];
+ for ( Tree a: al )
+ bs = bs.append(make_persistent_type(a));
+ String g = persistent_collection(f);
+ return #<`g(...bs)>;
+ };
+ return tp;
+ }
+
+ private static String max_collection ( String x, String y ) {
+ boolean is_bag = x.equals("Bag") || y.equals("Bag") || x.equals("bag") || y.equals("bag");
+ boolean is_persistent = x.equals("Bag") || y.equals("Bag") || x.equals("List") || y.equals("List");
+ return (is_bag) ? ((is_persistent) ? "Bag" : "bag") : ((is_persistent) ? "List" : "list");
+ }
+
+ private static boolean numerical ( String tp ) {
+ return #[short,int,long,float,double].member(#<`tp>);
+ }
+
+ static void type_error ( Tree e, String msg ) {
+ System.err.println("*** Type error (line: "+e.line+", position: "+e.position+")");
+ if (Config.trace && type_env.iterator().hasNext()) {
+ msg += "\nVariable Types:";
+ for ( String var: type_env )
+ msg += "\n "+var + ": " + print_type(type_env.lookup(var));
+ };
+ System.err.println("*** "+msg);
+ throw new Error("Type Error");
+ }
+
+ /** given that pattern has type tp, bind the pattern variables to types */
+ private static Trees bind_pattern_type ( Tree pattern, Tree tp ) {
+ Trees args = #[];
+ match pattern {
+ case tuple(...pl):
+ match tp {
+ case tuple(...tl):
+ if (tl.length() != pl.length())
+ type_error(pattern,"Tuple pattern "+print_query(pattern)+" must have exactly "
+ +tl.length()+" components");
+ int i = 0;
+ for ( Tree p: pl )
+ args = args.append(bind_pattern_type(p,tl.nth(i++)));
+ case `etp: type_error(pattern,"Wrong pattern: found "+print_query(pattern)
+ +" but expected a pattern that matches the type "+print_type(etp));
+ };
+ case record(...bl):
+ Trees attrs = #[];
+ match tp {
+ case record(...tl):
+ for ( Tree b: bl )
+ match b {
+ case bind(`n,`p):
+ boolean found = false;
+ if (attrs.member(n))
+ type_error(pattern,"Duplicate record attribute name: "+n
+ +" in pattern "+print_query(pattern));
+ attrs = attrs.append(n);
+ for ( Tree t: tl )
+ match t {
+ case bind(`nt,`pt):
+ if (!nt.equals(n))
+ fail;
+ found = true;
+ args = args.append(bind_pattern_type(p,pt));
+ };
+ if (!found)
+ type_error(pattern,"Wrong record component: "+n
+ +" in pattern "+print_query(pattern)
+ +" (expected one from "+print_type(tp)+")");
+ };
+ case `etp: type_error(pattern,"Wrong pattern: found "+print_query(pattern)
+ +" but expected a pattern that matches the type "+print_type(etp));
+ };
+ case typed(`p,`t):
+ if (subtype(t,tp))
+ args = bind_pattern_type(p,t);
+ else type_error(pattern,"Type "+print_type(t)+" in pattern "+print_query(pattern)
+ +" does not match the expected type "+print_type(tp));
+ case list(...pl):
+ match tp {
+ case list(`etp):
+ for ( Tree p: pl )
+ args = args.append(bind_pattern_type(p,etp));
+ case `stp: type_error(pattern,"List pattern "+print_query(pattern)
+ +" can only be used for lists (found "+print_type(tp)+")");
+ };
+ case call(`c,...s):
+ Tree ci = data_constructors.lookup(c.toString());
+ if (ci == null)
+ type_error(pattern,"Undefined data constructor "+c+" in pattern "+print_query(pattern));
+ match ci {
+ case `dname(`n,`dtp):
+ if (!subtype(tp,expand(#<`dname>)))
+ type_error(pattern,"Cannot use the data constructor "+print_query(pattern)
+ +" in a pattern that expects type "+print_type(tp));
+ args = args.append(bind_pattern_type(s.length() == 1 ? s.head() : #<tuple(...s)>,
+ dtp));
+ };
+ case any: ;
+ case `v:
+ if (v.is_variable()) {
+ args = args.append(v);
+ type_env.insert(v.toString(),tp);
+ } else if (!subtype(type_inference2(v),tp))
+ type_error(pattern,"The constant "+v+" in pattern "
+ +print_query(pattern)+" is not of type "+print_type(tp));
+ };
+ return args;
+ }
+
+ private static short needs_coerce ( Tree x, Tree y ) {
+ if (x.is_variable() && numerical(x.toString())
+ && y.is_variable() && numerical(y.toString())) {
+ short cx = MRContainer.type_code(x.toString());
+ short cy = MRContainer.type_code(y.toString());
+ return (cx == cy) ? -1 : (cx > cy) ? cx : cy;
+ };
+ return -1;
+ }
+
+ /** type equality in MRQL is structured equality, not named equality */
+ public static boolean equal_types ( Tree tx, Tree ty ) {
+ tx = expand(tx);
+ ty = expand(ty);
+ if (tx.equals(ty))
+ return true;
+ match tx {
+ case `f(...xs):
+ match ty {
+ case `g(...ys):
+ if (f.equals(g) && xs.length() == ys.length()) {
+ for ( ; !ys.is_empty(); xs = xs.tail(), ys = ys.tail() )
+ if (!equal_types(xs.head(),ys.head()))
+ return false;
+ return true;
+ }
+ }
+ };
+ return false;
+ }
+
+ /** is the collection type name S a subtype of that of T?
+ List \lt Bag \lt bag and List \lt list \lt bag
+ */
+ public static boolean subtype ( String S, String T ) {
+ return S.equals(T)
+ || (S.equals("List") && T.equals("list"))
+ || (S.equals("List") && T.equals("Bag"))
+ || (S.equals("list") && T.equals("bag"))
+ || (S.equals("Bag") && T.equals("bag"));
+ }
+
+ /** is the type tx a subtype of type ty? */
+ public static boolean subtype ( Tree tx, Tree ty ) {
+ tx = expand(tx);
+ ty = expand(ty);
+ if (ty.equals(#<any>))
+ return true;
+ if (equal_types(tx,ty))
+ return true;
+ if (tx.is_variable() && numerical(tx.toString())
+ && ty.is_variable() && numerical(ty.toString()))
+ return MRContainer.type_code(tx.toString()) <= MRContainer.type_code(ty.toString());
+ match tx {
+ case `T(`tex):
+ if (!is_collection(T))
+ fail;
+ match ty {
+ case `S(`tey):
+ if (is_collection(S))
+ return subtype(T,S) && subtype(tex,tey);
+ else fail
+ };
+ case union(...):
+ if (ty.equals(#<union>)) // used for XML functions
+ return true;
+ else fail
+ case `f(...xs):
+ match ty {
+ case `g(...ys):
+ if (f.equals(g))
+ return subtype(xs,ys);
+ }
+ };
+ return false;
+ }
+
+ public static boolean subtype ( Trees ts1, Trees ts2 ) {
+ if (ts1.length() != ts2.length())
+ return false;
+ for ( Trees s1 = ts1, s2 = ts2; !s1.is_empty(); s1=s1.tail(), s2=s2.tail() )
+ if (!subtype(s1.head(),s2.head()))
+ return false;
+ return true;
+ }
+
+ public static int compare_types ( Tree t1, Tree t2 ) {
+ if (t1.equals(#<any>) && t2.equals(#<any>))
+ return 0;
+ else if (t2.equals(#<any>))
+ return -1;
+ else if (t1.equals(#<any>))
+ return 1;
+ else if (t1 instanceof VariableLeaf && t2 instanceof VariableLeaf)
+ return MRContainer.type_code(((VariableLeaf)t1).value())
+ -MRContainer.type_code(((VariableLeaf)t2).value());
+ else if (t1 instanceof VariableLeaf)
+ return -1;
+ else return 1;
+ }
+
+ /** if the expression at loc.head has type tx that is a subtype of ty, coerce it to ty
+ * NOTE: it destructively changes the expression at loc
+ */
+ private static void coerce ( Tree tx, Tree ty, Trees loc ) {
+ tx = expand(tx);
+ ty = expand(ty);
+ if (ty.equals(#<any>) || equal_types(tx,ty))
+ return;
+ if (tx.is_variable() && numerical(tx.toString())
+ && ty.is_variable() && numerical(ty.toString())) {
+ loc.head = #<call(coerce,`(loc.head), // destructive
+ `(MRContainer.type_code(ty.toString())))>;
+ return;
+ };
+ match tx {
+ case `T(`tex):
+ if (!is_collection(T))
+ fail;
+ match ty {
+ case `S(`tey):
+ if (!is_collection(S))
+ fail;
+ if (is_persistent_collection(T) && !is_persistent_collection(S))
+ loc.head = #<Collect(`(loc.head))>; // destructive
+ if (subtype(tex,tey) && unify(tex,tey) == null) {
+ Tree nv = new_var();
+ Tree b = #<bag(`nv)>;
+ coerce(tex,tey,((Node)b).children);
+ loc.head = #<cmap(lambda(`nv,`b),`(loc.head))>; // destructive
+ return;
+ }
+ };
+ case tuple(...xs):
+ match ty {
+ case tuple(...ys):
+ Tree nv = new_var();
+ Trees nt = #[];
+ int i = 0;
+ for ( Trees xl = xs, yl = ys; xl != #[] && yl != #[]; xl = xl.tail, yl = yl.tail ) {
+ Trees dest = #[nth(`nv,`(i++))];
+ coerce(xl.head,yl.head,dest);
+ nt = nt.append(dest);
+ };
+ loc.head = #<let(`nv,`(loc.head),tuple(...nt))>; // destructive
+ }
+ case record(...xs):
+ match ty {
+ case record(...ys):
+ Tree nv = new_var();
+ Trees nt = #[];
+ for ( Tree x: xs )
+ match x {
+ case bind(`ax,`tex):
+ for ( Tree y: ys )
+ match y {
+ case bind(`ay,`tey):
+ if (equal_types(ax,ay)) {
+ Tree b = #<bind(`ax,project(`nv,`ax))>;
+ nt = nt.append(b);
+ coerce(tex,tey,((Node)b).children.tail);
+ }
+ }
+ };
+ loc.head = #<let(`nv,`(loc.head),record(...nt))>; // destructive
+ }
+ }
+ }
+
+ /** pure-mans type-unification (the any type unifies with everything) */
+ private static Trees unify ( Trees ts1, Trees ts2 ) {
+ Trees s = #[];
+ if (ts1.length() != ts2.length())
+ return null;
+ for ( Trees s1 = ts1, s2 = ts2; !s1.is_empty(); s1=s1.tail(), s2=s2.tail() ) {
+ Tree t = unify(s1.head(),s2.head());
+ if (t == null)
+ return null;
+ else s = s.append(t);
+ };
+ return s;
+ }
+
+ /** pure-mans type-unification (the any type unifies with everything) */
+ public static Tree unify ( Tree t1, Tree t2 ) {
+ t1 = expand(t1);
+ t2 = expand(t2);
+ if (t1.equals(#<any>))
+ return t2;
+ if (t2.equals(#<any>) || equal_types(t1,t2))
+ return t1;
+ match t1 {
+ case `T(`t):
+ match t2 {
+ case `S(`s):
+ if (!T.equals(S))
+ fail;
+ if (!is_collection(T))
+ fail;
+ Tree ts = unify(t,s);
+ if (ts != null)
+ return #<`T(`ts)>;
+ }
+ case `f(...ts1):
+ match t2 {
+ case `g(...ts2):
+ Trees s = unify(ts1,ts2);
+ if (f.equals(g) && s != null)
+ return #<`f(...s)>;
+ }
+ };
+ return null;
+ }
+
+ /** if the types tx and ty do not unify, try to coerce them */
+ static Tree unify ( Tree tx, Tree ty, Trees destx, Trees desty ) {
+ Tree tp = unify(tx,ty);
+ if (tp != null)
+ return tp;
+ else if (subtype(tx,ty)) {
+ coerce(tx,ty,destx);
+ return ty;
+ } else if (subtype(ty,tx)) {
+ coerce(ty,tx,desty);
+ return tx;
+ } else return null;
+ }
+
+ /** find a type from the list of types ts that is the supertype of all other types */
+ private static Tree maximum_type ( Trees ts ) {
+ Tree maxt = ts.head;
+ for ( Tree t: ts.tail )
+ if (subtype(maxt,t) || maxt.equals(#<any>))
+ maxt = t;
+ return maxt;
+ }
+
+ /** if the type tp is a named type, expand it using its definition */
+ public static Tree expand ( Tree tp ) {
+ if (!tp.is_variable())
+ return tp;
+ Tree rt = global_datatype_env.lookup(tp.toString());
+ if (rt != null)
+ return expand(rt);
+ rt = type_names.lookup(tp.toString());
+ if (rt == null)
+ return tp;
+ else return expand(rt);
+ }
+
+ /** infer the type of an expression and expand it if necessary
+ * @param e the expression
+ * @return the type of e
+ */
+ public static Tree type_inference2 ( Tree e ) {
+ return expand(type_inference(e));
+ }
+
+ /** infer the type of an expression
+ * @param e the expression
+ * @return the type of e
+ */
+ public static Tree type_inference ( Tree e ) {
+ match e {
+ case select(`opt_dist,`u,from(...bl),where(`c),groupby(...gs),orderby(...os)):
+ type_env.begin_scope();
+ Trees dvs = #[];
+ String max_type = "list";
+ for ( Tree b: bl )
+ match b {
+ case bind(`p,`d):
+ match type_inference2(d) {
+ case `T(`tp):
+ if (!is_collection(T))
+ fail;
+ dvs = dvs.append(bind_pattern_type(p,tp));
+ max_type = max_collection(T,max_type);
+ case `ftp:
+ type_error(e,"The from-binding domain "+print_query(d)+" in "
+ +print_query(e)+" must be a collection (found "
+ +print_type(ftp)+")");
+ }
+ };
+ Tree ctp = type_inference2(c);
+ if (unify(ctp,#<bool>) == null)
+ type_error(e,"The predicate "+print_query(c)+" in "+print_query(e)
+ +" must be a boolean (found "+print_type(ctp)+")");
+ match #<groupby(...gs)> {
+ case groupby(`h,...gl):
+ Trees pvs = #[];
+ for ( Tree g: gl )
+ match g {
+ case bind(`gp,`gd):
+ bind_pattern_type(gp,type_inference2(gd));
+ pvs = pvs.append(pattern_variables(gp));
+ };
+ // find the type of the partition variable
+ Trees partl = #[];
+ for ( Tree dv: pattern_variables(#<tuple(...dvs)>) )
+ partl = partl.append(#<bind(`dv,`(type_env.lookup(dv.toString())))>);
+ type_env.insert("partition",#<bag(record(...partl))>);
+ // lift domain variables to bags
+ for ( Tree dv: dvs )
+ if (!pvs.member(dv))
+ type_env.insert(dv.toString(),
+ #<bag(`(type_env.lookup(dv.toString())))>);
+ Tree htp = type_inference2(h);
+ if (unify(htp,#<bool>) == null)
+ type_error(e,"The group-by predicate "+print_query(h)+" in "+print_query(e)
+ +" must be a boolean (found "+print_type(htp)+")");
+ };
+ match #<orderby(...os)> {
+ case orderby(`l,...ol):
+ if (!l.equals(#<none>)) {
+ Tree ltp = type_inference2(l);
+ if (unify(ltp,#<int>) == null)
+ type_error(e,"The limit "+print_query(l)+" in "+print_query(e)
+ +" must be an int (found "+print_type(ltp)+")");
+ };
+ for ( Tree o: ol)
+ type_inference2(o);
+ Tree rtp = type_inference2(u);
+ type_env.end_scope();
+ return (is_persistent_collection(max_type)) ? #<List(`rtp)> : #<list(`rtp)>;
+ };
+ Tree rtp = type_inference2(u);
+ type_env.end_scope();
+ return (is_persistent_collection(max_type)) ? #<Bag(`rtp)> : #<bag(`rtp)>;
+ case select(`u,from(...bl),where(`c)):
+ String max_type = "list";
+ for ( Tree b: bl )
+ match b {
+ case bind(`v,`d):
+ match type_inference2(d) {
+ case `T(`tp):
+ if (!is_collection(T))
+ fail;
+ type_env.insert(v.toString(),tp);
+ max_type = max_collection(T,max_type);
+ case _: type_error(e,"Expected a collection: "+print_query(d)
+ +" in "+print_query(e));
+ }
+ };
+ if (unify(type_inference2(c),#<bool>) != null) {
+ Tree tp = type_inference(u);
+ return (is_persistent_collection(max_type)) ? #<Bag(`tp)> : #<bag(`tp)>;
+ } else type_error(e,"The select predicate must be boolean: "+print_query(e));
+ case let_bind(`p,`u,`b):
+ type_env.begin_scope();
+ bind_pattern_type(p,type_inference2(u));
+ Tree tp = type_inference2(b);
+ type_env.end_scope();
+ return tp;
+ case let(`p,`u,`b):
+ bind_pattern_type(p,type_inference2(u));
+ return type_inference2(b);
+ case case(`u,...cs):
+ Tree tp = type_inference2(u);
+ Trees ts = #[];
+ for ( Tree c: cs )
+ match c {
+ case case(`p,`b):
+ type_env.begin_scope();
+ bind_pattern_type(p,tp);
+ ts = ts.append(type_inference2(b));
+ type_env.end_scope();
+ };
+ Tree maxt = maximum_type(ts);
+ for ( ; cs != #[] && ts != #[]; cs = cs.tail, ts = ts.tail )
+ match cs.head {
+ case case(`p,`b):
+ if (subtype(ts.head,maxt))
+ coerce(ts.head,maxt,((Node)(cs.head)).children.tail);
+ else type_error(cs.head,"Mismatched case output: "+b);
+ };
+ return maxt;
+ case lambda(`v,`b):
+ if(!v.is_variable())
+ fail;
+ if (type_env.lookup(v.toString()) == null)
+ type_env.insert(v.toString(),#<any>);
+ return #<arrow(`(type_env.lookup(v.toString())),`(type_inference(b)))>;
+ case function(tuple(...params),`outp,`body):
+ Trees as = #[];
+ for ( Tree param: params )
+ match param {
+ case `bind(`v,`tp):
+ type_env.insert(v.toString(),tp);
+ as = as.append(tp);
+ };
+ Tree btp = type_inference(body);
+ if (!subtype(btp,outp))
+ type_error(e,"The type of the function body "+print_type(btp)
+ +" does not match the expected type "+print_type(outp)+"\n"+e);
+ if (unify(btp,outp) == null) { // the body needs coercion
+ Trees b = #[`body];
+ coerce(btp,outp,b);
+ body = b.head;
+ };
+ return #<arrow(tuple(...as),`outp)>;
+ case call(source,binary,`f,`tp):
+ return tp;
+ case call(source,binary,`f):
+ if (!f.is_string())
+ type_error(e,"The source file must be a constant string: "+print_query(f));
+ Tree tp = null;
+ if (Config.hadoop_mode)
+ tp = Plan.get_type(f.stringValue());
+ else tp = MapReduceAlgebra.get_type(f.stringValue());
+ if (tp == null)
+ type_error(e,"Cannot find the type of file "+f);
+ ((Node)e).children = ((Node)e).children.append(tp); // destructive
+ return tp;
+ case call(source,`parser,`f,...args):
+ if (!parser.is_variable())
+ type_error(e,"The parser must be a constant name: "+print_query(parser));
+ if (unify(type_inference(f),#<string>) == null)
+ type_error(e,"The source file must be a string: "+print_query(f));
+ try {
+ Class<? extends Parser> pc = DataSource.parserDirectory.get(parser.toString());
+ if (pc == null)
+ type_error(e,"Unrecognized parser: "+parser);
+ Parser p = pc.newInstance();
+ p.initialize(args);
+ return #<Bag(`(p.type()))>;
+ } catch (Exception x) {
+ type_error(e,"Unrecognized parser type: "+parser);
+ }
+ case typed(null,`tp):
+ return tp;
+ case typed(`f(...),`tp):
+ if (f.equals("tagged_union") || f.equals("union_value"))
+ return tp;
+ else fail
+ case typed(`x,`tp):
+ if (tp.is_variable() && !tp.equals(#<string>)
+ && MRContainer.type_code(tp.toString()) >= 0) {
+ Tree tx = type_inference(x);
+ if (tx.is_variable() && !tx.equals(#<string>)
+ && MRContainer.type_code(tx.toString()) >= 0)
+ return tp;
+ else type_error(e,"Expression "+print_query(x)+" of type "+print_type(tx)
+ +" cannot be coerced to type "+tp);
+ };
+ Tree tx = type_inference(x);
+ if (!subtype(tx,tp))
+ type_error(e,"Expression "+print_query(x)+" of type "+print_type(tx)
+ +" cannot be coerced to type "+print_type(tp));
+ if (unify(tx,tp) == null) // need to coerce x
+ coerce(tx,tp,((Node)e).children);
+ return tp;
+ case tuple(...el):
+ Trees s = #[];
+ for ( Tree a: el )
+ s = s.append(type_inference(a));
+ return #<tuple(...s)>;
+ case call(coerce,`x,`n):
+ return #<`(MRContainer.type_names[(int)n.longValue()])>;
+ case record(...el):
+ Trees s = #[];
+ Trees attrs = #[];
+ for ( Tree a: el )
+ match a {
+ case bind(`v,`b):
+ s = s.append(#<bind(`v,`(type_inference(b)))>);
+ if (attrs.member(v))
+ type_error(e,"Duplicate record attribute name: "+v+" in "+print_query(e));
+ attrs = attrs.append(v);
+ };
+ return #<record(...s)>;
+ case union_tag(`x):
+ return #<int>;
+ case `T(...el):
+ if (!is_collection(T))
+ fail;
+ if (el.is_empty())
+ return #<`T(any)>;
+ Trees ts = #[];
+ for ( Tree t: el )
+ ts = ts.append(type_inference(t));
+ Tree maxt = maximum_type(ts);
+ for ( ; el != #[] && ts != #[]; el = el.tail, ts = ts.tail )
+ if (subtype(ts.head,maxt))
+ coerce(ts.head,maxt,el);
+ else type_error(e,"Incompatible values in "+T+" construction: "+print_query(e));
+ return #<`T(`maxt)>;
+ case nth(`a,`n):
+ if (!n.is_long())
+ type_error(e,"Tuple index must be an integer: "+print_query(e));
+ int i = (int)n.longValue();
+ match type_inference2(a) {
+ case tuple(...ts):
+ if (i < 0 || i >= ts.length())
+ type_error(e,"Tuple index outside bounds: "+print_query(e));
+ return ts.nth(i);
+ case `S(tuple(...ts)):
+ if (!is_collection(S))
+ fail;
+ if (i < 0 || i >= ts.length())
+ type_error(e,"Tuple index outside bounds: "+print_query(e));
+ return #<`S(`(ts.nth(i)))>;
+ case `tp:
+ type_error(e,"Tuple index must be applied to a tuple: "
+ +print_query(a)+" of type: "+print_type(tp)+" in "+print_query(e));
+ };
+ case project(`a,`v): // highly overloaded
+ Tree ta = type_inference(a);
+ match ta {
+ case XML:
+ return #<list(XML)>;
+ case `S(`tp):
+ if (is_collection(S) && (tp.equals(#<XML>) || tp.equals(TopLevel.xml_type)))
+ return #<`S(XML)>;
+ };
+ if (ta.equals(TopLevel.xml_type))
+ return #<list(XML)>;
+ match expand(ta) {
+ case record(...ts):
+ for ( Tree t: ts )
+ match t {
+ case bind(`w,`tp):
+ if (equal_types(w,v))
+ return tp;
+ };
+ type_error(e,"Record "+print_query(a)+" does not have a component "+v);
+ case union(...tl):
+ Trees s = #[];
+ for ( Tree t: tl )
+ match t {
+ case `c(record(...ts)):
+ for ( Tree tn: ts )
+ match tn {
+ case bind(`w,`tp):
+ if (equal_types(w,v))
+ s = s.append(tp);
+ };
+ case `c(bag(tuple(string,`tv))):
+ s = s.append(tv);
+ };
+ if (s.length() == 0)
+ type_error(e,"Wrong record projection "+print_query(e)
+ + " over the union "+print_type(ta));
+ else if (s.length() > 1)
+ type_error(e,"Ambiguous record projection "+print_query(e)
+ + " over the union "+print_type(ta));
+ return s.head();
+ case `S(`ttp):
+ if (!is_collection(S))
+ fail;
+ match expand(ttp) {
+ case record(...ts):
+ for ( Tree t: ts )
+ match t {
+ case bind(`w,`tp):
+ if (equal_types(w,v))
+ return #<`S(`tp)>;
+ };
+ type_error(e,"The record collection "+print_query(a)
+ +" does not have a component "+v);
+ case tuple(string,`tv):
+ return tv;
+ case union(...tl):
+ Trees s = #[];
+ for ( Tree t: tl )
+ match t {
+ case `c(record(...ts)):
+ for ( Tree tn: ts )
+ match tn {
+ case bind(`w,`tp):
+ if (equal_types(w,v))
+ s = s.append(tp);
+ };
+ case `c(bag(tuple(string,`tv))):
+ s = s.append(tv);
+ };
+ if (s.length() == 0)
+ type_error(e,"Wrong record projection "+print_query(e)
+ + " over the union "+print_type(ta));
+ else if (s.length() > 1)
+ type_error(e,"Ambiguous record projection "+print_query(e)
+ + " over the union "+print_type(ta));
+ return #<`S(...s)>;
+ };
+ case `t:
+ type_error(e,"The record projection "+print_query(e)
+ + " cannot apply to the type "+print_type(t));
+ };
+ case index(`a,`i):
+ Tree ti = type_inference2(i);
+ match type_inference2(a) {
+ case bag(tuple(`tk,`tv)):
+ if (subtype(ti,tk)) {
+ coerce(ti,tk,((Node)e).children.tail);
+ return tv;
+ } else fail
+ case Bag(tuple(`tk,`tv)):
+ if (subtype(ti,tk)) {
+ coerce(ti,tk,((Node)e).children.tail);
+ return tv;
+ } else fail
+ case list(`tp):
+ if (unify(ti,#<int>) != null)
+ return tp;
+ else type_error(e,"List index must be an integer: "+print_query(e));
+ case List(`tp):
+ if (unify(ti,#<int>) != null)
+ return tp;
+ else type_error(e,"List index must be an integer: "+print_query(e));
+ case union(...tl):
+ Trees s = #[];
+ for ( Tree t: tl )
+ match expand(t) {
+ case `c(bag(tuple(`tk,`tv))):
+ if (unify(ti,tk) != null)
+ s = s.append(tv);
+ else fail
+ case `c(list(`tp)):
+ if (unify(ti,#<int>) != null)
+ s = s.append(tp);
+ else fail
+ };
+ if (s.length() == 0)
+ type_error(e,"Wrong indexing "+print_query(e)
+ + " in union "+print_type(#<union(...tl)>));
+ else if (s.length() > 1)
+ type_error(e,"Ambiguous indexing "+print_query(e)
+ + " in union "+print_type(#<union(...tl)>));
+ return s.head();
+ case `tp: type_error(e,"Indexing is not allowed for type "+print_type(tp)
+ +" with index "+print_type(ti)+" in "+print_query(e));
+ };
+ case range(`u,`i,`j):
+ if (unify(type_inference2(i),#<int>) == null
+ || unify(type_inference2(j),#<int>) == null)
+ type_error(e,"Range indexes must be integer expressions: "+print_query(e));
+ match type_inference2(u) {
+ case list(`a): return #<list(`a)>;
+ case List(`a): return #<list(`a)>;
+ };
+ type_error(e,"Range indexing must be applied to a list: "+print_query(e));
+ case range(`min,`max):
+ Tree tmin = type_inference(min);
+ Tree tmax = type_inference(max);
+ if (!subtype(tmin,#<long>))
+ type_error(e,"Expected a long integer for min: "+print_query(min));
+ else if (unify(tmin,#<long>) == null) // coerce
+ coerce(tmin,#<long>,((Node)e).children);
+ if (!subtype(tmax,#<long>))
+ type_error(e,"Expected a long integer for max: "+print_query(max));
+ else if (unify(tmax,#<long>) == null) // coerce
+ coerce(tmax,#<long>,((Node)e).children.tail);
+ return #<list(long)>;
+ case call(gen,`min,`max,`size):
+ return type_inference(#<gen(`min,`max,`size)>);
+ case gen(`min,`max,`size):
+ Tree tmin = type_inference(min);
+ Tree tmax = type_inference(max);
+ Tree tsize = type_inference(size);
+ if (!subtype(tmin,#<long>))
+ type_error(e,"Expected a long integer for min: "+print_query(min));
+ else if (unify(tmin,#<long>) == null) // coerce
+ coerce(tmin,#<long>,((Node)e).children);
+ if (!subtype(tmax,#<long>))
+ type_error(e,"Expected a long integer for max: "+print_query(max));
+ else if (unify(tmax,#<long>) == null) // coerce
+ coerce(tmax,#<long>,((Node)e).children.tail);
+ if (!subtype(tsize,#<long>))
+ type_error(e,"Expected a long integer for size: "+print_query(size));
+ else if (unify(tsize,#<long>) == null) // coerce
+ coerce(tsize,#<long>,((Node)e).children.tail.tail);
+ return #<Bag(long)>;
+ case dataset_size(`x):
+ return #<long>;
+ case groupBy(`u):
+ Tree tp = type_inference2(u);
+ match tp {
+ case `T(tuple(`k,`v)):
+ if (is_collection(T))
+ return (is_persistent_collection(T))
+ ? #<Bag(tuple(`k,list(`v)))>
+ : #<bag(tuple(`k,list(`v)))>;
+ };
+ type_error(e,"Wrong groupBy: "+print_query(e)+" of type "+print_type(tp));
+ case orderBy(`u):
+ Tree tp = type_inference2(u);
+ match tp {
+ case `T(tuple(`k,`v)):
+ if (is_collection(T))
+ return (is_persistent_collection(T))
+ ? #<List(tuple(`k,list(`v)))>
+ : #<list(tuple(`k,list(`v)))>;
+ };
+ type_error(e,"Wrong orderBy: "+print_query(e)+" of type "+print_type(tp));
+ case cmap(lambda(`v,`body),`s):
+ match type_inference2(s) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ type_env.insert(v.toString(),a);
+ match type_inference2(body) {
+ case `S(`tp):
+ if (!is_collection(S))
+ fail;
+ return #<`T(`tp)>;
+ case _: type_error(e,"cmap must return a collection: "+print_query(e));
+ };
+ case `t: type_error(e,"cmap must be over a collection: "+print_query(e)
+ +" (found type "+print_type(t)+")");
+ };
+ type_error(e,"Wrong cmap: "+print_query(e));
+ case fold(lambda(`v,`body),`n,`s):
+ match type_inference2(s) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ Tree tp = type_inference(n);
+ type_env.insert(v.toString(),#<tuple(`a,tp)>);
+ if (unify(type_inference2(body),tp) == null)
+ type_error(e,"Wrong types in fold: "+print_query(e));
+ return tp;
+ };
+ case join(lambda(`v1,`b1),lambda(`v2,`b2),lambda(`vr,`br),`x,`y):
+ match type_inference2(x) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ match type_inference2(y) {
+ case `S(`b):
+ if (!is_collection(S))
+ fail;
+ type_env.insert(v1.toString(),a);
+ type_env.insert(v2.toString(),b);
+ T = transient_collection(T);
+ S = transient_collection(S);
+ type_env.insert(vr.toString(),#<tuple(`T(`a),`S(`b))>);
+ if (unify(type_inference2(b1),type_inference2(b2)) == null)
+ type_error(e,"Incompatible keys in join ("+type_inference2(b1)
+ +" and "+type_inference2(b2)+"): "+print_query(e));
+ match type_inference(br) {
+ case `S3(`ntp):
+ if (!is_collection(S3))
+ fail;
+ S3 = persistent_collection(S3);
+ return #<`S3(`ntp)>;
+ };
+ type_error(e,"The join reducer must return a collection: "+print_query(br));
+ case _: type_error(e,"The right join input is not a collection: "+print_query(y));
+ };
+ case _: type_error(e,"The left join input is not a collection: "+print_query(x));
+ };
+ case crossProduct(lambda(`v1,`b1),lambda(`v2,`b2),lambda(`vr,`br),`x,`y):
+ match type_inference2(x) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ match type_inference2(y) {
+ case `S(`b):
+ if (!is_collection(S))
+ fail;
+ type_env.insert(v1.toString(),a);
+ type_env.insert(v2.toString(),b);
+ match type_inference2(b1) {
+ case `S1(`w1):
+ if (!is_collection(S1))
+ fail;
+ match type_inference2(b2) {
+ case `S2(`w2):
+ if (!is_collection(S2))
+ fail;
+ type_env.insert(vr.toString(),#<tuple(`w1,`w2)>);
+ match type_inference(br) {
+ case `S3(`ntp):
+ if (!is_collection(S3))
+ fail;
+ S3 = persistent_collection(S3);
+ return #<`S3(`ntp)>;
+ };
+ type_error(e,"The cross product reducer must return a collection: "+print_query(br));
+ case _: type_error(e,"Wrong right mapper in a cross product: "+print_query(b2));
+ };
+ case _: type_error(e,"Wrong left mapper in a cross product: "+print_query(b1));
+ };
+ case _: type_error(e,"The right cross product input is not a collection: "+print_query(y));
+ };
+ case _: type_error(e,"The left cross product input is not a collection: "+print_query(x));
+ };
+ case mapReduce(lambda(`vm,`bm),lambda(`vr,`br),`X,_):
+ match type_inference2(X) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ type_env.insert(vm.toString(),a);
+ match type_inference2(bm) {
+ case `S(tuple(`k,`b)):
+ if (!is_collection(S))
+ fail;
+ type_env.insert(vr.toString(),#<tuple(`k,list(`b))>);
+ match type_inference(br) {
+ case `S3(`ntp):
+ if (!is_collection(S3))
+ fail;
+ if (is_persistent_collection(T))
+ S3 = persistent_collection(S3);
+ return #<`S3(`ntp)>;
+ };
+ type_error(e,"The MapReduce reducer must return a collection: "+print_query(br));
+ case _: type_error(e,"Wrong mapper in mapReduce: "+print_query(bm));
+ }
+ };
+ type_error(e,"The MapReduce input is not a collection: "+print_query(X));
+ case mapReduce2(lambda(`v1,`b1),lambda(`v2,`b2),lambda(`vr,`br),`X,`Y,_):
+ match type_inference2(X) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ match type_inference2(Y) {
+ case `S(`b):
+ if (!is_collection(S))
+ fail;
+ type_env.insert(v1.toString(),a);
+ type_env.insert(v2.toString(),b);
+ match type_inference2(b1) {
+ case `S1(tuple(`k1,`w1)):
+ if (!is_collection(S1))
+ fail;
+ match type_inference2(b2) {
+ case `S2(tuple(`k2,`w2)):
+ if (!is_collection(S2))
+ fail;
+ if (unify(k1,k2) == null)
+ type_error(e,"incompatible keys in mapReduce2: "+print_query(e));
+ S1 = transient_collection(S1);
+ S2 = transient_collection(S2);
+ type_env.insert(vr.toString(),#<tuple(`S1(`w1),`S2(`w2))>);
+ match type_inference(br) {
+ case `S3(`ntp):
+ if (!is_collection(S3))
+ fail;
+ if (is_persistent_collection(T) && is_persistent_collection(S))
+ S3 = persistent_collection(S3);
+ return #<`S3(`ntp)>;
+ };
+ type_error(e,"The MapReduce2 reducer must return a collection: "+print_query(br));
+ case _: type_error(e,"Wrong right mapper in mapReduce2: "+print_query(b2));
+ };
+ case _: type_error(e,"Wrong left mapper in mapReduce2: "+print_query(b1));
+ };
+ case _: type_error(e,"The right MapReduce2 input is not a collection: "+print_query(Y));
+ };
+ case _: type_error(e,"The left MapReduce2 input is not a collection: "+print_query(X));
+ };
+ case Collect(`x):
+ match type_inference2(x) {
+ case Bag(`tp): return #<bag(`tp)>;
+ case List(`tp): return #<list(`tp)>;
+ };
+ type_error(e,"You can only cache persistent collections: "+e);
+ case aggregate(lambda(`v,`b),`z,`X):
+ match type_inference2(X) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ Tree ztp = type_inference2(z);
+ type_env.insert(v.toString(),#<tuple(`ztp,`a)>);
+ if (!subtype(ztp,type_inference2(b)))
+ type_error(e,"Wrong accumulator: "+print_query(e));
+ return ztp;
+ };
+ type_error(e,"Aggregation input is not a collection: "+print_query(e));
+ case repeat(lambda(`p,`b),`s,...ns):
+ if (!ns.is_empty() && unify(type_inference2(ns.head()),#<int>) == null)
+ type_error(e,"The maximum number of steps in repeat must be an integer: "
+ +print_query(ns.head()));
+ Tree tp = type_inference2(s);
+ bind_pattern_type(p,tp);
+ match tp {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ match type_inference2(b) {
+ case `S(`c): // transitive closure
+ if (!is_collection(S))
+ fail;
+ if (unify(a,c) == null)
+ fail;
+ ((Node)e).name = #<closure>.toString(); // destructive
+ return #<`T(`a)>;
+ case `S(tuple(`w,`c)):
+ if (!is_collection(S))
+ fail;
+ if (unify(c,#<bool>) == null)
+ fail;
+ if (unify(a,w) == null)
+ fail;
+ return #<`T(`a)>;
+ case `t: type_error(e,"The repeat body must return a collection of type "
+ +print_type(tp)+" or "+print_type(#<`T(tuple(`a,bool))>)
+ +" (Found type: "+print_type(t)+")");
+ }
+ };
+ type_error(e,"The repeat source must return a bag: "+print_query(e));
+ case closure(lambda(`v,`b),`s,...ns):
+ if (!ns.is_empty() && unify(type_inference2(ns.head()),#<int>) == null)
+ type_error(e,"The maximum number of steps in closure must be an integer: "
+ +print_query(ns.head()));
+ match type_inference2(s) {
+ case `T(`a):
+ if (!is_collection(T))
+ fail;
+ type_env.insert(v.toString(),#<`T(`a)>);
+ match type_inference2(b) {
+ case `S(`tp):
+ if (!is_collection(S))
+ fail;
+ if (unify(a,tp) == null)
+ fail;
+ return #<`T(`a)>;
+ case `tp: type_error(e,"The closure body must return a collection of type "
+ +print_type(#<`T(`a)>)+" or "+print_type(#<`T(tuple(`a,bool))>)
+ +" (Found type: "+print_type(tp)+")");
+ }
+ };
+ type_error(e,"The closure source must return a bag: "+print_query(e));
+ case loop(lambda(`p,`b),`s,`n):
+ if (unify(type_inference2(n),#<int>) == null)
+ type_error(e,"The number of steps in loop must be an integer: "+print_query(n));
+ Tree tp = type_inference2(s);
+ bind_pattern_type(p,tp);
+ Tree btp = type_inference2(b);
+ if (unify(tp,btp) == null)
+ type_error(e,"The type of the repeat body ("+print_type(btp)
+ + ") and the type of the initial value ("+print_type(tp)+") do not match");
+ return tp;
+ case step(`x): // used in QueryPlan for repeat
+ match type_inference(x) {
+ case `T(`tp): return #<`T(tuple(`tp,bool))>;
+ };
+ case cstep(`x): // used in QueryPlan for closure
+ return type_inference(x);
+ case if(`p,`x,`y):
+ if (unify(type_inference2(p),#<bool>) == null)
+ type_error(e,"Expected a boolean predicate in if-then-else: "+print_query(p));
+ Tree tx = type_inference2(x);
+ Tree ty = type_inference2(y);
+ Tree rt = unify(tx,ty,((Node)e).children.tail,((Node)e).children.tail.tail);
+ if (rt == null)
+ type_error(e,"Incompatible types in if-then-else: "+print_query(e));
+ return rt;
+ case call(inv,`x):
+ return type_inference(x);
+ case call(plus,`x,`y):
+ Tree tx = type_inference2(x);
+ Tree ty = type_inference2(y);
+ match tx {
+ case `T(`xt):
+ if (!is_collection(T))
+ fail;
+ match ty {
+ case `S(`yt):
+ if (!is_collection(S))
+ fail;
+ Tree rt = unify(tx,ty,((Node)e).children.tail,((Node)e).children.tail.tail);
+ if (rt == null)
+ type_error(e,"Incompatible types in union/append: "+print_type(tx)+" and "+print_type(ty));
+ return rt;
+ }
+ };
+ fail
+ case `f(`x,`y):
+ if (! #[union,intersect,except].member(#<`f>))
+ fail;
+ Tree tx = type_inference2(x);
+ Tree ty = type_inference2(y);
+ match tx {
+ case `T(`t1):
+ if (!is_collection(T))
+ fail;
+ Tree t = unify(tx,ty,((Node)e).children,((Node)e).children.tail);
+ if (t != null)
+ return t;
+ };
+ type_error(e,"Incompatible types in "+f+": "+print_type(tx)+" and "+print_type(ty));
+ case call(member,`x,`y):
+ Tree tx = type_inference2(x);
+ Tree ty = type_inference2(y);
+ match ty {
+ case `T(`t1):
+ if (!is_collection(T))
+ fail;
+ if (!subtype(tx,t1))
+ type_error(e,"Incompatible types in member: "+print_type(tx)+" and "+print_type(ty));
+ coerce(tx,t1,((Node)e).children.tail);
+ return #<bool>;
+ };
+ case call(`f,`x,`y):
+ if (! #[eq,neq,lt,leq,gt,geq].member(f))
+ fail;
+ Tree tx = type_inference2(x);
+ Tree ty = type_inference2(y);
+ if (!subtype(tx,ty) && !subtype(ty,tx))
+ type_error(e,"Incompatible types in comparison "+f+": "
+ +print_type(tx)+" and "+print_type(ty));
+ if (unify(tx,ty) != null)
+ return #<bool>;
+ if (subtype(tx,ty))
+ coerce(tx,ty,((Node)e).children.tail);
+ else if (subtype(ty,tx))
+ coerce(ty,tx,((Node)e).children.tail.tail);
+ return #<bool>;
+ case call(`f,`s):
+ for ( Tree monoid: monoids )
+ match monoid {
+ case `aggr(`mtp,`plus,`zero,`unit):
+ if (!aggr.equals(f.toString()))
+ continue;
+ match type_inference2(s) {
+ case `T(`tp):
+ if (!is_collection(T))
+ type_error(e,"Aggregation must be over collections: "+s);
+ if (unify(mtp,tp) == null)
+ continue;
+ Tree nv = new_var();
+ type_env.begin_scope();
+ type_env.insert(nv.toString(),tp);
+ Tree t = type_inference2(#<apply(`unit,`nv)>);
+ Tree tz = type_inference2(zero);
+ type_env.end_scope();
+ if (unify(t,tz) != null)
+ return t;
+ }
+ };
+ fail
+ case call(avg,`s):
+ return type_inference(#<call(div,typed(call(sum,`s),double),call(count,`s))>);
+ case apply(lambda(`v,`b),`arg):
+ type_env.begin_scope();
+ type_env.insert(v.toString(),type_inference(arg));
+ Tree tp = type_inference(b);
+ type_env.end_scope();
+ return tp;
+ case call(`f,...al):
+ Tree macro = global_macros.lookup(f.toString());
+ if (macro == null)
+ fail;
+ match macro {
+ case macro(params(...pl),`body):
+ Tree b = body;
+ if (pl.length() != al.length())
+ fail;
+ for ( ; !pl.is_empty(); pl = pl.tail(), al = al.tail() )
+ b = subst(pl.head(),al.head(),b);
+ return type_inference2(b);
+ };
+ fail
+ case call(`f,...el):
+ if (!f.is_variable() || global_vars.lookup(f.toString()) != null)
+ fail;
+ Tree ret = data_constructors.lookup(f.toString());
+ if (ret != null)
+ match ret {
+ case `v(`n,`tp):
+ if (unify(type_inference(make_tuple(el)),tp) != null)
+ return #<`v>;
+ else type_error(e,"Wrong data construction: "+print_query(e));
+ };
+ ret = type_env.lookup(f.toString());
+ if (ret == null)
+ ret = global_type_env.lookup(f.toString());
+ if (ret != null)
+ match ret {
+ case arrow(`s,`d):
+ Tree tp = type_inference(#<tuple(...el)>);
+ if (!subtype(tp,s))
+ type_error(e,"The domain of the anonymous function "+print_type(s)+" in "+print_query(e)
+ +" does not match the argument types "+print_type(tp));
+ if (unify(tp,s) == null) // coerce args
+ match #<tuple(`tp,`s)> {
+ case tuple(tuple(...txs),tuple(...tys)):
+ for ( ; txs != #[]; txs = txs.tail, tys = tys.tail, el = el.tail )
+ coerce(txs.head,tys.head,el);
+ }
+ return d;
+ case _: type_error(e,"Expected a functional type for "+f+" (found "+print_type(ret)+")");
+ };
+ Trees tps = #[];
+ for ( Tree a: el )
+ tps = tps.append(type_inference(a));
+ for ( Tree fnc: functions )
+ match fnc {
+ case `fn(`otp,...tl):
+ if (!fn.equals(f.toString()) || !subtype(tps,tl))
+ fail;
+ if (f.equals(#<XMLchildren>))
+ return #<list(XML)>;
+ else if (f.equals(#<XMLattributes>))
+ return #<list(string)>;
+ else if (f.equals(#<XMLattribute>) && collection_type(otp))
+ return #<list(string)>;
+ else return otp;
+ };
+ type_error(e,"Undefined function "+f+" over arguments of type "+print_type(#<tuple(...tps)>));
+ case apply(`f,`u):
+ match type_inference2(f) {
+ case arrow(`s,`d):
+ Tree tp = type_inference(u);
+ if (!subtype(tp,s))
+ type_error(e,"The domain of the anonymous function "+print_type(s)+" in "+print_query(e)
+ +" does not match the argument types "+print_type(tp));
+ if (unify(tp,s) == null) // coerce args
+ coerce(tp,s,((Node)e).children.tail);
+ return d;
+ case `tp: type_error(e,"Expected a functional type for "+f+" (found "+print_type(tp)+")");
+ };
+ case callM(`f,_,...el):
+ return type_inference(#<call(`f,...el)>);
+ case true: return #<bool>;
+ case false: return #<bool>;
+ case null: return #<any>;
+ case `v:
+ if (!v.is_variable())
+ fail;
+ Tree ret1 = type_env.lookup(v.toString());
+ Tree ret2 = global_type_env.lookup(v.toString());
+ if (ret1 == null)
+ if (ret2 == null) {
+ Tree ret = global_vars.lookup(v.toString());
+ if (ret == null) {
+ String msg = "";
+ if (!Config.trace && type_env.iterator().hasNext()) {
+ msg += "\nVariable Types:";
+ for ( String var: type_env )
+ msg += "\n "+var + ": " + print_type(type_env.lookup(var));
+ };
+ type_error(e,"Undefined variable: "+v+msg);
+ } else if (!v.equals(ret))
+ return type_inference(ret);
+ } else return ret2;
+ else return ret1;
+ case `n:
+ if (n.is_long())
+ return #<int>;
+ else if (n.is_double())
+ return #<float>;
+ else if (n.is_string())
+ return #<string>;
+ };
+ type_error(e,"Wrong expression: "+print_query(e));
+ throw new Error();
+ }
+
+ /** check the type for inconsistencies and fix the transient/persistent components */
+ public static Tree normalize_type ( Tree type ) {
+ match type {
+ case record(...bs):
+ Trees as = #[];
+ Trees vs = #[];
+ for ( Tree b: bs )
+ match b {
+ case bind(`v,`tp):
+ if (v.is_variable())
+ if (vs.member(v))
+ type_error(type,"Duplicate record attributes: "+print_type(type));
+ else {
+ vs = vs.append(v);
+ as = as.append(#<bind(`v,`(normalize_type(tp)))>);
+ }
+ else type_error(type,"Expected an attribute name: "+v);
+ case _: type_error(type,"Ill-formed record type: "+print_type(type));
+ };
+ return #<record(...as)>;
+ case tuple(...ts):
+ Trees as = #[];
+ for ( Tree t: ts )
+ as = as.append(normalize_type(t));
+ return #<tuple(...as)>;
+ case union(...bs):
+ Trees as = #[];
+ for ( Tree b: bs )
+ match b {
+ case `c(`tp):
+ as = as.append(#<`c(`(normalize_type(tp)))>);
+ case _: type_error(type,"Ill-formed union type: "+print_type(type));
+ };
+ return #<union(...as)>;
+ case persistent(bag(`tp)):
+ Tree ntp = normalize_type(tp);
+ return #<Bag(`ntp)>;
+ case persistent(list(`tp)):
+ Tree ntp = normalize_type(tp);
+ return #<List(`ntp)>;
+ case `T(`tp):
+ if (is_collection(T))
+ return #<`T(`(normalize_type(tp)))>;
+ else fail
+ case `tp:
+ if (!tp.is_variable())
+ fail;
+ if (#[bool,byte,short,int,long,float,double,string].member(tp))
+ return tp;
+ Tree rt = global_datatype_env.lookup(tp.toString());
+ if (rt != null)
+ return tp;
+ rt = type_names.lookup(tp.toString());
+ if (rt != null)
+ return tp;
+ };
+ type_error(type,"Unrecognized type: "+print_type(type));
+ return type;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/Union.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/Union.java b/core/src/main/java/org/apache/mrql/Union.java
new file mode 100644
index 0000000..540b6d9
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/Union.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+
+
+/** union values are tagged values, where tag is the descriminator */
+public class Union extends MRData {
+ private byte tag;
+ private MRData value;
+
+ public Union ( byte tag, MRData value ) {
+ this.tag = tag;
+ this.value = value;
+ }
+
+ public void materializeAll () { value.materializeAll(); };
+
+ public byte tag () { return tag; }
+
+ public MRData value () { return value; }
+
+ final public void write ( DataOutput out ) throws IOException {
+ out.writeByte(MRContainer.UNION);
+ out.writeByte(tag);
+ value.write(out);
+ }
+
+ final public static Union read ( DataInput in ) throws IOException {
+ return new Union(in.readByte(),MRContainer.read(in));
+ }
+
+ public void readFields ( DataInput in ) throws IOException {
+ tag = in.readByte();
+ value = MRContainer.read(in);
+ }
+
+ public int compareTo ( MRData x ) {
+ assert(x instanceof Union);
+ Union p = (Union) x;
+ return (tag == p.tag)
+ ? value.compareTo(p.value)
+ : (tag - p.tag);
+ }
+
+ final public static int compare ( byte[] x, int xs, int xl, byte[] y, int ys, int yl, int[] size ) {
+ int k = (x[xs] == y[ys]) ? MRContainer.compare(x,xs+1,xl-1,y,ys+1,yl-1,size) : (x[xs]-y[ys]);
+ size[0] += 2;
+ return k;
+ }
+
+ public boolean equals ( Object x ) {
+ return x instanceof Union && ((Union)x).tag==tag
+ && ((Union)x).value.equals(value);
+ }
+
+ public int hashCode () {
+ return Math.abs(tag ^ value.hashCode());
+ }
+
+ public String toString () {
+ return "union("+tag+","+value+")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/XMLParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/XMLParser.java b/core/src/main/java/org/apache/mrql/XMLParser.java
new file mode 100644
index 0000000..a25acbd
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/XMLParser.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+import javax.xml.parsers.SAXParserFactory;
+import org.xml.sax.*;
+import java.io.*;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.conf.Configuration;
+
+
+/** An XML parser */
+final public class XMLParser implements Parser {
+ String[] tags; // split tags
+ Tree xpath; // XPath query for fragmentation
+ XMLSplitter splitter;
+ XPathParser parser;
+ XMLReader xmlReader;
+ final static SAXParserFactory factory = SAXParserFactory.newInstance();
+
+ public void initialize ( Trees args ) {
+ try {
+ if (args.length() > 0) {
+ if (!(args.nth(0) instanceof Node)
+ || !(((Node)args.nth(0)).name().equals("list")
+ || ((Node)args.nth(0)).name().equals("bag")))
+ throw new Error("Expected a bag of synchronization tagnames to split the XML source: "+args.nth(0));
+ Trees ts = ((Node)args.nth(0)).children();
+ if (ts.length() == 0)
+ throw new Error("Expected at least one synchronization tagname in XML source: "+ts);
+ tags = new String[ts.length()];
+ for ( int i = 0; i < tags.length; i++ )
+ if (ts.nth(i) instanceof StringLeaf)
+ tags[i] = ((StringLeaf)(ts.nth(i))).value();
+ else throw new Error("Expected a synchronization tagname in XML source: "+ts.nth(i));
+ if (args.length() == 2)
+ xpath = ((Node)args.nth(1)).children().nth(0);
+ else xpath = new VariableLeaf("dot");
+ } else xpath = new VariableLeaf("dot");
+ parser = new XPathParser(xpath);
+ factory.setValidating(false);
+ factory.setNamespaceAware(false);
+ xmlReader = factory.newSAXParser().getXMLReader();
+ } catch (Exception e) {
+ throw new Error(e);
+ }
+ }
+
+ public void open ( String file ) {
+ try {
+ splitter = new XMLSplitter(tags,file,new DataOutputBuffer());
+ } catch (Exception e) {
+ throw new Error(e);
+ }
+ }
+
+ public void open ( FSDataInputStream fsin, long start, long end ) {
+ try {
+ splitter = new XMLSplitter(tags,fsin,start,end,new DataOutputBuffer());
+ } catch (Exception e) {
+ throw new Error(e);
+ }
+ }
+
+ public Tree type () { return new VariableLeaf("XML"); }
+
+ public String slice () {
+ if (splitter.hasNext()) {
+ DataOutputBuffer b = splitter.next();
+ return new String(b.getData(),0,b.getLength());
+ } else return null;
+ }
+
+ public Bag parse ( String s ) {
+ try {
+ parser.dataConstructor.start();
+ xmlReader.setContentHandler(parser.handler);
+ xmlReader.parse(new InputSource(new StringReader(s)));
+ Bag b = new Bag();
+ for ( MRData e: parser.dataConstructor.value() )
+ b.add(e);
+ return b;
+ } catch (Exception e) {
+ System.err.println(e);
+ return new Bag();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/1adaa71c/core/src/main/java/org/apache/mrql/XMLSplitter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mrql/XMLSplitter.java b/core/src/main/java/org/apache/mrql/XMLSplitter.java
new file mode 100644
index 0000000..e407d85
--- /dev/null
+++ b/core/src/main/java/org/apache/mrql/XMLSplitter.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mrql;
+
+import org.apache.mrql.gen.*;
+import java.util.Iterator;
+import javax.xml.parsers.SAXParserFactory;
+import org.xml.sax.*;
+import java.io.*;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.fs.FSDataInputStream;
+
+
+/** Extract the XML elements tagged by tags from a data split of the input stream (fsin)
+ * and store them in a buffer (to be parsed by SAX).
+ */
+final public class XMLSplitter implements Iterator<DataOutputBuffer> {
+ boolean in_memory;
+ FSDataInputStream fsin; // for HDFS processing
+ BufferedReader in; // for in-memory processing
+ String[] tags;
+ long start;
+ long end;
+ StringBuffer tagname = new StringBuffer(100);
+ String start_tagname;
+ final DataOutputBuffer buffer;
+
+ XMLSplitter ( String[] tags, FSDataInputStream fsin, long start, long end,
+ DataOutputBuffer buffer ) {
+ in_memory = false;
+ this.tags = tags;
+ this.fsin = fsin;
+ this.start = start;
+ this.end = end;
+ this.buffer = buffer;
+ try {
+ fsin.seek(start);
+ } catch ( IOException e ) {
+ System.err.println("*** Cannot parse the data split: "+fsin);
+ }
+ }
+
+ XMLSplitter ( String[] tags, String file, DataOutputBuffer buffer ) {
+ in_memory = true;
+ try {
+ in = new BufferedReader(new InputStreamReader(new FileInputStream(file)),
+ 100000);
+ } catch ( Exception e ) {
+ throw new Error("Cannot open the file: "+file);
+ };
+ this.tags = tags;
+ this.buffer = buffer;
+ }
+
+ public boolean hasNext () {
+ try {
+ if (in_memory || fsin.getPos() < end)
+ if (skip())
+ return store();
+ return false;
+ } catch (Exception e) {
+ System.err.println(e);
+ return false;
+ }
+ }
+
+ public DataOutputBuffer next () {
+ return buffer;
+ }
+
+ public void remove () { }
+
+ boolean is_start_tag () {
+ if (tags == null)
+ return true;
+ for (String tag: tags)
+ if (tag.contentEquals(tagname))
+ return true;
+ return false;
+ }
+
+ char read_tag () throws IOException {
+ tagname.setLength(0);
+ while (true) {
+ int b = in_memory ? in.read() : fsin.read();
+ if (b == -1)
+ return ' ';
+ else if (!Character.isLetterOrDigit(b) && b != ':' && b != '_')
+ return (char)b;
+ tagname.append((char)b);
+ }
+ }
+
+ /** skip until the beginning of a split element */
+ boolean skip () throws IOException {
+ while (true) {
+ int b = in_memory ? in.read() : fsin.read();
+ if (b == -1 || (!in_memory && fsin.getPos() >= end))
+ return false;
+ else if (b == '<') {
+ b = read_tag();
+ if (is_start_tag()) {
+ buffer.reset();
+ buffer.write('<');
+ for ( int i = 0; i < tagname.length(); i++ )
+ buffer.write(tagname.charAt(i));
+ buffer.write(b);
+ start_tagname = new String(tagname);
+ return true;
+ }
+ }
+ }
+ }
+
+ /** store one split element into the buffer; may cross split boundaries */
+ boolean store () throws IOException {
+ while (true) {
+ int b = in_memory ? in.read() : fsin.read();
+ if (b == -1)
+ return false;
+ if (b == '&') { // don't validate external XML entities
+ buffer.write('&');buffer.write('a');buffer.write('m');buffer.write('p');buffer.write(';');
+ } else buffer.write(b);
+ if (b == '<') {
+ b = in_memory ? in.read() : fsin.read();
+ buffer.write(b);
+ if (b == '/') {
+ b = read_tag();
+ for ( int i = 0; i < tagname.length(); i++ )
+ buffer.write(tagname.charAt(i));
+ buffer.write(b);
+ if (start_tagname.contentEquals(tagname)) {
+ while (b != '>') {
+ b = fsin.read();
+ buffer.write(b);
+ };
+ return true;
+ }
+ }
+ }
+ }
+ }
+}