You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/02/23 00:44:56 UTC
svn commit: r630357 [1/2] - in /incubator/pig/branches/types: ./
src/org/apache/pig/builtin/ src/org/apache/pig/data/
src/org/apache/pig/impl/builtin/ src/org/apache/pig/impl/eval/
src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/mapreduce...
Author: gates
Date: Fri Feb 22 15:44:37 2008
New Revision: 630357
URL: http://svn.apache.org/viewvc?rev=630357&view=rev
Log:
Changes to make types changes pass unit tests. The following tests still fail:
AlgebraicEval
testGroupCountWithMultipleFields - fails in combiner, ok because we'll rewrite as part of pipeline.
EvalPipeline
testFunctionInsideFunction - fails in assertAtomic, ok because we'll rewrite how functions return their types anyway.
testDistinct - some problem in distinct, may be ok because we'll rewrite this as part of pipeline.
FilterOpNumeric
testBinCond - fails because I commented out comparators, which will be totally rewritten anyway.
testNestedBinCond - fails because I commented out comparators, which will be totally rewritten anyway.
Modified:
incubator/pig/branches/types/build.xml
incubator/pig/branches/types/src/org/apache/pig/builtin/AVG.java
incubator/pig/branches/types/src/org/apache/pig/builtin/MAX.java
incubator/pig/branches/types/src/org/apache/pig/builtin/MIN.java
incubator/pig/branches/types/src/org/apache/pig/builtin/PigStorage.java
incubator/pig/branches/types/src/org/apache/pig/builtin/SUM.java
incubator/pig/branches/types/src/org/apache/pig/data/DataType.java
incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java
incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java
incubator/pig/branches/types/src/org/apache/pig/data/TupleFactory.java
incubator/pig/branches/types/src/org/apache/pig/impl/builtin/ADD.java
incubator/pig/branches/types/src/org/apache/pig/impl/builtin/DIVIDE.java
incubator/pig/branches/types/src/org/apache/pig/impl/builtin/MULTIPLY.java
incubator/pig/branches/types/src/org/apache/pig/impl/builtin/SUBTRACT.java
incubator/pig/branches/types/src/org/apache/pig/impl/eval/GenerateSpec.java
incubator/pig/branches/types/src/org/apache/pig/impl/eval/ProjectSpec.java
incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java
incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/MapReduceLauncher.java
incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigCombine.java
incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigMapReduce.java
incubator/pig/branches/types/test/org/apache/pig/test/TestAlgebraicEval.java
incubator/pig/branches/types/test/org/apache/pig/test/TestBuiltin.java
incubator/pig/branches/types/test/org/apache/pig/test/TestDataBag.java
incubator/pig/branches/types/test/org/apache/pig/test/TestDataModel.java
incubator/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
incubator/pig/branches/types/test/org/apache/pig/test/TestFilterOpNumeric.java
incubator/pig/branches/types/test/org/apache/pig/test/TestFilterOpString.java
incubator/pig/branches/types/test/org/apache/pig/test/TestInfixArithmetic.java
incubator/pig/branches/types/test/org/apache/pig/test/TestLargeFile.java
incubator/pig/branches/types/test/org/apache/pig/test/TestLogicalPlanBuilder.java
incubator/pig/branches/types/test/org/apache/pig/test/TestMapReduce.java
incubator/pig/branches/types/test/org/apache/pig/test/TestOrderBy.java
incubator/pig/branches/types/test/org/apache/pig/test/TestPi.java
incubator/pig/branches/types/test/org/apache/pig/test/TestPigFile.java
incubator/pig/branches/types/test/org/apache/pig/test/TestStore.java
incubator/pig/branches/types/test/org/apache/pig/test/Util.java
Modified: incubator/pig/branches/types/build.xml
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/build.xml?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/build.xml (original)
+++ incubator/pig/branches/types/build.xml Fri Feb 22 15:44:37 2008
@@ -126,6 +126,9 @@
<fileset dir="test">
<include name="**/*Test*.java" />
<exclude name="**/TestLargeFile.java" />
+ <exclude name="**/TestOrderBy.java" />
+ <exclude name="**/TestPi.java" />
+
<exclude name="**/nightly/**" />
</fileset>
</batchtest>
Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/AVG.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/AVG.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/AVG.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/AVG.java Fri Feb 22 15:44:37 2008
@@ -23,6 +23,7 @@
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
@@ -66,8 +67,8 @@
public Tuple exec(Tuple input) throws IOException {
try {
Tuple t = mTupleFactory.newTuple(2);
- t.set(0, sum(input));
- t.set(1, count(input));
+ t.set(0, new Double(sum(input)));
+ t.set(1, new Long(count(input)));
return t;
} catch(RuntimeException t) {
throw new RuntimeException(t.getMessage() + ": " + input);
@@ -128,7 +129,9 @@
double sum = 0;
for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
Tuple t = it.next();
- sum += (Double)t.get(0);
+ Double d = DataType.toDouble(t.get(0));
+ if (d == null) continue;
+ sum += d;
}
return sum;
Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/MAX.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/MAX.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/MAX.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/MAX.java Fri Feb 22 15:44:37 2008
@@ -23,6 +23,7 @@
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
@@ -77,7 +78,9 @@
for (Iterator it = values.iterator(); it.hasNext();) {
Tuple t = (Tuple)it.next();
try {
- curMax = java.lang.Math.max(curMax, (Double)t.get(0));
+ Double d = DataType.toDouble(t.get(0));
+ if (d == null) continue;
+ curMax = java.lang.Math.max(curMax, d);
} catch (RuntimeException exp) {
IOException newE = new IOException("Error processing: " +
t.toString() + exp.getMessage());
Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/MIN.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/MIN.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/MIN.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/MIN.java Fri Feb 22 15:44:37 2008
@@ -23,6 +23,7 @@
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
@@ -73,7 +74,9 @@
for (Iterator it = values.iterator(); it.hasNext();) {
Tuple t = (Tuple) it.next();
try {
- curMin = java.lang.Math.min(curMin, (Double)t.get(0));
+ Double d = DataType.toDouble(t.get(0));
+ if (d == null) continue;
+ curMin = java.lang.Math.min(curMin, d);
} catch (RuntimeException exp) {
IOException newE = new IOException("Error processing: " +
t.toString() + exp.getMessage());
Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/PigStorage.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/PigStorage.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/PigStorage.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/PigStorage.java Fri Feb 22 15:44:37 2008
@@ -48,8 +48,8 @@
long end = Long.MAX_VALUE;
private byte recordDel = '\n';
private byte fieldDel = '\t';
- private ByteArrayOutputStream mBuf;
- private ArrayList<Object> mProtoTuple;
+ private ByteArrayOutputStream mBuf = null;
+ private ArrayList<Object> mProtoTuple = null;
private TupleFactory mTupleFactory = TupleFactory.getInstance();
public PigStorage() {
@@ -64,8 +64,8 @@
*/
public PigStorage(String delimiter) {
this.fieldDel = (byte)delimiter.charAt(0);
- mBuf = new ByteArrayOutputStream(4096);
- mProtoTuple = new ArrayList<Object>();
+ //mBuf = new ByteArrayOutputStream(4096);
+ //mProtoTuple = new ArrayList<Object>();
}
public Tuple getNext() throws IOException {
@@ -73,6 +73,7 @@
return null;
}
+ if (mBuf == null) mBuf = new ByteArrayOutputStream(4096);
mBuf.reset();
while (true) {
// Hadoop's FSDataInputStream (which my input stream is based
@@ -177,6 +178,7 @@
}
private void readField() {
+ if (mProtoTuple == null) mProtoTuple = new ArrayList<Object>();
if (mBuf.size() == 0) {
// NULL value
mProtoTuple.add(null);
Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/SUM.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/SUM.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/SUM.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/SUM.java Fri Feb 22 15:44:37 2008
@@ -23,6 +23,7 @@
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
@@ -74,9 +75,11 @@
Tuple t = null;
for (Iterator it = values.iterator(); it.hasNext();) {
try {
- t = (Tuple) it.next();
- i++;
- sum += (Double)t.get(0);
+ t = (Tuple) it.next();
+ i++;
+ Double d = DataType.toDouble(t.get(0));
+ if (d == null) continue;
+ sum += d;
}catch(RuntimeException exp) {
String msg = "iteration = " + i + "bag size = " +
values.size() + " partial sum = " + sum + "\n";
Modified: incubator/pig/branches/types/src/org/apache/pig/data/DataType.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DataType.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DataType.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DataType.java Fri Feb 22 15:44:37 2008
@@ -17,10 +17,12 @@
*/
package org.apache.pig.data;
+import java.io.IOException;
import java.lang.Class;
import java.lang.reflect.Type;
import java.util.Iterator;
import java.util.Map;
+import java.util.TreeMap;
/**
* A class of static final values used to encode data type. This could be
@@ -31,7 +33,7 @@
// IMPORTANT! This list can be used to record values of data on disk,
// so do not change the values. You may strand user data.
// IMPORTANT! Order matters here, as compare() below uses the order to
- // order unlink datatypes. Don't change this ordering.
+ // order unlike datatypes. Don't change this ordering.
// Spaced unevenly to leave room for new entries without changing
// values or creating order issues.
public static final byte UNKNOWN = 0;
@@ -113,7 +115,15 @@
* @return type name, as a String.
*/
public static String findTypeName(Object o) {
- byte dt = findType(o);
+ return findTypeName(findType(o));
+ }
+
+ /**
+ * Get the type name from the type byte code
+ * @param dt Type byte code
+ * @return type name, as a String.
+ */
+ public static String findTypeName(byte dt) {
switch (dt) {
case NULL: return "NULL";
case BOOLEAN: return "boolean";
@@ -223,17 +233,15 @@
} else if (sz1 > sz2) {
return 1;
} else {
+ // This is bad, but we have to sort the keys of the maps in order
+ // to be commutative.
+ TreeMap<Object, Object> tm1 = new TreeMap<Object, Object>(m1);
+ TreeMap<Object, Object> tm2 = new TreeMap<Object, Object>(m2);
Iterator<Map.Entry<Object, Object> > i1 =
- m1.entrySet().iterator();
+ tm1.entrySet().iterator();
Iterator<Map.Entry<Object, Object> > i2 =
- m2.entrySet().iterator();
+ tm2.entrySet().iterator();
while (i1.hasNext()) {
- // This isn't real meaningful, as there are no
- // guarantees on iteration order in a map. But it
- // makes more sense than iterating through one and
- // probing the other, which will almost always
- // result in missing keys in the second and thus
- // not provide communativity.
Map.Entry<Object, Object> entry1 = i1.next();
Map.Entry<Object, Object> entry2 = i2.next();
int c = compare(entry1.getKey(), entry2.getKey());
@@ -267,4 +275,254 @@
}
}
+ /**
+ * Force a data object to an Integer, if possible. Any numeric type
+ * can be forced to an Integer (though precision may be lost), as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to an Integer. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is an Integer you
+ * should just cast it.
+ * @return The object as a Integer.
+ * @throws IOException if the type can't be forced to an Integer.
+ */
+ public static Integer toInteger(Object o) throws IOException {
+ switch (findType(o)) {
+ case BOOLEAN:
+ if (((Boolean)o) == true) return new Integer(1);
+ else return new Integer(0);
+
+ case INTEGER:
+ return (Integer)o;
+
+ case LONG:
+ return new Integer(((Long)o).intValue());
+
+ case FLOAT:
+ return new Integer(((Float)o).intValue());
+
+ case DOUBLE:
+ return new Integer(((Double)o).intValue());
+
+ case BYTEARRAY:
+ return new Integer(Integer.valueOf(((DataByteArray)o).toString()));
+
+ case CHARARRAY:
+ return new Integer(Integer.valueOf((String)o));
+
+ case NULL:
+ return null;
+
+ case MAP:
+ case TUPLE:
+ case BAG:
+ case UNKNOWN:
+ default:
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to an Integer");
+ }
+ }
+
+ /**
+ * Force a data object to a Long, if possible. Any numeric type
+ * can be forced to a Long (though precision may be lost), as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to a Long. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Long you
+ * should just cast it.
+ * @return The object as a Long.
+ * @throws IOException if the type can't be forced to a Long.
+ */
+ public static Long toLong(Object o) throws IOException {
+ switch (findType(o)) {
+ case BOOLEAN:
+ if (((Boolean)o) == true) return new Long(1);
+ else return new Long(0);
+
+ case INTEGER:
+ return new Long(((Integer)o).longValue());
+
+ case LONG:
+ return (Long)o;
+
+ case FLOAT:
+ return new Long(((Float)o).longValue());
+
+ case DOUBLE:
+ return new Long(((Double)o).longValue());
+
+ case BYTEARRAY:
+ return new Long(Long.valueOf(((DataByteArray)o).toString()));
+
+ case CHARARRAY:
+ return new Long(Long.valueOf((String)o));
+
+ case NULL:
+ return null;
+
+ case MAP:
+ case TUPLE:
+ case BAG:
+ case UNKNOWN:
+ default:
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a Long");
+ }
+ }
+
+ /**
+ * Force a data object to a Float, if possible. Any numeric type
+ * can be forced to a Float (though precision may be lost), as well
+ * as CharArray, ByteArray. Complex types cannot be
+ * forced to a Float. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Float you
+ * should just cast it.
+ * @return The object as a Float.
+ * @throws IOException if the type can't be forced to a Float.
+ */
+ public static Float toFloat(Object o) throws IOException {
+ switch (findType(o)) {
+ case INTEGER:
+ return new Float(((Integer)o).floatValue());
+
+ case LONG:
+ return new Float(((Long)o).floatValue());
+
+ case FLOAT:
+ return (Float)o;
+
+ case DOUBLE:
+ return new Float(((Double)o).floatValue());
+
+ case BYTEARRAY:
+ return new Float(Float.valueOf(((DataByteArray)o).toString()));
+
+ case CHARARRAY:
+ return new Float(Float.valueOf((String)o));
+
+ case NULL:
+ return null;
+
+ case BOOLEAN:
+ case MAP:
+ case TUPLE:
+ case BAG:
+ case UNKNOWN:
+ default:
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a Float");
+ }
+ }
+
+ /**
+ * Force a data object to a Double, if possible. Any numeric type
+ * can be forced to a Double, as well
+ * as CharArray, ByteArray. Complex types cannot be
+ * forced to a Double. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Double you
+ * should just cast it.
+ * @return The object as a Double.
+ * @throws IOException if the type can't be forced to a Double.
+ */
+ public static Double toDouble(Object o) throws IOException {
+ switch (findType(o)) {
+ case INTEGER:
+ return new Double(((Integer)o).doubleValue());
+
+ case LONG:
+ return new Double(((Long)o).doubleValue());
+
+ case FLOAT:
+ return new Double(((Float)o).doubleValue());
+
+ case DOUBLE:
+ return (Double)o;
+
+ case BYTEARRAY:
+ return new Double(Double.valueOf(((DataByteArray)o).toString()));
+
+ case CHARARRAY:
+ return new Double(Double.valueOf((String)o));
+
+ case NULL:
+ return null;
+
+ case BOOLEAN:
+ case MAP:
+ case TUPLE:
+ case BAG:
+ case UNKNOWN:
+ default:
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a Double");
+ }
+ }
+
+ /**
+ * If this object is a map, return it as a map.
+ * This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Map you
+ * should just cast it.
+ * @return The object as a Double.
+ * @throws IOException if the type can't be forced to a Double.
+ */
+ public static Map<Object, Object> toMap(Object o) throws IOException {
+ if (o == null) return null;
+
+ if (o instanceof Map) {
+ return (Map<Object, Object>)o;
+ } else {
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a Map");
+ }
+ }
+
+ /**
+ * If this object is a tuple, return it as a tuple.
+ * This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Tuple you
+ * should just cast it.
+ * @return The object as a Double.
+ * @throws IOException if the type can't be forced to a Double.
+ */
+ public static Tuple toTuple(Object o) throws IOException {
+ if (o == null) return null;
+
+ if (o instanceof Tuple) {
+ return (Tuple)o;
+ } else {
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a Tuple");
+ }
+ }
+
+ /**
+ * If this object is a bag, return it as a bag.
+ * This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a bag you
+ * should just cast it.
+ * @return The object as a Double.
+ * @throws IOException if the type can't be forced to a Double.
+ */
+ public static DataBag toBag(Object o) throws IOException {
+ if (o == null) return null;
+
+ if (o instanceof DataBag) {
+ return (DataBag)o;
+ } else {
+ throw new IOException("Cannot convert a " + findTypeName(o) +
+ " to a DataBag");
+ }
+ }
+
+ /**
+ * Purely for debugging
+ */
+ public static void spillTupleContents(Tuple t, String label) {
+ System.out.print("Tuple " + label + " ");
+ Iterator<Object> i = t.getAll().iterator();
+ for (int j = 0; i.hasNext(); j++) {
+ System.out.print(j + ":" + i.next().getClass().getName() + " ");
+ }
+ System.out.println(t.toString());
+ }
}
Modified: incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java Fri Feb 22 15:44:37 2008
@@ -253,7 +253,6 @@
// using the iterator to write it will guarantee those things come
// correctly. And on the other end there'll be no reason to waste
// time re-sorting or re-applying distinct.
- out.write(DataType.BAG);
out.writeLong(size());
Iterator<Tuple> it = iterator();
while (it.hasNext()) {
Modified: incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java Fri Feb 22 15:44:37 2008
@@ -18,6 +18,7 @@
package org.apache.pig.data;
import java.io.IOException;
+import java.lang.Class;
import java.util.List;
/**
@@ -49,6 +50,10 @@
"allocated tuple of size 1!", e);
}
return t;
+ }
+
+ public Class tupleClass() {
+ return DefaultTuple.class;
}
DefaultTupleFactory() {
Modified: incubator/pig/branches/types/src/org/apache/pig/data/TupleFactory.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/TupleFactory.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/TupleFactory.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/TupleFactory.java Fri Feb 22 15:44:37 2008
@@ -83,7 +83,8 @@
/**
* Create a tuple with size fields. Whenever possible this is prefered
* over the nullary constructor, as the constructor can preallocate the
- * size of the container holding the fields.
+ * size of the container holding the fields. Once this is called, it
+ * is legal to call Tuple.set(x, object), where x < size.
* @param size Number of fields in the tuple.
*/
public abstract Tuple newTuple(int size);
@@ -101,6 +102,16 @@
* @param datum Datum to put in the tuple.
*/
public abstract Tuple newTuple(Object datum);
+
+ /**
+ * Return the actual class representing a tuple that the implementing
+ * factory will be returning. This is needed because hadoop (and
+ * possibly other systems) we use need to know the exact class we will
+ * be using for input and output.
+ * @return Class that implements tuple.
+ */
+ public abstract Class tupleClass();
+
protected TupleFactory() {
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/builtin/ADD.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/builtin/ADD.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/builtin/ADD.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/builtin/ADD.java Fri Feb 22 15:44:37 2008
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -29,8 +30,10 @@
@Override
public Double exec(Tuple input) throws IOException {
- double v1 = (Double)input.get(0);
- double v2 = (Double)input.get(1);
+ Double v1 = DataType.toDouble(input.get(0));
+ Double v2 = DataType.toDouble(input.get(1));
+
+ if (v1 == null || v2 == null) return null;
return new Double(v1 + v2);
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/builtin/DIVIDE.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/builtin/DIVIDE.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/builtin/DIVIDE.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/builtin/DIVIDE.java Fri Feb 22 15:44:37 2008
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
@@ -27,8 +28,9 @@
@Override
public Double exec(Tuple input) throws IOException {
- double v1 = (Double)input.get(0);
- double v2 = (Double)input.get(1);
+ Double v1 = DataType.toDouble(input.get(0));
+ Double v2 = DataType.toDouble(input.get(1));
+ if (v1 == null || v2 == null) return null;
return new Double(v1/v2);
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/builtin/MULTIPLY.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/builtin/MULTIPLY.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/builtin/MULTIPLY.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/builtin/MULTIPLY.java Fri Feb 22 15:44:37 2008
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -30,8 +31,11 @@
@Override
public Double exec(Tuple input) throws IOException {
- double v1 = (Double)input.get(0);
- double v2 = (Double)input.get(1);
+ Double v1 = DataType.toDouble(input.get(0));
+ Double v2 = DataType.toDouble(input.get(1));
+
+ if (v1 == null || v2 == null) return null;
+
return new Double(v1 * v2);
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/builtin/SUBTRACT.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/builtin/SUBTRACT.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/builtin/SUBTRACT.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/builtin/SUBTRACT.java Fri Feb 22 15:44:37 2008
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.AtomSchema;
import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -29,9 +30,11 @@
@Override
public Double exec(Tuple input) throws IOException {
- double v1 = (Double)input.get(0);
- double v2 = (Double)input.get(1);
+ Double v1 = DataType.toDouble(input.get(0));
+ Double v2 = DataType.toDouble(input.get(1));
+ if (v1 == null || v2 == null) return null;
+
return new Double(v1 - v2);
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/eval/GenerateSpec.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/eval/GenerateSpec.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/eval/GenerateSpec.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/eval/GenerateSpec.java Fri Feb 22 15:44:37 2008
@@ -25,6 +25,7 @@
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.FunctionInstantiator;
@@ -40,9 +41,6 @@
protected int driver;
- private TupleFactory mTupleFactory = TupleFactory.getInstance();
-
-
public GenerateSpec(List<EvalSpec> specs){
this.specs = specs;
selectDriver();
@@ -70,7 +68,6 @@
@Override
public void add(Object d) {
-
if (checkDelimiter(d))
throw new RuntimeException("Internal error: not expecting a delimiter tuple");
@@ -108,6 +105,8 @@
}
private class DatumBag extends DataCollector{
+ private TupleFactory tf = TupleFactory.getInstance();
+
DataBag bag;
public DatumBag(){
super(null);
@@ -116,7 +115,7 @@
@Override
public void add(Object d){
- bag.add(mTupleFactory.newTuple(d));
+ bag.add(tf.newTuple(d));
}
public Iterator<Object> content(){
@@ -146,6 +145,8 @@
private class CrossProductItem extends DataCollector{
DatumBag[] toBeCrossed;
Object cpiInput;
+ private TupleFactory tf = TupleFactory.getInstance();
+
public CrossProductItem(Object driverInput, DataCollector successor){
super(successor);
@@ -220,7 +221,7 @@
}
}
- Tuple outTuple = mTupleFactory.newTuple();
+ Tuple outTuple = tf.newTuple();
for (int i=0; i< numItems; i++){
if (specs.get(i).isFlattened() && outData[i] instanceof Tuple){
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/eval/ProjectSpec.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/eval/ProjectSpec.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/eval/ProjectSpec.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/eval/ProjectSpec.java Fri Feb 22 15:44:37 2008
@@ -33,9 +33,6 @@
protected List<Integer> cols;
protected boolean wrapInTuple;
- private TupleFactory mTupleFactory = TupleFactory.getInstance();
-
-
public List<Integer> getCols() {
return cols;
}
@@ -86,7 +83,7 @@
if (!wrapInTuple && cols.size() == 1){
return t.get(cols.get(0));
}else{
- Tuple out = mTupleFactory.newTuple(cols.size());
+ Tuple out = TupleFactory.getInstance().newTuple(cols.size());
for (int i: cols){
out.set(i, t.get(i));
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java Fri Feb 22 15:44:37 2008
@@ -77,7 +77,7 @@
groupAndTuple[0] = output.get(0);
groupAndTuple[1] = output.get(1);
} else {
- Tuple group = TupleFactory.getInstance().newTuple(output.size());
+ Tuple group = TupleFactory.getInstance().newTuple(output.size() - 1);
for (int j = 0; j < output.size() - 1; j++) {
group.set(j, output.get(j));
}
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/MapReduceLauncher.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/MapReduceLauncher.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/MapReduceLauncher.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/MapReduceLauncher.java Fri Feb 22 15:44:37 2008
@@ -30,6 +30,7 @@
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.IndexedTuple;
import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.eval.EvalSpec;
import org.apache.pig.impl.io.PigFile;
import org.apache.pig.impl.physicalLayer.POMapreduce;
@@ -40,7 +41,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TaskReport;
@@ -67,13 +68,22 @@
}
public static class PigWritableComparator extends WritableComparator {
+ // TupleFactory mTupleFactory = TupleFactory.getInstance();
+
public PigWritableComparator() {
- super(Tuple.class);
+ super(TupleFactory.getInstance().tupleClass());
}
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2){
return WritableComparator.compareBytes(b1, s1, l1, b2, s2, l2);
}
+
+ @Override
+ public WritableComparable newKey() {
+ //return mTupleFactory.newTuple();
+ return TupleFactory.getInstance().newTuple();
+ }
+
}
static Random rand = new Random();
@@ -162,7 +172,7 @@
conf.setOutputFormat(PigOutputFormat.class);
// not used starting with 0.15 conf.setInputKeyClass(Text.class);
// not used starting with 0.15 conf.setInputValueClass(Tuple.class);
- conf.setOutputKeyClass(Tuple.class);
+ conf.setOutputKeyClass(TupleFactory.getInstance().tupleClass());
if (pom.userComparator != null)
conf.setOutputKeyComparatorClass(pom.userComparator);
conf.setOutputValueClass(IndexedTuple.class);
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigCombine.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigCombine.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigCombine.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigCombine.java Fri Feb 22 15:44:37 2008
@@ -29,6 +29,7 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.IndexedTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
@@ -55,12 +56,6 @@
throws IOException {
try {
- /*
- tmpdir = new File(job.get("mapred.task.id"));
- tmpdir.mkdirs();
-
- BagFactory.init(tmpdir);
- */
PigContext pigContext = (PigContext) ObjectSerializer.deserialize(job.get("pig.pigContext"));
if (evalPipe == null) {
inputCount = ((ArrayList<FileSpec>)ObjectSerializer.deserialize(job.get("pig.inputs"))).size();
@@ -70,7 +65,6 @@
EvalSpec esp = (EvalSpec)ObjectSerializer.deserialize(evalSpec);
if(esp != null) esp.instantiateFunc(pigContext);
evalPipe = esp.setupPipe(finalout);
- //throw new RuntimeException("combine spec: " + evalSpec + " combine pipe: " + esp.toString());
bags = new DataBag[inputCount];
for (int i = 0; i < inputCount; i++) {
@@ -81,7 +75,7 @@
PigSplit split = PigInputFormat.PigRecordReader.getPigRecordReader().getPigFileSplit();
index = split.getIndex();
- String groupName = (String)((Tuple) key).get(0);
+ String groupName = ((Tuple) key).get(0).toString();
finalout.group = ((Tuple) key);
finalout.index = index;
@@ -100,9 +94,8 @@
if (((DataBag)t.get(1 + i)).size() == 0)
return;
}
-// throw new RuntimeException("combine input: " + t.toString());
+
evalPipe.add(t);
- // evalPipe.add(null); // EOF marker
} catch (Throwable tr) {
tr.printStackTrace();
RuntimeException exp = new RuntimeException(tr.getMessage());
Modified: incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigMapReduce.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigMapReduce.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigMapReduce.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/mapreduceExec/PigMapReduce.java Fri Feb 22 15:44:37 2008
@@ -51,6 +51,9 @@
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.tools.timer.PerformanceTimerFactory;
+import java.io.FileOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
/**
* This class is a wrapper of sorts for Pig Map/Reduce jobs. Both the Mapper and the Reducer are
@@ -128,20 +131,13 @@
PigMapReduce.reporter = reporter;
try {
- /*
- tmpdir = new File(job.get("mapred.task.id"));
- tmpdir.mkdirs();
-
- BagFactory.init(tmpdir);
- */
-
oc = output;
if (evalPipe == null) {
setupReducePipe();
}
DataBag[] bags = new DataBag[inputCount];
- String groupName = (String)((Tuple) key).get(0);
+ String groupName = ((Tuple) key).get(0).toString();
Tuple t = mTupleFactory.newTuple(1 + inputCount);
t.set(0, groupName);
for (int i = 1; i < 1 + inputCount; i++) {
@@ -298,10 +294,10 @@
}
class MapDataOutputCollector extends DataCollector {
-
+
public MapDataOutputCollector(){
super(null);
- }
+ }
@Override
public void add(Object d){
Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestAlgebraicEval.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestAlgebraicEval.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/test/org/apache/pig/test/TestAlgebraicEval.java (original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestAlgebraicEval.java Fri Feb 22 15:44:37 2008
@@ -29,6 +29,7 @@
import org.apache.pig.PigServer;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
public class TestAlgebraicEval extends TestCase {
@@ -67,7 +68,7 @@
@Test
public void testSimpleCount() throws Exception {
- int LOOP_COUNT = 1024;
+ long LOOP_COUNT = 1024;
PigServer pig = new PigServer(initString);
File tmpFile = File.createTempFile("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
@@ -81,14 +82,13 @@
Iterator it = pig.openIterator("myid");
tmpFile.delete();
Tuple t = (Tuple)it.next();
- DataByteArray a = (DataByteArray)t.get(0);
- Double count = Double.valueOf(a.toString());
- assertEquals(count, (double)LOOP_COUNT);
+ Long count = DataType.toLong(t.get(0));
+ assertEquals(count.longValue(), LOOP_COUNT);
}
@Test
public void testGroupCount() throws Exception {
- int LOOP_COUNT = 1024;
+ long LOOP_COUNT = 1024;
PigServer pig = new PigServer(initString);
File tmpFile = File.createTempFile("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
@@ -102,16 +102,15 @@
Iterator it = pig.openIterator("myid");
tmpFile.delete();
Tuple t = (Tuple)it.next();
- DataByteArray a = (DataByteArray)t.get(1);
- Double count = Double.valueOf(a.toString());
- assertEquals(count, (double)LOOP_COUNT);
+ Long count = DataType.toLong(t.get(1));
+ assertEquals(count.longValue(), LOOP_COUNT);
}
@Test
public void testGroupReorderCount() throws Exception {
- int LOOP_COUNT = 1024;
+ long LOOP_COUNT = 1024;
PigServer pig = new PigServer(initString);
File tmpFile = File.createTempFile("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
@@ -125,9 +124,8 @@
Iterator it = pig.openIterator("myid");
tmpFile.delete();
Tuple t = (Tuple)it.next();
- DataByteArray a = (DataByteArray)t.get(0);
- Double count = Double.valueOf(a.toString());
- assertEquals(count, (double)LOOP_COUNT);
+ Long count = DataType.toLong(t.get(0));
+ assertEquals(count.longValue(), LOOP_COUNT);
}
@@ -138,7 +136,7 @@
PigServer pig = new PigServer(initString);
File tmpFile = File.createTempFile("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
- int groupsize = 0;
+ long groupsize = 0;
for(int i = 0; i < LOOP_COUNT; i++) {
if(i%10 == 0) groupsize++;
ps.println(i%10 + ":" + i);
@@ -151,12 +149,11 @@
tmpFile.delete();
while(it.hasNext()) {
Tuple t = (Tuple)it.next();
- DataByteArray a = (DataByteArray)t.get(0);
+ String a = t.get(0).toString();
Double group = Double.valueOf(a.toString());
if(group == 0.0) {
- DataByteArray b = (DataByteArray)t.get(1);
- Double count = Double.valueOf(b.toString());
- assertEquals(count, (double)groupsize);
+ Long count = DataType.toLong(t.get(1));
+ assertEquals(count.longValue(), groupsize);
break;
}
}
@@ -168,7 +165,7 @@
PigServer pig = new PigServer(initString);
File tmpFile = File.createTempFile("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
- int groupsize = 0;
+ long groupsize = 0;
for(int i = 0; i < LOOP_COUNT; i++) {
if(i%10 == 0) groupsize++;
ps.println(i%10 + ":" + i);
@@ -181,12 +178,11 @@
tmpFile.delete();
while(it.hasNext()) {
Tuple t = (Tuple)it.next();
- DataByteArray a = (DataByteArray)t.get(0);
+ String a = t.get(0).toString();
Double group = Double.valueOf(a.toString());
if(group == 0.0) {
- DataByteArray b = (DataByteArray)t.get(1);
- Double count = Double.valueOf(b.toString());
- assertEquals(count, (double)groupsize);
+ Long count = DataType.toLong(t.get(1));
+ assertEquals(count.longValue(), groupsize);
break;
}
}
Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestBuiltin.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/test/org/apache/pig/test/TestBuiltin.java (original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestBuiltin.java Fri Feb 22 15:44:37 2008
@@ -22,7 +22,9 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.Map;
import junit.framework.TestCase;
@@ -57,7 +59,7 @@
Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
Double output = avg.exec(tup);
- assertTrue(actual == expected);
+ assertTrue(output == expected);
}
@Test
@@ -68,25 +70,23 @@
Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
Tuple output = avg.exec(tup);
- DataByteArray a = output.get(0);
- Double f1 = Double.valueOf(a.toString());
+ Double f1 = DataType.toDouble(output.get(0));
assertEquals("Expected sum to be 55.0", 55.0, f1);
- a = output.get(1);
- Long f2 = Long.valueOf(a.toString());
- assertEquals("Expected count to be 10", 10, f2);
+ Long f2 = DataType.toLong(output.get(1));
+ assertEquals("Expected count to be 10", 10, f2.longValue());
}
@Test
public void testAVGFinal() throws Exception {
Tuple t1 = TupleFactory.getInstance().newTuple(2);
t1.set(0, 55.0);
- t1.set(1, 10);
+ t1.set(1, 10L);
Tuple t2 = TupleFactory.getInstance().newTuple(2);
t2.set(0, 28.0);
- t2.set(1, 7);
+ t2.set(1, 7L);
Tuple t3 = TupleFactory.getInstance().newTuple(2);
t3.set(0, 82.0);
- t3.set(1, 17);
+ t3.set(1, 17L);
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(t1);
bag.add(t2);
@@ -111,12 +111,12 @@
Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
Long output = count.exec(tup);
- assertTrue(actual == expected);
+ assertTrue(output == expected);
}
@Test
public void testCOUNTMap() throws Exception {
- Map<Object, Object> map = new Map<Object, Object>();
+ Map<Object, Object> map = new HashMap<Object, Object>();
Tuple tup = TupleFactory.getInstance().newTuple();
tup.append(map);
@@ -135,11 +135,11 @@
assertTrue(output == 1);
- map.put("b", Tuple.getInstance().newTuple());
+ map.put("b", TupleFactory.getInstance().newTuple());
assertFalse(isEmpty.exec(tup));
output = count.exec(tup);
- assertTrue(output.numval() == 2);
+ assertTrue(output == 2);
}
@@ -151,20 +151,19 @@
Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
Tuple output = count.exec(tup);
- DataByteArray a = output.get(0);
- Long f1 = Long.valueOf(a.toString());
- assertEquals("Expected count to be 10", 10, f1);
+ Long f1 = DataType.toLong(output.get(0));
+ assertEquals("Expected count to be 10", 10, f1.longValue());
}
@Test
public void testCOUNTFinal() throws Exception {
- int input[] = { 23, 38, 39 };
+ long input[] = { 23, 38, 39 };
Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
- EvalFunc<DataAtom> count = new COUNT.Final();
+ EvalFunc<Long> count = new COUNT.Final();
Long output = count.exec(tup);
- assertEquals("Expected count to be 100", 100, output);
+ assertEquals("Expected count to be 100", 100, output.longValue());
}
@Test
@@ -172,14 +171,11 @@
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
double expected = 55;
- EvalFunc<DataAtom> sum = new SUM();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- DataAtom output = new DataAtom();
- sum.exec(tup, output);
-
- double actual = (new Double(output.strval())).doubleValue();
+ EvalFunc<Double> sum = new SUM();
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Double output = sum.exec(tup);
- assertTrue(actual == expected);
+ assertTrue(output == expected);
}
@Test
@@ -187,36 +183,32 @@
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
EvalFunc<Tuple> sum = new SUM.Initial();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- Tuple output = new Tuple();
- sum.exec(tup, output);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Tuple output = sum.exec(tup);
- assertEquals("Expected sum to be 55.0", 55.0,
- output.getAtomField(0).numval());
+ assertEquals("Expected sum to be 55.0", 55.0, DataType.toDouble(output.get(0)));
}
@Test
public void testSUMFinal() throws Exception {
int input[] = { 23, 38, 39 };
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
- EvalFunc<DataAtom> sum = new SUM.Final();
- DataAtom output = new DataAtom();
- sum.exec(tup, output);
+ EvalFunc<Double> sum = new SUM.Final();
+ Double output = sum.exec(tup);
- assertEquals("Expected sum to be 100.0", 100.0, output.numval());
+ assertEquals("Expected sum to be 100.0", 100.0, output);
}
@Test
public void testMIN() throws Exception {
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
- EvalFunc<DataAtom> min = new MIN();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- DataAtom output = new DataAtom();
- min.exec(tup, output);
+ EvalFunc<Double> min = new MIN();
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Double output = min.exec(tup);
- assertEquals("Expected min to be 1.0", 1.0, output.numval());
+ assertEquals("Expected min to be 1.0", 1.0, output);
}
@@ -225,36 +217,32 @@
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
EvalFunc<Tuple> min = new MIN.Initial();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- Tuple output = new Tuple();
- min.exec(tup, output);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Tuple output = min.exec(tup);
- assertEquals("Expected min to be 1.0", 1.0,
- output.getAtomField(0).numval());
+ assertEquals("Expected min to be 1.0", 1.0, DataType.toDouble(output.get(0)));
}
@Test
public void testMINFinal() throws Exception {
int input[] = { 23, 38, 39 };
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
- EvalFunc<DataAtom> min = new MIN.Final();
- DataAtom output = new DataAtom();
- min.exec(tup, output);
+ EvalFunc<Double> min = new MIN.Final();
+ Double output = min.exec(tup);
- assertEquals("Expected sum to be 23.0", 23.0, output.numval());
+ assertEquals("Expected sum to be 23.0", 23.0, output);
}
@Test
public void testMAX() throws Exception {
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
- EvalFunc<DataAtom> max = new MAX();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- DataAtom output = new DataAtom();
- max.exec(tup, output);
+ EvalFunc<Double> max = new MAX();
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Double output = max.exec(tup);
- assertEquals("Expected max to be 10.0", 10.0, output.numval());
+ assertEquals("Expected max to be 10.0", 10.0, output);
}
@@ -263,24 +251,21 @@
int input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
EvalFunc<Tuple> max = new MAX.Initial();
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
- Tuple output = new Tuple();
- max.exec(tup, output);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
+ Tuple output = max.exec(tup);
- assertEquals("Expected max to be 10.0", 10.0,
- output.getAtomField(0).numval());
+ assertEquals("Expected max to be 10.0", 10.0, DataType.toDouble(output.get(0)));
}
@Test
public void testMAXFinal() throws Exception {
int input[] = { 23, 38, 39 };
- Tuple tup = Util.loadNestTuple(new Tuple(1), input);
+ Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
- EvalFunc<DataAtom> max = new MAX.Final();
- DataAtom output = new DataAtom();
- max.exec(tup, output);
+ EvalFunc<Double> max = new MAX.Final();
+ Double output = max.exec(tup);
- assertEquals("Expected sum to be 39.0", 39.0, output.numval());
+ assertEquals("Expected sum to be 39.0", 39.0, output);
}
@@ -301,7 +286,7 @@
FakeFSInputStream ffis1 = new FakeFSInputStream(input1.getBytes());
p1.bindTo(null, new BufferedPositionedInputStream(ffis1), 0, input1.getBytes().length);
Tuple f1 = p1.getNext();
- assertTrue(f1.arity() == arity1);
+ assertTrue(f1.size() == arity1);
String input2 = ":this:has:a:leading:colon\n";
int arity2 = 6;
@@ -310,7 +295,7 @@
FakeFSInputStream ffis2 = new FakeFSInputStream(input2.getBytes());
p2.bindTo(null, new BufferedPositionedInputStream(ffis2), 0, input2.getBytes().length);
Tuple f2 = p2.getNext();
- assertTrue(f2.arity() == arity2);
+ assertTrue(f2.size() == arity2);
String input3 = "this:has:a:trailing:colon:\n";
int arity3 = 6;
@@ -319,7 +304,7 @@
FakeFSInputStream ffis3 = new FakeFSInputStream(input3.getBytes());
p3.bindTo(null, new BufferedPositionedInputStream(ffis3), 0, input1.getBytes().length);
Tuple f3 = p3.getNext();
- assertTrue(f3.arity() == arity3);
+ assertTrue(f3.size() == arity3);
}
/*
@@ -399,7 +384,8 @@
text1.bindTo(null, new BufferedPositionedInputStream(ffis1), 0, input1.getBytes().length);
Tuple f1 = text1.getNext();
Tuple f2 = text1.getNext();
- assertTrue(expected1.equals(f1.getAtomField(0).strval()) && expected2.equals(f2.getAtomField(0).strval()));
+ assertTrue(expected1.equals(f1.get(0).toString()) &&
+ expected2.equals(f2.get(0).toString()));
String input2 = "";
FakeFSInputStream ffis2 = new FakeFSInputStream(input2.getBytes());
@@ -416,8 +402,11 @@
StoreFunc sfunc = new PigStorage("\t");
sfunc.bindTo(os);
- int[] input = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
- Tuple f1 = Util.loadFlatTuple(new Tuple(input.length), input);
+ DataByteArray[] input = { new DataByteArray("amy"),
+ new DataByteArray("bob"), new DataByteArray("charlene"),
+ new DataByteArray("david"), new DataByteArray("erin"),
+ new DataByteArray("frank") };
+ Tuple f1 = Util.loadTuple(TupleFactory.getInstance().newTuple(input.length), input);
sfunc.putNext(f1);
sfunc.finish();
@@ -450,10 +439,10 @@
assertTrue(iter.hasNext());
t = iter.next();
- assertEquals("f00", t.getAtomField(0).strval());
+ assertEquals("{(f00)}", t.get(0).toString());
assertTrue(iter.hasNext());
t = iter.next();
- assertEquals("b00", t.getAtomField(0).strval());
+ assertEquals("{(b00)}", t.get(0).toString());
assertFalse(iter.hasNext());
tempFile.delete();
}
@@ -479,8 +468,12 @@
for (int i=0; i< numTimes; i++){
Tuple t = iter.next();
- assertEquals(i+"AA", t.getBagField(0).iterator().next().getAtomField(0).strval());
- assertEquals(i+"BB", t.getBagField(1).iterator().next().getAtomField(0).strval());
+ DataBag b = DataType.toBag(t.get(0));
+ Tuple t1 = b.iterator().next();
+ assertEquals(i+"AA", t1.get(0).toString());
+ b = DataType.toBag(t.get(1));
+ t1 = b.iterator().next();
+ assertEquals(i+"BB", t1.get(0).toString());
}
assertFalse(iter.hasNext());
Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestDataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestDataBag.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/test/org/apache/pig/test/TestDataBag.java (original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestDataBag.java Fri Feb 22 15:44:37 2008
@@ -97,7 +97,7 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -125,7 +125,7 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -155,7 +155,7 @@
// Write tuples into both
for (int j = 0; j < 3; j++) {
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -185,14 +185,14 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -220,14 +220,14 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -262,7 +262,7 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -290,7 +290,7 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -319,7 +319,7 @@
// Write tuples into both
for (int j = 0; j < 3; j++) {
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -349,14 +349,14 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -383,14 +383,14 @@
// Write tuples into both
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -424,7 +424,7 @@
PriorityQueue<Tuple> rightAnswer = new PriorityQueue<Tuple>(20);
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -461,7 +461,7 @@
// Write tuples into both
for (int j = 0; j < 373; j++) {
for (int i = 0; i < 10; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt()));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt()));
b.add(t);
rightAnswer.add(t);
}
@@ -491,7 +491,7 @@
// Write tuples into both
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
@@ -519,7 +519,7 @@
// Write tuples into both
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
@@ -549,7 +549,7 @@
// Write tuples into both
for (int j = 0; j < 3; j++) {
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
@@ -579,14 +579,14 @@
// Write tuples into both
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -614,14 +614,14 @@
// Write tuples into both
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
mgr.forceSpill();
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(i));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(i));
b.add(t);
rightAnswer.add(t);
}
@@ -658,7 +658,7 @@
// Write tuples into both
for (int j = 0; j < 321; j++) {
for (int i = 0; i < 50; i++) {
- Tuple t = new Tuple(new DataAtom(rand.nextInt() % 5));
+ Tuple t = TupleFactory.getInstance().newTuple(new Integer(rand.nextInt() % 5));
b.add(t);
rightAnswer.add(t);
}
Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestDataModel.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestDataModel.java?rev=630357&r1=630356&r2=630357&view=diff
==============================================================================
--- incubator/pig/branches/types/test/org/apache/pig/test/TestDataModel.java (original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestDataModel.java Fri Feb 22 15:44:37 2008
@@ -17,12 +17,19 @@
*/
package org.apache.pig.test;
+import java.io.DataInput;
import java.io.DataInputStream;
+import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileInputStream;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.Map;
import java.util.Random;
import org.junit.Test;
@@ -44,83 +51,56 @@
}
@Test
- public void testDataAtom() throws Exception {
- // Make sure a DataAtom is still a Datum
- DataAtom da1 = new DataAtom("string");
- assertTrue(da1 instanceof Datum);
-
- // Test basic comparison functions
- DataAtom da2 = new DataAtom("string");
- assertTrue(da1.compareTo(da2) == 0);
- assertTrue(da1.equals(da2));
- assertTrue(da1.strval().equals(da2.toString()));
-
- // Make sure that case sensitivity is maintained
- da2 = new DataAtom("String");
- assertFalse(da1.compareTo(da2) == 0);
- assertFalse(da1.strval().equals(da2));
- assertFalse(da1.strval().equals(da2.toString()));
-
- // Test string/int/double comparison and storage
- da1 = new DataAtom(1);
- da2 = new DataAtom("1");
- assertTrue(da1.equals(da2));
- da2 = new DataAtom(1.0);
- assertTrue(da1.numval().equals(da2.numval()));
- da2 = new DataAtom("2");
- assertTrue(da1.compareTo(da2) < 0);
- assertFalse(da1.compareTo(da2) > 0);
- }
-
- @Test
public void testTuple() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
int arity = 5;
int[] input1 = { 1, 2, 3, 4, 5 };
String[] input2 = { "1", "2", "3", "4", "5" };
String[] input3 = { "1", "2", "3", "4", "5", "6" };
// validate construction and equality
- Tuple f1 = Util.loadFlatTuple(new Tuple(arity), input1);
- Tuple f2 = Util.loadFlatTuple(new Tuple(arity), input1);
- Tuple f3 = new Tuple(arity);
- assertTrue(f1.arity() == arity);
- assertTrue(f1.equals(f2));
-
- // validate string vs. int construction and equality
- f2 = Util.loadTuple(new Tuple(arity), input2);
+ Tuple f1 = Util.loadFlatTuple(tf.newTuple(arity), input1);
+ Tuple f2 = Util.loadFlatTuple(tf.newTuple(arity), input1);
+ Tuple f3 = tf.newTuple(arity);
+ assertTrue(f1.size() == arity);
assertTrue(f1.equals(f2));
// invalid equality
- f2 = Util.loadTuple(new Tuple(input3.length), input3);
+ f2 = Util.loadTuple(tf.newTuple(input3.length), input3);
assertFalse(f1.equals(f2));
// copy equality
+ /*
f2.copyFrom(f1);
assertTrue(f1.equals(f2));
+ */
// append function and equality
int[] input4 = { 1, 2, 3 };
int[] input5 = { 4, 5 };
- f1 = Util.loadFlatTuple(new Tuple(input4.length), input4);
- f2 = Util.loadFlatTuple(new Tuple(input5.length), input5);
- f3 = Util.loadFlatTuple(new Tuple(input1.length), input1);
+ /*
+ f1 = Util.loadFlatTuple(tf.newTuple(input4.length), input4);
+ f2 = Util.loadFlatTuple(tf.newTuple(input5.length), input5);
+ f3 = Util.loadFlatTuple(tf.newTuple(input1.length), input1);
f1.appendTuple(f2);
assertTrue(f3.equals(f1));
+ */
// arity then value comparision behavior
- f1 = Util.loadFlatTuple(new Tuple(input1.length), input1); // 1,2,3,4,5
- f2 = Util.loadFlatTuple(new Tuple(input4.length), input4); // 1,2,3
- assertTrue(f1.greaterThan(f2));
- assertFalse(f1.lessThan(f2));
+ f1 = Util.loadFlatTuple(tf.newTuple(input1.length), input1); // 1,2,3,4,5
+ f2 = Util.loadFlatTuple(tf.newTuple(input4.length), input4); // 1,2,3
+ assertTrue(f1.compareTo(f2) > 0);
+ assertFalse(f1.compareTo(f2) < 0);
int[] input6 = { 1, 2, 3, 4, 6 };
- f2 = Util.loadFlatTuple(new Tuple(input6.length), input6);
- assertTrue(f1.lessThan(f2));
- assertFalse(f1.greaterThan(f2));
+ f2 = Util.loadFlatTuple(tf.newTuple(input6.length), input6);
+ assertTrue(f1.compareTo(f2) < 0);
+ assertFalse(f1.compareTo(f2) > 0);
// delimited export
String expected = "1:2:3:4:5";
- f1 = Util.loadFlatTuple(new Tuple(input1.length), input1);
+ f1 = Util.loadFlatTuple(tf.newTuple(input1.length), input1);
assertTrue(expected.equals(f1.toDelimitedString(":")));
// value read / write & marshalling
@@ -138,159 +118,272 @@
@Test
public void testNestTuple() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
int[][] input1 = { { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 },
{ 1, 2, 3, 4, 5 } };
int[][] input2 = { { 1, 2 }, { 1, 2 } };
- Tuple n1 = Util.loadNestTuple(new Tuple(input1.length), input1);
- Tuple n2 = new Tuple();
+ Tuple n1 = Util.loadNestTuple(tf.newTuple(input1.length), input1);
+ Tuple n2 = tf.newTuple();
- // CompareTo is currently not implemented
- n2.copyFrom(n1);
- // assertTrue(n1.compareTo(n2) == 0);
+ n2 = Util.loadNestTuple(tf.newTuple(input2.length), input2);
+ }
- n2 = Util.loadNestTuple(new Tuple(input2.length), input2);
- // assertTrue(n1.compareTo(n2) == 1);
+ @Test
+ public void testReadWrite() throws Exception {
+ // Create a tuple with every data type in it, and then read and
+ // write it, both via DataReaderWriter and Tuple.readFields
+ TupleFactory tf = TupleFactory.getInstance();
+
+ Tuple t1 = giveMeOneOfEach();
+
+ File file = File.createTempFile("Tuple", "put");
+ FileOutputStream fos = new FileOutputStream(file);
+ DataOutput out = new DataOutputStream(fos);
+ t1.write(out);
+ t1.write(out); // twice in a row on purpose
+ fos.close();
+
+ FileInputStream fis = new FileInputStream(file);
+ DataInput in = new DataInputStream(fis);
+ for (int i = 0; i < 2; i++) {
+ Tuple after = tf.newTuple();
+ after.readFields(in);
+
+ Object o = after.get(0);
+ assertTrue("isa Tuple", o instanceof Tuple);
+ Tuple t3 = (Tuple)o;
+ o = t3.get(0);
+ assertTrue("isa Integer", o instanceof Integer);
+ assertEquals(new Integer(3), (Integer)o);
+ o = t3.get(1);
+ assertTrue("isa Float", o instanceof Float);
+ assertEquals(new Float(3.0), (Float)o);
+
+ o = after.get(1);
+ assertTrue("isa Bag", o instanceof DataBag);
+ DataBag b = (DataBag)o;
+ Iterator<Tuple> j = b.iterator();
+ Tuple[] ts = new Tuple[2];
+ assertTrue("first tuple in bag", j.hasNext());
+ ts[0] = j.next();
+ assertTrue("second tuple in bag", j.hasNext());
+ ts[1] = j.next();
+ o = ts[0].get(0);
+ assertTrue("isa Integer", o instanceof Integer);
+ assertEquals(new Integer(4), (Integer)o);
+ o = ts[1].get(0);
+ assertTrue("isa String", o instanceof String);
+ assertEquals("mary had a little lamb", (String)o);
+
+ o = after.get(2);
+ assertTrue("isa Map", o instanceof Map);
+ Map<Object, Object> m = (Map<Object, Object>)o;
+ assertEquals("world", (String)m.get("hello"));
+ assertEquals("all", (String)m.get("goodbye"));
+ assertNull(m.get("fred"));
+
+ o = after.get(3);
+ assertTrue("isa Integer", o instanceof Integer);
+ Integer ii = (Integer)o;
+ assertEquals(new Integer(42), ii);
+
+ o = after.get(4);
+ assertTrue("isa Long", o instanceof Long);
+ Long l = (Long)o;
+ assertEquals(new Long(5000000000L), l);
+
+ o = after.get(5);
+ assertTrue("isa Float", o instanceof Float);
+ Float f = (Float)o;
+ assertEquals(new Float(3.141592654), f);
+
+ o = after.get(6);
+ assertTrue("isa Double", o instanceof Double);
+ Double d = (Double)o;
+ assertEquals(2.99792458e8, d);
+
+ o = after.get(7);
+ assertTrue("isa Boolean", o instanceof Boolean);
+ Boolean bool = (Boolean)o;
+ assertTrue(bool);
+
+ o = after.get(8);
+ assertTrue("isa DataByteArray", o instanceof DataByteArray);
+ DataByteArray ba = (DataByteArray)o;
+ assertEquals(new DataByteArray("hello"), ba);
+
+ o = after.get(9);
+ assertTrue("isa String", o instanceof String);
+ String s = (String)o;
+ assertEquals("goodbye", s);
+ }
- // basic append test ..
- int n1Arity = n1.arity();
- int n2Arity = n2.arity();
- n1.appendTuple(n2);
- assertTrue(n1.arity() == n1Arity + n2Arity);
+ file.delete();
}
- /*
@Test
- public void testDataBag() throws Exception {
- int[] input1 = { 1, 2, 3, 4, 5 };
- int[] input2 = { 0, 2, 3, 4, 5 };
+ public void testTupleToString() throws Exception {
+ Tuple t = giveMeOneOfEach();
- // Check empty bag errors
- DataBag b = new DataBag();
- boolean caught = false;
- try {
- b.getField(0).strval().equals("1");
- } catch (IOException e) {
- caught = true;
- }
- assertTrue(caught);
- assertTrue(b.isEmpty());
-
- // Check field get for indentical rows
- Tuple f = Util.loadFlatTuple(new Tuple(input1.length), input1);
- for (int i = 0; i < 10; i++) {
- b.add(f);
- }
- assertTrue(b.getField(0).strval().equals("1"));
- assertTrue(b.cardinality() == 10);
-
- // Check field get for heterogenous rows
- f = Util.loadFlatTuple(new Tuple(input2.length), input2);
- b.add(f);
- caught = false;
- try {
- b.getField(0).strval().equals("1");
- } catch (IOException e) {
- caught = true;
- }
- assertTrue(caught);
-
- // check that notifications are sent
- b.clear();
- DataBag.notifyInterval = 2;
- Tuple g = Util.loadFlatTuple(new Tuple(input1.length), input1);
- for (int i = 0; i < 10; i++) {
- b.add(g);
- }
+ assertEquals("toString", "((3, 3.0), {(4), (mary had a little lamb)}, {hello=world, goodbye=all}, 42, 5000000000, 3.1415927, 2.99792458E8, true, hello, goodbye, NULL)", t.toString());
+ }
- Iterator it = b.content();
- while (it.hasNext()) it.next();
- assert(b.numNotifies == 5);
+ @Test
+ public void testTupleHashCode() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
+ Tuple t1 = tf.newTuple(2);
+ t1.set(0, new DataByteArray("hello world"));
+ t1.set(1, new Integer(1));
+
+ Tuple t2 = tf.newTuple(2);
+ t2.set(0, new DataByteArray("hello world"));
+ t2.set(1, new Integer(1));
+
+ assertEquals("same data", t1.hashCode(), t2.hashCode());
+
+ Tuple t3 = tf.newTuple(3);
+ t3.set(0, new DataByteArray("hello world"));
+ t3.set(1, new Integer(1));
+ t3.set(2, new Long(4));
+ assertFalse("different size", t1.hashCode() == t3.hashCode());
+
+ Tuple t4 = tf.newTuple(2);
+ t4.set(0, new DataByteArray("hello world"));
+ t4.set(1, new Integer(2));
+ assertFalse("same size, different data", t1.hashCode() == t4.hashCode());
+
+ // Make sure we can take the hash code of all the types.
+ Tuple t5 = giveMeOneOfEach();
+ t5.hashCode();
}
@Test
+ public void testTupleEquals() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
+ Tuple t1 = tf.newTuple();
+ Tuple t2 = tf.newTuple();
- public void testBigDataBagInMemory() throws Exception{
- testBigDataBag(5*1024*1024, 5000);
+ t1.append(new Integer(3));
+ t2.append(new Integer(3));
+
+ assertFalse("different object", t1.equals(new String()));
+
+ assertTrue("same data", t1.equals(t2));
+
+ t2 = tf.newTuple();
+ t2.append(new Integer(4));
+ assertFalse("different data", t1.equals(t2));
+
+ t2 = tf.newTuple();
+ t2.append(new Integer(3));
+ t2.append(new Integer(3));
+ assertFalse("different size", t1.equals(t2));
+ }
+
+ @Test
+ public void testTupleCompareTo() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
+ Tuple t1 = tf.newTuple();
+ Tuple t2 = tf.newTuple();
+
+ t1.append(new Integer(3));
+ t2.append(new Integer(3));
+
+ assertEquals("same data equal", 0, t1.compareTo(t2));
+
+ t2 = tf.newTuple();
+ t2.append(new Integer(2));
+ assertEquals("greater than tuple with lesser value", 1, t1.compareTo(t2));
+
+ t2 = tf.newTuple();
+ t2.append(new Integer(4));
+ assertEquals("less than tuple with greater value", -1, t1.compareTo(t2));
+
+ t2 = tf.newTuple();
+ t2.append(new Integer(3));
+ t2.append(new Integer(4));
+ assertEquals("less than bigger tuple", -1, t1.compareTo(t2));
+
+ t2 = tf.newTuple();
+ assertEquals("greater than smaller tuple", 1, t1.compareTo(t2));
}
- public void testBigDataBagOnDisk() throws Exception{
- Runtime.getRuntime().gc();
- testBigDataBag(Runtime.getRuntime().maxMemory() - 1*1024*1024, 1000000);
+ @Test
+ public void testByteArrayToString() throws Exception {
+ DataByteArray ba = new DataByteArray("hello world");
+
+ assertEquals("toString", "hello world", ba.toString());
}
- */
- private enum TestType {
- PRE_SORT,
- POST_SORT,
- PRE_DISTINCT,
- POST_DISTINCT,
- NONE
+ @Test
+ public void testByteArrayHashCode() throws Exception {
+ DataByteArray ba1 = new DataByteArray("hello world");
+ DataByteArray ba2 = new DataByteArray("hello world");
+ DataByteArray ba3 = new DataByteArray("goodbye world");
+
+ assertEquals("same data", ba1.hashCode(), ba2.hashCode());
+
+ assertFalse("different data", ba1.hashCode() == ba3.hashCode());
}
-
-
- /*
- private void testBigDataBag(long freeMemoryToMaintain, int numItems) throws Exception {
- BigDataBag.FREE_MEMORY_TO_MAINTAIN = freeMemoryToMaintain;
- Random r = new Random();
-
- for (TestType testType: TestType.values()){
- BigDataBag bag = BagFactory.getInstance().getNewBigBag();
-
- assertTrue(bag.isEmpty());
-
- if (testType == TestType.PRE_SORT)
- bag.sort();
- else if (testType == TestType.PRE_DISTINCT)
- bag.distinct();
-
- //generate data and add it to the bag
- for(int i = 0; i < numItems; i++) {
- Tuple t = new Tuple(1);
- t.setField(0, r.nextInt(numItems));
- bag.add(t);
- }
-
- assertFalse(bag.isEmpty());
-
- if (testType == TestType.POST_SORT)
- bag.sort();
- else if (testType == TestType.POST_DISTINCT)
- bag.distinct();
-
- if (testType == TestType.NONE)
- assertTrue(bag.cardinality() == numItems);
- checkContents(bag, numItems, testType);
- checkContents(bag, numItems, testType);
+ @Test
+ public void testByteArrayEquals() throws Exception {
+ DataByteArray ba1 = new DataByteArray("hello world");
+ DataByteArray ba2 = new DataByteArray("hello world");
+ DataByteArray ba3 = new DataByteArray("goodbye world");
- }
+ assertTrue("same data", ba1.equals(ba2));
+
+ assertFalse("different data", ba1.equals(ba3));
}
-
-
- private void checkContents(DataBag bag, int numItems, TestType testType) throws Exception{
- String last = "";
-
- DataBag.notifyInterval = 100;
-
- Iterator<Tuple> it = bag.content();
- int count = 0;
- while(it.hasNext()) {
- Tuple t = it.next();
- String next = t.getAtomField(0).strval();
- if (testType == TestType.POST_SORT || testType == TestType.PRE_SORT)
- assertTrue(last.compareTo(next)<=0);
- else if (testType == TestType.POST_DISTINCT || testType == TestType.PRE_DISTINCT)
- assertTrue(last.compareTo(next)<0);
- last = next;
- count++;
- }
-
- assertTrue(bag.cardinality() == count);
-
- if (testType != TestType.NONE)
- assertTrue(bag.numNotifies >= count/DataBag.notifyInterval);
+
+ @Test
+ public void testByteArrayCompareTo() throws Exception {
+ DataByteArray ba1 = new DataByteArray("hello world");
+ DataByteArray ba2 = new DataByteArray("hello world");
+ DataByteArray ba3 = new DataByteArray("goodbye world");
+
+ assertTrue("same data", ba1.compareTo(ba2) == 0);
+
+ assertFalse("lexically lower value less than",
+ ba3.compareTo(ba1) < 0);
+ assertFalse("lexically higher value greater than",
+ ba1.compareTo(ba3) > 0);
}
- */
+ private Tuple giveMeOneOfEach() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
+ Tuple t1 = tf.newTuple(11);
+ Tuple t2 = tf.newTuple(2);
+
+ t2.set(0, new Integer(3));
+ t2.set(1, new Float(3.0));
+
+ DataBag bag = BagFactory.getInstance().newDefaultBag();
+ bag.add(tf.newTuple(new Integer(4)));
+ bag.add(tf.newTuple(new String("mary had a little lamb")));
+
+ Map<Object, Object> map = new HashMap<Object, Object>(2);
+ map.put(new String("hello"), new String("world"));
+ map.put(new String("goodbye"), new String("all"));
+
+ t1.set(0, t2);
+ t1.set(1, bag);
+ t1.set(2, map);
+ t1.set(3, new Integer(42));
+ t1.set(4, new Long(5000000000L));
+ t1.set(5, new Float(3.141592654));
+ t1.set(6, new Double(2.99792458e8));
+ t1.set(7, new Boolean(true));
+ t1.set(8, new DataByteArray("hello"));
+ t1.set(9, new String("goodbye"));
+
+ return t1;
+ }
}