You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@pig.apache.org by Mohit Anchlia <mo...@gmail.com> on 2012/09/09 01:14:32 UTC
Understanding evalfunc schema
I am trying to understand how I can add schema such that I get 2 or more
tuples in a bag. Is this the right way of doing this? For some reason the
second tuple_of_token1 always comes out null and both tuples that I add
show up in "tuple_of_tokens" itself.
@Override
public DataBag exec(Tuple input) throws IOException {
DataBag output = mBagFactory.newDefaultBag();
try {
Object o = input.get(1);
if (!(o instanceof String)) {
throw new IOException(
"Expected document input to be chararray, but got "
+ o.getClass().getName());
}
Object o1 = input.get(0);
if (!(o1 instanceof Long)) {
throw new IOException("Expected input to be long, but got "
+ o.getClass().getName());
}
String json = (String) o;
WebEvent we = ModelParser.convertJsonStringToObj(json);
output.add(mTupleFactory.newTuple("1," + we.getAccountOwner() + ","
+ we.getTrackingCodeVersion()));
output.add(mTupleFactory.newTuple("2," + we.getAccountOwner() + ","
+ we.getTrackingCodeVersion()));
} catch (ExecException ee) {
log.error("Failed to Process ", ee);
throw ee;
}
return output;
}
@Override
public Schema outputSchema(Schema input) {
try {
Schema.FieldSchema tokenFs = new Schema.FieldSchema("token",
DataType.CHARARRAY);
Schema tupleSchema = new Schema(tokenFs);
Schema.FieldSchema tupleFs;
tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
DataType.TUPLE);
Schema.FieldSchema tokenFs1 = new Schema.FieldSchema("token1",
DataType.CHARARRAY);
Schema tupleSchema1 = new Schema(tokenFs1);
Schema.FieldSchema tupleFs1;
tupleFs1 = new Schema.FieldSchema("tuple_of_tokens1", tupleSchema1,
DataType.TUPLE);
List<Schema.FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
fields.add(tupleFs);
fields.add(tupleFs1);
Schema bagSchema = new Schema(fields);
bagSchema.setTwoLevelAccessRequired(true);
Schema.FieldSchema bagFs = new Schema.FieldSchema(
"bag_of_tokenTuples", bagSchema, DataType.BAG);
return new Schema(bagFs);
} catch (Exception e) {
return null;
}
}