You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pig.apache.org by "Jonathan Coveney (JIRA)" <ji...@apache.org> on 2011/01/26 22:20:43 UTC
[jira] Created: (PIG-1826) Unexpected data type -1 found in stream
error
Unexpected data type -1 found in stream error
---------------------------------------------
Key: PIG-1826
URL: https://issues.apache.org/jira/browse/PIG-1826
Project: Pig
Issue Type: Bug
Affects Versions: 0.8.0
Environment: This is pig 0.8.0 on a linux box
Reporter: Jonathan Coveney
Attachments: numgraph.java
When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
package squeal.fun;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.WrappedIOException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.data.DataType;
import squeal.com.MutableInt;
public class numgraph extends EvalFunc<DataBag>{
TupleFactory mTupleFactory = TupleFactory.getInstance();
BagFactory mBagFactory = BagFactory.getInstance();
public DataBag exec(Tuple input) throws IOException {
try {
accumulate(input);
DataBag bag = getValue();
System.out.println(input.get(0).toString());
System.out.println(bag.toString());
return bag;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
public void accumulate(Tuple input) throws IOException {
try {
buildgraph(input);
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
//public void cleanup() { thegraph.clear(); }
public DataBag getValue() throws IOException {
try {
return thegraph.toBag();
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
Graph thegraph = null;
private class Graph {
Map<numpair, MutableInt> graph;
Graph() { graph = null; }
Graph(Map<numpair,MutableInt> gs) { graph = gs; }
Map<numpair,MutableInt> getGraph() { return graph; }
void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
void inc(numpair look) {
MutableInt val = graph.get(look);
if (val == null) {
val = new MutableInt();
graph.put(look,val);
} else {
val.inc();
}
}
void clear() { graph = null; }
@Override
public String toString() { return graph.toString(); }
void addPairsBag(DataBag c2s) throws IOException {
try {
List<String> c2list = new ArrayList<String>();
for (Tuple tup : c2s) {
String cur = (String)tup.get(0);
for (String ne : c2list)
inc(new numpair(ne, cur));
c2list.add(cur);
}
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
//This creates a databag in the form of (c2, c2, hits)
DataBag toBag() throws IOException {
try {
DataBag outBag = mBagFactory.newDefaultBag();
for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
List inList = new ArrayList();
Iterator<String> sIt = pairs.getKey().getPartsIt();
inList.add(sIt.next()); inList.add(sIt.next()); inList.add(pairs.getValue());
outBag.add(mTupleFactory.newTuple(inList));
}
return outBag;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
}
private class numpair {
Set<String> pair;
numpair(String p1, String p2) {
pair = new HashSet<String>(2,1);
pair.add(p1);
pair.add(p2);
}
Set<String> getPair() { return pair; }
Iterator<String> getPartsIt() { return pair.iterator(); }
@Override
public boolean equals(Object p) {
return p instanceof numpair && ((numpair)p).getPair().equals(pair);
}
@Override
public int hashCode() {
return pair.hashCode();
}
public String toString() { return pair.toString(); }
}
private void buildgraph(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return;
try {
if (thegraph == null)
thegraph = new Graph(new HashMap<numpair,MutableInt>());
if (thegraph.getGraph() == null)
thegraph.setGraph(new HashMap<numpair, MutableInt>());
DataBag bag = (DataBag)input.get(0);
for (Tuple ne : bag)
thegraph.addPairsBag((DataBag)ne.get(0));
} catch (ExecException ee) {
throw ee;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
@Override
public Schema outputSchema(Schema input) {
try {
Schema bagSchema = new Schema();
bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG));
} catch (Exception e) {
return null;
}
}
}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] [Resolved] (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Daniel Dai resolved PIG-1826.
-----------------------------
Resolution: Fixed
Hadoop Flags: [Reviewed]
Patch committed to both trunk and 0.9 branch.
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Commented: (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989790#comment-12989790 ]
Jonathan Coveney commented on PIG-1826:
---------------------------------------
(note that it should be 1 tab a, 1 tab b, etc...I hit enter hastily)
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Commented] (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13009232#comment-13009232 ]
Daniel Dai commented on PIG-1826:
---------------------------------
The error message can be improved. We can be more specific to say something like "MutableInt is not supported. Only Pig data type is allowed".
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Commented] (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "jiraposter@reviews.apache.org (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13026017#comment-13026017 ]
jiraposter@reviews.apache.org commented on PIG-1826:
----------------------------------------------------
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/670/
-----------------------------------------------------------
Review request for pig and thejas.
Summary
-------
See PIG-1826
This addresses bug PIG-1826.
https://issues.apache.org/jira/browse/PIG-1826
Diffs
-----
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/BinInterSedes.java 1096629
http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/MiniCluster.java 1096629
http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java 1096629
Diff: https://reviews.apache.org/r/670/diff
Testing
-------
Test-patch:
[exec] +1 overall.
[exec]
[exec] +1 @author. The patch does not contain any @author tags.
[exec]
[exec] +1 tests included. The patch appears to include 6 new or modified tests.
[exec]
[exec] +1 javadoc. The javadoc tool did not generate any warning messages.
[exec]
[exec] +1 javac. The applied patch does not increase the total number of javac compiler warnings.
[exec]
[exec] +1 findbugs. The patch does not introduce any new Findbugs warnings.
[exec]
[exec] +1 release audit. The applied patch does not increase the total number of release audit warnings.
Unit-test:
all pass.
Thanks,
Daniel
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Commented: (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Charles Ferreira Gonçalves (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12994386#comment-12994386 ]
Charles Ferreira Gonçalves commented on PIG-1826:
-------------------------------------------------
Hi Guys,
Adding other sample, in fact is the code, the script and the input data.
If any help is needed from my part count on me ok!?
Thanks!
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream
error
Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jonathan Coveney updated PIG-1826:
----------------------------------
Description: When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such. (was: When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
package squeal.fun;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.WrappedIOException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.data.DataType;
import squeal.com.MutableInt;
public class numgraph extends EvalFunc<DataBag>{
TupleFactory mTupleFactory = TupleFactory.getInstance();
BagFactory mBagFactory = BagFactory.getInstance();
public DataBag exec(Tuple input) throws IOException {
try {
accumulate(input);
DataBag bag = getValue();
System.out.println(input.get(0).toString());
System.out.println(bag.toString());
return bag;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
public void accumulate(Tuple input) throws IOException {
try {
buildgraph(input);
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
//public void cleanup() { thegraph.clear(); }
public DataBag getValue() throws IOException {
try {
return thegraph.toBag();
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
Graph thegraph = null;
private class Graph {
Map<numpair, MutableInt> graph;
Graph() { graph = null; }
Graph(Map<numpair,MutableInt> gs) { graph = gs; }
Map<numpair,MutableInt> getGraph() { return graph; }
void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
void inc(numpair look) {
MutableInt val = graph.get(look);
if (val == null) {
val = new MutableInt();
graph.put(look,val);
} else {
val.inc();
}
}
void clear() { graph = null; }
@Override
public String toString() { return graph.toString(); }
void addPairsBag(DataBag c2s) throws IOException {
try {
List<String> c2list = new ArrayList<String>();
for (Tuple tup : c2s) {
String cur = (String)tup.get(0);
for (String ne : c2list)
inc(new numpair(ne, cur));
c2list.add(cur);
}
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
//This creates a databag in the form of (c2, c2, hits)
DataBag toBag() throws IOException {
try {
DataBag outBag = mBagFactory.newDefaultBag();
for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
List inList = new ArrayList();
Iterator<String> sIt = pairs.getKey().getPartsIt();
inList.add(sIt.next()); inList.add(sIt.next()); inList.add(pairs.getValue());
outBag.add(mTupleFactory.newTuple(inList));
}
return outBag;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
}
private class numpair {
Set<String> pair;
numpair(String p1, String p2) {
pair = new HashSet<String>(2,1);
pair.add(p1);
pair.add(p2);
}
Set<String> getPair() { return pair; }
Iterator<String> getPartsIt() { return pair.iterator(); }
@Override
public boolean equals(Object p) {
return p instanceof numpair && ((numpair)p).getPair().equals(pair);
}
@Override
public int hashCode() {
return pair.hashCode();
}
public String toString() { return pair.toString(); }
}
private void buildgraph(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return;
try {
if (thegraph == null)
thegraph = new Graph(new HashMap<numpair,MutableInt>());
if (thegraph.getGraph() == null)
thegraph.setGraph(new HashMap<numpair, MutableInt>());
DataBag bag = (DataBag)input.get(0);
for (Tuple ne : bag)
thegraph.addPairsBag((DataBag)ne.get(0));
} catch (ExecException ee) {
throw ee;
} catch (Exception e) {
int errCode = 31415;
String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
@Override
public Schema outputSchema(Schema input) {
try {
Schema bagSchema = new Schema();
bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG));
} catch (Exception e) {
return null;
}
}
})
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream
error
Posted by "Charles Ferreira Gonçalves (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Charles Ferreira Gonçalves updated PIG-1826:
--------------------------------------------
Attachment: PIG-1826.tar.gz
The files to be tested to see to help with the issue!
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Updated] (PIG-1826) Unexpected data type -1 found in stream
error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Daniel Dai updated PIG-1826:
----------------------------
Fix Version/s: 0.9.0
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Updated] (PIG-1826) Unexpected data type -1 found in stream
error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Daniel Dai updated PIG-1826:
----------------------------
Attachment: PIG-1826-1.patch
PIG-1826-1.patch fix the error message. It also piggyback a change in number of retries in hadoop. I decrease this number from 4 to 1 to accelerate the unit tests.
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Commented: (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989014#comment-12989014 ]
Daniel Dai commented on PIG-1826:
---------------------------------
What is MutableInt in UDF? What is your input file?
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream
error
Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jonathan Coveney updated PIG-1826:
----------------------------------
Attachment: numgraph.java
This is the UDF I made that it fails on
The form of the script is
register /path/to/myudf.jar;
A = LOAD 'test.txt' as (a:chararray, b:chararray);
B = GROUP A BY a;
C = FOREACH B GENERATE A.b;
D = GROUP C ALL;
E = FOREACH D GENERATE myudf.fun.udf(C.b);
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
> package squeal.fun;
> import java.util.Iterator;
> import java.util.List;
> import java.util.ArrayList;
> import java.util.Map;
> import java.util.HashMap;
> import java.util.Set;
> import java.util.HashSet;
> import java.io.IOException;
> import org.apache.pig.PigException;
> import org.apache.pig.backend.executionengine.ExecException;
> import org.apache.pig.EvalFunc;
> import org.apache.pig.data.Tuple;
> import org.apache.pig.data.DataBag;
> import org.apache.pig.data.BagFactory;
> import org.apache.pig.data.TupleFactory;
> import org.apache.pig.impl.util.WrappedIOException;
> import org.apache.pig.impl.logicalLayer.schema.Schema;
> import org.apache.pig.data.DataType;
> import squeal.com.MutableInt;
> public class numgraph extends EvalFunc<DataBag>{
>
> TupleFactory mTupleFactory = TupleFactory.getInstance();
> BagFactory mBagFactory = BagFactory.getInstance();
> public DataBag exec(Tuple input) throws IOException {
> try {
> accumulate(input);
> DataBag bag = getValue();
> System.out.println(input.get(0).toString());
> System.out.println(bag.toString());
> return bag;
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> public void accumulate(Tuple input) throws IOException {
> try {
> buildgraph(input);
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> //public void cleanup() { thegraph.clear(); }
> public DataBag getValue() throws IOException {
> try {
> return thegraph.toBag();
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> Graph thegraph = null;
> private class Graph {
> Map<numpair, MutableInt> graph;
> Graph() { graph = null; }
> Graph(Map<numpair,MutableInt> gs) { graph = gs; }
> Map<numpair,MutableInt> getGraph() { return graph; }
> void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
>
> void inc(numpair look) {
> MutableInt val = graph.get(look);
> if (val == null) {
> val = new MutableInt();
> graph.put(look,val);
> } else {
> val.inc();
> }
> }
> void clear() { graph = null; }
> @Override
> public String toString() { return graph.toString(); }
> void addPairsBag(DataBag c2s) throws IOException {
> try {
> List<String> c2list = new ArrayList<String>();
> for (Tuple tup : c2s) {
> String cur = (String)tup.get(0);
> for (String ne : c2list)
> inc(new numpair(ne, cur));
> c2list.add(cur);
> }
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> //This creates a databag in the form of (c2, c2, hits)
> DataBag toBag() throws IOException {
> try {
> DataBag outBag = mBagFactory.newDefaultBag();
> for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
> List inList = new ArrayList();
> Iterator<String> sIt = pairs.getKey().getPartsIt();
> inList.add(sIt.next()); inList.add(sIt.next()); inList.add(pairs.getValue());
> outBag.add(mTupleFactory.newTuple(inList));
> }
> return outBag;
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> }
>
> private class numpair {
> Set<String> pair;
> numpair(String p1, String p2) {
> pair = new HashSet<String>(2,1);
> pair.add(p1);
> pair.add(p2);
> }
> Set<String> getPair() { return pair; }
> Iterator<String> getPartsIt() { return pair.iterator(); }
>
> @Override
> public boolean equals(Object p) {
> return p instanceof numpair && ((numpair)p).getPair().equals(pair);
> }
>
> @Override
> public int hashCode() {
> return pair.hashCode();
> }
> public String toString() { return pair.toString(); }
> }
> private void buildgraph(Tuple input) throws IOException {
> if (input == null || input.size() == 0)
> return;
> try {
> if (thegraph == null)
> thegraph = new Graph(new HashMap<numpair,MutableInt>());
> if (thegraph.getGraph() == null)
> thegraph.setGraph(new HashMap<numpair, MutableInt>());
> DataBag bag = (DataBag)input.get(0);
> for (Tuple ne : bag)
> thegraph.addPairsBag((DataBag)ne.get(0));
> } catch (ExecException ee) {
> throw ee;
> } catch (Exception e) {
> int errCode = 31415;
> String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
> throw new ExecException(msg, errCode, PigException.BUG, e);
> }
> }
> @Override
> public Schema outputSchema(Schema input) {
> try {
> Schema bagSchema = new Schema();
> bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
> bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
> bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
> return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG));
> } catch (Exception e) {
> return null;
> }
> }
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] [Commented] (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "jiraposter@reviews.apache.org (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13026055#comment-13026055 ]
jiraposter@reviews.apache.org commented on PIG-1826:
----------------------------------------------------
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/670/#review594
-----------------------------------------------------------
Ship it!
+1
- thejas
On 2011-04-27 21:28:47, Daniel Dai wrote:
bq.
bq. -----------------------------------------------------------
bq. This is an automatically generated e-mail. To reply, visit:
bq. https://reviews.apache.org/r/670/
bq. -----------------------------------------------------------
bq.
bq. (Updated 2011-04-27 21:28:47)
bq.
bq.
bq. Review request for pig and thejas.
bq.
bq.
bq. Summary
bq. -------
bq.
bq. See PIG-1826
bq.
bq.
bq. This addresses bug PIG-1826.
bq. https://issues.apache.org/jira/browse/PIG-1826
bq.
bq.
bq. Diffs
bq. -----
bq.
bq. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/BinInterSedes.java 1096629
bq. http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/MiniCluster.java 1096629
bq. http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java 1096629
bq.
bq. Diff: https://reviews.apache.org/r/670/diff
bq.
bq.
bq. Testing
bq. -------
bq.
bq. Test-patch:
bq. [exec] +1 overall.
bq. [exec]
bq. [exec] +1 @author. The patch does not contain any @author tags.
bq. [exec]
bq. [exec] +1 tests included. The patch appears to include 6 new or modified tests.
bq. [exec]
bq. [exec] +1 javadoc. The javadoc tool did not generate any warning messages.
bq. [exec]
bq. [exec] +1 javac. The applied patch does not increase the total number of javac compiler warnings.
bq. [exec]
bq. [exec] +1 findbugs. The patch does not introduce any new Findbugs warnings.
bq. [exec]
bq. [exec] +1 release audit. The applied patch does not increase the total number of release audit warnings.
bq.
bq. Unit-test:
bq. all pass.
bq.
bq.
bq. Thanks,
bq.
bq. Daniel
bq.
bq.
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Commented: (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989789#comment-12989789 ]
Jonathan Coveney commented on PIG-1826:
---------------------------------------
MutableInt is defined as such:
package squeal.com;
public class MutableInt {
int value;
public MutableInt() { value = 1; }
public MutableInt(int val) { value = val; }
public void inc() { ++value; }
public void inc(int val) { value += val; }
public int get() { return value; }
public String toString() { return (new Integer(value)).toString(); }
}
Just a dumb wrapper class to avoid having to do another put after getting from a Map.
it is failing on a trivial example:
1 a
1 b
1 c
2 a
2 b
2 c
3 a
3 b
3 c
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] Commented: (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13008743#comment-13008743 ]
Daniel Dai commented on PIG-1826:
---------------------------------
The problem is because Pig don't understand MutableInt. All data feeding by UDF should be Pig compatible data types.
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Assigned] (PIG-1826) Unexpected data type -1 found in
stream error
Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Daniel Dai reassigned PIG-1826:
-------------------------------
Assignee: Daniel Dai
> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
> Key: PIG-1826
> URL: https://issues.apache.org/jira/browse/PIG-1826
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.8.0
> Environment: This is pig 0.8.0 on a linux box
> Reporter: Jonathan Coveney
> Assignee: Daniel Dai
> Fix For: 0.9.0
>
> Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira