You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pig.apache.org by "Jonathan Coveney (JIRA)" <ji...@apache.org> on 2011/01/26 22:20:43 UTC

[jira] Created: (PIG-1826) Unexpected data type -1 found in stream error

Unexpected data type -1 found in stream error
---------------------------------------------

                 Key: PIG-1826
                 URL: https://issues.apache.org/jira/browse/PIG-1826
             Project: Pig
          Issue Type: Bug
    Affects Versions: 0.8.0
         Environment: This is pig 0.8.0 on a linux box
            Reporter: Jonathan Coveney
         Attachments: numgraph.java

When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

package squeal.fun;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.WrappedIOException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.data.DataType;
import squeal.com.MutableInt;

public class numgraph extends EvalFunc<DataBag>{
	
	TupleFactory mTupleFactory = TupleFactory.getInstance();
	BagFactory mBagFactory = BagFactory.getInstance();

	public DataBag exec(Tuple input) throws IOException {
		try {
			accumulate(input);
			DataBag bag = getValue();
			System.out.println(input.get(0).toString());
			System.out.println(bag.toString());
			return bag;

		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	public void accumulate(Tuple input) throws IOException {
		try {
			buildgraph(input);
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	//public void cleanup() { thegraph.clear(); }

	public DataBag getValue() throws IOException {
		try {
			return thegraph.toBag();
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	Graph thegraph = null;
	private class Graph {
		Map<numpair, MutableInt> graph;

		Graph() { graph = null; }
		Graph(Map<numpair,MutableInt> gs) { graph = gs; }

		Map<numpair,MutableInt> getGraph() { return graph; }
		void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
		
		void inc(numpair look) {
			MutableInt val = graph.get(look);
			if (val == null) {
				val = new MutableInt();
				graph.put(look,val);
			} else {
				val.inc();
			}
		}

		void clear() { graph = null; }

		@Override
		public String toString() { return graph.toString(); }

		void addPairsBag(DataBag c2s) throws IOException {
			try {
				List<String> c2list = new ArrayList<String>();
				for (Tuple tup : c2s) {
					String cur = (String)tup.get(0);
					for (String ne : c2list)
						inc(new numpair(ne, cur));
					c2list.add(cur);
				}
			} catch (Exception e) {
				int errCode = 31415;
				String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
				throw new ExecException(msg, errCode, PigException.BUG, e);
			}
		}

		//This creates a databag in the form of (c2, c2, hits)
		DataBag toBag() throws IOException {
			try {
				DataBag outBag = mBagFactory.newDefaultBag();
				for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
					List inList = new ArrayList();
					Iterator<String> sIt = pairs.getKey().getPartsIt();
					inList.add(sIt.next()); inList.add(sIt.next());	inList.add(pairs.getValue());
					outBag.add(mTupleFactory.newTuple(inList));
				}
				return outBag;
			} catch (Exception e) {
				int errCode = 31415;
				String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
				throw new ExecException(msg, errCode, PigException.BUG, e);
			}
		}
	}
	
	private class numpair {
		Set<String> pair;

		numpair(String p1, String p2) {
			pair = new HashSet<String>(2,1);
			pair.add(p1);
			pair.add(p2);
		}
		Set<String> getPair() { return pair; }
		Iterator<String> getPartsIt() { return pair.iterator(); }
		
		@Override
		public boolean equals(Object p) {
			return p instanceof numpair && ((numpair)p).getPair().equals(pair); 
		}
		
		@Override
		public int hashCode() {
			return pair.hashCode();
		}

		public String toString() { return pair.toString(); }
	}

	private void buildgraph(Tuple input) throws IOException {
		if (input == null || input.size() == 0) 
			return;
		try {
			if (thegraph == null)
				thegraph = new Graph(new HashMap<numpair,MutableInt>());
			if (thegraph.getGraph() == null)
				thegraph.setGraph(new HashMap<numpair, MutableInt>());
			DataBag bag = (DataBag)input.get(0);
			for (Tuple ne : bag)
				thegraph.addPairsBag((DataBag)ne.get(0));
		} catch (ExecException ee) {
			throw ee;
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}


	@Override
	public Schema outputSchema(Schema input) {
		try {
			Schema bagSchema = new Schema();
			bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
			bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
			bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
			return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG)); 
		} catch (Exception e) {
			return null;
		}
	}
}

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] [Resolved] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Daniel Dai resolved PIG-1826.
-----------------------------

      Resolution: Fixed
    Hadoop Flags: [Reviewed]

Patch committed to both trunk and 0.9 branch.

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] Commented: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989790#comment-12989790 ] 

Jonathan Coveney commented on PIG-1826:
---------------------------------------

(note that it should be 1 tab a, 1 tab b, etc...I hit enter hastily)

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] [Commented] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13009232#comment-13009232 ] 

Daniel Dai commented on PIG-1826:
---------------------------------

The error message can be improved. We can be more specific to say something like "MutableInt is not supported. Only Pig data type is allowed".

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "jiraposter@reviews.apache.org (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13026017#comment-13026017 ] 

jiraposter@reviews.apache.org commented on PIG-1826:
----------------------------------------------------


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/670/
-----------------------------------------------------------

Review request for pig and thejas.


Summary
-------

See PIG-1826


This addresses bug PIG-1826.
    https://issues.apache.org/jira/browse/PIG-1826


Diffs
-----

  http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/BinInterSedes.java 1096629 
  http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/MiniCluster.java 1096629 
  http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java 1096629 

Diff: https://reviews.apache.org/r/670/diff


Testing
-------

Test-patch:
     [exec] +1 overall.  
     [exec] 
     [exec]     +1 @author.  The patch does not contain any @author tags.
     [exec] 
     [exec]     +1 tests included.  The patch appears to include 6 new or modified tests.
     [exec] 
     [exec]     +1 javadoc.  The javadoc tool did not generate any warning messages.
     [exec] 
     [exec]     +1 javac.  The applied patch does not increase the total number of javac compiler warnings.
     [exec] 
     [exec]     +1 findbugs.  The patch does not introduce any new Findbugs warnings.
     [exec] 
     [exec]     +1 release audit.  The applied patch does not increase the total number of release audit warnings.

Unit-test:
    all pass.


Thanks,

Daniel



> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] Commented: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Charles Ferreira Gonçalves (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12994386#comment-12994386 ] 

Charles Ferreira Gonçalves commented on PIG-1826:
-------------------------------------------------

Hi Guys, 

Adding other sample, in fact is the code, the script and the  input data.
If any help is needed from my part count on me ok!?

Thanks!

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

       

[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jonathan Coveney updated PIG-1826:
----------------------------------

    Description: When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.  (was: When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

package squeal.fun;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.WrappedIOException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.data.DataType;
import squeal.com.MutableInt;

public class numgraph extends EvalFunc<DataBag>{
	
	TupleFactory mTupleFactory = TupleFactory.getInstance();
	BagFactory mBagFactory = BagFactory.getInstance();

	public DataBag exec(Tuple input) throws IOException {
		try {
			accumulate(input);
			DataBag bag = getValue();
			System.out.println(input.get(0).toString());
			System.out.println(bag.toString());
			return bag;

		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	public void accumulate(Tuple input) throws IOException {
		try {
			buildgraph(input);
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	//public void cleanup() { thegraph.clear(); }

	public DataBag getValue() throws IOException {
		try {
			return thegraph.toBag();
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}

	Graph thegraph = null;
	private class Graph {
		Map<numpair, MutableInt> graph;

		Graph() { graph = null; }
		Graph(Map<numpair,MutableInt> gs) { graph = gs; }

		Map<numpair,MutableInt> getGraph() { return graph; }
		void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
		
		void inc(numpair look) {
			MutableInt val = graph.get(look);
			if (val == null) {
				val = new MutableInt();
				graph.put(look,val);
			} else {
				val.inc();
			}
		}

		void clear() { graph = null; }

		@Override
		public String toString() { return graph.toString(); }

		void addPairsBag(DataBag c2s) throws IOException {
			try {
				List<String> c2list = new ArrayList<String>();
				for (Tuple tup : c2s) {
					String cur = (String)tup.get(0);
					for (String ne : c2list)
						inc(new numpair(ne, cur));
					c2list.add(cur);
				}
			} catch (Exception e) {
				int errCode = 31415;
				String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
				throw new ExecException(msg, errCode, PigException.BUG, e);
			}
		}

		//This creates a databag in the form of (c2, c2, hits)
		DataBag toBag() throws IOException {
			try {
				DataBag outBag = mBagFactory.newDefaultBag();
				for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
					List inList = new ArrayList();
					Iterator<String> sIt = pairs.getKey().getPartsIt();
					inList.add(sIt.next()); inList.add(sIt.next());	inList.add(pairs.getValue());
					outBag.add(mTupleFactory.newTuple(inList));
				}
				return outBag;
			} catch (Exception e) {
				int errCode = 31415;
				String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
				throw new ExecException(msg, errCode, PigException.BUG, e);
			}
		}
	}
	
	private class numpair {
		Set<String> pair;

		numpair(String p1, String p2) {
			pair = new HashSet<String>(2,1);
			pair.add(p1);
			pair.add(p2);
		}
		Set<String> getPair() { return pair; }
		Iterator<String> getPartsIt() { return pair.iterator(); }
		
		@Override
		public boolean equals(Object p) {
			return p instanceof numpair && ((numpair)p).getPair().equals(pair); 
		}
		
		@Override
		public int hashCode() {
			return pair.hashCode();
		}

		public String toString() { return pair.toString(); }
	}

	private void buildgraph(Tuple input) throws IOException {
		if (input == null || input.size() == 0) 
			return;
		try {
			if (thegraph == null)
				thegraph = new Graph(new HashMap<numpair,MutableInt>());
			if (thegraph.getGraph() == null)
				thegraph.setGraph(new HashMap<numpair, MutableInt>());
			DataBag bag = (DataBag)input.get(0);
			for (Tuple ne : bag)
				thegraph.addPairsBag((DataBag)ne.get(0));
		} catch (ExecException ee) {
			throw ee;
		} catch (Exception e) {
			int errCode = 31415;
			String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
			throw new ExecException(msg, errCode, PigException.BUG, e);
		}
	}


	@Override
	public Schema outputSchema(Schema input) {
		try {
			Schema bagSchema = new Schema();
			bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
			bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
			bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
			return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG)); 
		} catch (Exception e) {
			return null;
		}
	}
})

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Charles Ferreira Gonçalves (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Charles Ferreira Gonçalves updated PIG-1826:
--------------------------------------------

    Attachment: PIG-1826.tar.gz

The files to be tested to see to help with the issue!

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

       

[jira] [Updated] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Daniel Dai updated PIG-1826:
----------------------------

    Fix Version/s: 0.9.0

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Daniel Dai updated PIG-1826:
----------------------------

    Attachment: PIG-1826-1.patch

PIG-1826-1.patch fix the error message. It also piggyback a change in number of retries in hadoop. I decrease this number from 4 to 1 to accelerate the unit tests.

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] Commented: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989014#comment-12989014 ] 

Daniel Dai commented on PIG-1826:
---------------------------------

What is MutableInt in UDF? What is your input file?

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] Updated: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jonathan Coveney updated PIG-1826:
----------------------------------

    Attachment: numgraph.java

This is the UDF I made that it fails on

The form of the script is

register /path/to/myudf.jar;
A = LOAD 'test.txt' as (a:chararray, b:chararray);
B = GROUP A BY a;
C = FOREACH B GENERATE A.b;
D = GROUP C ALL;
E = FOREACH D GENERATE myudf.fun.udf(C.b);

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.
> package squeal.fun;
> import java.util.Iterator;
> import java.util.List;
> import java.util.ArrayList;
> import java.util.Map;
> import java.util.HashMap;
> import java.util.Set;
> import java.util.HashSet;
> import java.io.IOException;
> import org.apache.pig.PigException;
> import org.apache.pig.backend.executionengine.ExecException;
> import org.apache.pig.EvalFunc;
> import org.apache.pig.data.Tuple;
> import org.apache.pig.data.DataBag;
> import org.apache.pig.data.BagFactory;
> import org.apache.pig.data.TupleFactory;
> import org.apache.pig.impl.util.WrappedIOException;
> import org.apache.pig.impl.logicalLayer.schema.Schema;
> import org.apache.pig.data.DataType;
> import squeal.com.MutableInt;
> public class numgraph extends EvalFunc<DataBag>{
> 	
> 	TupleFactory mTupleFactory = TupleFactory.getInstance();
> 	BagFactory mBagFactory = BagFactory.getInstance();
> 	public DataBag exec(Tuple input) throws IOException {
> 		try {
> 			accumulate(input);
> 			DataBag bag = getValue();
> 			System.out.println(input.get(0).toString());
> 			System.out.println(bag.toString());
> 			return bag;
> 		} catch (Exception e) {
> 			int errCode = 31415;
> 			String msg = "Error while accumulating graphs (exec) " + this.getClass().getSimpleName();
> 			throw new ExecException(msg, errCode, PigException.BUG, e);
> 		}
> 	}
> 	public void accumulate(Tuple input) throws IOException {
> 		try {
> 			buildgraph(input);
> 		} catch (Exception e) {
> 			int errCode = 31415;
> 			String msg = "Error while accumulating graphs (accumulate) " + this.getClass().getSimpleName();
> 			throw new ExecException(msg, errCode, PigException.BUG, e);
> 		}
> 	}
> 	//public void cleanup() { thegraph.clear(); }
> 	public DataBag getValue() throws IOException {
> 		try {
> 			return thegraph.toBag();
> 		} catch (Exception e) {
> 			int errCode = 31415;
> 			String msg = "Error while accumulating graphs (getValue) " + this.getClass().getSimpleName();
> 			throw new ExecException(msg, errCode, PigException.BUG, e);
> 		}
> 	}
> 	Graph thegraph = null;
> 	private class Graph {
> 		Map<numpair, MutableInt> graph;
> 		Graph() { graph = null; }
> 		Graph(Map<numpair,MutableInt> gs) { graph = gs; }
> 		Map<numpair,MutableInt> getGraph() { return graph; }
> 		void setGraph(Map<numpair,MutableInt> gs) { graph = gs; }
> 		
> 		void inc(numpair look) {
> 			MutableInt val = graph.get(look);
> 			if (val == null) {
> 				val = new MutableInt();
> 				graph.put(look,val);
> 			} else {
> 				val.inc();
> 			}
> 		}
> 		void clear() { graph = null; }
> 		@Override
> 		public String toString() { return graph.toString(); }
> 		void addPairsBag(DataBag c2s) throws IOException {
> 			try {
> 				List<String> c2list = new ArrayList<String>();
> 				for (Tuple tup : c2s) {
> 					String cur = (String)tup.get(0);
> 					for (String ne : c2list)
> 						inc(new numpair(ne, cur));
> 					c2list.add(cur);
> 				}
> 			} catch (Exception e) {
> 				int errCode = 31415;
> 				String msg = "Error while accumulating graphs (addPairsBag) " + this.getClass().getSimpleName();
> 				throw new ExecException(msg, errCode, PigException.BUG, e);
> 			}
> 		}
> 		//This creates a databag in the form of (c2, c2, hits)
> 		DataBag toBag() throws IOException {
> 			try {
> 				DataBag outBag = mBagFactory.newDefaultBag();
> 				for (Map.Entry<numpair,MutableInt> pairs : graph.entrySet()) {
> 					List inList = new ArrayList();
> 					Iterator<String> sIt = pairs.getKey().getPartsIt();
> 					inList.add(sIt.next()); inList.add(sIt.next());	inList.add(pairs.getValue());
> 					outBag.add(mTupleFactory.newTuple(inList));
> 				}
> 				return outBag;
> 			} catch (Exception e) {
> 				int errCode = 31415;
> 				String msg = "Error while accumulating graphs (toBag) " + this.getClass().getSimpleName();
> 				throw new ExecException(msg, errCode, PigException.BUG, e);
> 			}
> 		}
> 	}
> 	
> 	private class numpair {
> 		Set<String> pair;
> 		numpair(String p1, String p2) {
> 			pair = new HashSet<String>(2,1);
> 			pair.add(p1);
> 			pair.add(p2);
> 		}
> 		Set<String> getPair() { return pair; }
> 		Iterator<String> getPartsIt() { return pair.iterator(); }
> 		
> 		@Override
> 		public boolean equals(Object p) {
> 			return p instanceof numpair && ((numpair)p).getPair().equals(pair); 
> 		}
> 		
> 		@Override
> 		public int hashCode() {
> 			return pair.hashCode();
> 		}
> 		public String toString() { return pair.toString(); }
> 	}
> 	private void buildgraph(Tuple input) throws IOException {
> 		if (input == null || input.size() == 0) 
> 			return;
> 		try {
> 			if (thegraph == null)
> 				thegraph = new Graph(new HashMap<numpair,MutableInt>());
> 			if (thegraph.getGraph() == null)
> 				thegraph.setGraph(new HashMap<numpair, MutableInt>());
> 			DataBag bag = (DataBag)input.get(0);
> 			for (Tuple ne : bag)
> 				thegraph.addPairsBag((DataBag)ne.get(0));
> 		} catch (ExecException ee) {
> 			throw ee;
> 		} catch (Exception e) {
> 			int errCode = 31415;
> 			String msg = "Error while accumulating graphs in " + this.getClass().getSimpleName();
> 			throw new ExecException(msg, errCode, PigException.BUG, e);
> 		}
> 	}
> 	@Override
> 	public Schema outputSchema(Schema input) {
> 		try {
> 			Schema bagSchema = new Schema();
> 			bagSchema.add(new Schema.FieldSchema("c2_1",DataType.CHARARRAY));
> 			bagSchema.add(new Schema.FieldSchema("c2_2",DataType.CHARARRAY));
> 			bagSchema.add(new Schema.FieldSchema("hits",DataType.INTEGER));
> 			return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), bagSchema, DataType.BAG)); 
> 		} catch (Exception e) {
> 			return null;
> 		}
> 	}
> }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] [Commented] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "jiraposter@reviews.apache.org (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13026055#comment-13026055 ] 

jiraposter@reviews.apache.org commented on PIG-1826:
----------------------------------------------------


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/670/#review594
-----------------------------------------------------------

Ship it!


+1

- thejas


On 2011-04-27 21:28:47, Daniel Dai wrote:
bq.  
bq.  -----------------------------------------------------------
bq.  This is an automatically generated e-mail. To reply, visit:
bq.  https://reviews.apache.org/r/670/
bq.  -----------------------------------------------------------
bq.  
bq.  (Updated 2011-04-27 21:28:47)
bq.  
bq.  
bq.  Review request for pig and thejas.
bq.  
bq.  
bq.  Summary
bq.  -------
bq.  
bq.  See PIG-1826
bq.  
bq.  
bq.  This addresses bug PIG-1826.
bq.      https://issues.apache.org/jira/browse/PIG-1826
bq.  
bq.  
bq.  Diffs
bq.  -----
bq.  
bq.    http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/BinInterSedes.java 1096629 
bq.    http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/MiniCluster.java 1096629 
bq.    http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java 1096629 
bq.  
bq.  Diff: https://reviews.apache.org/r/670/diff
bq.  
bq.  
bq.  Testing
bq.  -------
bq.  
bq.  Test-patch:
bq.       [exec] +1 overall.  
bq.       [exec] 
bq.       [exec]     +1 @author.  The patch does not contain any @author tags.
bq.       [exec] 
bq.       [exec]     +1 tests included.  The patch appears to include 6 new or modified tests.
bq.       [exec] 
bq.       [exec]     +1 javadoc.  The javadoc tool did not generate any warning messages.
bq.       [exec] 
bq.       [exec]     +1 javac.  The applied patch does not increase the total number of javac compiler warnings.
bq.       [exec] 
bq.       [exec]     +1 findbugs.  The patch does not introduce any new Findbugs warnings.
bq.       [exec] 
bq.       [exec]     +1 release audit.  The applied patch does not increase the total number of release audit warnings.
bq.  
bq.  Unit-test:
bq.      all pass.
bq.  
bq.  
bq.  Thanks,
bq.  
bq.  Daniel
bq.  
bq.



> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826-1.patch, PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] Commented: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Jonathan Coveney (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12989789#comment-12989789 ] 

Jonathan Coveney commented on PIG-1826:
---------------------------------------

MutableInt is defined as such:


package squeal.com;

public class MutableInt {
        int value;

        public MutableInt() { value = 1; }
        public MutableInt(int val) { value = val; }
        public void inc() { ++value; }
        public void inc(int val) { value += val; }
        public int get() { return value; }
        public String toString() { return (new Integer(value)).toString(); }
}

Just a dumb wrapper class to avoid having to do another put after getting from a Map.

it is failing on a trivial example:


1       a
1       b
1       c
2       a
2       b
2       c
3       a
3       b
3       c

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] Commented: (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13008743#comment-13008743 ] 

Daniel Dai commented on PIG-1826:
---------------------------------

The problem is because Pig don't understand MutableInt. All data feeding by UDF should be Pig compatible data types. 

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>         Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Assigned] (PIG-1826) Unexpected data type -1 found in stream error

Posted by "Daniel Dai (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/PIG-1826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Daniel Dai reassigned PIG-1826:
-------------------------------

    Assignee: Daniel Dai

> Unexpected data type -1 found in stream error
> ---------------------------------------------
>
>                 Key: PIG-1826
>                 URL: https://issues.apache.org/jira/browse/PIG-1826
>             Project: Pig
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>         Environment: This is pig 0.8.0 on a linux box
>            Reporter: Jonathan Coveney
>            Assignee: Daniel Dai
>             Fix For: 0.9.0
>
>         Attachments: PIG-1826.tar.gz, numgraph.java
>
>
> When running the attached udf I get the title error. By inserting printlns extensively, the script is functioning properly and returning a DataBag, but for whatever reason, pig does not detect it as such.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira