You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-user@hadoop.apache.org by "Peter W." <pe...@marketingbrokers.com> on 2007/07/17 23:00:09 UTC
Pig newbie
Hello,
I'm a Pig newbie and want to run my first program
(from the sample) which compiles OK but errors with:
java.lang.RuntimeException: Function tokenize not found.
code:
import java.io.*;
import java.util.*;
import com.yahoo.pig.BagEvalFunc;
import com.yahoo.pig.PigServer;
import com.yahoo.pig.data.Tuple;
import com.yahoo.pig.data.DataAtom;
import com.yahoo.pig.data.DataCollector;
/*******************************
* in.txt: test|,|this|is|a|test
*******************************/
public class pigtest
{
public static void main(String[] args)
{
try
{
PigServer ps=new PigServer(PigServer.ExecType.MAPREDUCE);
ps.registerQuery("input=LOAD 'in.txt' USING StorageText();");
ps.registerQuery("words=FOREACH input GENERATE FLATTEN
(tokenize());");
ps.registerQuery("grouped=GROUP words BY $0;");
ps.registerQuery("counts=FOREACH grouped GENERATE
group,COUNT(words);");
Iterator<Tuple> i=ps.openIterator("counts");
while(i.hasNext())
{
Tuple t=(Tuple)i.next();
System.out.println(t);
}
}
catch(Exception e)
{
System.out.println(e);
}
}
static class tokenize extends BagEvalFunc
{
public void exec(Tuple in_t, DataCollector in_d) throws
IOException
{
String s=(in_t.getTupleField(0)).toString();
StringTokenizer st=new StringTokenizer(s,"|");
while(st.hasMoreTokens())
{
Tuple t=new Tuple(1);
t.setField(0,st.nextToken());
in_d.add(t);
}
}
}
}
How can I get this working and is this the correct forum
for these Hadoop related Pig questions?
Thanks!
Regards,
Peter W.