You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@hive.apache.org by Bing Li <sm...@gmail.com> on 2012/01/17 12:39:51 UTC

Cannot create an instance of InputFormat

My Steps:
I define a class "public class myInputFormat extends TextInputFormat
implements JobConfigurable" to specify input format.

hive> add jar /home/biadmin/hiveudf/myFileFormat.jar;
Added /home/biadmin/hiveudf/myFileFormat.jar to class path
Added resource: /home/biadmin/hiveudf/myFileFormat.jar

hive> list jars;
/home/biadmin/hiveudf/myFileFormat.jar

hive> create table IOtable(str1 string, str2 string, str3 string) stored as
INPUTFORMAT 'com.mytest.fileformat.myInputFormat' OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' ;
OK
Time taken: 0.081 seconds

hive> load data local inpath '/home/biadmin/hivetbl/IOtable_data.txt' into
table IOtable;
Copying data from file:/home/biadmin/hivetbl/IOtable_data.txt
Copying file: file:/home/biadmin/hivetbl/IOtable_data.txt
Loading data to table default.iotable
OK
Time taken: 0.147 seconds

hive>  select * from IOtable;
OK
Failed with exception java.io.IOException:java.io.IOException: Cannot
create an instance of InputFormat class com.mytest.fileformat.myInputFormat
as specified in mapredWork!
Time taken: 0.059 seconds




*Here is my source code :*
===============================
package com.mytest.fileformat;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.LineRecordReader;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.TextInputFormat;

@SuppressWarnings("deprecation")
public class myInputFormat extends TextInputFormat implements
JobConfigurable {
 TextInputFormat format;
    JobConf job;

public myInputFormat() {
        format = new TextInputFormat();
    }

 @Override
public void configure(JobConf job) {
        this.job = job;
        format.configure(job);
}
    public RecordReader<LongWritable, Text> getRecordReader(
            InputSplit genericSplit, JobConf job, Reporter reporter)
            throws IOException {

        reporter.setStatus(genericSplit.toString());
        return new myLineRecordReader(job, (FileSplit) genericSplit);
    }


    public static class myLineRecordReader implements
    RecordReader<LongWritable, Text> {
      LineRecordReader lineReader;
      LongWritable lineKey;
      Text lineValue;

      public myLineRecordReader(JobConf job, FileSplit split) throws
IOException {
        lineReader = new LineRecordReader(job, split);
        lineKey = lineReader.createKey();
        lineValue = lineReader.createValue();
      }

      public boolean next(LongWritable key, Text value) throws IOException {
        while (lineReader.next(lineKey, lineValue)) {
          String strReplace = lineValue.toString().toLowerCase().replace(
"$$$$" , "\001" );
          Text txtReplace = new Text();
          txtReplace.set(strReplace);
          value.set(txtReplace.getBytes(), 0, txtReplace.getLength());
          return true ;
         }
         // no more data
         return false;
      }  /** end next **/


      public LongWritable createKey() {
        return lineReader.createKey();
      }
      public Text createValue() {
        return lineReader.createValue();
      }
      public long getPos() throws IOException{
        return lineReader.getPos();
      }
      public float getProgress() throws IOException{
        return lineReader.getProgress();
      }
      public void close() throws IOException{
        lineReader.close();
      }
     }  /** end class myLineRecordReader **/
}

Re: Cannot create an instance of InputFormat

Posted by Aniket Mokashi <an...@gmail.com>.

Everything in auxlib is added to HADOOP_CLASSPATH. But, the paths in
HADOOP_CLASSPATH are added to the class path of the Job Client, but they
are not added to the class path of the Task Trackers. Therefore if you put
a JAR called MyJar.jar on the HADOOP_CLASSPATH and don't do anything to
make it available to the Task Trackers as well, calls to MyJar.jar code
from the run() method of your job work, but calls from your Mapper or
Reducer will fail at runtime.

Thanks,

Aniket

On Tue, Jan 17, 2012 at 7:04 AM, Edward Capriolo <ed...@gmail.com>wrote:

> I generally have to put them in my hadoop classpath or hive-0.8.0/auxlib
> folder. I never quite understood why buy the classpath that reads the data
> seems different from the classpath that processes the data. Hence the
> distrinction between add jar and auxlib.
>
> Edward
>
> On Tue, Jan 17, 2012 at 6:39 AM, Bing Li <sm...@gmail.com> wrote:
>
>> My Steps:
>> I define a class "public class myInputFormat extends TextInputFormat
>> implements JobConfigurable" to specify input format.
>>
>> hive> add jar /home/biadmin/hiveudf/myFileFormat.jar;
>> Added /home/biadmin/hiveudf/myFileFormat.jar to class path
>> Added resource: /home/biadmin/hiveudf/myFileFormat.jar
>>
>> hive> list jars;
>> /home/biadmin/hiveudf/myFileFormat.jar
>>
>> hive> create table IOtable(str1 string, str2 string, str3 string) stored
>> as INPUTFORMAT 'com.mytest.fileformat.myInputFormat' OUTPUTFORMAT
>> 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' ;
>> OK
>> Time taken: 0.081 seconds
>>
>> hive> load data local inpath '/home/biadmin/hivetbl/IOtable_data.txt'
>> into table IOtable;
>> Copying data from file:/home/biadmin/hivetbl/IOtable_data.txt
>> Copying file: file:/home/biadmin/hivetbl/IOtable_data.txt
>> Loading data to table default.iotable
>> OK
>> Time taken: 0.147 seconds
>>
>> hive>  select * from IOtable;
>> OK
>> Failed with exception java.io.IOException:java.io.IOException: Cannot
>> create an instance of InputFormat class com.mytest.fileformat.myInputFormat
>> as specified in mapredWork!
>> Time taken: 0.059 seconds
>>
>>
>>
>>
>> *Here is my source code :*
>> ===============================
>> package com.mytest.fileformat;
>>
>> import java.io.IOException;
>>
>> import org.apache.hadoop.io.LongWritable;
>> import org.apache.hadoop.io.Text;
>> import org.apache.commons.logging.Log;
>> import org.apache.commons.logging.LogFactory;
>> import org.apache.hadoop.mapred.FileSplit;
>> import org.apache.hadoop.mapred.InputSplit;
>> import org.apache.hadoop.mapred.JobConf;
>> import org.apache.hadoop.mapred.JobConfigurable;
>> import org.apache.hadoop.mapred.LineRecordReader;
>> import org.apache.hadoop.mapred.RecordReader;
>> import org.apache.hadoop.mapred.Reporter;
>> import org.apache.hadoop.mapred.InputFormat;
>> import org.apache.hadoop.mapred.TextInputFormat;
>>
>> @SuppressWarnings("deprecation")
>> public class myInputFormat extends TextInputFormat implements
>> JobConfigurable {
>>  TextInputFormat format;
>>     JobConf job;
>>
>> public myInputFormat() {
>>         format = new TextInputFormat();
>>     }
>>
>>  @Override
>> public void configure(JobConf job) {
>>         this.job = job;
>>         format.configure(job);
>> }
>>     public RecordReader<LongWritable, Text> getRecordReader(
>>             InputSplit genericSplit, JobConf job, Reporter reporter)
>>             throws IOException {
>>
>>         reporter.setStatus(genericSplit.toString());
>>         return new myLineRecordReader(job, (FileSplit) genericSplit);
>>     }
>>
>>
>>     public static class myLineRecordReader implements
>>     RecordReader<LongWritable, Text> {
>>       LineRecordReader lineReader;
>>       LongWritable lineKey;
>>       Text lineValue;
>>
>>       public myLineRecordReader(JobConf job, FileSplit split) throws
>> IOException {
>>         lineReader = new LineRecordReader(job, split);
>>         lineKey = lineReader.createKey();
>>         lineValue = lineReader.createValue();
>>       }
>>
>>       public boolean next(LongWritable key, Text value) throws
>> IOException {
>>         while (lineReader.next(lineKey, lineValue)) {
>>           String strReplace = lineValue.toString().toLowerCase().replace(
>> "$$$$" , "\001" );
>>           Text txtReplace = new Text();
>>           txtReplace.set(strReplace);
>>           value.set(txtReplace.getBytes(), 0, txtReplace.getLength());
>>           return true ;
>>          }
>>          // no more data
>>          return false;
>>       }  /** end next **/
>>
>>
>>       public LongWritable createKey() {
>>         return lineReader.createKey();
>>       }
>>       public Text createValue() {
>>         return lineReader.createValue();
>>       }
>>       public long getPos() throws IOException{
>>         return lineReader.getPos();
>>       }
>>       public float getProgress() throws IOException{
>>         return lineReader.getProgress();
>>       }
>>       public void close() throws IOException{
>>         lineReader.close();
>>       }
>>      }  /** end class myLineRecordReader **/
>> }
>>
>
>


-- 
"...:::Aniket:::... Quetzalco@tl"

Re: Cannot create an instance of InputFormat

Posted by Edward Capriolo <ed...@gmail.com>.

I generally have to put them in my hadoop classpath or hive-0.8.0/auxlib
folder. I never quite understood why buy the classpath that reads the data
seems different from the classpath that processes the data. Hence the
distrinction between add jar and auxlib.

Edward

On Tue, Jan 17, 2012 at 6:39 AM, Bing Li <sm...@gmail.com> wrote:

> My Steps:
> I define a class "public class myInputFormat extends TextInputFormat
> implements JobConfigurable" to specify input format.
>
> hive> add jar /home/biadmin/hiveudf/myFileFormat.jar;
> Added /home/biadmin/hiveudf/myFileFormat.jar to class path
> Added resource: /home/biadmin/hiveudf/myFileFormat.jar
>
> hive> list jars;
> /home/biadmin/hiveudf/myFileFormat.jar
>
> hive> create table IOtable(str1 string, str2 string, str3 string) stored
> as INPUTFORMAT 'com.mytest.fileformat.myInputFormat' OUTPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' ;
> OK
> Time taken: 0.081 seconds
>
> hive> load data local inpath '/home/biadmin/hivetbl/IOtable_data.txt' into
> table IOtable;
> Copying data from file:/home/biadmin/hivetbl/IOtable_data.txt
> Copying file: file:/home/biadmin/hivetbl/IOtable_data.txt
> Loading data to table default.iotable
> OK
> Time taken: 0.147 seconds
>
> hive>  select * from IOtable;
> OK
> Failed with exception java.io.IOException:java.io.IOException: Cannot
> create an instance of InputFormat class com.mytest.fileformat.myInputFormat
> as specified in mapredWork!
> Time taken: 0.059 seconds
>
>
>
>
> *Here is my source code :*
> ===============================
> package com.mytest.fileformat;
>
> import java.io.IOException;
>
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.commons.logging.Log;
> import org.apache.commons.logging.LogFactory;
> import org.apache.hadoop.mapred.FileSplit;
> import org.apache.hadoop.mapred.InputSplit;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.JobConfigurable;
> import org.apache.hadoop.mapred.LineRecordReader;
> import org.apache.hadoop.mapred.RecordReader;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.mapred.InputFormat;
> import org.apache.hadoop.mapred.TextInputFormat;
>
> @SuppressWarnings("deprecation")
> public class myInputFormat extends TextInputFormat implements
> JobConfigurable {
>  TextInputFormat format;
>     JobConf job;
>
> public myInputFormat() {
>         format = new TextInputFormat();
>     }
>
>  @Override
> public void configure(JobConf job) {
>         this.job = job;
>         format.configure(job);
> }
>     public RecordReader<LongWritable, Text> getRecordReader(
>             InputSplit genericSplit, JobConf job, Reporter reporter)
>             throws IOException {
>
>         reporter.setStatus(genericSplit.toString());
>         return new myLineRecordReader(job, (FileSplit) genericSplit);
>     }
>
>
>     public static class myLineRecordReader implements
>     RecordReader<LongWritable, Text> {
>       LineRecordReader lineReader;
>       LongWritable lineKey;
>       Text lineValue;
>
>       public myLineRecordReader(JobConf job, FileSplit split) throws
> IOException {
>         lineReader = new LineRecordReader(job, split);
>         lineKey = lineReader.createKey();
>         lineValue = lineReader.createValue();
>       }
>
>       public boolean next(LongWritable key, Text value) throws IOException
> {
>         while (lineReader.next(lineKey, lineValue)) {
>           String strReplace = lineValue.toString().toLowerCase().replace(
> "$$$$" , "\001" );
>           Text txtReplace = new Text();
>           txtReplace.set(strReplace);
>           value.set(txtReplace.getBytes(), 0, txtReplace.getLength());
>           return true ;
>          }
>          // no more data
>          return false;
>       }  /** end next **/
>
>
>       public LongWritable createKey() {
>         return lineReader.createKey();
>       }
>       public Text createValue() {
>         return lineReader.createValue();
>       }
>       public long getPos() throws IOException{
>         return lineReader.getPos();
>       }
>       public float getProgress() throws IOException{
>         return lineReader.getProgress();
>       }
>       public void close() throws IOException{
>         lineReader.close();
>       }
>      }  /** end class myLineRecordReader **/
> }
>