You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hadoop.apache.org by unmesha sreeveni <un...@gmail.com> on 2014/06/11 12:15:34 UTC
Custom FileInputFormat not able to pass value to mapper

Below is my code. What my code does is

I stored the entire file into MyFileInputFormat. And from that I extracted
the line I want.Basically it will be a couple of characters starting from 0
to 6000

*public class MyFileInputFormat extends FileInputFormat<Text, Text> {*

*@Override*
* public RecordReader<Text, Text> createRecordReader(InputSplit split,*
*   TaskAttemptContext context) {*
* return new MyWholeFileReader();*
* }*

* @Override*
* protected boolean isSplitable(JobContext context, Path file) {*
* System.out.println("isSplitable");*
* return false;*

* }*


 * public static class MyWholeFileReader extends RecordReader<Text, Text> {*

* private CompressionCodecFactory compressionCodecs = null;*
* private long start;*
* private long end;*
* private long pos;*
* private LineReader in;*
* int counter = 0;*
* int i = 0;*
* String header = null;*
* int headerIndex = 0;*
* int footerIndex = 0;*
* private Text key = null;*
* private Text value = null;*
* private Text buffer = new Text();*
* StringBuilder sb = new StringBuilder();*

* public void initialize(InputSplit genericSplit,*
* TaskAttemptContext context) throws IOException {*

* FileSplit split = (FileSplit) genericSplit;*
* Configuration job = context.getConfiguration();*
* start = split.getStart();*
* this.end = start + split.getLength();*
* this.pos = start;*
* final Path file = split.getPath();*
* compressionCodecs = new CompressionCodecFactory(job);*
* final CompressionCodec codec = compressionCodecs.getCodec(file);*
* FileSystem fs = file.getFileSystem(job);*
* FSDataInputStream fileIn = fs.open(split.getPath());*
* if (codec != null) {*
* in = new LineReader(codec.createInputStream(fileIn), job);*
* }*
* else {*
* in = new LineReader(fileIn, job);*
* }*
* if (key == null) {*
* key = new Text();*
* }*
* key.set(split.getPath().getName());*
* if (value == null) {*
* value = new Text();*
* }*
* }*
* public boolean nextKeyValue() throws IOException {*
* System.out.println("nextKeyValue");*
* int itr = 0;*

* if(itr == 0){*
* int newSize = 0;*
* newSize = in.readLine(buffer);*
* int index = 0;*
* while (newSize > 0) {*
* String str = buffer.toString();*
* sb.append(str);*
* sb.append("\n");*
* newSize = in.readLine(buffer);*
* if(sb.toString().contains("6000")){*
* counter ++;*
* }*
* }*
* }*
* /**
* * Loop through string builder*
* */*
* String[] lines = sb.toString().split("\\n");*
* for(String s: lines){*
 * if(s.contains("^6000")){*
* i++;*
* }*
* }*
* /**
* * differentiating header,body and footer*
* */*
* String[] lines1 = sb.toString().split("\\n");*
* StringBuilder temp = new StringBuilder();*
* for(String getVal: lines1){*
* if(getVal.startsWith("6000")){*
* temp.append(getVal);*
* i --;*
* break;*
* }*
* else{*
* temp.append(getVal);*
* }*
* temp.append("\n");*
 * }*
* System.out.println("temp = " + temp.toString());*
* value.set(temp.toString());*
* sb.delete(0, temp.toString().length());*
* /**
* * Stopping condition*
* */*
* if(i == -1){*
*                                 sb.delete(0, sb.length());*
* }*
* if (sb.length() == 0) {*
* key = null;*
* value = null;*
* return false;*
* }*
* else {*
* value.set(temp.toString());*
* return true;*
* }*
* }*
* @Override*
* public Text getCurrentKey() {*
* return key;*
* }*
* @Override*
* public Text getCurrentValue() {*
* return value;*
* }*
* /***
* * *
* * Get the progress within the split*
* */*

* public float getProgress() {*
* return 0.0f;*
* }*
* public synchronized void close() throws IOException {*
* if (in != null) {*
* in.close();*
* }*
* }*
* }*
*}*
I am able to get my desired value in MyFileInputFormat,But these values are
not entering into Mapper. And my desired value to reach into mapper is in
temp.toString(). But it is not entering into my Map().

Am I doing anything wrong.

Please suggest.


-- 
*Thanks & Regards *


*Unmesha Sreeveni U.B*
*Hadoop, Bigdata Developer*
*Center for Cyber Security | Amrita Vishwa Vidyapeetham*
http://www.unmeshasreeveni.blogspot.in/