You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-issues@hadoop.apache.org by "rulinma (JIRA)" <ji...@apache.org> on 2013/07/31 04:27:48 UTC
[jira] [Commented] (MAPREDUCE-5433) use mapreduce to parse hfiles and output keyvalue

    [ https://issues.apache.org/jira/browse/MAPREDUCE-5433?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13724769#comment-13724769 ] 

rulinma commented on MAPREDUCE-5433:
------------------------------------

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class HFileInputFormat extends
		FileInputFormat<ImmutableBytesWritable, KeyValue> {

	private class HFileRecordReader extends
			RecordReader<ImmutableBytesWritable, KeyValue> {
		private HFile.Reader reader;
		private final HFileScanner scanner;
		private int entryNumber = 0;

		public HFileRecordReader(FileSplit split, Configuration conf)
				throws IOException {
			final Path path = split.getPath();
			reader = HFile.createReader(FileSystem.get(conf), path,
					new CacheConfig(conf));
			scanner = reader.getScanner(false, false, false);
			scanner.seekTo();
		}

		@Override
		public void close() throws IOException {
			if (reader != null) {
				reader.close();
			}
		}

		@Override
		public ImmutableBytesWritable getCurrentKey() throws IOException,
				InterruptedException {
			System.out.println("key: " + scanner.getKeyValue().getRow());
			return new ImmutableBytesWritable(scanner.getKeyValue().getRow());
		}

		@Override
		public KeyValue getCurrentValue() throws IOException,
				InterruptedException {
			System.out.println("value: " + scanner.getKeyValue());
			return scanner.getKeyValue();
		}

		@Override
		public boolean nextKeyValue() throws IOException, InterruptedException {
			if (entryNumber == 0) {
				entryNumber++;
				return true;
			}
			entryNumber++;
			return scanner.next();
		}

		@Override
		public float getProgress() throws IOException, InterruptedException {
			if (reader != null) {
				return (entryNumber / reader.getEntries());
			}
			return 1;
		}

		@Override
		public void initialize(InputSplit arg0, TaskAttemptContext arg1)
				throws IOException, InterruptedException {
			System.out.println("init");
		}

	}

	@Override
	protected boolean isSplitable(JobContext context, Path filename) {
		return false;
	}

	@Override
	public RecordReader<ImmutableBytesWritable, KeyValue> createRecordReader(
			InputSplit split, TaskAttemptContext context) throws IOException,
			InterruptedException {
		return new HFileRecordReader((FileSplit) split,
				context.getConfiguration());
	}

}
                
> use mapreduce to parse hfiles and output keyvalue
> -------------------------------------------------
>
>                 Key: MAPREDUCE-5433
>                 URL: https://issues.apache.org/jira/browse/MAPREDUCE-5433
>             Project: Hadoop Map/Reduce
>          Issue Type: Improvement
>          Components: examples
>            Reporter: rulinma
>            Assignee: rulinma
>


--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira