You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-dev@hadoop.apache.org by Tony Wang <iv...@gmail.com> on 2013/10/26 22:20:53 UTC
Read hadoop output file, question
I'm trying to analyze the output of my hadoop M/R output files. Below is
the code in my Job main(), it still does not work yet, it gives me this
error:
java.io.FileNotFoundException:
hdfs:/Master:9100/user/output/30/part-r-00000 (No such file or directory)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(FileInputStream.java:146)
at java.io.FileInputStream.<init>(FileInputStream.java:101)
at java.io.FileReader.<init>(FileReader.java:58)
at distributed.jobStats.main(jobStats.java:122)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:160)
Hashmap size is: 0
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: jobStats <in> <out> <job>");
System.exit(2);
}
conf.set("job", otherArgs[2]);
Job job = new Job(conf, "job count");
job.setJarByClass(jobStats.class);
job.setMapperClass(jobMapper.class);
job.setCombinerClass(jobReducer.class);
job.setReducerClass(jobReducer.class);
// job.setNumReduceTasks(3);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
try { // this block is to save the hadoop output path args[1] and
its file system - either HDFS or S3 Amazon
String fileSys = conf.get("fs.default.name");
File file = new File("/home/ec2-user/hadoopOutput.txt");
// if file doesnt exists, then create it
if (!file.exists())
{
file.createNewFile();
}
FileWriter fw = new FileWriter(file.getAbsoluteFile(), false);
BufferedWriter bw = new BufferedWriter(fw);
bw.write(args[1] + "/part-r-00000 | " + fileSys);
bw.close();
fw.close();
System.out.println("Done");
} catch (IOException e)
{
e.printStackTrace();
}
int jobStatus = job.waitForCompletion(true) ? 0 : 1;
if (jobStatus == 0)
{
HashMap<String, Integer> jobCountMap = new HashMap<String,
Integer>();
FileSystem fs = FileSystem.get(conf);
Path outPath = new Path(args[1]);
Path pathPattern = new Path(outPath, "part-r-[0-9]*");
FileStatus[] list = fs.globStatus(pathPattern);
for (FileStatus status : list)
{
try {
BufferedReader brr = new BufferedReader(new
FileReader(status.getPath().toString()));
String line;
while ((line = brr.readLine()) != null)
{
String[] yearjobCount = line.split("\\|"); //the
reducer output in this format "xxxx | int"
jobCountMap.put(yearjobCount[0].trim(),
Integer.parseInt(yearjobCount[1].trim()));
}
} catch (FileNotFoundException e)
{
e.printStackTrace();
} catch (IOException e)
{
e.printStackTrace();
}
}
System.out.println("Hashmap size is: " + jobCountMap.size());
//
System.exit(job.waitForCompletion(true) ? 0 : 1);
}