You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2016/02/22 03:36:55 UTC
parquet-mr git commit: PARQUET-529: Avoid evoking job.toString() in
ParquetLoader
Repository: parquet-mr
Updated Branches:
refs/heads/master 944291b74 -> c44f982e8
PARQUET-529: Avoid evoking job.toString() in ParquetLoader
When ran under hadoop2 environment and log level setting to `DEBUG`, ParquetLoader would evoke `job.toString()` in several methods, which might cause the whole application to stop due to :
```
java.lang.IllegalStateException: Job in state DEFINE instead of RUNNING
at org.apache.hadoop.mapreduce.Job.ensureState(Job.java:283)
at org.apache.hadoop.mapreduce.Job.toString(Job.java:452)
at java.lang.String.valueOf(String.java:2847)
at java.lang.StringBuilder.append(StringBuilder.java:128)
at org.apache.parquet.pig.ParquetLoader.getSchema(ParquetLoader.java:260)
at org.apache.parquet.pig.TestParquetLoader.testSchema(TestParquetLoader.java:54)
...
```
The reason is that in the hadoop 2.x branch, `org.apache.hadoop.mapreduce.Job.toString()` has added an `ensureState(JobState.RUNNING)` check; see [map-reduce: Job.java#452](http://grepcode.com/file/repo1.maven.org/maven2/org.apache.hadoop/hadoop-mapreduce-client-core/2.3.0/org/apache/hadoop/mapreduce/Job.java#452). In contrast, the hadoop 1.x branch does not contain such checks, so `ParquetLoader` works well.
This PR simply avoids evoking `job.toString()` in `ParquetLoader`.
Author: proflin <pr...@gmail.com>
Author: Liwei Lin <pr...@gmail.com>
Closes #326 from proflin/PARQUET-529--Avoid-evoking-job.toString()-in-ParquetLoader and squashes the following commits:
f464c7b [proflin] Add jobToString
5d4c750 [proflin] PARQUET-529: Avoid evoking job.toString() in ParquetLoader.java
bb4283a [Liwei Lin] Merge branch 'master' of https://github.com/proflin/parquet-mr
839b458 [proflin] Merge remote-tracking branch 'refs/remotes/apache/master'
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/c44f982e
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/c44f982e
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/c44f982e
Branch: refs/heads/master
Commit: c44f982e89b63a97190638cd12bd8bee2bafb883
Parents: 944291b
Author: proflin <pr...@gmail.com>
Authored: Sun Feb 21 18:36:50 2016 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Sun Feb 21 18:36:50 2016 -0800
----------------------------------------------------------------------
.../org/apache/parquet/pig/ParquetLoader.java | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c44f982e/parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java b/parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java
index 41ce738..0575dce 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java
@@ -137,7 +137,10 @@ public class ParquetLoader extends LoadFunc implements LoadMetadata, LoadPushDow
@Override
public void setLocation(String location, Job job) throws IOException {
- if (DEBUG) LOG.debug("LoadFunc.setLocation(" + location + ", " + job + ")");
+ if (DEBUG) {
+ String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
+ LOG.debug("LoadFunc.setLocation(" + location + ", " + jobToString + ")");
+ }
setInput(location, job);
}
@@ -240,14 +243,20 @@ public class ParquetLoader extends LoadFunc implements LoadMetadata, LoadPushDow
@Override
public String[] getPartitionKeys(String location, Job job) throws IOException {
- if (DEBUG) LOG.debug("LoadMetadata.getPartitionKeys(" + location + ", " + job + ")");
+ if (DEBUG) {
+ String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
+ LOG.debug("LoadMetadata.getPartitionKeys(" + location + ", " + jobToString + ")");
+ }
setInput(location, job);
return null;
}
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
- if (DEBUG) LOG.debug("LoadMetadata.getSchema(" + location + ", " + job + ")");
+ if (DEBUG) {
+ String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
+ LOG.debug("LoadMetadata.getSchema(" + location + ", " + jobToString + ")");
+ }
setInput(location, job);
return new ResourceSchema(schema);
}
@@ -289,7 +298,10 @@ public class ParquetLoader extends LoadFunc implements LoadMetadata, LoadPushDow
@Override
public ResourceStatistics getStatistics(String location, Job job)
throws IOException {
- if (DEBUG) LOG.debug("LoadMetadata.getStatistics(" + location + ", " + job + ")");
+ if (DEBUG) {
+ String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
+ LOG.debug("LoadMetadata.getStatistics(" + location + ", " + jobToString + ")");
+ }
/* We need to call setInput since setLocation is not
guaranteed to be called before this */
setInput(location, job);