You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by Jan Lukavský <ja...@firma.seznam.cz> on 2011/08/26 13:08:59 UTC

MapReduce configuration issues

Hi,

we have run into problems caused by HBASE-3578 (included in cdh3u1). I 
think this patch introduced a bug. If a job uses GenericOptionsParser 
and its -conf directive to load a configuration file (eg. for a specific 
job) and changes some hbase (client) configuration options, then after 
call to initTableMapperJob all this settings is lost (or must be final). 
Another (may be more severe) problem occurs in the following code:

Configuration conf = HBaseConfiguration.create();

// change quorum to connect to different HBase than the default
conf.setInt("hbase.zookeeper.quorum", "host1,host2,host2");

Job job = new Job(conf, ...);

TableMapReduceUtil.initTableMapperJob(...);

// now the job connects to the default quorum. This is definitely not 
what I wanted.
job.submit();


I think the right way to do this is this

Index: src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java    
(revision 1162051)
+++ src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java    
(working copy)
@@ -54,9 +54,7 @@
    public HBaseConfiguration(final Configuration c) {
      //TODO:replace with private constructor
      this();
-    for (Entry<String, String>e: c) {
-      set(e.getKey(), e.getValue());
-    }
+    merge(this, c);
    }

    private static void checkDefaultsVersion(Configuration conf) {
@@ -109,9 +107,19 @@
     */
    public static Configuration create(final Configuration that) {
      Configuration conf = create();
-    for (Entry<String, String>e: that) {
-      conf.set(e.getKey(), e.getValue());
-    }
+    merge(conf, that);
      return conf;
    }
+
+  /**
+   * Merge two configurations.
+   * @param destConf the configuration that will be overwritten with items
+   *                 from the srcConf
+   * @param srcConf the source configuration
+   **/
+  public static void merge(Configuration destConf, Configuration srcConf) {
+    for (Entry<String, String> e : srcConf) {
+      destConf.set(e.getKey(), e.getValue());
+    }
+  }
  }
Index: 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
===================================================================
--- 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java    
(revision 1162051)
+++ 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java    
(working copy)
@@ -126,10 +126,10 @@
      if (outputValueClass != null) 
job.setMapOutputValueClass(outputValueClass);
      if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
      job.setMapperClass(mapper);
-    HBaseConfiguration.addHbaseResources(job.getConfiguration());
-    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
-    job.getConfiguration().set(TableInputFormat.SCAN,
-      convertScanToString(scan));
+    Configuration jobConf = job.getConfiguration();
+    HBaseConfiguration.merge(jobConf, HBaseConfiguration.create(jobConf));
+    jobConf.set(TableInputFormat.INPUT_TABLE, table);
+    jobConf.set(TableInputFormat.SCAN, convertScanToString(scan));
      if (addDependencyJars) {
        addDependencyJars(job);
      }



Jan



Re: MapReduce configuration issues

Posted by Jan Lukavský <ja...@firma.seznam.cz>.
I overlooked the second call to addHbaseResources in 
initTableReducerJob, the same issue applies there too.

Index: src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java    
(revision 1162051)
+++ src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java    
(working copy)
@@ -54,9 +54,7 @@
    public HBaseConfiguration(final Configuration c) {
      //TODO:replace with private constructor
      this();
-    for (Entry<String, String>e: c) {
-      set(e.getKey(), e.getValue());
-    }
+    merge(this, c);
    }

    private static void checkDefaultsVersion(Configuration conf) {
@@ -109,9 +107,19 @@
     */
    public static Configuration create(final Configuration that) {
      Configuration conf = create();
-    for (Entry<String, String>e: that) {
-      conf.set(e.getKey(), e.getValue());
-    }
+    merge(conf, that);
      return conf;
    }
+
+  /**
+   * Merge two configurations.
+   * @param destConf the configuration that will be overwritten with items
+   *                 from the srcConf
+   * @param srcConf the source configuration
+   **/
+  public static void merge(Configuration destConf, Configuration srcConf) {
+    for (Entry<String, String> e : srcConf) {
+      destConf.set(e.getKey(), e.getValue());
+    }
+  }
  }
Index: 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
===================================================================
--- 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java    
(revision 1162051)
+++ 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java    
(working copy)
@@ -126,10 +126,10 @@
      if (outputValueClass != null) 
job.setMapOutputValueClass(outputValueClass);
      if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
      job.setMapperClass(mapper);
-    HBaseConfiguration.addHbaseResources(job.getConfiguration());
-    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
-    job.getConfiguration().set(TableInputFormat.SCAN,
-      convertScanToString(scan));
+    Configuration jobConf = job.getConfiguration();
+    HBaseConfiguration.merge(jobConf, HBaseConfiguration.create(jobConf));
+    jobConf.set(TableInputFormat.INPUT_TABLE, table);
+    jobConf.set(TableInputFormat.SCAN, convertScanToString(scan));
      if (addDependencyJars) {
        addDependencyJars(job);
      }
@@ -333,8 +333,8 @@
      Class partitioner, String quorumAddress, String serverClass,
      String serverImpl, boolean addDependencyJars) throws IOException {

-    Configuration conf = job.getConfiguration();
-    HBaseConfiguration.addHbaseResources(conf);
+    Configuration conf = job.getConfiguration();
+    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
      job.setOutputFormatClass(TableOutputFormat.class);
      if (reducer != null) job.setReducerClass(reducer);
      conf.set(TableOutputFormat.OUTPUT_TABLE, table);


On 26.8.2011 13:08, Jan Lukavský wrote:
> Hi,
>
> we have run into problems caused by HBASE-3578 (included in cdh3u1). I
> think this patch introduced a bug. If a job uses GenericOptionsParser
> and its -conf directive to load a configuration file (eg. for a specific
> job) and changes some hbase (client) configuration options, then after
> call to initTableMapperJob all this settings is lost (or must be final).
> Another (may be more severe) problem occurs in the following code:
>
> Configuration conf = HBaseConfiguration.create();
>
> // change quorum to connect to different HBase than the default
> conf.setInt("hbase.zookeeper.quorum", "host1,host2,host2");
>
> Job job = new Job(conf, ...);
>
> TableMapReduceUtil.initTableMapperJob(...);
>
> // now the job connects to the default quorum. This is definitely not
> what I wanted.
> job.submit();
>
>
> I think the right way to do this is this
>
> Index: src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java
> ===================================================================
> --- src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java
> (revision 1162051)
> +++ src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java
> (working copy)
> @@ -54,9 +54,7 @@
>      public HBaseConfiguration(final Configuration c) {
>        //TODO:replace with private constructor
>        this();
> -    for (Entry<String, String>e: c) {
> -      set(e.getKey(), e.getValue());
> -    }
> +    merge(this, c);
>      }
>
>      private static void checkDefaultsVersion(Configuration conf) {
> @@ -109,9 +107,19 @@
>       */
>      public static Configuration create(final Configuration that) {
>        Configuration conf = create();
> -    for (Entry<String, String>e: that) {
> -      conf.set(e.getKey(), e.getValue());
> -    }
> +    merge(conf, that);
>        return conf;
>      }
> +
> +  /**
> +   * Merge two configurations.
> +   * @param destConf the configuration that will be overwritten with items
> +   *                 from the srcConf
> +   * @param srcConf the source configuration
> +   **/
> +  public static void merge(Configuration destConf, Configuration srcConf) {
> +    for (Entry<String, String>  e : srcConf) {
> +      destConf.set(e.getKey(), e.getValue());
> +    }
> +  }
>    }
> Index:
> src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
> ===================================================================
> ---
> src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
> (revision 1162051)
> +++
> src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
> (working copy)
> @@ -126,10 +126,10 @@
>        if (outputValueClass != null)
> job.setMapOutputValueClass(outputValueClass);
>        if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
>        job.setMapperClass(mapper);
> -    HBaseConfiguration.addHbaseResources(job.getConfiguration());
> -    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
> -    job.getConfiguration().set(TableInputFormat.SCAN,
> -      convertScanToString(scan));
> +    Configuration jobConf = job.getConfiguration();
> +    HBaseConfiguration.merge(jobConf, HBaseConfiguration.create(jobConf));
> +    jobConf.set(TableInputFormat.INPUT_TABLE, table);
> +    jobConf.set(TableInputFormat.SCAN, convertScanToString(scan));
>        if (addDependencyJars) {
>          addDependencyJars(job);
>        }
>
>
>
> Jan
>
>


-- 

Jan Lukavský
programátor
Seznam.cz, a.s.
Radlická 608/2
15000, Praha 5

jan.lukavsky@firma.seznam.cz
http://www.seznam.cz