You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by fpompermaier <gi...@git.apache.org> on 2014/11/20 00:14:27 UTC

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

GitHub user fpompermaier opened a pull request:

    https://github.com/apache/incubator-flink/pull/220

    Upgraded HBase addon to HBase 0.98.x and new Tuple APIs + fix of ExecutionEnvironment

    Now it should be ok :+1: 

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/fpompermaier/incubator-flink master

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/incubator-flink/pull/220.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #220
    
----
commit dca62ebc1f37f55001b95e231ca5e57f0cb25899
Author: fpompermaier <fp...@github.com>
Date:   2014-11-19T23:08:02Z

    Upgraded HBase addon to HBase 0.98.x and new Tuple APIs + fix of
    ExecutionEnvironment

----


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20678517
  
    --- Diff: flink-addons/flink-hbase/src/main/java/org/apache/flink/addons/hbase/TableInputFormat.java ---
    @@ -23,182 +23,69 @@
     import java.util.ArrayList;
     import java.util.List;
     
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.flink.addons.hbase.common.HBaseKey;
    -import org.apache.flink.addons.hbase.common.HBaseResult;
    -import org.apache.flink.addons.hbase.common.HBaseUtil;
     import org.apache.flink.api.common.io.InputFormat;
     import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
     import org.apache.flink.api.common.io.statistics.BaseStatistics;
    +import org.apache.flink.api.java.tuple.Tuple;
     import org.apache.flink.configuration.Configuration;
     import org.apache.flink.core.io.InputSplitAssigner;
    -import org.apache.flink.types.Record;
    -import org.apache.flink.util.OperatingSystem;
    -import org.apache.hadoop.fs.Path;
     import org.apache.hadoop.hbase.HBaseConfiguration;
     import org.apache.hadoop.hbase.client.HTable;
     import org.apache.hadoop.hbase.client.Result;
    +import org.apache.hadoop.hbase.client.ResultScanner;
     import org.apache.hadoop.hbase.client.Scan;
    -import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    -import org.apache.hadoop.hbase.mapreduce.TableRecordReader;
     import org.apache.hadoop.hbase.util.Bytes;
     import org.apache.hadoop.hbase.util.Pair;
     import org.apache.hadoop.util.StringUtils;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
     
     /**
      * {@link InputFormat} subclass that wraps the access for HTables.
    + * 
    + * @author Flavio Pompermaier <po...@okkam.it>
      */
    -public class TableInputFormat implements InputFormat<Record, TableInputSplit> {
    +public abstract class TableInputFormat<T extends Tuple> implements InputFormat<T, TableInputSplit>{
     
     	private static final long serialVersionUID = 1L;
     
     	private static final Logger LOG = LoggerFactory.getLogger(TableInputFormat.class);
     
    -	/** A handle on an HBase table */
    -	private HTable table;
    -
    -	/** The scanner that performs the actual access on the table. HBase object */
    -	private Scan scan;
    -
    -	/** Hbase' iterator wrapper */
    -	private TableRecordReader tableRecordReader;
    -
     	/** helper variable to decide whether the input is exhausted or not */
     	private boolean endReached = false;
    +	
    +	// TODO table and scan could be serialized when kryo serializer will be the default
    +	private transient HTable table;
    +	private transient Scan scan;
    +	
    +	/** HBase iterator wrapper */
    +	private ResultScanner rs;
     
    -	/** Job parameter that specifies the input table. */
    -	public static final String INPUT_TABLE = "hbase.inputtable";
    -
    -	/** Location of the hbase-site.xml. If set, the HBaseAdmin will build inside */
    -	public static final String CONFIG_LOCATION = "hbase.config.location";
    -
    -	/**
    -	 * Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
    -	 * See TableMapReduceUtil.convertScanToString(Scan) for more details.
    -	 */
    -	public static final String SCAN = "hbase.scan";
    -
    -	/** Column Family to Scan */
    -	public static final String SCAN_COLUMN_FAMILY = "hbase.scan.column.family";
    -
    -	/** Space delimited list of columns to scan. */
    -	public static final String SCAN_COLUMNS = "hbase.scan.columns";
    -
    -	/** The timestamp used to filter columns with a specific timestamp. */
    -	public static final String SCAN_TIMESTAMP = "hbase.scan.timestamp";
    -
    -	/** The starting timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_START = "hbase.scan.timerange.start";
    -
    -	/** The ending timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_END = "hbase.scan.timerange.end";
    -
    -	/** The maximum number of version to return. */
    -	public static final String SCAN_MAXVERSIONS = "hbase.scan.maxversions";
    -
    -	/** Set to false to disable server-side caching of blocks for this scan. */
    -	public static final String SCAN_CACHEBLOCKS = "hbase.scan.cacheblocks";
    -
    -	/** The number of rows for caching that will be passed to scanners. */
    -	public static final String SCAN_CACHEDROWS = "hbase.scan.cachedrows";
    -
    -	/** mutable objects that are used to avoid recreation of wrapper objects */
    -	protected HBaseKey hbaseKey;
    -
    -	protected HBaseResult hbaseResult;
    -
    -	private org.apache.hadoop.conf.Configuration hConf;
    -
    -	@Override
    -	public void configure(Configuration parameters) {
    -		HTable table = createTable(parameters);
    -		setTable(table);
    -		Scan scan = createScanner(parameters);
    -		setScan(scan);
    -	}
    -
    +	// abstract methods allow for multiple table and scanners in the same job
    +	protected abstract Scan getScanner();
    +	protected abstract String getTableName();
    +	protected abstract T mapResultToTuple(Result r);
    --- End diff --
    
    I forgot to say that result contains the key..you just have to call result.getRow()..


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20641840
  
    --- Diff: flink-addons/flink-hbase/pom.xml ---
    @@ -28,59 +28,60 @@ under the License.
     		<version>0.8-incubating-SNAPSHOT</version>
     		<relativePath>..</relativePath>
     	</parent>
    -	
    -	<repositories>
    -		<repository>
    -			<id>cloudera-releases</id>
    -			<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
    -			<releases>
    -				<enabled>true</enabled>
    -			</releases>
    -			<snapshots>
    -				<enabled>false</enabled>
    -			</snapshots>
    -		</repository>
    -	</repositories>
    -
    -	<properties>
    - 		<hbase.version>0.96.0-hadoop2</hbase.version>
    -	</properties>
     
     	<artifactId>flink-hbase</artifactId>
     	<name>flink-hbase</name>
     	<packaging>jar</packaging>
     
    +	<properties>
    +		<hbase.hadoop1.version>0.98.6.1-hadoop1</hbase.hadoop1.version>
    +		<hbase.hadoop2.version>0.98.6.1-hadoop2</hbase.hadoop2.version>
    +	</properties>
    +
     	<dependencies>
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-core</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-java</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hbase</groupId>
    -			<artifactId>hbase</artifactId>
    -			<version>0.94.2-cdh4.2.1</version>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-clients</artifactId>
    +			<version>${project.version}</version>
     			<exclusions>
    -				<!-- jruby is used for the hbase shell. -->
     				<exclusion>
    -					<groupId>org.jruby</groupId>
    -					<artifactId>jruby-complete</artifactId>
    +					<groupId>org.apache.hadoop</groupId>
    +					<artifactId>hadoop-core</artifactId>
     				</exclusion>
     			</exclusions>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-hadoop-compatibility</artifactId>
    +			<version>${project.version}</version>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-client</artifactId>
    +			<version>${hbase.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hadoop</groupId>
    -			<artifactId>hadoop-client</artifactId>
    -			<version>${hadoop.version}</version>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-server</artifactId>
    --- End diff --
    
    Unfortunately yes...I'll be more precise about that asap


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66602944
  
    Now it seems all right!
    
    On 11 December 2014 at 10:57, Robert Metzger <no...@github.com>
    wrote:
    
    > Okay, thank you.
    >
    > This is the commit that I'm going to put into the master: rmetzger@0235f82
    > <https://github.com/rmetzger/incubator-flink/commit/0235f822092d55b99a4773bbb5456ef07803fc2d>
    > (unless there is something wrong with it).
    >
    > —
    > Reply to this email directly or view it on GitHub
    > <https://github.com/apache/incubator-flink/pull/220#issuecomment-66596245>
    > .
    >


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fhueske <gi...@git.apache.org>.
Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20660555
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/execution/RuntimeEnvironment.java ---
    @@ -177,8 +177,18 @@ public RuntimeEnvironment(Task owner, TaskDeploymentDescriptor tdd,
     		this.taskConfiguration = tdd.getTaskConfiguration();
     		
     		this.invokable.setEnvironment(this);
    -		this.invokable.registerInputOutput();
    +		
    +		{ 
    +			//TODO Check if this fix necessary elsewhere..
    --- End diff --
    
    Done


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65076389
  
    Yes, I merged the code, but it seems that the hadoop-1 variant does not work right now - must have been lost when I merged it with the code that set the hadoop-2 profile as default.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66541558
  
    I've looked at your branch and it's not a fork of my code rebased with the
    current master..
    I'll try to rebase my PR with the current master now..
    
    On 10 December 2014 at 21:02, Robert Metzger <no...@github.com>
    wrote:
    
    > Thanks @fpompermaier <https://github.com/fpompermaier> for describing
    > whats missing. I've triggered a Travis build to verify it:
    > https://travis-ci.org/rmetzger/incubator-flink/builds/43638278
    >
    > —
    > Reply to this email directly or view it on GitHub
    > <https://github.com/apache/incubator-flink/pull/220#issuecomment-66514191>
    > .
    >


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20638301
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/execution/RuntimeEnvironment.java ---
    @@ -177,8 +177,18 @@ public RuntimeEnvironment(Task owner, TaskDeploymentDescriptor tdd,
     		this.taskConfiguration = tdd.getTaskConfiguration();
     		
     		this.invokable.setEnvironment(this);
    -		this.invokable.registerInputOutput();
    +		
    +		{ 
    +			//TODO Check if this fix necessary elsewhere..
    --- End diff --
    
    I will apply this fix in a separate patch, this is actually a very important correction.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20758193
  
    --- Diff: flink-addons/flink-hbase/src/test/resources/log4j.properties ---
    @@ -0,0 +1,6 @@
    +log4j.rootLogger=${hadoop.root.logger}
    --- End diff --
    
    I the other tests, we deactivated logging. The reason is that otherwise, the logs overflow the screens when testing and exceed the limits of the log length on the build servers.
    
    How about commenting the logger out (setting it to off) and adding a comment to the example, explaining how to activate the logging (by uncommenting the logger config)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by rmetzger <gi...@git.apache.org>.
Github user rmetzger commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65049293
  
    I'm a bit confused regarding this pull request.
    Has it been merged already in this commit? https://github.com/apache/incubator-flink/commit/a1100af4247f77632f07ee9cea9fc7452104fac6


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65959384
  
    Ah, okay. I think that may have been accidentally removed when we merged the code with Robert's change that made the hadoop2-profile the default.
    
    Any chance you can open a pull request where you re-add the POM entries that are required to make it work with hadoop1?
    
    Sorry for the extra effort...


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fhueske <gi...@git.apache.org>.
Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20646344
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/execution/RuntimeEnvironment.java ---
    @@ -177,8 +177,18 @@ public RuntimeEnvironment(Task owner, TaskDeploymentDescriptor tdd,
     		this.taskConfiguration = tdd.getTaskConfiguration();
     		
     		this.invokable.setEnvironment(this);
    -		this.invokable.registerInputOutput();
    +		
    +		{ 
    +			//TODO Check if this fix necessary elsewhere..
    --- End diff --
    
    I have a branch for that already. So if you haven't fixed it, I can push it.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65088942
  
    Have you merged all the latest commits? Have you check that the flink-addons/pom.xml compiles the hbase addon both with hadoop1 and hadoop2?
    
    <modules>
    		<module>flink-hbase</module>
    		<module>flink-avro</module>
    		<module>flink-jdbc</module>
    		<module>flink-spargel</module>
    		<module>flink-hadoop-compatibility</module>
    		<module>flink-streaming</module>
    	</modules>
    
    Thus removing <module>flink-hbase</module> from the hadoop2 profile?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65963393
  
    In the next days it's a problem for me because I'll be away...however I xan tell you what are the changes of the pom files: the pom of the hbase addon should be taken as-is, while the one of the flink-addons should always compile the module (remove modules section from hadoop-1 profile and move it to the common modules).these last change is pretty straightforward if you look at its diff. Let me know if this could be enough or if you really need me to rebase the code...


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by rmetzger <gi...@git.apache.org>.
Github user rmetzger commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66586451
  
    Ah, you updated this pull request?
    
    My understanding of the situation is the following:
    - This pull request updated our HBase module as described in the title.
    - Stephan merged the pull request, but due to merge conflicts, he merged it wrong (the pom.xml file of flink-addons was basically incorrect).  Also, the pull request was not closed after the pull request has been merged
    - I tried to come up with a commit that describes the missing changes
    - You updated the old pull request, asking to merge it again?
    
    We usually open new pull requests for new changes, that's why I'm a bit confused. 
    Give me a quick heads up if my assumptions are correct. I'll review and merge your changes in this pull request today.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20648707
  
    --- Diff: flink-addons/flink-hbase/pom.xml ---
    @@ -28,59 +28,60 @@ under the License.
     		<version>0.8-incubating-SNAPSHOT</version>
     		<relativePath>..</relativePath>
     	</parent>
    -	
    -	<repositories>
    -		<repository>
    -			<id>cloudera-releases</id>
    -			<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
    -			<releases>
    -				<enabled>true</enabled>
    -			</releases>
    -			<snapshots>
    -				<enabled>false</enabled>
    -			</snapshots>
    -		</repository>
    -	</repositories>
    -
    -	<properties>
    - 		<hbase.version>0.96.0-hadoop2</hbase.version>
    -	</properties>
     
     	<artifactId>flink-hbase</artifactId>
     	<name>flink-hbase</name>
     	<packaging>jar</packaging>
     
    +	<properties>
    +		<hbase.hadoop1.version>0.98.6.1-hadoop1</hbase.hadoop1.version>
    +		<hbase.hadoop2.version>0.98.6.1-hadoop2</hbase.hadoop2.version>
    +	</properties>
    +
     	<dependencies>
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-core</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-java</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hbase</groupId>
    -			<artifactId>hbase</artifactId>
    -			<version>0.94.2-cdh4.2.1</version>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-clients</artifactId>
    +			<version>${project.version}</version>
     			<exclusions>
    -				<!-- jruby is used for the hbase shell. -->
     				<exclusion>
    -					<groupId>org.jruby</groupId>
    -					<artifactId>jruby-complete</artifactId>
    +					<groupId>org.apache.hadoop</groupId>
    +					<artifactId>hadoop-core</artifactId>
     				</exclusion>
     			</exclusions>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-hadoop-compatibility</artifactId>
    +			<version>${project.version}</version>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-client</artifactId>
    +			<version>${hbase.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hadoop</groupId>
    -			<artifactId>hadoop-client</artifactId>
    -			<version>${hadoop.version}</version>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-server</artifactId>
    --- End diff --
    
    You were actually right :)
    Now it is possible to use ResultScanner from client lib insteadof TableRecordReader (that is in the server)! Just pushed


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by asfgit <gi...@git.apache.org>.
Github user asfgit closed the pull request at:

    https://github.com/apache/incubator-flink/pull/220


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fhueske <gi...@git.apache.org>.
Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20647897
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/execution/RuntimeEnvironment.java ---
    @@ -177,8 +177,18 @@ public RuntimeEnvironment(Task owner, TaskDeploymentDescriptor tdd,
     		this.taskConfiguration = tdd.getTaskConfiguration();
     		
     		this.invokable.setEnvironment(this);
    -		this.invokable.registerInputOutput();
    +		
    +		{ 
    +			//TODO Check if this fix necessary elsewhere..
    --- End diff --
    
    Thanks, will push later today.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by rmetzger <gi...@git.apache.org>.
Github user rmetzger commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66596245
  
    Okay, thank you.
    
    This is the commit that I'm going to put into the master: https://github.com/rmetzger/incubator-flink/commit/0235f822092d55b99a4773bbb5456ef07803fc2d (unless there is something wrong with it).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by rmetzger <gi...@git.apache.org>.
Github user rmetzger commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66671123
  
    Yes, everything is fine. I'll merge it for the 0.8 release
    
    Sent from my iPhone
    
    > On 11.12.2014, at 19:15, Flavio Pompermaier <no...@github.com> wrote:
    > 
    > Any news @rmetzger? Does everything compile correctly in your fork? 
    > 
    > On 11 December 2014 at 11:55, Flavio Pompermaier <f....@gmail.com> 
    > wrote: 
    > 
    > > Now it seems all right! 
    > > 
    > > On 11 December 2014 at 10:57, Robert Metzger <no...@github.com> 
    > > wrote: 
    > > 
    > >> Okay, thank you. 
    > >> 
    > >> This is the commit that I'm going to put into the master: rmetzger@ 
    > >> 0235f82 
    > >> <https://github.com/rmetzger/incubator-flink/commit/0235f822092d55b99a4773bbb5456ef07803fc2d> 
    > >> (unless there is something wrong with it). 
    > >> 
    > >> — 
    > >> Reply to this email directly or view it on GitHub 
    > >> <https://github.com/apache/incubator-flink/pull/220#issuecomment-66596245> 
    > >> . 
    > >> 
    > > 
    > >
    > —
    > Reply to this email directly or view it on GitHub.
    > 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20676905
  
    --- Diff: flink-addons/flink-hbase/src/main/java/org/apache/flink/addons/hbase/TableInputFormat.java ---
    @@ -23,182 +23,69 @@
     import java.util.ArrayList;
     import java.util.List;
     
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.flink.addons.hbase.common.HBaseKey;
    -import org.apache.flink.addons.hbase.common.HBaseResult;
    -import org.apache.flink.addons.hbase.common.HBaseUtil;
     import org.apache.flink.api.common.io.InputFormat;
     import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
     import org.apache.flink.api.common.io.statistics.BaseStatistics;
    +import org.apache.flink.api.java.tuple.Tuple;
     import org.apache.flink.configuration.Configuration;
     import org.apache.flink.core.io.InputSplitAssigner;
    -import org.apache.flink.types.Record;
    -import org.apache.flink.util.OperatingSystem;
    -import org.apache.hadoop.fs.Path;
     import org.apache.hadoop.hbase.HBaseConfiguration;
     import org.apache.hadoop.hbase.client.HTable;
     import org.apache.hadoop.hbase.client.Result;
    +import org.apache.hadoop.hbase.client.ResultScanner;
     import org.apache.hadoop.hbase.client.Scan;
    -import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    -import org.apache.hadoop.hbase.mapreduce.TableRecordReader;
     import org.apache.hadoop.hbase.util.Bytes;
     import org.apache.hadoop.hbase.util.Pair;
     import org.apache.hadoop.util.StringUtils;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
     
     /**
      * {@link InputFormat} subclass that wraps the access for HTables.
    + * 
    + * @author Flavio Pompermaier <po...@okkam.it>
      */
    -public class TableInputFormat implements InputFormat<Record, TableInputSplit> {
    +public abstract class TableInputFormat<T extends Tuple> implements InputFormat<T, TableInputSplit>{
     
     	private static final long serialVersionUID = 1L;
     
     	private static final Logger LOG = LoggerFactory.getLogger(TableInputFormat.class);
     
    -	/** A handle on an HBase table */
    -	private HTable table;
    -
    -	/** The scanner that performs the actual access on the table. HBase object */
    -	private Scan scan;
    -
    -	/** Hbase' iterator wrapper */
    -	private TableRecordReader tableRecordReader;
    -
     	/** helper variable to decide whether the input is exhausted or not */
     	private boolean endReached = false;
    +	
    +	// TODO table and scan could be serialized when kryo serializer will be the default
    +	private transient HTable table;
    +	private transient Scan scan;
    +	
    +	/** HBase iterator wrapper */
    +	private ResultScanner rs;
     
    -	/** Job parameter that specifies the input table. */
    -	public static final String INPUT_TABLE = "hbase.inputtable";
    -
    -	/** Location of the hbase-site.xml. If set, the HBaseAdmin will build inside */
    -	public static final String CONFIG_LOCATION = "hbase.config.location";
    -
    -	/**
    -	 * Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
    -	 * See TableMapReduceUtil.convertScanToString(Scan) for more details.
    -	 */
    -	public static final String SCAN = "hbase.scan";
    -
    -	/** Column Family to Scan */
    -	public static final String SCAN_COLUMN_FAMILY = "hbase.scan.column.family";
    -
    -	/** Space delimited list of columns to scan. */
    -	public static final String SCAN_COLUMNS = "hbase.scan.columns";
    -
    -	/** The timestamp used to filter columns with a specific timestamp. */
    -	public static final String SCAN_TIMESTAMP = "hbase.scan.timestamp";
    -
    -	/** The starting timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_START = "hbase.scan.timerange.start";
    -
    -	/** The ending timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_END = "hbase.scan.timerange.end";
    -
    -	/** The maximum number of version to return. */
    -	public static final String SCAN_MAXVERSIONS = "hbase.scan.maxversions";
    -
    -	/** Set to false to disable server-side caching of blocks for this scan. */
    -	public static final String SCAN_CACHEBLOCKS = "hbase.scan.cacheblocks";
    -
    -	/** The number of rows for caching that will be passed to scanners. */
    -	public static final String SCAN_CACHEDROWS = "hbase.scan.cachedrows";
    -
    -	/** mutable objects that are used to avoid recreation of wrapper objects */
    -	protected HBaseKey hbaseKey;
    -
    -	protected HBaseResult hbaseResult;
    -
    -	private org.apache.hadoop.conf.Configuration hConf;
    -
    -	@Override
    -	public void configure(Configuration parameters) {
    -		HTable table = createTable(parameters);
    -		setTable(table);
    -		Scan scan = createScanner(parameters);
    -		setScan(scan);
    -	}
    -
    +	// abstract methods allow for multiple table and scanners in the same job
    +	protected abstract Scan getScanner();
    +	protected abstract String getTableName();
    +	protected abstract T mapResultToTuple(Result r);
    --- End diff --
    
    I decided to leave the user the possibility to choose..It depends on user
    needs, on the implementation of the mapResultToTuple()
    On Nov 20, 2014 9:41 PM, "Fabian Hueske" <no...@github.com> wrote:
    
    > In
    > flink-addons/flink-hbase/src/main/java/org/apache/flink/addons/hbase/TableInputFormat.java:
    >
    > > -	protected HBaseResult hbaseResult;
    > > -
    > > -	private org.apache.hadoop.conf.Configuration hConf;
    > > -
    > > -	@Override
    > > -	public void configure(Configuration parameters) {
    > > -		HTable table = createTable(parameters);
    > > -		setTable(table);
    > > -		Scan scan = createScanner(parameters);
    > > -		setScan(scan);
    > > -	}
    > > -
    > > +	// abstract methods allow for multiple table and scanners in the same job
    > > +	protected abstract Scan getScanner();
    > > +	protected abstract String getTableName();
    > > +	protected abstract T mapResultToTuple(Result r);
    >
    > Are the HBase keys also contained in the Result such that it is possible
    > to put them into the outgoing Tuple?
    >
    > —
    > Reply to this email directly or view it on GitHub
    > <https://github.com/apache/incubator-flink/pull/220/files#r20675793>.
    >


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66662763
  
    Any news @rmetzger? Does everything compile correctly in your fork?
    
    On 11 December 2014 at 11:55, Flavio Pompermaier <f....@gmail.com>
    wrote:
    
    > Now it seems all right!
    >
    > On 11 December 2014 at 10:57, Robert Metzger <no...@github.com>
    > wrote:
    >
    >> Okay, thank you.
    >>
    >> This is the commit that I'm going to put into the master: rmetzger@
    >> 0235f82
    >> <https://github.com/rmetzger/incubator-flink/commit/0235f822092d55b99a4773bbb5456ef07803fc2d>
    >> (unless there is something wrong with it).
    >>
    >> —
    >> Reply to this email directly or view it on GitHub
    >> <https://github.com/apache/incubator-flink/pull/220#issuecomment-66596245>
    >> .
    >>
    >
    >


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20647652
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/execution/RuntimeEnvironment.java ---
    @@ -177,8 +177,18 @@ public RuntimeEnvironment(Task owner, TaskDeploymentDescriptor tdd,
     		this.taskConfiguration = tdd.getTaskConfiguration();
     		
     		this.invokable.setEnvironment(this);
    -		this.invokable.registerInputOutput();
    +		
    +		{ 
    +			//TODO Check if this fix necessary elsewhere..
    --- End diff --
    
    yes, go ahead. I have not fixed it yet


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fhueske <gi...@git.apache.org>.
Github user fhueske commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-65059995
  
    I guess it just wasn't closed by the ASF Github Bot?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fpompermaier <gi...@git.apache.org>.
Github user fpompermaier commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66587110
  
    I merged my fork with the current master. Basically I changed just the addons pom and all hbase folder (plus RuntimeEnvironment.java to fix the bug of the classpath but I saw that now it is ok in the master).
    
    So it is sufficient that you clone the current master and you take the addons pom and the entire hbase directory of my fork (https://github.com/fpompermaier/incubator-flink)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by rmetzger <gi...@git.apache.org>.
Github user rmetzger commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-66514191
  
    Thanks @fpompermaier for describing whats missing. I've triggered a Travis build to verify it: https://travis-ci.org/rmetzger/incubator-flink/builds/43638278


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on the pull request:

    https://github.com/apache/incubator-flink/pull/220#issuecomment-64086270
  
    I think this looks good to merge now. I suggest to change the log config per my comment above.
    @fhueske Has already added the patch for the environment class loader, so we can remove that file change during merging.
    
    +1 For adding this soon


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by fhueske <gi...@git.apache.org>.
Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20675793
  
    --- Diff: flink-addons/flink-hbase/src/main/java/org/apache/flink/addons/hbase/TableInputFormat.java ---
    @@ -23,182 +23,69 @@
     import java.util.ArrayList;
     import java.util.List;
     
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.flink.addons.hbase.common.HBaseKey;
    -import org.apache.flink.addons.hbase.common.HBaseResult;
    -import org.apache.flink.addons.hbase.common.HBaseUtil;
     import org.apache.flink.api.common.io.InputFormat;
     import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
     import org.apache.flink.api.common.io.statistics.BaseStatistics;
    +import org.apache.flink.api.java.tuple.Tuple;
     import org.apache.flink.configuration.Configuration;
     import org.apache.flink.core.io.InputSplitAssigner;
    -import org.apache.flink.types.Record;
    -import org.apache.flink.util.OperatingSystem;
    -import org.apache.hadoop.fs.Path;
     import org.apache.hadoop.hbase.HBaseConfiguration;
     import org.apache.hadoop.hbase.client.HTable;
     import org.apache.hadoop.hbase.client.Result;
    +import org.apache.hadoop.hbase.client.ResultScanner;
     import org.apache.hadoop.hbase.client.Scan;
    -import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    -import org.apache.hadoop.hbase.mapreduce.TableRecordReader;
     import org.apache.hadoop.hbase.util.Bytes;
     import org.apache.hadoop.hbase.util.Pair;
     import org.apache.hadoop.util.StringUtils;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
     
     /**
      * {@link InputFormat} subclass that wraps the access for HTables.
    + * 
    + * @author Flavio Pompermaier <po...@okkam.it>
      */
    -public class TableInputFormat implements InputFormat<Record, TableInputSplit> {
    +public abstract class TableInputFormat<T extends Tuple> implements InputFormat<T, TableInputSplit>{
     
     	private static final long serialVersionUID = 1L;
     
     	private static final Logger LOG = LoggerFactory.getLogger(TableInputFormat.class);
     
    -	/** A handle on an HBase table */
    -	private HTable table;
    -
    -	/** The scanner that performs the actual access on the table. HBase object */
    -	private Scan scan;
    -
    -	/** Hbase' iterator wrapper */
    -	private TableRecordReader tableRecordReader;
    -
     	/** helper variable to decide whether the input is exhausted or not */
     	private boolean endReached = false;
    +	
    +	// TODO table and scan could be serialized when kryo serializer will be the default
    +	private transient HTable table;
    +	private transient Scan scan;
    +	
    +	/** HBase iterator wrapper */
    +	private ResultScanner rs;
     
    -	/** Job parameter that specifies the input table. */
    -	public static final String INPUT_TABLE = "hbase.inputtable";
    -
    -	/** Location of the hbase-site.xml. If set, the HBaseAdmin will build inside */
    -	public static final String CONFIG_LOCATION = "hbase.config.location";
    -
    -	/**
    -	 * Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
    -	 * See TableMapReduceUtil.convertScanToString(Scan) for more details.
    -	 */
    -	public static final String SCAN = "hbase.scan";
    -
    -	/** Column Family to Scan */
    -	public static final String SCAN_COLUMN_FAMILY = "hbase.scan.column.family";
    -
    -	/** Space delimited list of columns to scan. */
    -	public static final String SCAN_COLUMNS = "hbase.scan.columns";
    -
    -	/** The timestamp used to filter columns with a specific timestamp. */
    -	public static final String SCAN_TIMESTAMP = "hbase.scan.timestamp";
    -
    -	/** The starting timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_START = "hbase.scan.timerange.start";
    -
    -	/** The ending timestamp used to filter columns with a specific range of versions. */
    -	public static final String SCAN_TIMERANGE_END = "hbase.scan.timerange.end";
    -
    -	/** The maximum number of version to return. */
    -	public static final String SCAN_MAXVERSIONS = "hbase.scan.maxversions";
    -
    -	/** Set to false to disable server-side caching of blocks for this scan. */
    -	public static final String SCAN_CACHEBLOCKS = "hbase.scan.cacheblocks";
    -
    -	/** The number of rows for caching that will be passed to scanners. */
    -	public static final String SCAN_CACHEDROWS = "hbase.scan.cachedrows";
    -
    -	/** mutable objects that are used to avoid recreation of wrapper objects */
    -	protected HBaseKey hbaseKey;
    -
    -	protected HBaseResult hbaseResult;
    -
    -	private org.apache.hadoop.conf.Configuration hConf;
    -
    -	@Override
    -	public void configure(Configuration parameters) {
    -		HTable table = createTable(parameters);
    -		setTable(table);
    -		Scan scan = createScanner(parameters);
    -		setScan(scan);
    -	}
    -
    +	// abstract methods allow for multiple table and scanners in the same job
    +	protected abstract Scan getScanner();
    +	protected abstract String getTableName();
    +	protected abstract T mapResultToTuple(Result r);
    --- End diff --
    
    Are the HBase keys also contained in the Result such that it is possible to put them into the outgoing Tuple?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-flink pull request: Upgraded HBase addon to HBase 0.98.x...

Posted by StephanEwen <gi...@git.apache.org>.
Github user StephanEwen commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/220#discussion_r20638270
  
    --- Diff: flink-addons/flink-hbase/pom.xml ---
    @@ -28,59 +28,60 @@ under the License.
     		<version>0.8-incubating-SNAPSHOT</version>
     		<relativePath>..</relativePath>
     	</parent>
    -	
    -	<repositories>
    -		<repository>
    -			<id>cloudera-releases</id>
    -			<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
    -			<releases>
    -				<enabled>true</enabled>
    -			</releases>
    -			<snapshots>
    -				<enabled>false</enabled>
    -			</snapshots>
    -		</repository>
    -	</repositories>
    -
    -	<properties>
    - 		<hbase.version>0.96.0-hadoop2</hbase.version>
    -	</properties>
     
     	<artifactId>flink-hbase</artifactId>
     	<name>flink-hbase</name>
     	<packaging>jar</packaging>
     
    +	<properties>
    +		<hbase.hadoop1.version>0.98.6.1-hadoop1</hbase.hadoop1.version>
    +		<hbase.hadoop2.version>0.98.6.1-hadoop2</hbase.hadoop2.version>
    +	</properties>
    +
     	<dependencies>
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-core</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
     			<groupId>org.apache.flink</groupId>
     			<artifactId>flink-java</artifactId>
     			<version>${project.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hbase</groupId>
    -			<artifactId>hbase</artifactId>
    -			<version>0.94.2-cdh4.2.1</version>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-clients</artifactId>
    +			<version>${project.version}</version>
     			<exclusions>
    -				<!-- jruby is used for the hbase shell. -->
     				<exclusion>
    -					<groupId>org.jruby</groupId>
    -					<artifactId>jruby-complete</artifactId>
    +					<groupId>org.apache.hadoop</groupId>
    +					<artifactId>hadoop-core</artifactId>
     				</exclusion>
     			</exclusions>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.flink</groupId>
    +			<artifactId>flink-hadoop-compatibility</artifactId>
    +			<version>${project.version}</version>
    +			<scope>test</scope>
    +		</dependency>
    +		<dependency>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-client</artifactId>
    +			<version>${hbase.version}</version>
     		</dependency>
    -
     		<dependency>
    -			<groupId>org.apache.hadoop</groupId>
    -			<artifactId>hadoop-client</artifactId>
    -			<version>${hadoop.version}</version>
    +			<groupId>org.apache.hbase</groupId>
    +			<artifactId>hbase-server</artifactId>
    --- End diff --
    
    Do we actually need the `hbase-server` dependency? Do we need classes beyond the `hbase-client` dependency?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---