You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by mb...@apache.org on 2014/12/15 23:07:44 UTC

[2/2] incubator-flink git commit: [streaming] Basic support reading from local and distributed file systems in readTextFile methods

[streaming] Basic support reading from local and distributed file systems in readTextFile methods

Conflicts:
	flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java
	flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java


Project: http://git-wip-us.apache.org/repos/asf/incubator-flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-flink/commit/510a8113
Tree: http://git-wip-us.apache.org/repos/asf/incubator-flink/tree/510a8113
Diff: http://git-wip-us.apache.org/repos/asf/incubator-flink/diff/510a8113

Branch: refs/heads/master
Commit: 510a81130a8fbfee4a14abc262299a1611a5eda3
Parents: 283c398
Author: szape <ne...@gmail.com>
Authored: Thu Nov 20 11:49:08 2014 +0100
Committer: mbalassi <mb...@apache.org>
Committed: Mon Dec 15 22:13:38 2014 +0100

----------------------------------------------------------------------
 .../flink/streaming/api/JobGraphBuilder.java    |  43 ++++++
 .../environment/StreamExecutionEnvironment.java |  94 ++++++++++++-
 .../api/function/source/FileSourceFunction.java | 139 +++++++++++++++++--
 .../api/function/source/FileStreamFunction.java |   9 +-
 .../function/source/FromElementsFunction.java   |   2 +-
 .../function/source/GenSequenceFunction.java    |   2 +-
 .../api/function/source/SourceFunction.java     |  16 ++-
 .../api/invokable/SourceInvokable.java          |   5 +
 .../api/invokable/StreamInvokable.java          |   6 +
 .../api/streamvertex/StreamVertex.java          |   3 +
 10 files changed, 292 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java
index e80d86d..01fd1e9 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/JobGraphBuilder.java
@@ -23,6 +23,8 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.operators.util.UserCodeWrapper;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.runtime.jobgraph.AbstractJobVertex;
 import org.apache.flink.runtime.jobgraph.DistributionPattern;
@@ -30,7 +32,10 @@ import org.apache.flink.runtime.jobgraph.JobGraph;
 import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
 import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
 import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
+import org.apache.flink.runtime.operators.util.TaskConfig;
 import org.apache.flink.streaming.api.collector.OutputSelector;
+import org.apache.flink.streaming.api.function.source.SourceFunction;
+import org.apache.flink.streaming.api.invokable.SourceInvokable;
 import org.apache.flink.streaming.api.invokable.StreamInvokable;
 import org.apache.flink.streaming.api.invokable.operator.co.CoInvokable;
 import org.apache.flink.streaming.api.streamrecord.StreamRecordSerializer;
@@ -79,6 +84,7 @@ public class JobGraphBuilder {
 	private Map<String, Integer> iterationTailCount;
 	private Map<String, Long> iterationWaitTime;
 	private Map<String, Map<String, OperatorState<?>>> operatorStates;
+	private Map<String, UserCodeWrapper<? extends InputFormat<String, ?>>> sources;
 
 	/**
 	 * Creates an new {@link JobGraph} with the given name. A JobGraph is a DAG
@@ -111,6 +117,7 @@ public class JobGraphBuilder {
 		iterationTailCount = new HashMap<String, Integer>();
 		iterationWaitTime = new HashMap<String, Long>();
 		operatorStates = new HashMap<String, Map<String, OperatorState<?>>>();
+		sources = new HashMap<String, UserCodeWrapper<? extends InputFormat<String, ?>>>();
 
 		if (LOG.isDebugEnabled()) {
 			LOG.debug("JobGraph created");
@@ -155,6 +162,30 @@ public class JobGraphBuilder {
 		}
 	}
 
+	public <IN, OUT> void addSourceVertex(String vertexName, SourceFunction<OUT> function,
+			TypeInformation<IN> inTypeInfo,	TypeInformation<OUT> outTypeInfo, String operatorName,
+			byte[] serializedFunction, int parallelism) {
+
+		StreamInvokable<OUT, OUT> invokableObject = new SourceInvokable<OUT>(function);
+
+		addVertex(vertexName, StreamVertex.class, invokableObject, operatorName,
+				serializedFunction, parallelism);
+		
+		StreamRecordSerializer<IN> inSerializer = inTypeInfo != null ? new StreamRecordSerializer<IN>(
+				inTypeInfo) : null;
+		StreamRecordSerializer<OUT> outSerializer = outTypeInfo != null ? new StreamRecordSerializer<OUT>(
+				outTypeInfo) : null;
+
+		addTypeSerializers(vertexName, inSerializer, null, outSerializer, null);
+
+		sources.put(vertexName, function.getFormatWrapper());
+		System.out.println(sources);
+
+		if (LOG.isDebugEnabled()) {
+			LOG.debug("Vertex: {}", vertexName);
+		}
+	}
+
 	/**
 	 * Adds a vertex for the iteration head to the {@link JobGraph}. The
 	 * iterated values will be fed from this vertex back to the graph.
@@ -341,6 +372,18 @@ public class JobGraphBuilder {
 			config.setIterationWaitTime(iterationWaitTime.get(vertexName));
 		}
 
+		if (sources.containsKey(vertexName)) {
+			TaskConfig taskConfig = new TaskConfig(vertex.getConfiguration());
+			// TypeInformation<?> OutTypeInfo =
+			// typeWrapperOut1.get(vertexName).getTypeInfo();
+			InputFormat<String, ?> format = sources.get(vertexName).getUserCodeObject();
+			vertex.setInputSplitSource(sources.get(vertexName).getUserCodeObject());
+			// taskConfig.setOutputSerializer(createSerializer(OutTypeInfo));
+			format.configure(taskConfig.getStubParameters());
+			// TaskConfig(vertex.getConfiguration());
+			// taskConfig.setStubWrapper(sources.get(vertexName));
+		}
+
 		streamVertices.put(vertexName, vertex);
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java
index 5c47592..78d18e7 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java
@@ -24,11 +24,15 @@ import java.util.List;
 
 import org.apache.commons.lang3.SerializationException;
 import org.apache.commons.lang3.SerializationUtils;
+import org.apache.commons.lang3.Validate;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.TextInputFormat;
 import org.apache.flink.api.java.typeutils.TypeExtractor;
 import org.apache.flink.client.program.Client;
 import org.apache.flink.client.program.ContextEnvironment;
+import org.apache.flink.core.fs.Path;
 import org.apache.flink.streaming.api.JobGraphBuilder;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.datastream.DataStreamSource;
@@ -158,12 +162,93 @@ public abstract class StreamExecutionEnvironment {
 	 * @return The DataStream representing the text file.
 	 */
 	public DataStreamSource<String> readTextFile(String filePath) {
-		checkIfFileExists(filePath);
-		return addSource(new FileSourceFunction(filePath));
+		// checkIfFileExists(filePath);
+		Validate.notNull(filePath, "The file path may not be null.");
+		TextInputFormat format = new TextInputFormat(new Path(filePath));
+		return addSource(new FileSourceFunction(format, BasicTypeInfo.STRING_TYPE_INFO), 1);
 	}
 
 	/**
 	 * Creates a DataStream that represents the Strings produced by reading the
+	 * given file line wise. The file will be read with the system's default
+	 * character set.
+	 * 
+	 * @param filePath
+	 *            The path of the file, as a URI (e.g.,
+	 *            "file:///some/local/file" or "hdfs://host:port/file/path").
+	 * @param parallelism
+	 *            degree of parallelism
+	 * @return The DataStream representing the text file.
+	 */
+	public DataStreamSource<String> readTextFile(String filePath, int parallelism) {
+		Validate.notNull(filePath, "The file path may not be null.");
+		TextInputFormat format = new TextInputFormat(new Path(filePath));
+		return addSource(new FileSourceFunction(format, BasicTypeInfo.STRING_TYPE_INFO),
+				parallelism);
+
+	}
+
+	/**
+	 * Creates a DataStream that represents the Strings produced by reading the
+	 * given file line wise. The file will be read with the given
+	 * character set.
+	 * 
+	 * @param filePath
+	 *            The path of the file, as a URI (e.g.,
+	 *            "file:///some/local/file" or "hdfs://host:port/file/path").
+	 * @return The DataStream representing the text file.
+	 */
+	public DataStreamSource<String> readTextFile(String filePath, String charsetName) {
+		Validate.notNull(filePath, "The file path may not be null.");
+		TextInputFormat format = new TextInputFormat(new Path(filePath));
+		format.setCharsetName(charsetName);
+		return addSource(new FileSourceFunction(format, BasicTypeInfo.STRING_TYPE_INFO), 1);
+	}
+
+	// public DataStreamSource<StringValue> readTextFileWithValue(String
+	// filePath) {
+	// Validate.notNull(filePath, "The file path may not be null.");
+	// TextValueInputFormat format = new TextValueInputFormat(new
+	// Path(filePath));
+	// return addSource(new FileSourceFunction<StringValue>(format,
+	// new ValueTypeInfo<StringValue>(StringValue.class)), 1);
+	// }
+	//
+	// public DataStreamSource<StringValue> readTextFileWithValue(String
+	// filePath, String charsetName,
+	// boolean skipInvalidLines) {
+	// Validate.notNull(filePath, "The file path may not be null.");
+	// TextValueInputFormat format = new TextValueInputFormat(new
+	// Path(filePath));
+	// format.setCharsetName(charsetName);
+	// format.setSkipInvalidLines(skipInvalidLines);
+	// return addSource(new FileSourceFunction<StringValue>(format,
+	// new ValueTypeInfo<StringValue>(StringValue.class)), 1);
+	// }
+	//
+	// public <X> DataStreamSource<X> readFile(FileInputFormat<X> format, String
+	// filePath) {
+	// if (format == null) {
+	// throw new IllegalArgumentException("InputFormat must not be null.");
+	// }
+	// if (filePath == null) {
+	// throw new IllegalArgumentException("The file path must not be null.");
+	// }
+	//
+	// format.setFilePath(new Path(filePath));
+	// try {
+	// return addSource(
+	// new FileSourceFunction<X>(format,
+	// TypeExtractor.getInputFormatTypes(format)), 1);
+	// } catch (Exception e) {
+	// throw new InvalidProgramException(
+	// "The type returned by the input format could not be automatically determined. "
+	// + "Please specify the TypeInformation of the produced type explicitly.");
+	// }
+	// }
+
+	/**
+	 * Creates a DataStream that represents the Strings produced by reading the
 	 * given file line wise multiple times(infinite). The file will be read with
 	 * the system's default character set.
 	 * 
@@ -330,9 +415,8 @@ public abstract class StreamExecutionEnvironment {
 		DataStreamSource<OUT> returnStream = new DataStreamSource<OUT>(this, "source", outTypeInfo);
 
 		try {
-			jobGraphBuilder.addStreamVertex(returnStream.getId(),
-					new SourceInvokable<OUT>(function), null, outTypeInfo, "source",
-					SerializationUtils.serialize(function), 1);
+			jobGraphBuilder.addSourceVertex(returnStream.getId(), function, null, outTypeInfo,
+					"source", SerializationUtils.serialize(function), getDegreeOfParallelism());
 		} catch (SerializationException e) {
 			throw new RuntimeException("Cannot serialize SourceFunction");
 		}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileSourceFunction.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileSourceFunction.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileSourceFunction.java
index 0fe7149..879eb3f 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileSourceFunction.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileSourceFunction.java
@@ -17,32 +17,141 @@
 
 package org.apache.flink.streaming.api.function.source;
 
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
 
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.operators.GenericDataSourceBase;
+import org.apache.flink.api.common.operators.OperatorInformation;
+import org.apache.flink.api.common.operators.util.UserCodeWrapper;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
+import org.apache.flink.api.java.typeutils.runtime.RuntimeStatefulSerializerFactory;
+import org.apache.flink.api.java.typeutils.runtime.RuntimeStatelessSerializerFactory;
+import org.apache.flink.core.io.InputSplit;
+import org.apache.flink.runtime.execution.Environment;
+import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
 import org.apache.flink.util.Collector;
 
-public class FileSourceFunction implements SourceFunction<String> {
+public class FileSourceFunction extends SourceFunction<String> {
 	private static final long serialVersionUID = 1L;
 
-	private final String path;
+	private InputSplitProvider provider;
 
-	public FileSourceFunction(String path) {
-		this.path = path;
+	private InputFormat<String, ?> format;
+
+	private TypeSerializerFactory<String> serializerFactory;
+
+	private UserCodeWrapper<? extends InputFormat<String, ?>> formatWrapper;
+
+	// cancel flag
+	private volatile boolean taskCanceled = false;
+
+	public FileSourceFunction(InputFormat<String, ?> format, TypeInformation<String> typeInfo) {
+		this.format = format;
+		@SuppressWarnings({ "unchecked", "rawtypes" })
+		GenericDataSourceBase<String, ?> source = new GenericDataSourceBase(format,
+				new OperatorInformation<String>(typeInfo), format.toString());
+		formatWrapper = source.getUserCodeWrapper();
+		this.serializerFactory = createSerializer(typeInfo);
+	}
+
+	@Override
+	public UserCodeWrapper<? extends InputFormat<String, ?>> getFormatWrapper() {
+		return this.formatWrapper;
+	}
+
+	private static TypeSerializerFactory<String> createSerializer(TypeInformation<String> typeInfo) {
+		TypeSerializer<String> serializer = typeInfo.createSerializer();
+
+		if (serializer.isStateful()) {
+			return new RuntimeStatefulSerializerFactory<String>(serializer, typeInfo.getTypeClass());
+		} else {
+			return new RuntimeStatelessSerializerFactory<String>(serializer,
+					typeInfo.getTypeClass());
+		}
 	}
 
 	@Override
-	public void invoke(Collector<String> collector) throws IOException {
-		BufferedReader br = new BufferedReader(new FileReader(path));
-		String line = br.readLine();
-		while (line != null) {
-			if (!line.equals("")) {
-				collector.collect(line);
+	public void invoke(Collector<String> collector) throws Exception {
+		final TypeSerializer<String> serializer = serializerFactory.getSerializer();
+		final Iterator<InputSplit> splitIterator = getInputSplits();
+		@SuppressWarnings("unchecked")
+		final InputFormat<String, InputSplit> format = (InputFormat<String, InputSplit>) this.format;
+		try {
+			while (!this.taskCanceled && splitIterator.hasNext()) {
+
+				final InputSplit split = splitIterator.next();
+				String record = serializer.createInstance();
+
+				format.open(split);
+				try {
+					while (!this.taskCanceled && !format.reachedEnd()) {
+						if ((record = format.nextRecord(record)) != null) {
+							collector.collect(record);
+						}
+					}
+				} finally {
+					format.close();
+				}
 			}
-			line = br.readLine();
+			collector.close();
+		} catch (Exception ex) {
+			ex.printStackTrace();
 		}
-		br.close();
+	}
+
+	private Iterator<InputSplit> getInputSplits() {
+
+		return new Iterator<InputSplit>() {
+
+			private InputSplit nextSplit;
+
+			private boolean exhausted;
+
+			@Override
+			public boolean hasNext() {
+				if (exhausted) {
+					return false;
+				}
+
+				if (nextSplit != null) {
+					return true;
+				}
+
+				InputSplit split = provider.getNextInputSplit();
+
+				if (split != null) {
+					this.nextSplit = split;
+					return true;
+				} else {
+					exhausted = true;
+					return false;
+				}
+			}
+
+			@Override
+			public InputSplit next() {
+				if (this.nextSplit == null && !hasNext()) {
+					throw new NoSuchElementException();
+				}
+
+				final InputSplit tmp = this.nextSplit;
+				this.nextSplit = null;
+				return tmp;
+			}
+
+			@Override
+			public void remove() {
+				throw new UnsupportedOperationException();
+			}
+		};
+	}
+
+	@Override
+	public final void initialize(Environment env) {
+		this.provider = env.getInputSplitProvider();
 	}
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileStreamFunction.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileStreamFunction.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileStreamFunction.java
index 7371ac9..18144b5 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileStreamFunction.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FileStreamFunction.java
@@ -21,9 +21,11 @@ import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
 
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.operators.util.UserCodeWrapper;
 import org.apache.flink.util.Collector;
 
-public class FileStreamFunction implements SourceFunction<String> {
+public class FileStreamFunction extends SourceFunction<String> {
 	private static final long serialVersionUID = 1L;
 
 	private final String path;
@@ -46,4 +48,9 @@ public class FileStreamFunction implements SourceFunction<String> {
 			br.close();
 		}
 	}
+
+	@Override
+	public UserCodeWrapper<? extends InputFormat<String, ?>> getFormatWrapper() {
+		return null;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FromElementsFunction.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FromElementsFunction.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FromElementsFunction.java
index cb960dd..89e0823 100755
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FromElementsFunction.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/FromElementsFunction.java
@@ -22,7 +22,7 @@ import java.util.Collection;
 
 import org.apache.flink.util.Collector;
 
-public class FromElementsFunction<T> implements SourceFunction<T> {
+public class FromElementsFunction<T> extends SourceFunction<T> {
 	private static final long serialVersionUID = 1L;
 
 	Iterable<T> iterable;

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/GenSequenceFunction.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/GenSequenceFunction.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/GenSequenceFunction.java
index 69601ff..ece68b2 100755
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/GenSequenceFunction.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/GenSequenceFunction.java
@@ -23,7 +23,7 @@ import org.apache.flink.util.Collector;
  * Source Function used to generate the number sequence
  * 
  */
-public class GenSequenceFunction implements SourceFunction<Long> {
+public class GenSequenceFunction extends SourceFunction<Long> {
 
 	private static final long serialVersionUID = 1L;
 

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/SourceFunction.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/SourceFunction.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/SourceFunction.java
index d30bbba..a3949e5 100755
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/SourceFunction.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/function/source/SourceFunction.java
@@ -18,11 +18,19 @@
 package org.apache.flink.streaming.api.function.source;
 
 import java.io.Serializable;
-
+
 import org.apache.flink.api.common.functions.Function;
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.operators.util.UserCodeWrapper;
+import org.apache.flink.runtime.execution.Environment;
 import org.apache.flink.util.Collector;
 
-public interface SourceFunction<OUT> extends Function, Serializable {
-
-	public void invoke(Collector<OUT> collector) throws Exception;
+public abstract class SourceFunction<OUT> implements Function, Serializable {
+	private static final long serialVersionUID = 1L;
+
+	public abstract void invoke(Collector<OUT> collector) throws Exception;
+	
+	public void initialize(Environment env){}
+	
+	public abstract UserCodeWrapper<? extends InputFormat<String, ?>> getFormatWrapper();
 }

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/SourceInvokable.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/SourceInvokable.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/SourceInvokable.java
index 8c9df46..0cfe028 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/SourceInvokable.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/SourceInvokable.java
@@ -49,5 +49,10 @@ public class SourceInvokable<OUT> extends StreamInvokable<OUT,OUT> implements Se
 	@Override
 	protected void callUserFunction() throws Exception {
 	}
+	
+	@Override
+	public SourceFunction<OUT> getSourceFunction(){
+		return sourceFunction;
+	}
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/StreamInvokable.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/StreamInvokable.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/StreamInvokable.java
index 71739c1..ea518a0 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/StreamInvokable.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/invokable/StreamInvokable.java
@@ -24,6 +24,8 @@ import org.apache.flink.api.common.functions.RichFunction;
 import org.apache.flink.api.common.functions.RuntimeContext;
 import org.apache.flink.api.common.functions.util.FunctionUtils;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
+import org.apache.flink.streaming.api.function.source.SourceFunction;
 import org.apache.flink.streaming.api.streamrecord.StreamRecord;
 import org.apache.flink.streaming.api.streamrecord.StreamRecordSerializer;
 import org.apache.flink.util.Collector;
@@ -161,4 +163,8 @@ public abstract class StreamInvokable<IN, OUT> implements Serializable {
 	public void setRuntimeContext(RuntimeContext t) {
 		FunctionUtils.setFunctionRuntimeContext(userFunction, t);
 	}
+
+	public SourceFunction<OUT> getSourceFunction() {
+		return null;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/510a8113/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/streamvertex/StreamVertex.java
----------------------------------------------------------------------
diff --git a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/streamvertex/StreamVertex.java b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/streamvertex/StreamVertex.java
index 2db0d8b..1dd78b5 100644
--- a/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/streamvertex/StreamVertex.java
+++ b/flink-addons/flink-streaming/flink-streaming-core/src/main/java/org/apache/flink/streaming/api/streamvertex/StreamVertex.java
@@ -76,6 +76,9 @@ public class StreamVertex<IN, OUT> extends AbstractInvokable {
 	}
 
 	protected <T> void invokeUserFunction(StreamInvokable<?, T> userInvokable) throws Exception {
+		if (userInvokable.getSourceFunction() != null) {
+			userInvokable.getSourceFunction().initialize(getEnvironment());
+		}
 		userInvokable.setRuntimeContext(context);
 		userInvokable.open(getTaskConfiguration());
 		userInvokable.invoke();