You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by zs...@apache.org on 2018/03/15 07:04:31 UTC
spark git commit: [SPARK-23533][SS] Add support for changing
ContinuousDataReader's startOffset
Repository: spark
Updated Branches:
refs/heads/master 4f5bad615 -> 7c3e8995f
[SPARK-23533][SS] Add support for changing ContinuousDataReader's startOffset
## What changes were proposed in this pull request?
As discussion in #20675, we need add a new interface `ContinuousDataReaderFactory` to support the requirements of setting start offset in Continuous Processing.
## How was this patch tested?
Existing UT.
Author: Yuanjian Li <xy...@gmail.com>
Closes #20689 from xuanyuanking/SPARK-23533.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c3e8995
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c3e8995
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c3e8995
Branch: refs/heads/master
Commit: 7c3e8995f18a1fb57c1f2c1b98a1d47590e28f38
Parents: 4f5bad6
Author: Yuanjian Li <xy...@gmail.com>
Authored: Thu Mar 15 00:04:28 2018 -0700
Committer: Shixiong Zhu <zs...@gmail.com>
Committed: Thu Mar 15 00:04:28 2018 -0700
----------------------------------------------------------------------
.../sql/kafka010/KafkaContinuousReader.scala | 11 +++++-
.../v2/reader/ContinuousDataReaderFactory.java | 35 ++++++++++++++++++++
.../continuous/ContinuousRateStreamSource.scala | 15 ++++++++-
3 files changed, 59 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/7c3e8995/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala
----------------------------------------------------------------------
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala
index ecd1170..6e56b0a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala
@@ -164,7 +164,16 @@ case class KafkaContinuousDataReaderFactory(
startOffset: Long,
kafkaParams: ju.Map[String, Object],
pollTimeoutMs: Long,
- failOnDataLoss: Boolean) extends DataReaderFactory[UnsafeRow] {
+ failOnDataLoss: Boolean) extends ContinuousDataReaderFactory[UnsafeRow] {
+
+ override def createDataReaderWithOffset(offset: PartitionOffset): DataReader[UnsafeRow] = {
+ val kafkaOffset = offset.asInstanceOf[KafkaSourcePartitionOffset]
+ require(kafkaOffset.topicPartition == topicPartition,
+ s"Expected topicPartition: $topicPartition, but got: ${kafkaOffset.topicPartition}")
+ new KafkaContinuousDataReader(
+ topicPartition, kafkaOffset.partitionOffset, kafkaParams, pollTimeoutMs, failOnDataLoss)
+ }
+
override def createDataReader(): KafkaContinuousDataReader = {
new KafkaContinuousDataReader(
topicPartition, startOffset, kafkaParams, pollTimeoutMs, failOnDataLoss)
http://git-wip-us.apache.org/repos/asf/spark/blob/7c3e8995/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousDataReaderFactory.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousDataReaderFactory.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousDataReaderFactory.java
new file mode 100644
index 0000000..a616976
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousDataReaderFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset;
+
+/**
+ * A mix-in interface for {@link DataReaderFactory}. Continuous data reader factories can
+ * implement this interface to provide creating {@link DataReader} with particular offset.
+ */
+@InterfaceStability.Evolving
+public interface ContinuousDataReaderFactory<T> extends DataReaderFactory<T> {
+ /**
+ * Create a DataReader with particular offset as its startOffset.
+ *
+ * @param offset offset want to set as the DataReader's startOffset.
+ */
+ DataReader<T> createDataReaderWithOffset(PartitionOffset offset);
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/7c3e8995/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
index b63d8d3..20d9006 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -106,7 +106,20 @@ case class RateStreamContinuousDataReaderFactory(
partitionIndex: Int,
increment: Long,
rowsPerSecond: Double)
- extends DataReaderFactory[Row] {
+ extends ContinuousDataReaderFactory[Row] {
+
+ override def createDataReaderWithOffset(offset: PartitionOffset): DataReader[Row] = {
+ val rateStreamOffset = offset.asInstanceOf[RateStreamPartitionOffset]
+ require(rateStreamOffset.partition == partitionIndex,
+ s"Expected partitionIndex: $partitionIndex, but got: ${rateStreamOffset.partition}")
+ new RateStreamContinuousDataReader(
+ rateStreamOffset.currentValue,
+ rateStreamOffset.currentTimeMs,
+ partitionIndex,
+ increment,
+ rowsPerSecond)
+ }
+
override def createDataReader(): DataReader[Row] =
new RateStreamContinuousDataReader(
startValue, startTimeMs, partitionIndex, increment, rowsPerSecond)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org