You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2020/01/06 09:10:46 UTC

[GitHub] [flink] sjwiesman commented on a change in pull request #10502: [FLINK-14825][state-processor-api][docs] Rework state processor api documentation

sjwiesman commented on a change in pull request #10502: [FLINK-14825][state-processor-api][docs] Rework state processor api documentation
URL: https://github.com/apache/flink/pull/10502#discussion_r363209178
 
 

 ##########
 File path: docs/dev/libs/state_processor_api.md
 ##########
 @@ -239,114 +225,302 @@ public class StatefulFunctionWithTime extends KeyedProcessFunction<Integer, Inte
  
    ValueState<Integer> state;
  
+   ListState<Long> updateTimes;
+
    @Override
    public void open(Configuration parameters) {
       ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("state", Types.INT);
       state = getRuntimeContext().getState(stateDescriptor);
+
+      ListStateDescriptor<Long> updateDescriptor = new ListStateDescriptor<>("times", Types.LONG);
+      updateTimes = getRuntimeContext().getListState(updateDescriptor);
    }
  
    @Override
    public void processElement(Integer value, Context ctx, Collector<Void> out) throws Exception {
       state.update(value + 1);
+      updateTimes.add(System.currentTimeMillis());
    }
 }
 {% endhighlight %}
 </div>
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
-public class StatefulFunctionWithTime extends KeyedProcessFunction[Integer, Integer, Void] {
+class StatefulFunctionWithTime extends KeyedProcessFunction[Integer, Integer, Void] {
  
-   var state: ValueState[Integer];
+   var state: ValueState[Integer] = _
  
-   override def open(parameters: Configuration) {
-      val stateDescriptor = new ValueStateDescriptor("state", Types.INT);
-      state = getRuntimeContext().getState(stateDescriptor);
+   var updateTimes: ListState[Long] = _ 
+
+   @throws[Exception]
+   override def open(parameters: Configuration): Unit {
+      val stateDescriptor = new ValueStateDescriptor("state", Types.INT)
+      state = getRuntimeContext().getState(stateDescriptor)
+
+      val updateDescirptor = new ListStateDescriptor("times", Types.LONG)
+      updateTimes = getRuntimeContext().getListState(updateDescriptor)
    }
  
-   override def processElement(value: Integer, ctx: Context, out: Collector[Void]) {
-      state.update(value + 1);
+   @throws[Exception]
+   override def processElement(value: Integer, ctx: Context, out: Collector[Void]): Unit = {
+      state.update(value + 1)
+      updateTimes.add(System.currentTimeMillis)
    }
 }
 {% endhighlight %}
 </div>
 </div>
 
-Then it can read by defining an output type and corresponding KeyedStateReaderFunction. 
+Then it can read by defining an output type and corresponding `KeyedStateReaderFunction`. 
 
 <div class="codetabs" markdown="1">
 <div data-lang="java" markdown="1">
 {% highlight java %}
-class KeyedState {
-  Integer key;
-  Integer value;
+DataSet<KeyedState> keyedState = savepoint.readKeyedState("my-uid", new ReaderFunction());
+
+public class KeyedState {
+  public int key;
+
+  public int value;
+
+  public List<Long> times;
 }
  
-class ReaderFunction extends KeyedStateReaderFunction<Integer, KeyedState> {
+public class ReaderFunction extends KeyedStateReaderFunction<Integer, KeyedState> {
+
   ValueState<Integer> state;
  
+  ListState<Long> updateTimes;
+
   @Override
   public void open(Configuration parameters) {
-     ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("state", Types.INT);
-     state = getRuntimeContext().getState(stateDescriptor);
+    ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("state", Types.INT);
+    state = getRuntimeContext().getState(stateDescriptor);
+
+    ListStateDescriptor<Long> updateDescriptor = new ListStateDescriptor<>("times", Types.LONG);
+    updateTimes = getRuntimeContext().getListState(updateDescriptor);
   }
  
   @Override
   public void readKey(
     Integer key,
     Context ctx,
     Collector<KeyedState> out) throws Exception {
- 
-     KeyedState data = new KeyedState();
-     data.key    = key;
-     data.value  = state.value();
-     out.collect(data);
+        
+    KeyedState data = new KeyedState();
+    data.key    = key;
+    data.value  = state.value();
+    data.times  = StreamSupport
+      .stream(updateTimes.get().spliterator(), false)
+      .collect(Collectors.toList());
+
+    out.collect(data);
   }
 }
- 
-DataSet<KeyedState> keyedState = savepoint.readKeyedState("my-uid", new ReaderFunction());
 {% endhighlight %}
 </div>
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
-case class KeyedState(key: Int, value: Int)
+val keyedState = savepoint.readKeyedState("my-uid", new ReaderFunction)
+
+case class KeyedState(key: Int, value: Int, List[Long])
  
 class ReaderFunction extends KeyedStateReaderFunction[Integer, KeyedState] {
-  var state: ValueState[Integer];
  
-  override def open(parameters: Configuration) {
-     val stateDescriptor = new ValueStateDescriptor("state", Types.INT);
-     state = getRuntimeContext().getState(stateDescriptor);
-  }
+  var state: ValueState[Integer] = _
+
+  var updateTimes: ListState[Long] = _
  
+  @throws[Exception]
+  override def open(parameters: Configuration): Unit {
+     val stateDescriptor = new ValueStateDescriptor("state", Types.INT)
+     state = getRuntimeContext().getState(stateDescriptor)
+
+      val updateDescirptor = new ListStateDescriptor("times", Types.LONG)
+      updateTimes = getRuntimeContext().getListState(updateDescriptor)
+    }
+ 
+
+  @throws[Exception]
   override def processKey(
     key: Int,
     ctx: Context,
-    out: Collector[Keyedstate]) throws Exception {
+    out: Collector[Keyedstate]): Unit {
  
-     val data = KeyedState(key, state.value())
-     out.collect(data);
+     val data = KeyedState(key, state.value(), updateTimes.get.asScala.toList)
+     out.collect(data)
   }
 }
-
-val keyedState = savepoint.readKeyedState("my-uid", new ReaderFunction());
 {% endhighlight %}
 </div>
 </div>
 
-{% panel **Note:** When using a `KeyedStateReaderFunction` all state descriptors must be registered eagerly inside of open. Any attempt to call `RuntimeContext#getState`, `RuntimeContext#getListState`, or `RuntimeContext#getMapState` will result in a `RuntimeException`. %}
+Along with reading registered state values, each key has access to a `Context` with metadata such as registered event time and processing time timers.
+
+{% panel **Note:** When using a `KeyedStateReaderFunction`, all state descriptors must be registered eagerly inside of open. Any attempt to call a `RuntimeContext#get*State` will result in a `RuntimeException`. %}
 
 ## Writing New Savepoints
 
-State writers are based around the abstraction of `Savepoint`, where one `Savepoint` may have many operators and the state for any particular operator is created using a `BootstrapTransformation`.
+`Savepoint`'s may also be written, which allows such use cases as bootstrapping state based on historical data.
+Each savepoint is made up of one or more `BootstrapTransformation`'s (explained below), each of which defines the state for an individual operator.
 
-A `BootstrapTransformation` starts with a `DataSet` containing the values that are to be written into state.
-The transformation may be optionally `keyed` depending on whether or not you are writing keyed or operator state.
-Finally a bootstrap function is applied depending to the transformation; Flink supplies `KeyedStateBootstrapFunction` for writing keyed state, `StateBootstrapFunction` for writing non keyed state, and `BroadcastStateBootstrapFunction` for writing broadcast state.
+<div class="codetabs" markdown="1">
+<div data-lang="java" markdown="1">
+{% highlight java %}
+int maxParallelism = 128;
+
+Savepoint
+    .create(new MemoryStateBackend(), maxParallelism)
+    .withOperator("uid1", transformation1)
+    .withOperator("uid2", transformation2)
+    .write(savepointPath);
+{% endhighlight %}
+</div>
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+val maxParallelism = 128
+
+Savepoint
+    .create(new MemoryStateBackend(), maxParallelism)
+    .withOperator("uid1", transformation1)
+    .withOperator("uid2", transformation2)
+    .write(savepointPath)
+{% endhighlight %}
+</div>
+</div>
+
+The [UIDs]({{ site.baseurl}}/ops/state/savepoints.html#assigning-operator-ids) associated with each operator much match one to one with the UIDs assigned to the operators in your `DataStream` application; these are how Flink knows what state maps to which operator.
 
 Review comment:
   Nice catch!

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services