You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by aj...@apache.org on 2023/01/18 19:33:31 UTC
svn commit: r1906774 [36/49] - in /samza/site: ./ archive/ blog/ case-studies/ community/ contribute/ img/latest/learn/documentation/api/ learn/documentation/latest/ learn/documentation/latest/api/ learn/documentation/latest/api/javadocs/ learn/documen...

Modified: samza/site/learn/documentation/latest/jobs/samza-configurations.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/jobs/samza-configurations.html?rev=1906774&r1=1906773&r2=1906774&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/jobs/samza-configurations.html (original)
+++ samza/site/learn/documentation/latest/jobs/samza-configurations.html Wed Jan 18 19:33:25 2023
@@ -227,6 +227,12 @@
     
       
         
+      <a class="side-navigation__group-item" data-match-active="" href="/releases/1.8.0">1.8.0</a>
+      
+        
+      <a class="side-navigation__group-item" data-match-active="" href="/releases/1.7.0">1.7.0</a>
+      
+        
       <a class="side-navigation__group-item" data-match-active="" href="/releases/1.6.0">1.6.0</a>
       
         
@@ -538,6 +544,14 @@
               
               
 
+              <li class="hide"><a href="/learn/documentation/1.8.0/jobs/samza-configurations">1.8.0</a></li>
+
+              
+
+              <li class="hide"><a href="/learn/documentation/1.7.0/jobs/samza-configurations">1.7.0</a></li>
+
+              
+
               <li class="hide"><a href="/learn/documentation/1.6.0/jobs/samza-configurations">1.6.0</a></li>
 
               
@@ -639,1405 +653,857 @@
    limitations under the License.
 -->
 
-<p>The following table lists the complete set of properties that can be included in a Samza job configuration file.<br></p>
+<p>The following table lists the complete set of properties that can be included in a Samza job configuration file.<br /></p>
 
 <ul>
-<li><a href="#application-configurations">1. Application Configurations</a>
-
-<ul>
-<li><a href="#advanced-application-configurations">1.1 Advanced Application Configurations</a></li>
-</ul></li>
-<li><a href="#checkpointing">2. Checkpointing</a>
-
-<ul>
-<li><a href="#advanced-checkpointing-configuration">2.1 Advanced Checkpointing Configurations</a></li>
-</ul></li>
-<li><a href="#systems-streams">3. Systems &amp; Streams</a>
-
-<ul>
-<li><a href="#advanced-system-stream-configurations">3.1 Advanced System &amp; Stream Configuration</a></li>
-<li><a href="#kafka">3.2 Kafka</a></li>
-<li><a href="#hdfs">3.3 HDFS</a></li>
-<li><a href="#eventhubs">3.4 Event Hubs</a></li>
-<li><a href="#kinesis">3.5 Kinesis</a></li>
-<li><a href="#elasticsearch">3.6 ElasticSearch</a></li>
-<li><a href="#azure-blob-storage">3.7 Azure Blob Storage</a></li>
-</ul></li>
-<li><a href="#state-storage">4. State Storage</a>
-
-<ul>
-<li><a href="#advanced-storage-configurations">4.1 Advanced Storage Configurations</a></li>
-</ul></li>
-<li><a href="#deployment">5. Deployment</a>
-
-<ul>
-<li><a href="#yarn-cluster-deployment">5.1 YARN Cluster Deployment</a>
-
-<ul>
-<li><a href="#advanced-cluster-configurations">5.1.1 Advanced Cluster Configurations</a></li>
-</ul></li>
-<li><a href="#standalone-deployment">5.2 Standalone Deployment</a>
-
-<ul>
-<li><a href="#advanced-standalone-configurations">5.2.1 Advanced Standalone Configurations</a></li>
-</ul></li>
-</ul></li>
-<li><a href="#metrics">6. Metrics</a></li>
+  <li><a href="#application-configurations">1. Application Configurations</a>
+    <ul>
+      <li><a href="#advanced-application-configurations">1.1 Advanced Application Configurations</a></li>
+    </ul>
+  </li>
+  <li><a href="#checkpointing">2. Checkpointing</a>
+    <ul>
+      <li><a href="#advanced-checkpointing-configuration">2.1 Advanced Checkpointing Configurations</a></li>
+    </ul>
+  </li>
+  <li><a href="#systems-streams">3. Systems &amp; Streams</a>
+    <ul>
+      <li><a href="#advanced-system-stream-configurations">3.1 Advanced System &amp; Stream Configuration</a></li>
+      <li><a href="#kafka">3.2 Kafka</a></li>
+      <li><a href="#hdfs">3.3 HDFS</a></li>
+      <li><a href="#eventhubs">3.4 Event Hubs</a></li>
+      <li><a href="#kinesis">3.5 Kinesis</a></li>
+      <li><a href="#elasticsearch">3.6 ElasticSearch</a></li>
+      <li><a href="#azure-blob-storage">3.7 Azure Blob Storage</a></li>
+    </ul>
+  </li>
+  <li><a href="#state-storage">4. State Storage</a>
+    <ul>
+      <li><a href="#advanced-storage-configurations">4.1 Advanced Storage Configurations</a></li>
+    </ul>
+  </li>
+  <li><a href="#deployment">5. Deployment</a>
+    <ul>
+      <li><a href="#yarn-cluster-deployment">5.1 YARN Cluster Deployment</a>
+        <ul>
+          <li><a href="#advanced-cluster-configurations">5.1.1 Advanced Cluster Configurations</a></li>
+        </ul>
+      </li>
+      <li><a href="#standalone-deployment">5.2 Standalone Deployment</a>
+        <ul>
+          <li><a href="#advanced-standalone-configurations">5.2.1 Advanced Standalone Configurations</a></li>
+        </ul>
+      </li>
+    </ul>
+  </li>
+  <li><a href="#metrics">6. Metrics</a></li>
 </ul>
 
-<h3 id="1-application-configurations"><a name="application-configurations"></a> <a href="#application-configurations">1. Application Configurations</a></h3>
-
+<h3 id="-1-application-configurations"><a name="application-configurations"></a> <a href="#application-configurations">1. Application Configurations</a></h3>
 <p>These are the basic properties for setting up a Samza application.</p>
 
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>app.name</td>
-<td></td>
-<td><strong>Required:</strong> The name of your application.</td>
-</tr>
-<tr>
-<td>app.id</td>
-<td>1</td>
-<td>If you run several instances of your application at the same time, you need to give each instance a different app.id. This is important, since otherwise the applications will overwrite each others&rsquo; checkpoints, and perhaps interfere with each other in other ways.</td>
-</tr>
-<tr>
-<td>app.class</td>
-<td></td>
-<td>This is <strong>required if running on YARN</strong>. The application to run. The value is a fully-qualified Java classname, which must implement StreamApplication. A StreamApplication describes as a series of transformations on the streams.</td>
-</tr>
-<tr>
-<td>job.factory.class</td>
-<td></td>
-<td>This is <strong>required if running on YARN</strong>. The job factory to use for running this job. <br> The value is a fully-qualified Java classname, which must implement StreamJobFactory.<br> Samza ships with three implementations:<br><br><code>org.apache.samza.job.yarn.YarnJobFactory</code><br>Runs your job on a YARN grid. See below for YARN-specific configuration.<br><br><code>org.apache.samza.job.local.ThreadJobFactory</code><br><strong>For dev deployments only.</strong> Runs your job on your local machine using threads.<br><br><code>org.apache.samza.job.local.ProcessJobFactory</code><br><strong>For dev deployments only.</strong> Runs your job on your local machine as a subprocess. An optional command builderproperty can also be specified (see task.command.class for details).</td>
-</tr>
-<tr>
-<td>job.name</td>
-<td></td>
-<td><em>(Deprecated in favor of app.name)</em>  The name of your job. This name appears on the Samza dashboard, and it is used to tell apart this job&rsquo;s checkpoints from other jobs&rsquo; checkpoints.</td>
-</tr>
-<tr>
-<td>job.id</td>
-<td>1</td>
-<td><em>(Deprecated in favor of app.id)</em> If you run several instances of your job at the same time, you need to give each execution a different job.id. This is important, since otherwise the jobs will overwrite each others&rsquo; checkpoints, and perhaps interfere with each other in other ways.</td>
-</tr>
-<tr>
-<td>job.default.system</td>
-<td></td>
-<td><strong>Required:</strong> The system-name to use for creating input or output streams for which the system is not explicitly configured. This property will also be used as default for <code>job.coordinator.system</code>, <code>task.checkpoint.system</code> and <code>job.changelog.system</code> if none are defined.</td>
-</tr>
-<tr>
-<td>task.class</td>
-<td></td>
-<td>Used for legacy purposes; replace with <code>app.class</code> in new jobs. The fully-qualified name of the Java class which processes incoming messages from input streams. The class must implement <a href="../api/javadocs/org/apache/samza/task/StreamTask.html">StreamTask</a> or <a href="../api/javadocs/org/apache/samza/task/AsyncStreamTask.html">AsyncStreamTask</a>, and may optionally implement <a href="../api/javadocs/org/apache/samza/task/InitableTask.html">InitableTask</a>, <a href="../api/javadocs/org/apache/samza/task/ClosableTask.html">ClosableTask</a> and/or <a href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>. The class will be instantiated several times, once for every input stream partition.</td>
-</tr>
-<tr>
-<td>job.host-affinity.enabled</td>
-<td>false</td>
-<td>This property indicates whether host-affinity is enabled or not. Host-affinity refers to the ability of Samza to request and allocate a container on the same host every time the job is deployed. When host-affinity is enabled, Samza makes a &ldquo;best-effort&rdquo; to honor the host-affinity constraint. The property <code>cluster-manager.container.request.timeout.ms</code> determines how long to wait before de-prioritizing the host-affinity constraint and assigning the container to any available resource.</td>
-</tr>
-<tr>
-<td>job.jmx.enabled</td>
-<td>true</td>
-<td>Determines whether a JMX server should be started on the job&rsquo;s JobCoordinator and Container. (true or false).</td>
-</tr>
-<tr>
-<td>task.window.ms</td>
-<td>-1</td>
-<td>If task.class implements <a href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>, it can receive a windowing callback in regular intervals. This property specifies the time between window() calls, in milliseconds. If the number is negative (the default), window() is never called. A <code>window()</code> call will never  occur concurrently with the processing of a message. If a message is being processed when a window() call is due, the invocation of window happens after processing the message. This property is set automatically when using join or window operators in a High Level API StreamApplication Note: task.window.ms should be set to be much larger than average process or window call duration to avoid starving regular processing.</td>
-</tr>
-<tr>
-<td>task.log4j.system</td>
-<td></td>
-<td>Specify the system name for the StreamAppender. If this property is not specified in the config, an exception will be thrown. (See <a href="logging.html#stream-log4j-appender">Stream Log4j Appender</a>) Example: task.log4j.system=kafka</td>
-</tr>
-<tr>
-<td>serializers.registry.<br><strong><em>serde-name</em></strong>.class</td>
-<td></td>
-<td>Use this property to register a serializer/deserializer, which defines a way of encoding data as an array of bytes (used for messages in streams, and for data in persistent storage). You can give a serde any serde-name you want, and reference that name in properties like systems.*.samza.key.serde, systems.*.samza.msg.serde, streams.*.samza.key.serde, streams.*.samza.msg.serde, stores.*.key.serde and stores.*.msg.serde. The value of this property is the fully-qualified name of a Java class that implements SerdeFactory. Samza ships with the following serde implementations:<br><br><code>org.apache.samza.serializers.ByteSerdeFactory</code><br>A no-op serde which passes through the undecoded byte array. <br><br><code>org.apache.samza.serializers.ByteBufferSerdeFactory</code><br>Encodes <code>java.nio.ByteBuffer</code> objects. <br><br><code>org.apache.samza.serializers.IntegerSerdeFactory</code><br>Encodes <code>java.lang.Integer</code> objects as binary (4 bytes fixed-length big-end
 ian encoding).<br><br><code>org.apache.samza.serializers.StringSerdeFactory</code><br>Encodes <code>java.lang.String</code> objects as UTF-8. <br><br><code>org.apache.samza.serializers.JsonSerdeFactory</code><br>Encodes nested structures of <code>java.util.Map</code>, <code>java.util.List</code> etc. as JSON. Note: This Serde enforces a dash-separated property naming convention, while JsonSerdeV2 doesn&rsquo;t. This serde is primarily meant for Samza&rsquo;s internal usage, and is publicly available for backwards compatibility.<br><br><code>org.apache.samza.serializers.JsonSerdeV2Factory</code><br>Encodes nested structures of <code>java.util.Map</code>, <code>java.util.List</code> etc. as JSON. Note: This Serde uses Jackson&rsquo;s default (camelCase) property naming convention. This serde should be preferred over JsonSerde, especially in High Level API, unless the dasherized naming convention is required (e.g., for backwards compatibility).<br><br><code>org.apache.samza.serializers
 .LongSerdeFactory</code><br>Encodes <code>java.lang.Long</code> as binary (8 bytes fixed-length big-endian encoding).<br><br><code>org.apache.samza.serializers.DoubleSerdeFactory</code><br>Encodes <code>java.lang.Double</code> as binary (8 bytes double-precision float point). <br><br><code>org.apache.samza.serializers.UUIDSerdeFactory</code><br>Encodes <code>java.util.UUID</code> objects.<br><br><code>org.apache.samza.serializers.SerializableSerdeFactory</code><br>Encodes <code>java.io.Serializable</code> objects.<br><br><code>org.apache.samza.serializers.MetricsSnapshotSerdeFactory</code><br>Encodes <code>org.apache.samza.metrics.reporter.MetricsSnapshot</code> objects (which are used for reporting metrics) as JSON.<br><br><code>org.apache.samza.serializers.KafkaSerdeFactory</code><br>Adapter which allows existing <code>kafka.serializer.Encoder</code> and <code>kafka.serializer.Decoder</code> implementations to be used as Samza serdes. Set <code>serializers.registry.serde-name.enco
 der</code> and  <code>serializers.registry.serde-name.decoder</code> to the appropriate class names.</td>
-</tr>
-</tbody></table>
-
-<h4 id="1-1-advanced-application-configurations"><a name="advanced-application-configurations"></a> <a href="#advanced-application-configurations">1.1 Advanced Application Configurations</a></h4>
-
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>job.changelog.system</td>
-<td>inherited from job.default.system</td>
-<td>This property is required if you would like to override the system defined in <code>job.default.system</code> for the changelog. The changelog will be used with the stream specified in <code>stores.store-name.changelog</code> config. You can override this system by specifying both the system and the stream in <code>stores.store-name.changelog</code>.</td>
-</tr>
-<tr>
-<td>job.coordinator.system</td>
-<td>inherited from job.default.system</td>
-<td>This property is required if you would like to override the system defined in <code>job.default.system</code> for coordination. The <strong><em>system-name</em></strong> to use for creating and maintaining the Coordinator Stream.</td>
-</tr>
-<tr>
-<td>job.config.rewriter.<br><strong><em>rewriter-name</em></strong>.class</td>
-<td>(none)</td>
-<td>You can optionally define configuration rewriters, which have the opportunity to dynamically modify the job configuration before the job is started. For example, this can be useful for pulling configuration from an external configuration management system, or for determining the set of input streams dynamically at runtime. The value of this property is a fully-qualified Java classname which must implement <a href="../api/javadocs/org/apache/samza/config/ConfigRewriter.html">ConfigRewriter</a>. Samza ships with these rewriters by default:<br><br><code>org.apache.samza.config.RegExTopicGenerator</code><br>When consuming from Kafka, this allows you to consume all Kafka topics that match some regular expression (rather than having to list each topic explicitly). This rewriter has additional configuration.<br><br><code>org.apache.samza.config.EnvironmentConfigRewriter</code><br>This rewriter takes environment variables that are prefixed with <code>SAMZA_</code> and adds them to the c
 onfiguration, overriding previous values where they exist. The keys are lowercased and underscores are converted to dots.</td>
-</tr>
-<tr>
-<td>job.config.rewriters</td>
-<td>(none)</td>
-<td>If you have defined configuration rewriters, you need to list them here, in the order in which they should be applied. The value of this property is a comma-separated list of <strong><em>rewriter-name</em></strong> tokens.</td>
-</tr>
-<tr>
-<td>job.config.rewriter.<br><strong><em>rewriter-name</em></strong>.system</td>
-<td>(none)</td>
-<td>Set this property to the <code>system-name</code> of the Kafka system from which you want to consume all matching topics.</td>
-</tr>
-<tr>
-<td>job.config.rewriter.<br><strong><em>rewriter-name</em></strong>.regex</td>
-<td>(none)</td>
-<td>A regular expression specifying which topics you want to consume within the Kafka system <code>job.config.rewriter.*.system</code>. Any topics matched by this regular expression will be consumed in addition to any topics you specify in your application.</td>
-</tr>
-<tr>
-<td>job.config.rewriter.<br><strong><em>rewriter-name</em></strong>.config.*</td>
-<td></td>
-<td>Any properties specified within this namespace are applied to the configuration of streams that match the regex in <code>job.config.rewriter.*.regex</code>. For example, you can set <code>job.config.rewriter.*.config.samza.msg.serde</code> to configure the deserializer for messages in the matching streams, which is equivalent to setting <code>systems.*.streams.*.samza.msg.serde</code> for each topic that matches the regex.</td>
-</tr>
-<tr>
-<td>job.container.thread.<br>pool.size</td>
-<td>0</td>
-<td>If configured, the container thread pool will be used to run synchronous operations of each task <a href="#../container/event-loop.html">in parallel</a>. The operations include StreamTask.process(), WindowableTask.window(), and internally Task.commit(). If not configured and the default value of 0 is used, all task operations will run in a single thread.</td>
-</tr>
-<tr>
-<td>job.coordinator.<br>monitor-partition-change.<br>frequency.ms</td>
-<td>300000</td>
-<td>The frequency at which the input streams&rsquo; partition count change should be detected. When the input partition count change is detected, Samza will automatically restart a stateless job or fail a stateful job. A longer time interval is recommended for jobs w/ large number of input system stream partitions, since gathering partition count may incur measurable overhead to the job. You can completely disable partition count monitoring by setting this value to 0 or a negative integer, which will also disable auto-restart/failing behavior of a Samza job on partition count changes.</td>
-</tr>
-<tr>
-<td>job.coordinator.segment.<br>bytes</td>
-<td>26214400</td>
-<td>If you are using a Kafka system for coordinator stream, this is the segment size to be used for the coordinator topic&rsquo;s log segments. Keeping this number small is useful because it increases the frequency that Kafka will garbage collect old messages.</td>
-</tr>
-<tr>
-<td>job.coordinator.replication.<br>factor</td>
-<td>300000</td>
-<td>The frequency at which the input streams&rsquo; partition count change should be detected. When the input partition count change is detected, Samza will automatically restart a stateless job or fail a stateful job. A longer time interval is recommended for jobs w/ large number of input system stream partitions, since gathering partition count may incur measurable overhead to the job. You can completely disable partition count monitoring by setting this value to 0 or a negative integer, which will also disable auto-restart/failing behavior of a Samza job on partition count changes.</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>grouper.factory</td>
-<td><code>org.apache.samza.</code><br><code>container.grouper.stream.</code><br><code>GroupByPartitionFactory</code></td>
-<td>A factory class that is used to determine how input SystemStreamPartitions are grouped together for processing in individual StreamTask instances. The factory must implement the SystemStreamPartitionGrouperFactory interface. Once this configuration is set, it can&rsquo;t be changed, since doing so could violate state semantics, and lead to a loss of data.<br><br><code>org.apache.samza.container.grouper.stream.</code><br><code>GroupByPartitionFactory</code><br>Groups input stream partitions according to their partition number. This grouping leads to a single StreamTask processing all messages for a single partition (e.g. partition 0) across all input streams that have a partition 0. Therefore, the default is that you get one StreamTask for all input partitions with the same partition number. Using this strategy, if two input streams have a partition 0, then messages from both partitions will be routed to a single StreamTask. This partitioning strategy is useful for joining and ag
 gregating streams.<br><br><code>org.apache.samza.container.grouper.stream.</code><br><code>GroupBySystemStreamPartitionFactory</code><br>Assigns each SystemStreamPartition to its own unique StreamTask. The GroupBySystemStreamPartitionFactory is useful in cases where you want increased parallelism (more containers), and don&rsquo;t care about co-locating partitions for grouping or joins, since it allows for a greater number of StreamTasks to be divided up amongst Samza containers.</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>matcher.class</td>
-<td></td>
-<td>If you want to enable static partition assignment, then this is a required configuration. The value of this property is a fully-qualified Java class name that implements the interface org.apache.samza.system.SystemStreamPartitionMatcher. Samza ships with two matcher classes:<br><br><code>org.apache.samza.system.RangeSystemStreamPartitionMatcher</code><br>This classes uses a comma separated list of range(s) to determine which partition matches, and thus statically assigned to the Job. For example &ldquo;2,3,1-2&rdquo;, statically assigns partition 1, 2, and 3 for all the specified system and streams (topics in case of Kafka) to the job. For config validation each element in the comma separated list much conform to one of the following regex:<br><code>(\\d+)</code>&ldquo; or&rdquo;<code>(\\d+-\\d+)</code>&ldquo;<br><code>JobConfig.SSP_MATCHER_CLASS_RANGE</code> constant has the canonical name of this class.<br><br><code>org.apache.samza.system.RegexSystemStreamPartitionMatcher</co
 de><br>This classes uses a standard Java supported regex to determine which partition matches, and thus statically assigned to the Job. For example &rdquo;[1-2]&ldquo;, statically assigns partition 1 and 2 for all the specified system and streams (topics in case of Kafka) to the job. JobConfig.SSP<em>MATCHER</em>CLASS_REGEX constant has the canonical name of this class.</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>matcher.config.<br>range</td>
-<td></td>
-<td>If <code>job.systemstreampartition.matcher.class</code> is specified, and the value of this property is <code>org.apache.samza.system.RangeSystemStreamPartitionMatcher</code>, then this property is a required configuration. Specify a comma separated list of range(s) to determine which partition matches, and thus statically assigned to the Job. For example &quot;2,3,11-20&rdquo;, statically assigns partition 2, 3, and 11 to 20 for all the specified system and streams (topics in case of Kafka) to the job. A single configuration value like &ldquo;19&rdquo; is valid as well. This statically assigns partition 19. For config validation each element in the comma separated list much conform to one of the following regex:<br>&ldquo;<code>(\\d+)</code>&rdquo; or &ldquo;<code>(\\d+-\\d+)</code>&rdquo;</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>matcher.config.<br>regex</td>
-<td></td>
-<td>If <code>job.systemstreampartition.matcher.class</code> is specified, and the value of this property is <code>org.apache.samza.system.RegexSystemStreamPartitionMatcher</code>, then this property is a required configuration. The value should be a valid Java supported regex. For example &ldquo;[1-2]&rdquo;, statically assigns partition 1 and 2 for all the specified system and streams (topics in case of Kakfa) to the job.</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>matcher.config.<br>job.factory.regex</td>
-<td></td>
-<td>This configuration can be used to specify the Java supported regex to match the StreamJobFactory for which the static partition assignment should be enabled. This configuration enables the partition assignment feature to be used for custom StreamJobFactory(ies) as well.<br>This config defaults to the following value: &ldquo;<em>org\.apache\.samza\.job\.local(.<em>ProcessJobFactory &#124; .</em>ThreadJobFactory)</em>&rdquo;, which enables static partition assignment when job.factory.class is set to <code>org.apache.samza.job.local.ProcessJobFactory</code> or <code>org.apache.samza.job.local.ThreadJobFactory</code>.</td>
-</tr>
-<tr>
-<td>job.systemstreampartition.<br>input.expansion.enabled</td>
-<td>true</td>
-<td>When enabled, this allows stateful jobs to expand or contract their partition count by a multiple of the previous count so that events from an input stream partition are processed on the same task as before. This will prevent erroneous results. This feature is disabled if the configuration is set to false or if the job is stateless. See <a href="https://cwiki.apache.org/confluence/display/SAMZA/SEP-5%3A+Enable+partition+expansion+of+input+streams">SEP-5</a> for more details.</td>
-</tr>
-<tr>
-<td>job.security.manager.<br>factory</td>
-<td>(none)</td>
-<td>This is the factory class used to create the proper SecurityManager to handle security for Samza containers when running in a secure environment, such as Yarn with Kerberos eanbled. Samza ships with one security manager by default:<br><br><code>org.apache.samza.job.yarn.SamzaYarnSecurityManagerFactory</code><br>Supports Samza containers to run properly in a Kerberos enabled Yarn cluster. Each Samza container, once started, will create a SamzaContainerSecurityManager. SamzaContainerSecurityManager runs on its separate thread and update user&rsquo;s delegation tokens at the interval specified by yarn.token.renewal.interval.seconds. See Yarn Security for details.</td>
-</tr>
-<tr>
-<td>task.callback.timeout.ms</td>
-<td>-1(no timeout)</td>
-<td>For an AsyncStreamTask, this defines the max allowed time for a processAsync callback to complete. For a StreamTask, this is the max allowed time for a process call to complete. When the timeout happens,the container is shutdown. Default is no timeout.</td>
-</tr>
-<tr>
-<td>task.chooser.class</td>
-<td><code>org.apache.samza.</code><br><code>system.chooser.</code><br><code>RoundRobinChooserFactory</code></td>
-<td>This property can be optionally set to override the default <a href="../container/streams.html#messagechooser">message chooser</a>, which determines the order in which messages from multiple input streams are processed. The value of this property is the fully-qualified name of a Java class that implements <a href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a>.</td>
-</tr>
-<tr>
-<td>task.command.class</td>
-<td><code>org.apache.samza.job.</code><br><code>ShellCommandBuilder</code></td>
-<td>The fully-qualified name of the Java class which determines the command line and environment variables for a <a href="../container/samza-container.html">container</a>. It must be a subclass of <a href="../api/javadocs/org/apache/samza/job/CommandBuilder.html">CommandBuilder</a>. This defaults to task.command.class=<code>org.apache.samza.job.ShellCommandBuilder</code>.</td>
-</tr>
-<tr>
-<td>task.drop.deserialization.errors</td>
-<td>false</td>
-<td>This property is to define how the system deals with deserialization failure situation. If set to true, the system will skip the error messages and keep running. If set to false, the system with throw exceptions and fail the container.</td>
-</tr>
-<tr>
-<td>task.drop.serialization.errors</td>
-<td>false</td>
-<td>This property is to define how the system deals with serialization failure situation. If set to true, the system will drop the error messages and keep running. If set to false, the system with throw exceptions and fail the container.</td>
-</tr>
-<tr>
-<td>task.drop.producer.errors</td>
-<td>false</td>
-<td>If true, producer errors will be logged and ignored. The only exceptions that will be thrown are those which are likely caused by the application itself (e.g. serializaiton errors). If false, the producer will be closed and producer errors will be propagated upward until the container ultimately fails. Failing the container is a safety precaution to ensure the latest checkpoints only reflect the events that have been completely and successfully processed. However, some applications prefer to remain running at all costs, even if that means lost messages. Setting this property to true will enable applications to recover from producer errors at the expense of one or many (in the case of batching producers) dropped messages. If you enable this, it is highly recommended that you also configure alerting on the &lsquo;producer-send-failed&rsquo; metric, since the producer might drop messages indefinitely. The logic for this property is specific to each SystemProducer implementation. It
  will have no effect for SystemProducers that ignore the property.</td>
-</tr>
-<tr>
-<td>task.ignored.exceptions</td>
-<td></td>
-<td>This property specifies which exceptions should be ignored if thrown in a task&rsquo;s process or window methods. The exceptions to be ignored should be a comma-separated list of fully-qualified class names of the exceptions or * to ignore all exceptions.</td>
-</tr>
-<tr>
-<td>task.log4j.location.info.enabled</td>
-<td>false</td>
-<td>Defines whether or not to include log4j&rsquo;s LocationInfo data in Log4j StreamAppender messages. LocationInfo includes information such as the file, class, and line that wrote a log message. This setting is only active if the Log4j stream appender is being used. (See <a href="../logging.html#stream-log4j-appender">Stream Log4j Appender</a>)</td>
-</tr>
-<tr>
-<td>task.max.idle.ms</td>
-<td>10</td>
-<td>The maximum time to wait for a task worker to complete when there are no new messages to handle before resuming the main loop and potentially polling for more messages. <code>See task.poll.interval.ms</code> This timeout value prevents the main loop from spinning when there is nothing for it to do. Increasing this value will reduce the background load of the thread, but, also potentially increase message latency. It should not be set greater than the <code>task.poll.interval.ms</code>.</td>
-</tr>
-<tr>
-<td>task.max.concurrency</td>
-<td>1</td>
-<td>Max number of outstanding messages being processed per task at a time, and itâs applicable to both StreamTask and AsyncStreamTask. The values can be:<br><br><code>1</code><br>Each task processes one message at a time. Next message will wait until the current message process completes. This ensures strict in-order processing.<br><br><code>&gt;1</code><br>Multiple outstanding messages are allowed to be processed per task at a time. The completion can be out of order. This option increases the parallelism within a task, but may result in out-of-order processing.</td>
-</tr>
-<tr>
-<td>task.name.grouper.factory</td>
-<td><code>org.apache.samza.</code><br><code>container.grouper.task.</code><br><code>GroupByContainerCountFactory</code></td>
-<td>The fully-qualified name of the Java class which determines the factory class which will build the TaskNameGrouper. The default configuration value if the property is not present is task.name.grouper.factory=<code>org.apache.samza.container.grouper.task.</code><br><code>GroupByContainerCountFactory</code>.The user can specify a custom implementation of the TaskNameGrouperFactory where a custom logic is implemented for grouping the tasks.<br>Note: For non-cluster applications (ones using coordination service) one must use <code>org.apache.samza.container.grouper.</code><br><code>task.GroupByContainerIdsFactory</code></td>
-</tr>
-<tr>
-<td>task.opts</td>
-<td></td>
-<td>Any JVM options to include in the command line when executing Samza containers. For example, this can be used to set the JVM heap size, to tune the garbage collector, or to enable remote debugging. This cannot be used when running with ThreadJobFactory. Anything you put in task.opts gets forwarded directly to the commandline as part of the JVM invocation.<br>Example: <code>task.opts=-XX:+HeapDumpOnOutOfMemoryError -XX:+UseConcMarkSweepGC</code></td>
-</tr>
-<tr>
-<td>task.poll.interval.ms</td>
-<td>50</td>
-<td>Samza&rsquo;s container polls for more messages under two conditions. The first condition arises when there are simply no remaining buffered messages to process for any input SystemStreamPartition. The second condition arises when some input SystemStreamPartitions have empty buffers, but some do not. In the latter case, a polling interval is defined to determine how often to refresh the empty SystemStreamPartition buffers. By default, this interval is 50ms, which means that any empty SystemStreamPartition buffer will be refreshed at least every 50ms. A higher value here means that empty SystemStreamPartitions will be refreshed less often, which means more latency is introduced, but less CPU and network will be used. Decreasing this value means that empty SystemStreamPartitions are refreshed more frequently, thereby introducing less latency, but increasing CPU and network utilization.</td>
-</tr>
-<tr>
-<td>task.shutdown.ms</td>
-<td>30000</td>
-<td>This property controls how long the Samza container will wait for an orderly shutdown of task instances.</td>
-</tr>
-</tbody></table>
-
-<h3 id="2-checkpointing"><a name="checkpointing"></a> <a href="#checkpointing">2. Checkpointing</a></h3>
-
-<p><a href="../container/checkpointing.html">Checkpointing</a> is not required, but recommended for most jobs. If you don&rsquo;t configure checkpointing, and a job or container restarts, it does not remember which messages it has already processed. Without checkpointing, consumer behavior on startup is determined by the &hellip;samza.offset.default setting. Checkpointing allows a job to start up where it previously left off.</p>
-
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>task.checkpoint.factory</td>
-<td></td>
-<td>To enable <a href="../container/checkpointing.html">checkpointing</a>, you must set this property to the fully-qualified name of a Java class that implements <a href="../api/javadocs/org/apache/samza/checkpoint/CheckpointManagerFactory.html">CheckpointManagerFactory</a>. Samza ships with two checkpoint managers by default: <br><br><code>org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory</code> <br>Writes checkpoints to a dedicated topic on a Kafka cluster. This is the recommended option if you are already using Kafka for input or output streams. Use the task.checkpoint.system property to configure which Kafka cluster to use for checkpoints.<br><br><code>org.apache.samza.checkpoint.file.FileSystemCheckpointManagerFactory</code> <br><strong>For dev deployments only.</strong> Writes checkpoints to files on the local filesystem. You can configure the file path with the task.checkpoint.path property. This is a simple option if your job always runs on the same machine. On
  a multi-machine cluster, this would require a network filesystem mount.</td>
-</tr>
-<tr>
-<td>task.commit.ms</td>
-<td>60000</td>
-<td>If task.checkpoint.factory is configured, this property determines how often a checkpoint is written. The value is the time between checkpoints, in milliseconds. The frequency of checkpointing affects failure recovery: if a container fails unexpectedly (e.g. due to crash or machine failure) and is restarted, it resumes processing at the last checkpoint. Any messages processed since the last checkpoint on the failed container are processed again. Checkpointing more frequently reduces the number of messages that may be processed twice, but also uses more resources.</td>
-</tr>
-</tbody></table>
-
-<h5 id="2-1-advanced-checkpointing-configurations"><a name="advanced-checkpointing-configuration"></a><a href="#advanced-checkpointing-configuration">2.1 Advanced Checkpointing Configurations</a></h5>
-
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>task.checkpoint.system</td>
-<td>inherited from job.default.system</td>
-<td>This property is required if you would like to override the system defined in <code>job.default.system</code> for checkpointing. You must set it to the <em><strong>system-name</strong></em> of the desired checkpointing system. The stream name (topic name) within that system is automatically determined from the job name and ID: _<em>samza</em>checkpoint<em>${job.name}</em>${job.id} (with underscores in the job name and ID replaced by hyphens).</td>
-</tr>
-<tr>
-<td>job.checkpoint.validation.enabled</td>
-<td>true</td>
-<td>This setting controls if the job should fail(true) or just warn(false) in case of the checkpoint topic fails.<br><strong>CAUTION:</strong> this configuration needs to be used w/ care. It should only be used as a work-around if the checkpoint topic was created with the wrong number of partitions, it&rsquo;s contents have been corrupted, or the <code>SystemStreamPartitionGrouperFactory</code> for the job needs to be changed.</td>
-</tr>
-<tr>
-<td>task.checkpoint.path</td>
-<td></td>
-<td>Required if you are using the filesystem for checkpoints. Set this to the path on your local filesystem where checkpoint files should be stored.</td>
-</tr>
-<tr>
-<td>task.checkpoint.<br>replication.factor</td>
-<td>2</td>
-<td>If you are using Kafka for checkpoints, this is the number of Kafka nodes to which you want the checkpoint topic replicated for durability.</td>
-</tr>
-<tr>
-<td>task.checkpoint.<br>segment.bytes</td>
-<td>26214400</td>
-<td>If you are using Kafka for checkpoints, this is the segment size to be used for the checkpoint topic&rsquo;s log segments. Keeping this number small is useful because it increases the frequency that Kafka will garbage collect old checkpoints.</td>
-</tr>
-</tbody></table>
-
-<h3 id="3-systems-streams"><a name="systems-streams"></a><a href="#systems-streams">3. Systems &amp; Streams</a></h3>
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>app.name</td>
+      <td>Â </td>
+      <td><strong>Required:</strong> The name of your application.</td>
+    </tr>
+    <tr>
+      <td>app.id</td>
+      <td>1</td>
+      <td>If you run several instances of your application at the same time, you need to give each instance a different app.id. This is important, since otherwise the applications will overwrite each othersâ checkpoints, and perhaps interfere with each other in other ways.</td>
+    </tr>
+    <tr>
+      <td>app.class</td>
+      <td>Â </td>
+      <td>This is <strong>required if running on YARN</strong>. The application to run. The value is a fully-qualified Java classname, which must implement StreamApplication. A StreamApplication describes as a series of transformations on the streams.</td>
+    </tr>
+    <tr>
+      <td>job.factory.class</td>
+      <td>Â </td>
+      <td>This is <strong>required if running on YARN</strong>. The job factory to use for running this job. <br /> The value is a fully-qualified Java classname, which must implement StreamJobFactory.<br /> Samza ships with three implementations:<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.job.yarn.YarnJobFactory</code><br />Runs your job on a YARN grid. See below for YARN-specific configuration.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.job.local.ThreadJobFactory</code><br /><strong>For dev deployments only.</strong> Runs your job on your local machine using threads.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.job.local.ProcessJobFactory</code><br /><strong>For dev deployments only.</strong> Runs your job on your local machine as a subprocess. An optional command builderproperty can also be specified (see task.command.class for details).</td>
+    </tr>
+    <tr>
+      <td>job.name</td>
+      <td>Â </td>
+      <td><em>(Deprecated in favor of app.name)</em>  The name of your job. This name appears on the Samza dashboard, and it is used to tell apart this jobâs checkpoints from other jobsâ checkpoints.</td>
+    </tr>
+    <tr>
+      <td>job.id</td>
+      <td>1</td>
+      <td><em>(Deprecated in favor of app.id)</em> If you run several instances of your job at the same time, you need to give each execution a different job.id. This is important, since otherwise the jobs will overwrite each othersâ checkpoints, and perhaps interfere with each other in other ways.</td>
+    </tr>
+    <tr>
+      <td>job.default.system</td>
+      <td>Â </td>
+      <td><strong>Required:</strong> The system-name to use for creating input or output streams for which the system is not explicitly configured. This property will also be used as default for <code class="language-plaintext highlighter-rouge">job.coordinator.system</code>, <code class="language-plaintext highlighter-rouge">task.checkpoint.system</code> and <code class="language-plaintext highlighter-rouge">job.changelog.system</code> if none are defined.</td>
+    </tr>
+    <tr>
+      <td>task.class</td>
+      <td>Â </td>
+      <td>Used for legacy purposes; replace with <code class="language-plaintext highlighter-rouge">app.class</code> in new jobs. The fully-qualified name of the Java class which processes incoming messages from input streams. The class must implement <a href="../api/javadocs/org/apache/samza/task/StreamTask.html">StreamTask</a> or <a href="../api/javadocs/org/apache/samza/task/AsyncStreamTask.html">AsyncStreamTask</a>, and may optionally implement <a href="../api/javadocs/org/apache/samza/task/InitableTask.html">InitableTask</a>, <a href="../api/javadocs/org/apache/samza/task/ClosableTask.html">ClosableTask</a> and/or <a href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>. The class will be instantiated several times, once for every input stream partition.</td>
+    </tr>
+    <tr>
+      <td>job.host-affinity.enabled</td>
+      <td>false</td>
+      <td>This property indicates whether host-affinity is enabled or not. Host-affinity refers to the ability of Samza to request and allocate a container on the same host every time the job is deployed. When host-affinity is enabled, Samza makes a âbest-effortâ to honor the host-affinity constraint. The property <code class="language-plaintext highlighter-rouge">cluster-manager.container.request.timeout.ms</code> determines how long to wait before de-prioritizing the host-affinity constraint and assigning the container to any available resource.</td>
+    </tr>
+    <tr>
+      <td>job.jmx.enabled</td>
+      <td>true</td>
+      <td>Determines whether a JMX server should be started on the jobâs JobCoordinator and Container. (true or false).</td>
+    </tr>
+    <tr>
+      <td>task.window.ms</td>
+      <td>-1</td>
+      <td>If task.class implements <a href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>, it can receive a windowing callback in regular intervals. This property specifies the time between window() calls, in milliseconds. If the number is negative (the default), window() is never called. A <code class="language-plaintext highlighter-rouge">window()</code> call will never  occur concurrently with the processing of a message. If a message is being processed when a window() call is due, the invocation of window happens after processing the message. This property is set automatically when using join or window operators in a High Level API StreamApplication Note: task.window.ms should be set to be much larger than average process or window call duration to avoid starving regular processing.</td>
+    </tr>
+    <tr>
+      <td>task.log4j.system</td>
+      <td>Â </td>
+      <td>Specify the system name for the StreamAppender. If this property is not specified in the config, an exception will be thrown. (See <a href="logging.html#stream-log4j-appender">Stream Log4j Appender</a>) Example: task.log4j.system=kafka</td>
+    </tr>
+    <tr>
+      <td>serializers.registry.<br /><strong><em>serde-name</em></strong>.class</td>
+      <td>Â </td>
+      <td>Use this property to register a serializer/deserializer, which defines a way of encoding data as an array of bytes (used for messages in streams, and for data in persistent storage). You can give a serde any serde-name you want, and reference that name in properties like systems.*.samza.key.serde, systems.*.samza.msg.serde, streams.*.samza.key.serde, streams.*.samza.msg.serde, stores.*.key.serde and stores.*.msg.serde. The value of this property is the fully-qualified name of a Java class that implements SerdeFactory. Samza ships with the following serde implementations:<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.ByteSerdeFactory</code><br />A no-op serde which passes through the undecoded byte array. <br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.ByteBufferSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.nio.ByteBuffer</code> objects. <br />
 <br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.IntegerSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.lang.Integer</code> objects as binary (4 bytes fixed-length big-endian encoding).<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.StringSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.lang.String</code> objects as UTF-8. <br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.JsonSerdeFactory</code><br />Encodes nested structures of <code class="language-plaintext highlighter-rouge">java.util.Map</code>, <code class="language-plaintext highlighter-rouge">java.util.List</code> etc. as JSON. Note: This Serde enforces a dash-separated property naming convention, while JsonSerdeV2 doesnât. This serde is primarily meant for Samzaâs internal usage, and is publicly available for back
 wards compatibility.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.JsonSerdeV2Factory</code><br />Encodes nested structures of <code class="language-plaintext highlighter-rouge">java.util.Map</code>, <code class="language-plaintext highlighter-rouge">java.util.List</code> etc. as JSON. Note: This Serde uses Jacksonâs default (camelCase) property naming convention. This serde should be preferred over JsonSerde, especially in High Level API, unless the dasherized naming convention is required (e.g., for backwards compatibility).<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.LongSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.lang.Long</code> as binary (8 bytes fixed-length big-endian encoding).<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.DoubleSerdeFactory</code><br />Encodes <code class="language-plainte
 xt highlighter-rouge">java.lang.Double</code> as binary (8 bytes double-precision float point). <br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.UUIDSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.util.UUID</code> objects.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.SerializableSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">java.io.Serializable</code> objects.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.MetricsSnapshotSerdeFactory</code><br />Encodes <code class="language-plaintext highlighter-rouge">org.apache.samza.metrics.reporter.MetricsSnapshot</code> objects (which are used for reporting metrics) as JSON.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.serializers.KafkaSerdeFactory</code><br />Adapter which allows existing <code class=
 "language-plaintext highlighter-rouge">kafka.serializer.Encoder</code> and <code class="language-plaintext highlighter-rouge">kafka.serializer.Decoder</code> implementations to be used as Samza serdes. Set <code class="language-plaintext highlighter-rouge">serializers.registry.serde-name.encoder</code> and  <code class="language-plaintext highlighter-rouge">serializers.registry.serde-name.decoder</code> to the appropriate class names.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h4 id="-11-advanced-application-configurations"><a name="advanced-application-configurations"></a> <a href="#advanced-application-configurations">1.1 Advanced Application Configurations</a></h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>job.changelog.system</td>
+      <td>inherited from job.default.system</td>
+      <td>This property is required if you would like to override the system defined in <code class="language-plaintext highlighter-rouge">job.default.system</code> for the changelog. The changelog will be used with the stream specified in <code class="language-plaintext highlighter-rouge">stores.store-name.changelog</code> config. You can override this system by specifying both the system and the stream in <code class="language-plaintext highlighter-rouge">stores.store-name.changelog</code>.</td>
+    </tr>
+    <tr>
+      <td>job.coordinator.system</td>
+      <td>inherited from job.default.system</td>
+      <td>This property is required if you would like to override the system defined in <code class="language-plaintext highlighter-rouge">job.default.system</code> for coordination. The <strong><em>system-name</em></strong> to use for creating and maintaining the Coordinator Stream.</td>
+    </tr>
+    <tr>
+      <td>job.coordinator.segment.<br />bytes</td>
+      <td>26214400</td>
+      <td>If you are using a Kafka system for coordinator stream, this is the segment size to be used for the coordinator topicâs log segments. Keeping this number small is useful because it increases the frequency that Kafka will garbage collect old messages.</td>
+    </tr>
+    <tr>
+      <td>job.coordinator.replication.<br />factor</td>
+      <td>2</td>
+      <td>If you are using a Kafka system for coordinator stream, this is the replication factor to be used for the coordinator topic.</td>
+    </tr>
+    <tr>
+      <td>job.coordinator.<br />monitor-partition-change.<br />frequency.ms</td>
+      <td>300000</td>
+      <td>The frequency at which the input streamsâ partition count change should be detected. When the input partition count change is detected, Samza will automatically restart a stateless job or fail a stateful job. A longer time interval is recommended for jobs w/ large number of input system stream partitions, since gathering partition count may incur measurable overhead to the job. You can completely disable partition count monitoring by setting this value to 0 or a negative integer, which will also disable auto-restart/failing behavior of a Samza job on partition count changes.</td>
+    </tr>
+    <tr>
+      <td>job.coordinator.execute</td>
+      <td>bin/run-jc.sh</td>
+      <td>The command that starts a Samza job coordinator. The script must be included in the job package. There is usually no need to customize this.</td>
+    </tr>
+    <tr>
+      <td>job.config.rewriter.<br /><strong><em>rewriter-name</em></strong>.class</td>
+      <td>(none)</td>
+      <td>You can optionally define configuration rewriters, which have the opportunity to dynamically modify the job configuration before the job is started. For example, this can be useful for pulling configuration from an external configuration management system, or for determining the set of input streams dynamically at runtime. The value of this property is a fully-qualified Java classname which must implement <a href="../api/javadocs/org/apache/samza/config/ConfigRewriter.html">ConfigRewriter</a>. Samza ships with these rewriters by default:<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.config.RegExTopicGenerator</code><br />When consuming from Kafka, this allows you to consume all Kafka topics that match some regular expression (rather than having to list each topic explicitly). This rewriter has additional configuration.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.config.EnvironmentConfigRewriter</code><br /
 >This rewriter takes environment variables that are prefixed with <code class="language-plaintext highlighter-rouge">SAMZA_</code> and adds them to the configuration, overriding previous values where they exist. The keys are lowercased and underscores are converted to dots.</td>
+    </tr>
+    <tr>
+      <td>job.config.rewriters</td>
+      <td>(none)</td>
+      <td>If you have defined configuration rewriters, you need to list them here, in the order in which they should be applied. The value of this property is a comma-separated list of <strong><em>rewriter-name</em></strong> tokens.</td>
+    </tr>
+    <tr>
+      <td>job.config.rewriter.<br /><strong><em>rewriter-name</em></strong>.system</td>
+      <td>(none)</td>
+      <td>Set this property to the <code class="language-plaintext highlighter-rouge">system-name</code> of the Kafka system from which you want to consume all matching topics.</td>
+    </tr>
+    <tr>
+      <td>job.config.rewriter.<br /><strong><em>rewriter-name</em></strong>.regex</td>
+      <td>(none)</td>
+      <td>A regular expression specifying which topics you want to consume within the Kafka system <code class="language-plaintext highlighter-rouge">job.config.rewriter.*.system</code>. Any topics matched by this regular expression will be consumed in addition to any topics you specify in your application.</td>
+    </tr>
+    <tr>
+      <td>job.config.rewriter.<br /><strong><em>rewriter-name</em></strong>.config.*</td>
+      <td>Â </td>
+      <td>Any properties specified within this namespace are applied to the configuration of streams that match the regex in <code class="language-plaintext highlighter-rouge">job.config.rewriter.*.regex</code>. For example, you can set <code class="language-plaintext highlighter-rouge">job.config.rewriter.*.config.samza.msg.serde</code> to configure the deserializer for messages in the matching streams, which is equivalent to setting <code class="language-plaintext highlighter-rouge">systems.*.streams.*.samza.msg.serde</code> for each topic that matches the regex.</td>
+    </tr>
+    <tr>
+      <td>job.container.thread.<br />pool.size</td>
+      <td>0</td>
+      <td>If configured, the container thread pool will be used to run synchronous operations of each task <a href="#../container/event-loop.html">in parallel</a>. The operations include StreamTask.process(), WindowableTask.window(), and internally Task.commit(). If not configured and the default value of 0 is used, all task operations will run in a single thread.</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />grouper.factory</td>
+      <td><code class="language-plaintext highlighter-rouge">org.apache.samza.</code><br /><code class="language-plaintext highlighter-rouge">container.grouper.stream.</code><br /><code class="language-plaintext highlighter-rouge">GroupByPartitionFactory</code></td>
+      <td>A factory class that is used to determine how input SystemStreamPartitions are grouped together for processing in individual StreamTask instances. The factory must implement the SystemStreamPartitionGrouperFactory interface. Once this configuration is set, it canât be changed, since doing so could violate state semantics, and lead to a loss of data.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.container.grouper.stream.</code><br /><code class="language-plaintext highlighter-rouge">GroupByPartitionFactory</code><br />Groups input stream partitions according to their partition number. This grouping leads to a single StreamTask processing all messages for a single partition (e.g. partition 0) across all input streams that have a partition 0. Therefore, the default is that you get one StreamTask for all input partitions with the same partition number. Using this strategy, if two input streams have a partition 0, then messages from both pa
 rtitions will be routed to a single StreamTask. This partitioning strategy is useful for joining and aggregating streams.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.container.grouper.stream.</code><br /><code class="language-plaintext highlighter-rouge">GroupBySystemStreamPartitionFactory</code><br />Assigns each SystemStreamPartition to its own unique StreamTask. The GroupBySystemStreamPartitionFactory is useful in cases where you want increased parallelism (more containers), and donât care about co-locating partitions for grouping or joins, since it allows for a greater number of StreamTasks to be divided up amongst Samza containers.</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />matcher.class</td>
+      <td>Â </td>
+      <td>If you want to enable static partition assignment, then this is a required configuration. The value of this property is a fully-qualified Java class name that implements the interface org.apache.samza.system.SystemStreamPartitionMatcher. Samza ships with two matcher classes:<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.system.RangeSystemStreamPartitionMatcher</code><br />This classes uses a comma separated list of range(s) to determine which partition matches, and thus statically assigned to the Job. For example â2,3,1-2â, statically assigns partition 1, 2, and 3 for all the specified system and streams (topics in case of Kafka) to the job. For config validation each element in the comma separated list much conform to one of the following regex:<br /><code class="language-plaintext highlighter-rouge">(\\d+)</code>â orâ<code class="language-plaintext highlighter-rouge">(\\d+-\\d+)</code>â<br /><code class="language-
 plaintext highlighter-rouge">JobConfig.SSP_MATCHER_CLASS_RANGE</code> constant has the canonical name of this class.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.system.RegexSystemStreamPartitionMatcher</code><br />This classes uses a standard Java supported regex to determine which partition matches, and thus statically assigned to the Job. For example â[1-2]â, statically assigns partition 1 and 2 for all the specified system and streams (topics in case of Kafka) to the job. JobConfig.SSP_MATCHER_CLASS_REGEX constant has the canonical name of this class.</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />matcher.config.<br />range</td>
+      <td>Â </td>
+      <td>If <code class="language-plaintext highlighter-rouge">job.systemstreampartition.matcher.class</code> is specified, and the value of this property is <code class="language-plaintext highlighter-rouge">org.apache.samza.system.RangeSystemStreamPartitionMatcher</code>, then this property is a required configuration. Specify a comma separated list of range(s) to determine which partition matches, and thus statically assigned to the Job. For example â2,3,11-20â, statically assigns partition 2, 3, and 11 to 20 for all the specified system and streams (topics in case of Kafka) to the job. A single configuration value like â19â is valid as well. This statically assigns partition 19. For config validation each element in the comma separated list much conform to one of the following regex:<br />â<code class="language-plaintext highlighter-rouge">(\\d+)</code>â or â<code class="language-plaintext highlighter-rouge">(\\d+-\\d+)</code>â</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />matcher.config.<br />regex</td>
+      <td>Â </td>
+      <td>If <code class="language-plaintext highlighter-rouge">job.systemstreampartition.matcher.class</code> is specified, and the value of this property is <code class="language-plaintext highlighter-rouge">org.apache.samza.system.RegexSystemStreamPartitionMatcher</code>, then this property is a required configuration. The value should be a valid Java supported regex. For example â[1-2]â, statically assigns partition 1 and 2 for all the specified system and streams (topics in case of Kakfa) to the job.</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />matcher.config.<br />job.factory.regex</td>
+      <td>Â </td>
+      <td>This configuration can be used to specify the Java supported regex to match the StreamJobFactory for which the static partition assignment should be enabled. This configuration enables the partition assignment feature to be used for custom StreamJobFactory(ies) as well.<br />This config defaults to the following value: â<em>org\\.apache\\.samza\\.job\\.local(.*ProcessJobFactory | .*ThreadJobFactory)</em>â, which enables static partition assignment when job.factory.class is set to <code class="language-plaintext highlighter-rouge">org.apache.samza.job.local.ProcessJobFactory</code> or <code class="language-plaintext highlighter-rouge">org.apache.samza.job.local.ThreadJobFactory</code>.</td>
+    </tr>
+    <tr>
+      <td>job.systemstreampartition.<br />input.expansion.enabled</td>
+      <td>true</td>
+      <td>When enabled, this allows stateful jobs to expand or contract their partition count by a multiple of the previous count so that events from an input stream partition are processed on the same task as before. This will prevent erroneous results. This feature is disabled if the configuration is set to false or if the job is stateless. See <a href="https://cwiki.apache.org/confluence/display/SAMZA/SEP-5%3A+Enable+partition+expansion+of+input+streams">SEP-5</a> for more details.</td>
+    </tr>
+    <tr>
+      <td>job.security.manager.<br />factory</td>
+      <td>(none)</td>
+      <td>This is the factory class used to create the proper SecurityManager to handle security for Samza containers when running in a secure environment, such as Yarn with Kerberos eanbled. Samza ships with one security manager by default:<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.job.yarn.SamzaYarnSecurityManagerFactory</code><br />Supports Samza containers to run properly in a Kerberos enabled Yarn cluster. Each Samza container, once started, will create a SamzaContainerSecurityManager. SamzaContainerSecurityManager runs on its separate thread and update userâs delegation tokens at the interval specified by yarn.token.renewal.interval.seconds. See Yarn Security for details.</td>
+    </tr>
+    <tr>
+      <td>task.callback.timeout.ms</td>
+      <td>-1(no timeout)</td>
+      <td>For an AsyncStreamTask, this defines the max allowed time for a processAsync callback to complete. For a StreamTask, this is the max allowed time for a process call to complete. When the timeout happens,the container is shutdown. Default is no timeout.</td>
+    </tr>
+    <tr>
+      <td>task.chooser.class</td>
+      <td><code class="language-plaintext highlighter-rouge">org.apache.samza.</code><br /><code class="language-plaintext highlighter-rouge">system.chooser.</code><br /><code class="language-plaintext highlighter-rouge">RoundRobinChooserFactory</code></td>
+      <td>This property can be optionally set to override the default <a href="../container/streams.html#messagechooser">message chooser</a>, which determines the order in which messages from multiple input streams are processed. The value of this property is the fully-qualified name of a Java class that implements <a href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a>.</td>
+    </tr>
+    <tr>
+      <td>task.command.class</td>
+      <td><code class="language-plaintext highlighter-rouge">org.apache.samza.job.</code><br /><code class="language-plaintext highlighter-rouge">ShellCommandBuilder</code></td>
+      <td>The fully-qualified name of the Java class which determines the command line and environment variables for a <a href="../container/samza-container.html">container</a>. It must be a subclass of <a href="../api/javadocs/org/apache/samza/job/CommandBuilder.html">CommandBuilder</a>. This defaults to task.command.class=<code class="language-plaintext highlighter-rouge">org.apache.samza.job.ShellCommandBuilder</code>.</td>
+    </tr>
+    <tr>
+      <td>task.drop.deserialization.errors</td>
+      <td>false</td>
+      <td>This property is to define how the system deals with deserialization failure situation. If set to true, the system will skip the error messages and keep running. If set to false, the system with throw exceptions and fail the container.</td>
+    </tr>
+    <tr>
+      <td>task.drop.serialization.errors</td>
+      <td>false</td>
+      <td>This property is to define how the system deals with serialization failure situation. If set to true, the system will drop the error messages and keep running. If set to false, the system with throw exceptions and fail the container.</td>
+    </tr>
+    <tr>
+      <td>task.drop.producer.errors</td>
+      <td>false</td>
+      <td>If true, producer errors will be logged and ignored. The only exceptions that will be thrown are those which are likely caused by the application itself (e.g. serializaiton errors). If false, the producer will be closed and producer errors will be propagated upward until the container ultimately fails. Failing the container is a safety precaution to ensure the latest checkpoints only reflect the events that have been completely and successfully processed. However, some applications prefer to remain running at all costs, even if that means lost messages. Setting this property to true will enable applications to recover from producer errors at the expense of one or many (in the case of batching producers) dropped messages. If you enable this, it is highly recommended that you also configure alerting on the âproducer-send-failedâ metric, since the producer might drop messages indefinitely. The logic for this property is specific to each SystemProducer implementation
 . It will have no effect for SystemProducers that ignore the property.</td>
+    </tr>
+    <tr>
+      <td>task.ignored.exceptions</td>
+      <td>Â </td>
+      <td>This property specifies which exceptions should be ignored if thrown in a taskâs process or window methods. The exceptions to be ignored should be a comma-separated list of fully-qualified class names of the exceptions or * to ignore all exceptions.</td>
+    </tr>
+    <tr>
+      <td>task.log4j.location.info.enabled</td>
+      <td>false</td>
+      <td>Defines whether or not to include log4jâs LocationInfo data in Log4j StreamAppender messages. LocationInfo includes information such as the file, class, and line that wrote a log message. This setting is only active if the Log4j stream appender is being used. (See <a href="../logging.html#stream-log4j-appender">Stream Log4j Appender</a>)</td>
+    </tr>
+    <tr>
+      <td>task.max.idle.ms</td>
+      <td>10</td>
+      <td>The maximum time to wait for a task worker to complete when there are no new messages to handle before resuming the main loop and potentially polling for more messages. <code class="language-plaintext highlighter-rouge">See task.poll.interval.ms</code> This timeout value prevents the main loop from spinning when there is nothing for it to do. Increasing this value will reduce the background load of the thread, but, also potentially increase message latency. It should not be set greater than the <code class="language-plaintext highlighter-rouge">task.poll.interval.ms</code>.</td>
+    </tr>
+    <tr>
+      <td>task.max.concurrency</td>
+      <td>1</td>
+      <td>Max number of outstanding messages being processed per task at a time, and itâs applicable to both StreamTask and AsyncStreamTask. The values can be:<br /><br /><code class="language-plaintext highlighter-rouge">1</code><br />Each task processes one message at a time. Next message will wait until the current message process completes. This ensures strict in-order processing.<br /><br /><code class="language-plaintext highlighter-rouge">&gt;1</code><br />Multiple outstanding messages are allowed to be processed per task at a time. The completion can be out of order. This option increases the parallelism within a task, but may result in out-of-order processing.</td>
+    </tr>
+    <tr>
+      <td>task.name.grouper.factory</td>
+      <td><code class="language-plaintext highlighter-rouge">org.apache.samza.</code><br /><code class="language-plaintext highlighter-rouge">container.grouper.task.</code><br /><code class="language-plaintext highlighter-rouge">GroupByContainerCountFactory</code></td>
+      <td>The fully-qualified name of the Java class which determines the factory class which will build the TaskNameGrouper. The default configuration value if the property is not present is task.name.grouper.factory=<code class="language-plaintext highlighter-rouge">org.apache.samza.container.grouper.task.</code><br /><code class="language-plaintext highlighter-rouge">GroupByContainerCountFactory</code>.The user can specify a custom implementation of the TaskNameGrouperFactory where a custom logic is implemented for grouping the tasks.<br />Note: For non-cluster applications (ones using coordination service) one must use <code class="language-plaintext highlighter-rouge">org.apache.samza.container.grouper.</code><br /><code class="language-plaintext highlighter-rouge">task.GroupByContainerIdsFactory</code></td>
+    </tr>
+    <tr>
+      <td>task.opts</td>
+      <td>Â </td>
+      <td>Any JVM options to include in the command line when executing Samza containers. For example, this can be used to set the JVM heap size, to tune the garbage collector, or to enable remote debugging. This cannot be used when running with ThreadJobFactory. Anything you put in task.opts gets forwarded directly to the commandline as part of the JVM invocation.<br />Example: <code class="language-plaintext highlighter-rouge">task.opts=-XX:+HeapDumpOnOutOfMemoryError -XX:+UseConcMarkSweepGC</code></td>
+    </tr>
+    <tr>
+      <td>task.poll.interval.ms</td>
+      <td>50</td>
+      <td>Samzaâs container polls for more messages under two conditions. The first condition arises when there are simply no remaining buffered messages to process for any input SystemStreamPartition. The second condition arises when some input SystemStreamPartitions have empty buffers, but some do not. In the latter case, a polling interval is defined to determine how often to refresh the empty SystemStreamPartition buffers. By default, this interval is 50ms, which means that any empty SystemStreamPartition buffer will be refreshed at least every 50ms. A higher value here means that empty SystemStreamPartitions will be refreshed less often, which means more latency is introduced, but less CPU and network will be used. Decreasing this value means that empty SystemStreamPartitions are refreshed more frequently, thereby introducing less latency, but increasing CPU and network utilization.</td>
+    </tr>
+    <tr>
+      <td>task.shutdown.ms</td>
+      <td>30000</td>
+      <td>This property controls how long the Samza container will wait for an orderly shutdown of task instances.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="-2-checkpointing"><a name="checkpointing"></a> <a href="#checkpointing">2. Checkpointing</a></h3>
+<p><a href="../container/checkpointing.html">Checkpointing</a> is not required, but recommended for most jobs. If you donât configure checkpointing, and a job or container restarts, it does not remember which messages it has already processed. Without checkpointing, consumer behavior on startup is determined by the â¦samza.offset.default setting. Checkpointing allows a job to start up where it previously left off.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>task.checkpoint.factory</td>
+      <td>Â </td>
+      <td>To enable <a href="../container/checkpointing.html">checkpointing</a>, you must set this property to the fully-qualified name of a Java class that implements <a href="../api/javadocs/org/apache/samza/checkpoint/CheckpointManagerFactory.html">CheckpointManagerFactory</a>. Samza ships with two checkpoint managers by default: <br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory</code> <br />Writes checkpoints to a dedicated topic on a Kafka cluster. This is the recommended option if you are already using Kafka for input or output streams. Use the task.checkpoint.system property to configure which Kafka cluster to use for checkpoints.<br /><br /><code class="language-plaintext highlighter-rouge">org.apache.samza.checkpoint.file.FileSystemCheckpointManagerFactory</code> <br /><strong>For dev deployments only.</strong> Writes checkpoints to files on the local filesystem. You can configure the file path wit
 h the task.checkpoint.path property. This is a simple option if your job always runs on the same machine. On a multi-machine cluster, this would require a network filesystem mount.</td>
+    </tr>
+    <tr>
+      <td>task.commit.ms</td>
+      <td>60000</td>
+      <td>If task.checkpoint.factory is configured, this property determines how often a checkpoint is written. The value is the time between checkpoints, in milliseconds. The frequency of checkpointing affects failure recovery: if a container fails unexpectedly (e.g. due to crash or machine failure) and is restarted, it resumes processing at the last checkpoint. Any messages processed since the last checkpoint on the failed container are processed again. Checkpointing more frequently reduces the number of messages that may be processed twice, but also uses more resources.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h5 id="21-advanced-checkpointing-configurations"><a name="advanced-checkpointing-configuration"></a><a href="#advanced-checkpointing-configuration">2.1 Advanced Checkpointing Configurations</a></h5>
+<p>|Name|Default|Description|
+|â |â |â |
+|task.checkpoint.system|inherited from job.default.system|This property is required if you would like to override the system defined in <code class="language-plaintext highlighter-rouge">job.default.system</code> for checkpointing. You must set it to the <em><strong>system-name</strong></em> of the desired checkpointing system. The stream name (topic name) within that system is automatically determined from the job name and ID: <strong>samza_checkpoint_${job.name}_${job.id} (with underscores in the job name and ID replaced by hyphens).|
+|job.checkpoint.validation.enabled|true|This setting controls if the job should fail(true) or just warn(false) in case of the checkpoint topic fails.<br />__CAUTION:</strong> this configuration needs to be used w/ care. It should only be used as a work-around if the checkpoint topic was created with the wrong number of partitions, itâs contents have been corrupted, or the <code class="language-plaintext highlighter-rouge">SystemStreamPartitionGrouperFactory</code> for the job needs to be changed.|
+|task.checkpoint.path| |Required if you are using the filesystem for checkpoints. Set this to the path on your local filesystem where checkpoint files should be stored.|
+|task.checkpoint.<br />replication.factor|2|If you are using Kafka for checkpoints, this is the number of Kafka nodes to which you want the checkpoint topic replicated for durability.|
+|task.checkpoint.<br />segment.bytes|26214400|If you are using Kafka for checkpoints, this is the segment size to be used for the checkpoint topicâs log segments. Keeping this number small is useful because it increases the frequency that Kafka will garbage collect old checkpoints.|</p>
 
+<h3 id="3-systems--streams"><a name="systems-streams"></a><a href="#systems-streams">3. Systems &amp; Streams</a></h3>
 <p>Samza consumes from and produces to <a href="../container/streams.html">Streams</a> and has support for a variety of Systems including Kafka, HDFS, Azure Event Hubs, Kinesis and ElasticSearch.</p>
 
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>task.inputs</td>
-<td></td>
-<td>This configuration is only required for legacy task applications. A comma-separated list of streams that are consumed by this job. Each stream is given in the format system-name.stream-name. For example, if you have one input system called my-kafka, and want to consume two Kafka topics called PageViewEvent and UserActivityEvent, then you would set task.inputs=my-kafka.PageViewEvent, my-kafka.UserActivityEvent.</td>
-</tr>
-<tr>
-<td>task.broadcast.inputs</td>
-<td></td>
-<td>This property specifies the partitions that all tasks should consume. The systemStreamPartitions you put here will be sent to all the tasks. <br>Format: system-name.stream-name#partitionId or system-name.stream-name#[startingPartitionId-endingPartitionId] <br>Example: task.broadcast.inputs=mySystem.broadcastStream#[0-2], mySystem.broadcastStream#0</td>
-</tr>
-<tr>
-<td>systems.<strong><em>system-name</em></strong>.samza.factory</td>
-<td></td>
-<td>The fully-qualified name of a Java class which provides a system. A system can provide input streams which you can consume in your Samza job, or output streams to which you can write, or both. The requirements on a system are very flexible â it may connect to a message broker, or read and write files, or use a database, or anything else. The class must implement <a href="../api/javadocs/org/apache/samza/system/SystemFactory.html">SystemFactory</a>. Alternatively, the user may define the system factory in code using SystemDescriptors. Samza ships with the following implementations: <br><br><code>org.apache.samza.system.kafka.KafkaSystemFactory</code> <a href="#kafka">(Configs)</a><br><code>org.apache.samza.system.hdfs.HdfsSystemFactory</code> <a href="#hdfs">(Configs)</a> <br><code>org.apache.samza.system.eventhub.EventHubSystemFactory</code> <a href="#eventhubs">(Configs)</a><br><code>org.apache.samza.system.kinesis.KinesisSystemFactory</code> <a href="#kinesis">(Configs)</
 a><br><code>org.apache.samza.system..elasticsearch.ElasticsearchSystemFactory</code> <a href="#elasticsearch">(Configs)</a></td>
-</tr>
-<tr>
-<td>systems.<strong><em>system-name</em></strong>.default.stream.*</td>
-<td></td>
-<td>A set of default properties for any stream associated with the system. For example, if &ldquo;systems.kafka-system.default.stream.replication.factor&rdquo;=2 was configured, then every Kafka stream created on the kafka-system will have a replication factor of 2 unless the property is explicitly overridden at the stream scope using streams properties.</td>
-</tr>
-<tr>
-<td>systems.<strong><em>system-name</em></strong>.default.stream.samza.key.serde</td>
-<td></td>
-<td>The <a href="../container/serialization.html">serde</a> which will be used to deserialize the key of messages on input streams, and to serialize the key of messages on output streams. This property defines the serde for an for all streams in the system. See the stream-scoped property to define the serde for an individual stream. If both are defined, the stream-level definition takes precedence. The value of this property must be a serde-name that is registered with serializers.registry.*.class. If this property is not set, messages are passed unmodified between the input stream consumer, the task and the output stream producer.</td>
-</tr>
-<tr>
-<td>systems.<strong><em>system-name</em></strong>.default.stream.samza.msg.serde</td>
-<td></td>
-<td>The <a href="../container/serialization.html">serde</a> which will be used to deserialize the value of messages on input streams, and to serialize the value of messages on output streams. This property defines the serde for an for all streams in the system. See the stream-scoped property to define the serde for an individual stream. If both are defined, the stream-level definition takes precedence. The value of this property must be a serde-name that is registered with serializers.registry.*.class. If this property is not set, messages are passed unmodified between the input stream consumer, the task and the output stream producer.</td>
-</tr>
-<tr>
-<td>systems.<strong><em>system-name</em></strong>.default.stream.samza.offset.default</td>
-<td><code>upcoming</code></td>
-<td>If a container starts up without a <a href="../container/checkpointing.html">checkpoint</a>,  this property determines where in the input stream we should start consuming. The value must be an <a href="../api/javadocs/org/apache/samza/system/SystemStreamMetadata.OffsetType.html">OffsetType</a>, one of the following: <br><br><code>upcoming</code> <br>Start processing messages that are published after the job starts. Any messages published while the job was not running are not processed. <br><br><code>oldest</code> <br>Start processing at the oldest available message in the system, and <a href="reprocessing.html">reprocess</a> the entire available message history. <br><br>This property is for all streams within a system. To set it for an individual stream, see streams.stream-id.samza.offset.default. If both are defined, the stream-level definition takes precedence.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.samza.system</td>
-<td></td>
-<td>The system-name of the system on which this stream will be accessed. This property binds the stream to one of the systems defined with the property systems.system-name.samza.factory. If this property isn&rsquo;t specified, it is inherited from job.default.system.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.samza.physical.name</td>
-<td></td>
-<td>The physical name of the stream on the system on which this stream will be accessed. This is opposed to the stream-id which is the logical name that Samza uses to identify the stream. A physical name could be a Kafka topic name, an HDFS file URN or any other system-specific identifier.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.samza.key.serde</td>
-<td></td>
-<td>The <a href="../container/serialization.html">serde</a> which will be used to deserialize the key of messages on input streams, and to serialize the key of messages on output streams. This property defines the serde for an individual stream. See the system-scoped property to define the serde for all streams within a system. If both are defined, the stream-level definition takes precedence. The value of this property must be a serde-name that is registered with serializers.registry.*.class. If this property is not set, messages are passed unmodified between the input stream consumer, the task and the output stream producer.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.samza.msg.serde</td>
-<td></td>
-<td>The <a href="../container/serialization.html">serde</a> which will be used to deserialize the value of messages on input streams, and to serialize the value of messages on output streams. This property defines the serde for an individual stream. See the system-scoped property to define the serde for all streams within a system. If both are defined, the stream-level definition takes precedence. The value of this property must be a serde-name that is registered with serializers.registry.*.class. If this property is not set, messages are passed unmodified between the input stream consumer, the task and the output stream producer.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.samza.offset.default</td>
-<td><code>upcoming</code></td>
-<td>If a container starts up without a <a href="../container/checkpointing.html">checkpoint</a>, this property determines where in the input stream we should start consuming. The value must be an [OffsetType (../api/javadocs/org/apache/samza/system/SystemStreamMetadata.OffsetType.html), one of the following: <br><br><code>upcoming</code> <br>Start processing messages that are published after the job starts. Any messages published while the job was not running are not processed. <br><br><code>oldest</code> <br>Start processing at the oldest available message in the system, and <a href="reprocessing.html">reprocess</a> the entire available message history. <br><br>This property is for an individual stream. To set it for all streams within a system, see  systems.system-name.samza.offset.default. If both are defined, the stream-level definition takes precedence.</td>
-</tr>
-</tbody></table>
-
-<h5 id="3-1-advanced-system-stream-configuration"><a name="advanced-system-stream-configurations"></a><a href="#advanced-system-stream-configurations">3.1 Advanced System &amp; Stream Configuration</a></h5>
-
-<table><thead>
-<tr>
-<th>Name</th>
-<th>Default</th>
-<th>Description</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.*</td>
-<td></td>
-<td>Any properties of the stream. These are typically system-specific and can be used by the system for stream creation or validation. Note that the other properties are prefixed with <code>samza</code>. which distinguishes them as Samza properties that are not system-specific.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.<br>samza.delete.committed.messages</td>
-<td>false</td>
-<td>If set to true, committed messages of this stream can be deleted. Committed messages of this stream will be deleted if <code>systems.system-name.samza.delete.committed.messages</code> is also set to true.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.<br>samza.reset.offset</td>
-<td>false</td>
-<td>If set to true, when a Samza container starts up, it ignores any <a href="../container/checkpointing.html">checkpointed offset</a> for this particular input stream. Its behavior is thus determined by the <code>samza.offset.default</code> setting. Note that the reset takes effect every time a container is started, which may be every time you restart your job, or more frequently if a container fails and is restarted by the framework.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.<br>samza.priority</td>
-<td>-1</td>
-<td>If one or more streams have a priority set (any positive integer), they will be processed with <a href="../container/streams.html#prioritizing-input-streams">higher priority</a> than the other streams. You can set several streams to the same priority, or define multiple priority levels by assigning a higher number to the higher-priority streams. If a higher-priority stream has any messages available, they will always be processed first; messages from lower-priority streams are only processed when there are no new messages on higher-priority inputs.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.<br>samza.bootstrap</td>
-<td>false</td>
-<td>If set to true, this stream will be processed as a <a href="../container/streams.html#bootstrapping">bootstrap stream</a>. This means that every time a Samza container starts up, this stream will be fully consumed before messages from any other stream are processed.</td>
-</tr>
-<tr>
-<td>streams.<strong><em>stream-id</em></strong>.<br>samza.broadcast</td>
-<td>false</td>
-<td>If set to true, this stream will be processed as a <a href="../container/samza-container.html#broadcast-streams">broadcast stream</a>. This means that ALL the partitions of this stream will be delivered to all the tasks.</td>
-</tr>
-<tr>
-<td>task.consumer.batch.size</td>
-<td>1</td>
-<td>If set to a positive integer, the task will try to consume batches with the given number of messages from each input stream, rather than consuming round-robin from all the input streams on each individual message. Setting this property can improve performance in some cases.</td>
-</tr>
-</tbody></table>
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>task.inputs</td>
+      <td>Â </td>
+      <td>This configuration is only required for legacy task applications. A comma-separated list of streams that are consumed by this job. Each stream is given in the format system-name.stream-name. For example, if you have one input system called my-kafka, and want to consume two Kafka topics called PageViewEvent and UserActivityEvent, then you would set task.inputs=my-kafka.PageViewEvent, my-kafka.UserActivityEvent.</td>
+    </tr>

[... 1394 lines stripped ...]