You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ar...@apache.org on 2019/11/20 14:28:17 UTC

[beam] branch master updated (0b415fd -> 18059ee)

This is an automated email from the ASF dual-hosted git repository.

aromanenko pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


    from 0b415fd  Merge pull request #10160 More compartmentalization of bundle-based-runner only utilities.
     add 6f72c93  [BEAM-8470] Add an empty spark-structured-streaming runner project targeting spark 2.4.0
     add 4ca7e55  [BEAM-8470] Fix missing dep
     add 0fc0d0a4 [BEAM-8470] Add SparkPipelineOptions
     add e8ca23e  [BEAM-8470] Start pipeline translation
     add 00964d2  [BEAM-8470] Add global pipeline translation structure
     add a3b278e  [BEAM-8470] Add nodes translators structure
     add ef4941a  [BEAM-8470] Wire node translators with pipeline translator
     add 8a8dc1e  [BEAM-8470] Renames: better differenciate pipeline translator for transform translator
     add cdfd589  [BEAM-8470] Organise methods in PipelineTranslator
     add 38eca95  [BEAM-8470] Initialise BatchTranslationContext
     add 80f2d8c  [BEAM-8470] Refactoring: -move batch/streaming common translation visitor and utility methods to PipelineTranslator -rename batch dedicated classes to Batch* to differentiate with their streaming counterparts -Introduce TranslationContext for common batch/streaming components
     add baf210f  [BEAM-8470] Make transform translation clearer: renaming, comments
     add b65a9da  [BEAM-8470] Improve javadocs
     add 0434749  [BEAM-8470] Move SparkTransformOverrides to correct package
     add 4372c7e  [BEAM-8470] Move common translation context components to superclass
     add 49b666b  [BEAM-8470] apply spotless
     add 0d6906a  [BEAM-8470] Make codestyle and firebug happy
     add ef97440  [BEAM-8470] Add TODOs
     add 9abf8ac  [BEAM-8470] Post-pone batch qualifier in all classes names for readability
     add 11a6e19  [BEAM-8470] Add precise TODO for multiple TransformTranslator per transform URN
     add 47ed3d1  [BEAM-8470] Added SparkRunnerRegistrar
     add 022a0d0  [BEAM-8470] Add basic pipeline execution. Refactor translatePipeline() to return the translationContext on which we can run startPipeline()
     add 96b3f36  [BEAM-8470] Create PCollections manipulation methods
     add b0c42af  [BEAM-8470] Create Datasets manipulation methods
     add 2c5cb23  [BEAM-8470] Add Flatten transformation translator
     add 9f1bf60  [BEAM-8470] Add primitive GroupByKeyTranslatorBatch implementation
     add 98ea9fb  [BEAM-8470] Use Iterators.transform() to return Iterable
     add 0b55323  [BEAM-8470] Implement read transform
     add 4c91a57  [BEAM-8470] update TODO
     add 4adf3bb  [BEAM-8470] Apply spotless
     add 6b4b916  [BEAM-8470] start source instanciation
     add ff60578  [BEAM-8470] Improve exception flow
     add 2ee98da  [BEAM-8470] Improve type enforcement in ReadSourceTranslator
     add fc3abf5  [BEAM-8470] Experiment over using spark Catalog to pass in Beam Source through spark Table
     add 4746d9b  [BEAM-8470] Add source mocks
     add e45e48d  [BEAM-8470] fix mock, wire mock in translators and create a main test.
     add 7d7fe77  [BEAM-8470] Use raw WindowedValue so that spark Encoders could work (temporary)
     add 9d84a0f  [BEAM-8470] clean deps
     add b4032aa  [BEAM-8470] Move DatasetSourceMock to proper batch mode
     add a7ad1ab  [BEAM-8470] Run pipeline in batch mode or in streaming mode
     add 141e4bc  [BEAM-8470] Split batch and streaming sources and translators
     add 8954c50  [BEAM-8470] Use raw Encoder<WindowedValue> also in regular ReadSourceTranslatorBatch
     add 00ef268  [BEAM-8470] Clean
     add c426c98  [BEAM-8470] Add ReadSourceTranslatorStreaming
     add 1740dc4  [BEAM-8470] Move Source and translator mocks to a mock package.
     add 7819918  [BEAM-8470] Pass Beam Source and PipelineOptions to the spark DataSource as serialized strings
     add b10aa53  [BEAM-8470] Refactor DatasetSource fields
     add c4bb08c  [BEAM-8470] Wire real SourceTransform and not mock and update the test
     add 5bbea63  [BEAM-8470] Add missing 0-arg public constructor
     add 0dbe26f  [BEAM-8470] Use new PipelineOptionsSerializationUtils
     add 43052d3  [BEAM-8470] Apply spotless and fix  checkstyle
     add 17ca18b  [BEAM-8470] Add a dummy schema for reader
     add 2e8393b  [BEAM-8470] Add empty 0-arg constructor for mock source
     add 43ff919  [BEAM-8470] Clean
     add c8ad727  [BEAM-8470] Checkstyle and Findbugs
     add 6e3575d  [BEAM-8470] Refactor SourceTest to a UTest instaed of a main
     add c221aaa  [BEAM-8470] Fix pipeline triggering: use a spark action instead of writing the dataset
     add c26c421  [BEAM-8470] improve readability of options passing to the source
     add ff69ded  [BEAM-8470] Clean unneeded fields in DatasetReader
     add 524667e  [BEAM-8470] Fix serialization issues
     add 638bdae  [BEAM-8470] Add SerializationDebugger
     add fd354fa  [BEAM-8470] Add serialization test
     add 163102b  [BEAM-8470] Move SourceTest to same package as tested class
     add 68fc6d5  [BEAM-8470] Fix SourceTest
     add e4c76fc  [BEAM-8470] Simplify beam reader creation as it created once the source as already been partitioned
     add 90463a0  [BEAM-8470] Put all transform translators Serializable
     add 940b484  [BEAM-8470] Enable test mode
     add 0b156bf  [BEAM-8470] Enable gradle build scan
     add 74080c1  [BEAM-8470] Add flatten test
     add 740131e  [BEAM-8470] First attempt for ParDo primitive implementation
     add 0cedcd7  [BEAM-8470] Serialize windowedValue to byte[] in source to be able to specify a binary dataset schema and deserialize windowedValue from Row to get a dataset<WindowedValue>
     add 79b075a  [BEAM-8470] Comment schema choices
     add 5d1b2b5  [BEAM-8470] Fix errorprone
     add 0529996  [BEAM-8470] Fix testMode output to comply with new binary schema
     add 696597c  [BEAM-8470] Cleaning
     add 91964b9  [BEAM-8470] Remove bundleSize parameter and always use spark default parallelism
     add 35051e0  [BEAM-8470] Fix split bug
     add 6902b4b  [BEAM-8470] Clean
     add e5a36f0  [BEAM-8470] Add ParDoTest
     add 057e0ac  [BEAM-8470] Address minor review notes
     add b706a74  [BEAM-8470] Clean
     add 83779db  [BEAM-8470] Add GroupByKeyTest
     add 0330f32  [BEAM-8470] Add comments and TODO to GroupByKeyTranslatorBatch
     add 1cb53e3  [BEAM-8470] Fix type checking with Encoder of WindowedValue<T>
     add e363586  [BEAM-8470] Port latest changes of ReadSourceTranslatorBatch to ReadSourceTranslatorStreaming
     add 9b9cca6  [BEAM-8470] Remove no more needed putDatasetRaw
     add cb1a085  [BEAM-8470] Add ComplexSourceTest
     add c13e8ae  [BEAM-8470] Fail in case of having SideInouts or State/Timers
     add 83e4067  [BEAM-8470] Fix Encoders: create an Encoder for every manipulated type to avoid Spark fallback to genericRowWithSchema and cast to allow having Encoders with generic types such as WindowedValue<T> and get the type checking back
     add d19c8b9  [BEAM-8470] Apply spotless
     add 1051eba  [BEAM-8470] Fixed Javadoc error
     add ee67f21  [BEAM-8470] Rename SparkSideInputReader class and rename pruneOutput() to pruneOutputFilteredByTag()
     add 8e0f183  [BEAM-8470] Don't use deprecated sideInput.getWindowingStrategyInternal()
     add f0ff5b4  [BEAM-8470] Simplify logic of ParDo translator
     add 9c4e36d  [BEAM-8470] Fix kryo issue in GBK translator with a workaround
     add b3a740c  [BEAM-8470] Rename SparkOutputManager for consistency
     add 438f1e9  [BEAM-8470] Fix for test elements container in GroupByKeyTest
     add be1cd23  [BEAM-8470] Added "testTwoPardoInRow"
     add ed6b12c  [BEAM-8470] Add a test for the most simple possible Combine
     add 35f100c  [BEAM-8470] Rename SparkDoFnFilterFunction to DoFnFilterFunction for consistency
     add 8111241  [BEAM-8470] Generalize the use of SerializablePipelineOptions in place of (not serializable) PipelineOptions
     add 2d0c33e  [BEAM-8470] Fix getSideInputs
     add 8662ef8  [BEAM-8470] Extract binary schema creation in a helper class
     add 9a757d0  [BEAM-8470] First version of combinePerKey
     add 3da0276  [BEAM-8470] Improve type checking of Tuple2 encoder
     add ca39fe1  [BEAM-8470] Introduce WindowingHelpers (and helpers package) and use it in Pardo, GBK and CombinePerKey
     add f002039  [BEAM-8470] Fix combiner using KV as input, use binary encoders in place of accumulatorEncoder and outputEncoder, use helpers, spotless
     add cec975f  [BEAM-8470] Add combinePerKey and CombineGlobally tests
     add 39a978a  [BEAM-8470] Introduce RowHelpers
     add c3cd76f  [BEAM-8470] Add CombineGlobally translation to avoid translating Combine.perKey as a composite transform based on Combine.PerKey (which uses low perf GBK)
     add 9ee65d5  [BEAM-8470] Cleaning
     add 279d2ca  [BEAM-8470] Get back to classes in translators resolution because URNs cannot translate Combine.Globally
     add 55bf503  [BEAM-8470] Fix various type checking issues in Combine.Globally
     add d5dab93  [BEAM-8470] Update test with Long
     add fd1d8c5  [BEAM-8470] Fix combine. For unknown reason GenericRowWithSchema is used as input of combine so extract its content to be able to proceed
     add 5c074eb  [BEAM-8470] Use more generic Row instead of GenericRowWithSchema
     add 341c39f  [BEAM-8470] Add explanation about receiving a Row as input in the combiner
     add 89d38b1  [BEAM-8470] Fix encoder bug in combinePerkey
     add 6b109d3  [BEAM-8470] Cleaning
     add 9494886  [BEAM-8470] Implement WindowAssignTranslatorBatch
     add c83a607  [BEAM-8470] Implement WindowAssignTest
     add 9bf9385  [BEAM-8470] Fix javadoc
     add 8f5e9dc  [BEAM-8470] Added SideInput support
     add 915ca01  [BEAM-8470] Fix CheckStyle violations
     add 92cab8e  [BEAM-8470] Don't use Reshuffle translation
     add 6bbeeff  [BEAM-8470] Added using CachedSideInputReader
     add f8512c7  [BEAM-8470] Added TODO comment for ReshuffleTranslatorBatch
     add 19ee565  [BEAM-8470] And unchecked warning suppression
     add 1bf9a4a  [BEAM-8470] Add streaming source initialisation
     add 6ca706a  [BEAM-8470] Implement first streaming source
     add b5ccec6  [BEAM-8470] Add a TODO on spark output modes
     add 175940d  [BEAM-8470] Add transformators registry in PipelineTranslatorStreaming
     add ba59f10  [BEAM-8470] Add source streaming test
     add 80dde2d  [BEAM-8470] Specify checkpointLocation at the pipeline start
     add e494189  [BEAM-8470] Clean unneeded 0 arg constructor in batch source
     add e92adee  [BEAM-8470] Clean streaming source
     add 31a2be5  [BEAM-8470] Continue impl of offsets for streaming source
     add 8777607  [BEAM-8470] Deal with checkpoint and offset based read
     add 43088fb  [BEAM-8470] Apply spotless and fix spotbugs warnings
     add 75e3f7f  [BEAM-8470] Disable never ending test SimpleSourceTest.testUnboundedSource
     add 9e60f27  [BEAM-8470] Fix access level issues, typos and modernize code to Java 8 style
     add 0492206  [BEAM-8470] Merge Spark Structured Streaming runner into main Spark module. Remove spark-structured-streaming module.Rename Runner to SparkStructuredStreamingRunner. Remove specific PipelineOotions and Registrar for Structured Streaming Runner
     add 0e37bae  [BEAM-8470] Fix non-vendored imports from Spark Streaming Runner classes
     add 01a2fd4  [BEAM-8470] Pass doFnSchemaInformation to ParDo batch translation
     add f950dbf  [BEAM-8470] Fix spotless issues after rebase
     add 82cf713  [BEAM-8470] Fix logging levels in Spark Structured Streaming translation
     add f44c863  [BEAM-8470] Add SparkStructuredStreamingPipelineOptions and SparkCommonPipelineOptions - SparkStructuredStreamingPipelineOptions was added to have the new   runner rely only on its specific options.
     add d0f2ba3  [BEAM-8470] Rename SparkPipelineResult to SparkStructuredStreamingPipelineResult This is done to avoid an eventual collision with the one in SparkRunner. However this cannot happen at this moment because it is package private, so it is also done for consistency.
     add 23e3c53  [BEAM-8470] Use PAssert in Spark Structured Streaming transform tests
     add 3ba3e51  [BEAM-8470] Ignore spark offsets (cf javadoc)
     add 05266b9  [BEAM-8470] implement source.stop
     add 75209f9  [BEAM-8470] Update javadoc
     add f11e382  [BEAM-8470] Apply Spotless
     add 075a79b  [BEAM-8470] Enable batch Validates Runner tests for Structured Streaming Runner
     add de5b17b  [BEAM-8470] Limit the number of partitions to make tests go 300% faster
     add 71edc3e  [BEAM-8470] Fixes ParDo not calling setup and not tearing down if exception on startBundle
     add 5972c51  [BEAM-8470] Pass transform based doFnSchemaInformation in ParDo translation
     add 3dbcaba  [BEAM-8470] Consider null object case on RowHelpers, fixes empty side inputs tests.
     add a687d5d  [BEAM-8470] Put back batch/simpleSourceTest.testBoundedSource
     add b70f605  [BEAM-8470] Update windowAssignTest
     add e070a69  [BEAM-8470] Add comment about checkpoint mark
     add 2c1437c  [BEAM-8470] Re-code GroupByKeyTranslatorBatch to conserve windowing instead of unwindowing/windowing(GlobalWindow): simplify code, use ReduceFnRunner to merge the windows
     add 01184ff  [BEAM-8470] re-enable reduceFnRunner timers for output
     add dfed0a5  [BEAM-8470] Improve visibility of debug messages
     add c7dd248  [BEAM-8470] Add a test that GBK preserves windowing
     add 65e1b79  [BEAM-8470] Add TODO in Combine translations
     add 346f2f4  [BEAM-8470] Update KVHelpers.extractKey() to deal with WindowedValue and update GBK and CPK
     add c9f4525  [BEAM-8470] Fix comment about schemas
     add b99c9ef  [BEAM-8470] Implement reduce part of CombineGlobally translation with windowing
     add 1b6339e  [BEAM-8470] Output data after combine
     add 66f6021  [BEAM-8470] Implement merge accumulators part of CombineGlobally translation with windowing
     add 6168070  [BEAM-8470] Fix encoder in combine call
     add 0c00e2f  [BEAM-8470] Revert extractKey while combinePerKey is not done (so that it compiles)
     add f9ecf1f  [BEAM-8470] Apply a groupByKey avoids for some reason that the spark structured streaming fmwk casts data to Row which makes it impossible to deserialize without the coder shipped into the data. For performance reasons (avoid memory consumption and having to deserialize), we do not ship coder + data. Also add a mapparitions before GBK to avoid shuffling
     add abaf140  [BEAM-8470] Fix case when a window does not merge into any other window
     add 319e01f  [BEAM-8470] Fix wrong encoder in combineGlobally GBK
     add a759ddc  [BEAM-8470] Fix bug in the window merging logic
     add d609213  [BEAM-8470] Remove the mapPartition that adds a key per partition because otherwise spark will reduce values per key instead of globally
     add 2cbd35d  [BEAM-8470] Remove CombineGlobally translation because it is less performant than the beam sdk one (key + combinePerKey.withHotkeyFanout)
     add 24c137a  [BEAM-8470] Now that there is only Combine.PerKey translation, make only one Aggregator
     add b7e4a8b  [BEAM-8470] Clean no more needed KVHelpers
     add 5215a5a  [BEAM-8470] Clean not more needed RowHelpers
     add 4e06ffe  [BEAM-8470] Clean not more needed WindowingHelpers
     add c83168c  [BEAM-8470] Fix javadoc of AggregatorCombiner
     add 5898cf0  [BEAM-8470] Fixed immutable list bug
     add 37b5fa8  [BEAM-8470] add comment in combine globally test
     add 6eea481  [BEAM-8470] Clean groupByKeyTest
     add 3e33078  [BEAM-8470] Add a test that combine per key preserves windowing
     add 55d865b  [BEAM-8470] Ignore for now not working test testCombineGlobally
     add 65ffdd8  [BEAM-8470] Add metrics support in DoFn
     add e2b57ac  [BEAM-8470] Add missing dependencies to run Spark Structured Streaming Runner on Nexmark
     add 323d10f  [BEAM-8470] Add setEnableSparkMetricSinks() method
     add 2f1d38b  [BEAM-8470] Fix javadoc
     add 06559d2  [BEAM-8470] Fix accumulators initialization in Combine that prevented CombineGlobally to work.
     add 3cce68c  [BEAM-8470] Add a test to check that CombineGlobally preserves windowing
     add 68b6f98  [BEAM-8470] Persist all output Dataset if there are multiple outputs in pipeline Enabled Use*Metrics tests
     add 6084764  [BEAM-8470] Added metrics sinks and tests
     add 69c514f  [BEAM-8470] Make spotless happy
     add 4cab501  [BEAM-8470] Add PipelineResults to Spark structured streaming.
     add a1ec37a  [BEAM-8470] Update log4j configuration
     add 2a53b32  [BEAM-8470] Add spark execution plans extended debug messages.
     add f34de05  [BEAM-8470] Print number of leaf datasets
     add 892b742  [BEAM-8470] fixup! Add PipelineResults to Spark structured streaming.
     add 613bac9  [BEAM-8470] Remove no more needed AggregatorCombinerPerKey (there is only AggregatorCombiner)
     add d024335  [BEAM-8470] After testing performance and correctness, launch pipeline with dataset.foreach(). Make both test mode and production mode use foreach for uniformity. Move dataset print as a utility method
     add 7a30c5a  [BEAM-8470] Improve Pardo translation performance: avoid calling a filter transform when there is only one output tag
     add 0e42c18  [BEAM-8470] Use "sparkMaster" in local mode to obtain number of shuffle partitions + spotless apply
     add 7f08fc8  [BEAM-8470] Wrap Beam Coders into Spark Encoders using ExpressionEncoder: serialization part
     add 2071f1c  [BEAM-8470] Wrap Beam Coders into Spark Encoders using ExpressionEncoder: deserialization part
     add 6f0e9fa  [BEAM-8470] type erasure: spark encoders require a Class<T>, pass Object and cast to Class<T>
     add 965814a  [BEAM-8470] Fix scala Product in Encoders to avoid StackEverflow
     add 165d8b7  [BEAM-8470] Conform to spark ExpressionEncoders: pass classTags, implement scala Product, pass children from within the ExpressionEncoder, fix visibilities
     add 609aace  [BEAM-8470] Add a simple spark native test to test Beam coders wrapping into Spark Encoders
     add 09e882e  [BEAM-8470] Fix code generation in Beam coder wrapper
     add d715e37  [BEAM-8470] Lazy init coder because coder instance cannot be interpolated by catalyst
     add d27962c  [BEAM-8470] Fix warning in coder construction by reflexion
     add fe46a63  [BEAM-8470] Fix ExpressionEncoder generated code: typos, try catch, fqcn
     add 64a978d  [BEAM-8470] Fix getting the output value in code generation
     add cfecb40  [BEAM-8470] Fix beam coder lazy init using reflexion: use .clas + try catch + cast
     add 0d13b77  [BEAM-8470] Remove lazy init of beam coder because there is no generic way on instanciating a beam coder
     add 1a6e662  [BEAM-8470] Remove example code
     add e45e7b5  [BEAM-8470] Fix equal and hashcode
     add dd63c1b  [BEAM-8470] Fix generated code: uniform exceptions catching, fix parenthesis and variable declarations
     add 75a8b24  [BEAM-8470] Add an assert of equality in the encoders test
     add ff50364  [BEAM-8470] Apply spotless and checkstyle and add javadocs
     add 4e215e0  [BEAM-8470] Wrap exceptions in UserCoderExceptions
     add 8ed3db6  [BEAM-8470] Put Encoders expressions serializable
     add 465d570  [BEAM-8470] Catch Exception instead of IOException because some coders to not throw Exceptions at all (e.g.VoidCoder)
     add 4579663  [BEAM-8470] Apply new Encoders to CombinePerKey
     add d246520  [BEAM-8470] Apply new Encoders to Read source
     add a448cfb  [BEAM-8470] Improve performance of source: the mapper already calls windowedValueCoder.decode, no need to call it also in the Spark encoder
     add c875dbc  [BEAM-8470] Ignore long time failing test: SparkMetricsSinkTest
     add be7415d  [BEAM-8470] Apply new Encoders to Window assign translation
     add cdd6e1a  [BEAM-8470] Apply new Encoders to AggregatorCombiner
     add 9ae6a57  [BEAM-8470] Create a Tuple2Coder to encode scala tuple2
     add f827291  [BEAM-8470] Apply new Encoders to GroupByKey
     add 76125e9  [BEAM-8470] Apply new Encoders to Pardo. Replace Tuple2Coder with MultiOutputCoder to deal with multiple output to use in Spark Encoder for DoFnRunner
     add f492ccf  [BEAM-8470] Apply spotless, fix typo and javadoc
     add d81d2f6  [BEAM-8470] Use beam encoders also in the output of the source translation
     add 9599546  [BEAM-8470] Remove unneeded cast
     add 0e072d3  [BEAM-8470] Fix: Remove generic hack of using object. Use actual Coder encodedType in Encoders
     add da8dfbd  [BEAM-8470] Remove Encoders based on kryo now that we call Beam coders in the runner
     add 470d04f  [BEAM-8470] Add a jenkins job for validates runner tests in the new spark runner
     add 17ace84  [BEAM-8470] Apply spotless
     add 2ca30a4  [BEAM-8470] Rebase on master: pass sideInputMapping in SimpleDoFnRunner as needed now in the API
     add 99e081d  Fix SpotBugs
     add 52aae7f  [BEAM-8470] simplify coders in combinePerKey translation
     add 803bb0b  [BEAM-8470] Fix combiner. Do not reuse instance of accumulator
     add a4c4ee7  [BEAM-8470] input windows can arrive exploded (for sliding windows). As a result an input has multiple windows. So we need to consider that the accumulator can have multiple windows
     add ea47c71  [BEAM-8470] Add a combine test with sliding windows
     add 89eb1f0  [BEAM-8470] Add a test to test combine translation on binaryCombineFn with sliding windows
     add 4416a2e  [BEAM-8470] Fix tests: use correct SparkStructuredStreamingPipelineOptions, set testMode to true. Some renaming
     add b980cf9  [BEAM-8470] Fix wrong expected results in CombineTest.testBinaryCombineWithSlidingWindows
     add 22d98df  [BEAM-8470] Add disclaimers about this runner being experimental
     add d680252  [BEAM-8470] Fix: create an empty accumulator in combine.mergeAccumulators, because this method modifies its first input accumulator. Decrease memory usage by storing only accumulator and timestamp in the combine.merge map
     add 9922bcb  [BEAM-8470] Apply spotless
     add a0872bc  [BEAM-8470] Add a countPerElement test with sliding windows
     add c9ce072  [BEAM-8470] Fix the output timestamps of combine: timestamps must be merged to a window before combining
     add f516554  [BEAM-8470] set log level to info to avoid resource consumption in production mode
     add 27dcc48  [BEAM-8470] Fix CombineTest.testCountPerElementWithSlidingWindows expected results
     add f5cfbc1  [BEAM-8470] Remove "validatesStructuredStreamingRunnerBatch" from "validatesRunner" task
     add ac3132c  [BEAM-8470] Fix timestamps in combine output: assign the timestamp to the window and not merge all the timestamps before combine
     add 18059ee  Merge pull request #9866: [BEAM-8470] Create a new Spark runner based on Spark Structured streaming framework

No new revisions were added by this update.

Summary of changes:
 ...ValidatesRunner_SparkStructuredStreaming.groovy |  43 +++
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |   2 +
 runners/spark/build.gradle                         |  47 +++
 .../runners/spark/SparkCommonPipelineOptions.java  |  74 +++++
 .../beam/runners/spark/SparkPipelineOptions.java   |  47 +--
 .../beam/runners/spark/SparkRunnerRegistrar.java   |   8 +-
 .../SparkStructuredStreamingPipelineOptions.java   |  41 +++
 .../SparkStructuredStreamingPipelineResult.java    | 151 ++++++++++
 .../SparkStructuredStreamingRunner.java            | 221 ++++++++++++++
 .../aggregators/AggregatorsAccumulator.java        |  70 +++++
 .../aggregators/NamedAggregators.java              | 110 +++++++
 .../aggregators/NamedAggregatorsAccumulator.java   |  63 ++++
 .../aggregators/package-info.java                  |  20 ++
 .../structuredstreaming/examples/WordCount.java    | 132 +++++++++
 .../metrics/AggregatorMetric.java                  |  39 +++
 .../metrics/AggregatorMetricSource.java            |  49 ++++
 .../metrics/CompositeSource.java                   |  45 +++
 .../metrics/MetricsAccumulator.java                |  71 +++++
 .../MetricsContainerStepMapAccumulator.java        |  65 +++++
 .../metrics/SparkBeamMetric.java                   |  89 ++++++
 .../metrics/SparkBeamMetricSource.java             |  48 +++
 .../metrics/SparkMetricsContainerStepMap.java      |  42 +++
 .../metrics/WithMetricsSupport.java                | 177 +++++++++++
 .../structuredstreaming/metrics/package-info.java  |  20 ++
 .../metrics/sink/CodahaleCsvSink.java              |  36 +++
 .../metrics/sink/CodahaleGraphiteSink.java         |  34 +++
 .../metrics/sink/package-info.java                 |  20 ++
 .../spark/structuredstreaming/package-info.java    |  20 ++
 .../translation/PipelineTranslator.java            | 215 ++++++++++++++
 .../translation/SchemaHelpers.java                 |  37 +++
 .../translation/SparkTransformOverrides.java       |  52 ++++
 .../translation/TransformTranslator.java           |  28 ++
 .../translation/TranslationContext.java            | 257 ++++++++++++++++
 .../translation/batch/AggregatorCombiner.java      | 238 +++++++++++++++
 .../batch/CombinePerKeyTranslatorBatch.java        | 111 +++++++
 .../CreatePCollectionViewTranslatorBatch.java      |  59 ++++
 .../translation/batch/DatasetSourceBatch.java      | 161 ++++++++++
 .../translation/batch/DoFnFunction.java            | 161 ++++++++++
 .../translation/batch/DoFnRunnerWithMetrics.java   |  96 ++++++
 .../translation/batch/FlattenTranslatorBatch.java  |  63 ++++
 .../batch/GroupByKeyTranslatorBatch.java           | 121 ++++++++
 .../translation/batch/ParDoTranslatorBatch.java    | 251 ++++++++++++++++
 .../translation/batch/PipelineTranslatorBatch.java |  94 ++++++
 .../translation/batch/ProcessContext.java          | 138 +++++++++
 .../batch/ReadSourceTranslatorBatch.java           |  85 ++++++
 .../batch/ReshuffleTranslatorBatch.java            |  29 ++
 .../batch/WindowAssignTranslatorBatch.java         |  58 ++++
 .../GroupAlsoByWindowViaOutputBufferFn.java        | 165 +++++++++++
 .../batch/functions/NoOpStepContext.java           |  36 +++
 .../batch/functions/SparkSideInputReader.java      | 146 ++++++++++
 .../translation/batch/functions/package-info.java  |  20 ++
 .../translation/batch/package-info.java            |  20 ++
 .../translation/helpers/CoderHelpers.java          |  63 ++++
 .../translation/helpers/EncoderHelpers.java        | 324 +++++++++++++++++++++
 .../translation/helpers/KVHelpers.java             |  31 ++
 .../translation/helpers/MultiOuputCoder.java       |  80 +++++
 .../translation/helpers/ReduceFnRunnerHelpers.java |  88 ++++++
 .../translation/helpers/RowHelpers.java            |  75 +++++
 .../translation/helpers/SideInputBroadcast.java    |  46 +++
 .../translation/helpers/WindowingHelpers.java      |  82 ++++++
 .../translation/helpers/package-info.java          |  20 ++
 .../translation/package-info.java                  |  20 ++
 .../streaming/DatasetSourceStreaming.java          | 258 ++++++++++++++++
 .../streaming/PipelineTranslatorStreaming.java     |  88 ++++++
 .../streaming/ReadSourceTranslatorStreaming.java   |  85 ++++++
 .../translation/streaming/package-info.java        |  20 ++
 .../translation/utils/CachedSideInputReader.java   |  91 ++++++
 .../translation/utils/SideInputStorage.java        | 103 +++++++
 .../translation/utils/package-info.java            |  20 ++
 runners/spark/src/main/resources/log4j.properties  |  40 +++
 .../runners/spark/SparkRunnerRegistrarTest.java    |   7 +-
 .../StructuredStreamingPipelineStateTest.java      | 225 ++++++++++++++
 .../aggregators/metrics/sink/InMemoryMetrics.java  |  82 ++++++
 .../metrics/sink/InMemoryMetricsSinkRule.java      |  28 ++
 .../metrics/sink/SparkMetricsSinkTest.java         |  84 ++++++
 .../metrics/BeamMetricTest.java                    |  47 +++
 .../metrics/MetricsPusherTest.java                 |  91 ++++++
 .../translation/batch/CombineTest.java             | 186 ++++++++++++
 .../translation/batch/ComplexSourceTest.java       |  86 ++++++
 .../translation/batch/FlattenTest.java             |  59 ++++
 .../translation/batch/GroupByKeyTest.java          | 124 ++++++++
 .../translation/batch/ParDoTest.java               | 153 ++++++++++
 .../translation/batch/SimpleSourceTest.java        | 101 +++++++
 .../translation/batch/WindowAssignTest.java        |  69 +++++
 .../translation/streaming/SimpleSourceTest.java    |  57 ++++
 .../structuredstreaming/utils/EncodersTest.java    |  52 ++++
 .../utils/SerializationDebugger.java               | 115 ++++++++
 .../structuredstreaming/utils/package-info.java    |  20 ++
 .../{log4j.properties => log4j-test.properties}    |   2 +-
 sdks/java/testing/nexmark/build.gradle             |   3 +-
 90 files changed, 7498 insertions(+), 52 deletions(-)
 create mode 100644 .test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.groovy
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/SparkCommonPipelineOptions.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineOptions.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineResult.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/AggregatorsAccumulator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/NamedAggregators.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/NamedAggregatorsAccumulator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/examples/WordCount.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/AggregatorMetric.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/AggregatorMetricSource.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/CompositeSource.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsContainerStepMapAccumulator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetric.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetricSource.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkMetricsContainerStepMap.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/WithMetricsSupport.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/sink/CodahaleCsvSink.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/sink/CodahaleGraphiteSink.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/sink/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SchemaHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkTransformOverrides.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/AggregatorCombiner.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombinePerKeyTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CreatePCollectionViewTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnFunction.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/PipelineTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ProcessContext.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ReadSourceTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ReshuffleTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTranslatorBatch.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/functions/GroupAlsoByWindowViaOutputBufferFn.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/functions/NoOpStepContext.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/functions/SparkSideInputReader.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/functions/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/CoderHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/KVHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/MultiOuputCoder.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/ReduceFnRunnerHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/RowHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/SideInputBroadcast.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/WindowingHelpers.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/DatasetSourceStreaming.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/PipelineTranslatorStreaming.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/ReadSourceTranslatorStreaming.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/package-info.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/utils/CachedSideInputReader.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/utils/SideInputStorage.java
 create mode 100644 runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/utils/package-info.java
 create mode 100644 runners/spark/src/main/resources/log4j.properties
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/StructuredStreamingPipelineStateTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/metrics/sink/InMemoryMetrics.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/metrics/sink/InMemoryMetricsSinkRule.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/aggregators/metrics/sink/SparkMetricsSinkTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/metrics/BeamMetricTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsPusherTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ComplexSourceTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/SimpleSourceTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/SimpleSourceTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/EncodersTest.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/SerializationDebugger.java
 create mode 100644 runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/package-info.java
 rename runners/spark/src/test/resources/{log4j.properties => log4j-test.properties} (96%)