You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by dh...@apache.org on 2017/01/29 16:21:47 UTC
[01/50] beam git commit: This closes #1822: Revert "Simplified API
surface verifications"
Repository: beam
Updated Branches:
refs/heads/python-sdk 1bc685980 -> 27cf68ee7
This closes #1822: Revert "Simplified API surface verifications"
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/cb6e0a80
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/cb6e0a80
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/cb6e0a80
Branch: refs/heads/python-sdk
Commit: cb6e0a80c57b056489d447cde092cffdd041eed5
Parents: 6ecbfb9 9248bef
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jan 23 19:47:47 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jan 23 19:47:47 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/util/ApiSurface.java | 420 +++++--------------
.../org/apache/beam/SdkCoreApiSurfaceTest.java | 61 ---
.../apache/beam/sdk/util/ApiSurfaceTest.java | 152 +++++--
.../apache/beam/sdk/io/gcp/ApiSurfaceTest.java | 134 ++++++
.../beam/sdk/io/gcp/GcpApiSurfaceTest.java | 76 ----
5 files changed, 359 insertions(+), 484 deletions(-)
----------------------------------------------------------------------
[48/50] beam git commit: Merge remote-tracking branch 'origin/master'
into python-sdk.
Posted by dh...@apache.org.
Merge remote-tracking branch 'origin/master' into python-sdk.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c2859a55
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c2859a55
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c2859a55
Branch: refs/heads/python-sdk
Commit: c2859a55f89c9807a037adfde9f7e8f506c108ce
Parents: 1bc6859 34b4a6d
Author: Ahmet Altay <al...@google.com>
Authored: Fri Jan 27 16:57:44 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Jan 27 16:57:44 2017 -0800
----------------------------------------------------------------------
.jenkins/common_job_properties.groovy | 9 +-
...job_beam_PostCommit_Java_MavenInstall.groovy | 2 +-
.../job_beam_PreCommit_Java_MavenInstall.groovy | 2 +-
.../job_beam_Release_NightlySnapshot.groovy | 2 +-
.jenkins/job_seed.groovy | 2 +-
.travis/README.md | 2 +-
DISCLAIMER | 10 -
NOTICE | 4 +-
README.md | 46 +-
examples/java/README.md | 16 +-
examples/java/pom.xml | 21 +-
.../beam/examples/DebuggingWordCount.java | 4 +-
.../org/apache/beam/examples/WordCount.java | 6 +-
.../beam/examples/complete/AutoComplete.java | 2 +-
.../org/apache/beam/examples/complete/README.md | 14 +-
.../apache/beam/examples/complete/TfIdf.java | 2 +-
.../examples/complete/TopWikipediaSessions.java | 2 +-
.../examples/complete/TrafficMaxLaneFlow.java | 2 +-
.../beam/examples/complete/TrafficRoutes.java | 2 +-
.../examples/cookbook/BigQueryTornadoes.java | 2 +-
.../cookbook/CombinePerKeyExamples.java | 2 +-
.../org/apache/beam/examples/cookbook/README.md | 14 +-
.../beam/examples/cookbook/TriggerExample.java | 4 +-
.../beam/examples/WindowedWordCountIT.java | 16 +-
examples/java8/pom.xml | 2 +-
.../beam/examples/complete/game/GameStats.java | 7 +-
.../examples/complete/game/LeaderBoard.java | 5 +-
.../beam/examples/complete/game/UserScore.java | 2 +-
examples/pom.xml | 16 +-
pom.xml | 41 +-
runners/apex/README.md | 4 +-
runners/apex/pom.xml | 3 +-
.../beam/runners/apex/ApexPipelineOptions.java | 7 +-
.../apache/beam/runners/apex/ApexRunner.java | 43 +-
.../beam/runners/apex/ApexYarnLauncher.java | 23 +-
.../translation/CreateValuesTranslator.java | 18 +-
.../FlattenPCollectionTranslator.java | 28 +-
.../apex/translation/GroupByKeyTranslator.java | 2 +-
.../translation/ParDoBoundMultiTranslator.java | 27 +-
.../apex/translation/ParDoBoundTranslator.java | 4 +-
.../apex/translation/TranslationContext.java | 27 +-
.../apex/translation/WindowBoundTranslator.java | 8 +-
.../operators/ApexGroupByKeyOperator.java | 4 +-
.../operators/ApexParDoOperator.java | 6 +-
.../ApexReadUnboundedInputOperator.java | 17 +-
.../beam/runners/apex/ApexRunnerTest.java | 75 ++
.../beam/runners/apex/ApexYarnLauncherTest.java | 9 +-
.../runners/apex/examples/WordCountTest.java | 2 +-
.../translation/ParDoBoundTranslatorTest.java | 6 +-
.../translation/ReadUnboundTranslatorTest.java | 8 +-
.../utils/ApexStateInternalsTest.java | 2 +-
.../test/resources/beam-runners-apex.properties | 20 +
runners/core-java/pom.xml | 2 +-
.../beam/runners/core/AssignWindowsDoFn.java | 3 +-
.../apache/beam/runners/core/DoFnAdapters.java | 343 ++++++
.../apache/beam/runners/core/DoFnRunner.java | 21 -
.../apache/beam/runners/core/DoFnRunners.java | 138 +--
.../core/GroupAlsoByWindowViaWindowSetDoFn.java | 10 +-
.../runners/core/GroupAlsoByWindowsDoFn.java | 5 +-
.../beam/runners/core/KeyedWorkItemCoder.java | 4 +-
.../core/LateDataDroppingDoFnRunner.java | 1 -
.../apache/beam/runners/core/NonEmptyPanes.java | 2 +-
.../org/apache/beam/runners/core/OldDoFn.java | 472 ++++++++
.../runners/core/PerKeyCombineFnRunner.java | 70 --
.../runners/core/PerKeyCombineFnRunners.java | 101 --
.../beam/runners/core/SimpleDoFnRunner.java | 63 -
.../beam/runners/core/SimpleOldDoFnRunner.java | 7 +-
.../beam/runners/core/SplittableParDo.java | 7 -
.../core/UnboundedReadFromBoundedSource.java | 14 +-
.../AfterDelayFromFirstElementStateMachine.java | 2 +-
.../core/triggers/AfterPaneStateMachine.java | 2 +-
.../core/DoFnDelegatingAggregatorTest.java | 144 +++
.../core/GroupAlsoByWindowsProperties.java | 2 +-
.../runners/core/KeyedWorkItemCoderTest.java | 6 +
.../core/LateDataDroppingDoFnRunnerTest.java | 2 +-
.../apache/beam/runners/core/NoOpOldDoFn.java | 72 ++
.../beam/runners/core/OldDoFnContextTest.java | 72 ++
.../apache/beam/runners/core/OldDoFnTest.java | 192 +++
.../beam/runners/core/ReduceFnRunnerTest.java | 12 +-
.../beam/runners/core/ReduceFnTester.java | 2 +-
.../runners/core/SimpleOldDoFnRunnerTest.java | 2 +-
.../UnboundedReadFromBoundedSourceTest.java | 12 +-
runners/direct-java/pom.xml | 3 +-
.../direct/BoundedReadEvaluatorFactory.java | 10 +-
...ecycleManagerRemovingTransformEvaluator.java | 19 +-
.../beam/runners/direct/EvaluationContext.java | 2 +-
.../direct/ExecutorServiceParallelExecutor.java | 4 +-
.../runners/direct/FlattenEvaluatorFactory.java | 4 +-
.../GroupAlsoByWindowEvaluatorFactory.java | 9 +-
.../direct/GroupByKeyOnlyEvaluatorFactory.java | 10 +-
.../beam/runners/direct/ParDoEvaluator.java | 16 +-
.../runners/direct/ParDoEvaluatorFactory.java | 19 +-
.../direct/ParDoMultiOverrideFactory.java | 13 +-
.../direct/StatefulParDoEvaluatorFactory.java | 27 +-
.../direct/TestStreamEvaluatorFactory.java | 5 +-
.../direct/UnboundedReadEvaluatorFactory.java | 22 +-
.../runners/direct/ViewEvaluatorFactory.java | 8 +-
.../beam/runners/direct/WatermarkManager.java | 6 +-
.../runners/direct/WindowEvaluatorFactory.java | 3 +-
.../runners/direct/AggregatorContainerTest.java | 16 +-
.../direct/BoundedReadEvaluatorFactoryTest.java | 5 -
.../CopyOnAccessInMemoryStateInternalsTest.java | 4 +-
.../runners/direct/DirectGraphVisitorTest.java | 16 +-
.../beam/runners/direct/DirectRunnerTest.java | 5 -
...leManagerRemovingTransformEvaluatorTest.java | 103 +-
.../runners/direct/EvaluationContextTest.java | 6 +-
.../beam/runners/direct/ParDoEvaluatorTest.java | 3 +-
.../StatefulParDoEvaluatorFactoryTest.java | 4 +-
.../UnboundedReadEvaluatorFactoryTest.java | 13 +-
runners/flink/README.md | 6 +-
runners/flink/examples/pom.xml | 2 +-
.../beam/runners/flink/examples/WordCount.java | 2 +-
.../flink/examples/streaming/AutoComplete.java | 2 +-
.../examples/streaming/KafkaIOExamples.java | 4 +-
.../KafkaWindowedWordCountExample.java | 2 +-
.../examples/streaming/WindowedWordCount.java | 2 +-
runners/flink/pom.xml | 2 +-
runners/flink/runner/pom.xml | 4 +-
.../runners/flink/FlinkPipelineOptions.java | 6 +-
.../runners/flink/OldPerKeyCombineFnRunner.java | 62 +
.../flink/OldPerKeyCombineFnRunners.java | 155 +++
.../FlinkBatchTransformTranslators.java | 40 +-
.../FlinkBatchTranslationContext.java | 21 +-
.../FlinkStreamingTransformTranslators.java | 46 +-
.../FlinkStreamingTranslationContext.java | 20 +-
.../functions/FlinkDoFnFunction.java | 4 +-
.../FlinkMergingNonShuffleReduceFunction.java | 10 +-
.../FlinkMergingPartialReduceFunction.java | 8 +-
.../functions/FlinkMergingReduceFunction.java | 8 +-
.../functions/FlinkMultiOutputDoFnFunction.java | 4 +-
.../FlinkMultiOutputProcessContext.java | 2 +-
.../functions/FlinkNoElementAssignContext.java | 2 +-
.../functions/FlinkPartialReduceFunction.java | 10 +-
.../functions/FlinkProcessContextBase.java | 4 +-
.../functions/FlinkReduceFunction.java | 10 +-
.../FlinkSingleOutputProcessContext.java | 2 +-
.../wrappers/streaming/DoFnOperator.java | 6 +-
.../streaming/SingletonKeyedWorkItemCoder.java | 10 +-
.../wrappers/streaming/WindowDoFnOperator.java | 2 +-
.../streaming/io/BoundedSourceWrapper.java | 2 +-
.../streaming/io/UnboundedFlinkSink.java | 6 +
.../streaming/io/UnboundedSourceWrapper.java | 2 +-
.../beam/runners/flink/PipelineOptionsTest.java | 13 +
.../streaming/FlinkStateInternalsTest.java | 2 +-
.../streaming/UnboundedSourceWrapperTest.java | 464 +++----
runners/google-cloud-dataflow-java/pom.xml | 15 +-
.../beam/runners/dataflow/AssignWindows.java | 89 ++
.../dataflow/DataflowAggregatorTransforms.java | 79 ++
.../dataflow/DataflowMetricUpdateExtractor.java | 109 ++
.../runners/dataflow/DataflowPipelineJob.java | 2 -
.../dataflow/DataflowPipelineTranslator.java | 510 +++-----
.../beam/runners/dataflow/DataflowRunner.java | 109 +-
.../DataflowUnboundedReadFromBoundedSource.java | 547 +++++++++
.../beam/runners/dataflow/ReadTranslator.java | 102 ++
.../runners/dataflow/TransformTranslator.java | 120 ++
.../dataflow/internal/AssignWindows.java | 89 --
.../dataflow/internal/CustomSources.java | 5 -
.../internal/DataflowAggregatorTransforms.java | 79 --
.../internal/DataflowMetricUpdateExtractor.java | 109 --
.../DataflowUnboundedReadFromBoundedSource.java | 556 ---------
.../runners/dataflow/internal/IsmFormat.java | 20 +-
.../dataflow/internal/ReadTranslator.java | 107 --
.../DataflowPipelineWorkerPoolOptions.java | 16 +-
.../beam/runners/dataflow/util/DoFnInfo.java | 66 +-
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 352 ++++--
.../beam/runners/dataflow/dataflow.properties | 6 +-
.../dataflow/DataflowPipelineJobTest.java | 38 +-
.../DataflowPipelineTranslatorTest.java | 3 +-
.../runners/dataflow/DataflowRunnerTest.java | 8 +-
...aflowUnboundedReadFromBoundedSourceTest.java | 79 ++
...aflowUnboundedReadFromBoundedSourceTest.java | 83 --
.../DataflowPipelineDebugOptionsTest.java | 2 +-
.../options/DataflowPipelineOptionsTest.java | 4 +-
.../options/DataflowProfilingOptionsTest.java | 4 +-
.../runners/dataflow/util/PackageUtilTest.java | 69 +-
runners/pom.xml | 16 +-
runners/spark/README.md | 8 +-
runners/spark/pom.xml | 29 +-
.../spark/aggregators/NamedAggregators.java | 4 +-
.../coders/BeamSparkRunnerRegistrator.java | 48 +-
.../spark/coders/StatelessJavaSerializer.java | 97 ++
.../runners/spark/coders/WritableCoder.java | 4 +-
.../beam/runners/spark/examples/WordCount.java | 2 +-
.../beam/runners/spark/io/MicrobatchSource.java | 9 +-
.../runners/spark/io/SparkUnboundedSource.java | 127 +-
.../spark/stateful/StateSpecFunctions.java | 37 +-
.../runners/spark/translation/DoFnFunction.java | 15 +-
.../spark/translation/EvaluationContext.java | 83 +-
.../translation/GroupCombineFunctions.java | 8 +-
.../spark/translation/MultiDoFnFunction.java | 14 +-
.../translation/SparkAbstractCombineFn.java | 12 +-
.../spark/translation/SparkGlobalCombineFn.java | 13 +-
.../translation/SparkGroupAlsoByWindowFn.java | 2 +-
.../spark/translation/SparkKeyedCombineFn.java | 13 +-
.../spark/translation/SparkPCollectionView.java | 99 ++
.../spark/translation/SparkRuntimeContext.java | 63 +-
.../spark/translation/TransformTranslator.java | 67 +-
.../spark/translation/TranslationUtils.java | 37 +-
.../streaming/StreamingTransformTranslator.java | 114 +-
.../runners/spark/util/BroadcastHelper.java | 127 --
.../runners/spark/util/SideInputBroadcast.java | 77 ++
.../spark/util/SparkSideInputReader.java | 8 +-
.../coders/BeamSparkRunnerRegistratorTest.java | 57 -
.../streaming/KafkaStreamingTest.java | 57 +-
.../ResumeFromCheckpointStreamingTest.java | 20 +-
.../streaming/utils/PAssertStreaming.java | 4 +-
sdks/java/build-tools/pom.xml | 2 +-
.../src/main/resources/beam/findbugs-filter.xml | 26 -
sdks/java/core/pom.xml | 2 +-
.../beam/sdk/annotations/Experimental.java | 5 +-
.../org/apache/beam/sdk/coders/AtomicCoder.java | 2 +-
.../org/apache/beam/sdk/coders/AvroCoder.java | 30 +-
.../apache/beam/sdk/coders/BigDecimalCoder.java | 6 +-
.../beam/sdk/coders/BigEndianIntegerCoder.java | 7 +
.../beam/sdk/coders/BigEndianLongCoder.java | 7 +
.../apache/beam/sdk/coders/ByteArrayCoder.java | 7 +
.../org/apache/beam/sdk/coders/ByteCoder.java | 7 +
.../apache/beam/sdk/coders/ByteStringCoder.java | 8 +
.../java/org/apache/beam/sdk/coders/Coder.java | 7 +
.../apache/beam/sdk/coders/CollectionCoder.java | 12 +-
.../org/apache/beam/sdk/coders/CustomCoder.java | 18 +-
.../apache/beam/sdk/coders/DelegateCoder.java | 29 +-
.../org/apache/beam/sdk/coders/DoubleCoder.java | 7 +
.../apache/beam/sdk/coders/DurationCoder.java | 8 +
.../apache/beam/sdk/coders/InstantCoder.java | 7 +
.../apache/beam/sdk/coders/IterableCoder.java | 12 +-
.../org/apache/beam/sdk/coders/JAXBCoder.java | 48 +-
.../org/apache/beam/sdk/coders/KvCoder.java | 35 +-
.../beam/sdk/coders/LengthPrefixCoder.java | 145 +++
.../org/apache/beam/sdk/coders/ListCoder.java | 7 +
.../org/apache/beam/sdk/coders/MapCoder.java | 62 +-
.../apache/beam/sdk/coders/NullableCoder.java | 6 +
.../beam/sdk/coders/SerializableCoder.java | 17 +-
.../org/apache/beam/sdk/coders/SetCoder.java | 12 +-
.../apache/beam/sdk/coders/StandardCoder.java | 39 +-
.../beam/sdk/coders/StringDelegateCoder.java | 16 +-
.../apache/beam/sdk/coders/StringUtf8Coder.java | 7 +
.../beam/sdk/coders/TableRowJsonCoder.java | 7 +
.../beam/sdk/coders/TextualIntegerCoder.java | 8 +
.../org/apache/beam/sdk/coders/VarIntCoder.java | 10 +-
.../apache/beam/sdk/coders/VarLongCoder.java | 7 +
.../org/apache/beam/sdk/coders/VoidCoder.java | 7 +
.../beam/sdk/coders/protobuf/ProtoCoder.java | 8 +-
.../java/org/apache/beam/sdk/io/AvroSource.java | 5 -
.../sdk/io/BoundedReadFromUnboundedSource.java | 79 +-
.../org/apache/beam/sdk/io/BoundedSource.java | 8 -
.../apache/beam/sdk/io/CompressedSource.java | 8 -
.../org/apache/beam/sdk/io/CountingSource.java | 5 -
.../org/apache/beam/sdk/io/FileSystems.java | 32 +-
.../java/org/apache/beam/sdk/io/PubsubIO.java | 1142 +++++++++---------
.../apache/beam/sdk/io/PubsubUnboundedSink.java | 88 +-
.../beam/sdk/io/PubsubUnboundedSource.java | 104 +-
.../main/java/org/apache/beam/sdk/io/Read.java | 7 +-
.../java/org/apache/beam/sdk/io/TextIO.java | 5 -
.../java/org/apache/beam/sdk/io/XmlSource.java | 5 -
.../org/apache/beam/sdk/options/GcpOptions.java | 36 +-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../beam/sdk/options/PipelineOptions.java | 2 +-
.../sdk/options/PipelineOptionsFactory.java | 10 +-
.../apache/beam/sdk/options/ValueProvider.java | 6 +-
.../beam/sdk/runners/TransformHierarchy.java | 33 +-
.../testing/FlattenWithHeterogeneousCoders.java | 29 +
.../org/apache/beam/sdk/testing/PAssert.java | 12 +-
.../beam/sdk/testing/RunnableOnService.java | 14 +-
.../beam/sdk/testing/SourceTestUtils.java | 5 -
.../org/apache/beam/sdk/testing/TestStream.java | 8 +
.../sdk/testing/UsesUnboundedPCollections.java | 23 +
.../beam/sdk/testing/ValueInSingleWindow.java | 6 +-
.../sdk/transforms/AggregatorRetriever.java | 13 +-
.../beam/sdk/transforms/AppliedPTransform.java | 11 +-
.../org/apache/beam/sdk/transforms/Combine.java | 197 +--
.../apache/beam/sdk/transforms/CombineFns.java | 14 +-
.../org/apache/beam/sdk/transforms/Count.java | 4 +-
.../org/apache/beam/sdk/transforms/Create.java | 5 -
.../sdk/transforms/DelegatingAggregator.java | 2 +-
.../beam/sdk/transforms/DoFnAdapters.java | 504 --------
.../apache/beam/sdk/transforms/DoFnTester.java | 7 -
.../apache/beam/sdk/transforms/GroupByKey.java | 2 +-
.../org/apache/beam/sdk/transforms/Max.java | 124 +-
.../org/apache/beam/sdk/transforms/Mean.java | 27 +-
.../org/apache/beam/sdk/transforms/Min.java | 122 +-
.../org/apache/beam/sdk/transforms/OldDoFn.java | 758 ------------
.../apache/beam/sdk/transforms/PTransform.java | 9 +-
.../org/apache/beam/sdk/transforms/ParDo.java | 46 +-
.../org/apache/beam/sdk/transforms/Regex.java | 589 ++++++++-
.../org/apache/beam/sdk/transforms/Sum.java | 57 +-
.../apache/beam/sdk/transforms/ToString.java | 198 +++
.../org/apache/beam/sdk/transforms/Top.java | 27 +-
.../beam/sdk/transforms/join/CoGbkResult.java | 35 +-
.../sdk/transforms/reflect/DoFnInvoker.java | 20 -
.../sdk/transforms/reflect/DoFnInvokers.java | 142 +--
.../sdk/transforms/reflect/DoFnSignature.java | 15 +-
.../windowing/AfterDelayFromFirstElement.java | 2 +-
.../sdk/transforms/windowing/AfterPane.java | 2 +-
.../sdk/transforms/windowing/GlobalWindow.java | 6 +
.../transforms/windowing/IntervalWindow.java | 4 +-
.../beam/sdk/transforms/windowing/Window.java | 3 +-
.../org/apache/beam/sdk/util/CoderUtils.java | 28 +-
.../beam/sdk/util/CombineContextFactory.java | 18 -
.../org/apache/beam/sdk/util/DefaultBucket.java | 105 ++
.../util/EmptyOnDeserializationThreadLocal.java | 39 +
.../apache/beam/sdk/util/GcpProjectUtil.java | 2 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 36 +-
.../org/apache/beam/sdk/util/NameUtils.java | 162 +++
.../org/apache/beam/sdk/util/PropertyNames.java | 1 +
.../org/apache/beam/sdk/util/PubsubClient.java | 28 +-
.../apache/beam/sdk/util/PubsubGrpcClient.java | 6 +-
.../apache/beam/sdk/util/PubsubJsonClient.java | 4 +-
.../apache/beam/sdk/util/PubsubTestClient.java | 6 +-
.../org/apache/beam/sdk/util/StringUtils.java | 100 --
.../apache/beam/sdk/util/TimerInternals.java | 4 +-
.../org/apache/beam/sdk/util/WindowedValue.java | 23 +-
.../beam/sdk/util/state/StateContexts.java | 4 +-
.../org/apache/beam/sdk/values/PValueBase.java | 4 +-
.../beam/sdk/values/TimestampedValue.java | 10 +-
.../sdk/AggregatorPipelineExtractorTest.java | 16 +-
.../apache/beam/sdk/coders/AvroCoderTest.java | 7 +
.../beam/sdk/coders/BigDecimalCoderTest.java | 46 +-
.../sdk/coders/BigEndianIntegerCoderTest.java | 9 +
.../beam/sdk/coders/BigEndianLongCoderTest.java | 9 +
.../beam/sdk/coders/ByteArrayCoderTest.java | 6 +
.../apache/beam/sdk/coders/ByteCoderTest.java | 9 +
.../beam/sdk/coders/ByteStringCoderTest.java | 8 +
.../beam/sdk/coders/CoderRegistryTest.java | 6 +
.../org/apache/beam/sdk/coders/CoderTest.java | 8 +
.../beam/sdk/coders/CollectionCoderTest.java | 16 +
.../beam/sdk/coders/DefaultCoderTest.java | 4 +-
.../beam/sdk/coders/DelegateCoderTest.java | 35 +-
.../apache/beam/sdk/coders/DoubleCoderTest.java | 9 +
.../beam/sdk/coders/DurationCoderTest.java | 10 +
.../beam/sdk/coders/InstantCoderTest.java | 9 +
.../beam/sdk/coders/IterableCoderTest.java | 27 +-
.../apache/beam/sdk/coders/JAXBCoderTest.java | 26 +-
.../org/apache/beam/sdk/coders/KvCoderTest.java | 29 +
.../beam/sdk/coders/LengthPrefixCoderTest.java | 129 ++
.../apache/beam/sdk/coders/ListCoderTest.java | 16 +-
.../apache/beam/sdk/coders/MapCoderTest.java | 21 +-
.../beam/sdk/coders/NullableCoderTest.java | 12 +
.../beam/sdk/coders/SerializableCoderTest.java | 9 +
.../apache/beam/sdk/coders/SetCoderTest.java | 16 +
.../beam/sdk/coders/StandardCoderTest.java | 40 +
.../sdk/coders/StringDelegateCoderTest.java | 11 +
.../beam/sdk/coders/StringUtf8CoderTest.java | 9 +
.../beam/sdk/coders/TableRowJsonCoderTest.java | 9 +
.../sdk/coders/TextualIntegerCoderTest.java | 9 +
.../apache/beam/sdk/coders/VarIntCoderTest.java | 9 +
.../beam/sdk/coders/VarLongCoderTest.java | 9 +
.../apache/beam/sdk/coders/VoidCoderTest.java | 40 +
.../beam/sdk/io/AvroIOGeneratedClassTest.java | 285 -----
.../apache/beam/sdk/io/AvroIOTransformTest.java | 324 +++++
.../beam/sdk/io/CompressedSourceTest.java | 5 -
.../apache/beam/sdk/io/FileBasedSourceTest.java | 5 -
.../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +-
.../beam/sdk/io/OffsetBasedSourceTest.java | 5 -
.../org/apache/beam/sdk/io/PubsubIOTest.java | 86 +-
.../beam/sdk/io/PubsubUnboundedSinkTest.java | 41 +-
.../beam/sdk/io/PubsubUnboundedSourceTest.java | 10 +-
.../java/org/apache/beam/sdk/io/ReadTest.java | 5 -
.../java/org/apache/beam/sdk/io/WriteTest.java | 10 +-
.../apache/beam/sdk/options/GcpOptionsTest.java | 4 +-
.../sdk/options/PipelineOptionsFactoryTest.java | 6 +-
.../beam/sdk/options/PipelineOptionsTest.java | 3 +-
.../beam/sdk/options/ValueProviderTest.java | 36 +-
.../sdk/options/ValueProviderUtilsTest.java | 2 +-
.../sdk/runners/TransformHierarchyTest.java | 30 +-
.../apache/beam/sdk/testing/TestStreamTest.java | 5 +
.../testing/ValueInSingleWindowCoderTest.java | 7 +
.../sdk/transforms/ApproximateUniqueTest.java | 483 ++++----
.../beam/sdk/transforms/CombineFnsTest.java | 20 +-
.../apache/beam/sdk/transforms/CombineTest.java | 99 +-
.../apache/beam/sdk/transforms/CountTest.java | 2 +-
.../apache/beam/sdk/transforms/CreateTest.java | 8 -
.../DoFnDelegatingAggregatorTest.java | 142 ---
.../apache/beam/sdk/transforms/DoFnTest.java | 15 +-
.../beam/sdk/transforms/DoFnTesterTest.java | 6 +-
.../apache/beam/sdk/transforms/FlattenTest.java | 27 +
.../apache/beam/sdk/transforms/KvSwapTest.java | 13 +-
.../org/apache/beam/sdk/transforms/MaxTest.java | 20 +-
.../apache/beam/sdk/transforms/MeanTest.java | 7 +-
.../org/apache/beam/sdk/transforms/MinTest.java | 21 +-
.../apache/beam/sdk/transforms/NoOpOldDoFn.java | 71 --
.../beam/sdk/transforms/OldDoFnContextTest.java | 69 --
.../apache/beam/sdk/transforms/OldDoFnTest.java | 188 ---
.../apache/beam/sdk/transforms/ParDoTest.java | 74 +-
.../apache/beam/sdk/transforms/RegexTest.java | 127 +-
.../apache/beam/sdk/transforms/SampleTest.java | 405 ++++---
.../beam/sdk/transforms/SimpleStatsFnsTest.java | 36 +-
.../org/apache/beam/sdk/transforms/SumTest.java | 24 +-
.../beam/sdk/transforms/ToStringTest.java | 125 ++
.../org/apache/beam/sdk/transforms/TopTest.java | 13 +-
.../apache/beam/sdk/transforms/ViewTest.java | 2 +-
.../transforms/join/CoGbkResultCoderTest.java | 10 +-
.../sdk/transforms/join/UnionCoderTest.java | 24 +-
.../transforms/reflect/DoFnInvokersTest.java | 42 -
.../transforms/windowing/GlobalWindowTest.java | 64 +
.../apache/beam/sdk/util/CombineFnUtilTest.java | 8 +-
.../apache/beam/sdk/util/DefaultBucketTest.java | 112 ++
.../org/apache/beam/sdk/util/GcsUtilTest.java | 56 +
.../org/apache/beam/sdk/util/NameUtilsTest.java | 177 +++
.../beam/sdk/util/PubsubGrpcClientTest.java | 8 +-
.../beam/sdk/util/PubsubJsonClientTest.java | 3 +-
.../beam/sdk/util/PubsubTestClientTest.java | 4 +-
.../beam/sdk/util/SerializableUtilsTest.java | 4 +-
.../apache/beam/sdk/util/StringUtilsTest.java | 100 --
.../beam/sdk/util/TimerInternalsTest.java | 5 +
.../beam/sdk/util/ValueWithRecordIdTest.java | 34 +
.../apache/beam/sdk/util/WindowedValueTest.java | 23 +
.../util/state/InMemoryStateInternalsTest.java | 2 +-
.../beam/sdk/util/state/StateTagTest.java | 11 +-
.../beam/sdk/values/TimestampedValueTest.java | 19 +-
sdks/java/extensions/join-library/README.md | 10 -
sdks/java/extensions/join-library/pom.xml | 2 +-
sdks/java/extensions/pom.xml | 2 +-
sdks/java/extensions/sorter/pom.xml | 2 +-
sdks/java/io/elasticsearch/pom.xml | 175 +++
.../sdk/io/elasticsearch/ElasticsearchIO.java | 819 +++++++++++++
.../beam/sdk/io/elasticsearch/package-info.java | 20 +
.../elasticsearch/ElasticSearchIOTestUtils.java | 129 ++
.../io/elasticsearch/ElasticsearchIOTest.java | 358 ++++++
sdks/java/io/google-cloud-platform/pom.xml | 2 +-
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 308 +++--
.../sdk/io/gcp/bigquery/BigQueryServices.java | 16 +-
.../io/gcp/bigquery/BigQueryServicesImpl.java | 76 +-
.../beam/sdk/io/gcp/bigtable/BigtableIO.java | 8 -
.../io/gcp/bigtable/BigtableTestOptions.java | 37 -
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 205 +++-
.../gcp/bigquery/BigQueryServicesImplTest.java | 141 +++
.../sdk/io/gcp/bigquery/BigQueryUtilTest.java | 3 +-
.../sdk/io/gcp/bigtable/BigtableIOTest.java | 5 +-
.../io/gcp/bigtable/BigtableTestOptions.java | 37 +
sdks/java/io/hdfs/pom.xml | 2 +-
.../beam/sdk/io/hdfs/AvroWrapperCoder.java | 4 +-
.../apache/beam/sdk/io/hdfs/HDFSFileSource.java | 5 -
.../apache/beam/sdk/io/hdfs/WritableCoder.java | 4 +-
.../beam/sdk/io/hdfs/AvroWrapperCoderTest.java | 1 -
sdks/java/io/jdbc/pom.xml | 2 +-
sdks/java/io/jms/pom.xml | 2 +-
sdks/java/io/kafka/pom.xml | 2 +-
.../org/apache/beam/sdk/io/kafka/KafkaIO.java | 7 +-
.../beam/sdk/io/kafka/KafkaRecordCoder.java | 4 +-
.../apache/beam/sdk/io/kafka/KafkaIOTest.java | 9 +-
.../beam/sdk/io/kafka/KafkaRecordCoderTest.java | 34 +
sdks/java/io/kinesis/pom.xml | 2 +-
.../beam/sdk/io/kinesis/KinesisRecordCoder.java | 4 +-
.../beam/sdk/io/kinesis/package-info.java | 2 +-
sdks/java/io/mongodb/pom.xml | 2 +-
.../beam/sdk/io/mongodb/MongoDbGridFSIO.java | 5 -
.../apache/beam/sdk/io/mongodb/MongoDbIO.java | 5 -
sdks/java/io/mqtt/pom.xml | 152 +++
.../org/apache/beam/sdk/io/mqtt/MqttIO.java | 588 +++++++++
.../apache/beam/sdk/io/mqtt/package-info.java | 22 +
.../org/apache/beam/sdk/io/mqtt/MqttIOTest.java | 197 +++
sdks/java/io/pom.xml | 4 +-
sdks/java/java8tests/pom.xml | 2 +-
.../maven-archetypes/examples-java8/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 19 +-
sdks/java/maven-archetypes/examples/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 19 +-
sdks/java/maven-archetypes/pom.xml | 2 +-
sdks/java/maven-archetypes/starter/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 4 +-
.../resources/projects/basic/reference/pom.xml | 4 +-
sdks/java/pom.xml | 2 +-
sdks/pom.xml | 15 +-
465 files changed, 13882 insertions(+), 8046 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/c2859a55/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/c2859a55/sdks/pom.xml
----------------------------------------------------------------------
diff --cc sdks/pom.xml
index e2dff16,06dbb9b..45d8df0
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@@ -73,9 -66,15 +67,16 @@@
</execution>
</executions>
</plugin>
+
</plugins>
</pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ </plugins>
</build>
</project>
[15/50] beam git commit: This closes #1838
Posted by dh...@apache.org.
This closes #1838
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7402d760
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7402d760
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7402d760
Branch: refs/heads/python-sdk
Commit: 7402d760004f8e7f79ca122c5fd26ec4f35dbdbe
Parents: e77de7c f9d1d68
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jan 24 18:00:43 2017 -0800
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jan 24 18:00:43 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 40 +++++--------------
.../sdk/io/gcp/bigquery/BigQueryServices.java | 9 ++---
.../io/gcp/bigquery/BigQueryServicesImpl.java | 23 ++++-------
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 41 ++++++++------------
.../sdk/io/gcp/bigquery/BigQueryUtilTest.java | 3 +-
5 files changed, 40 insertions(+), 76 deletions(-)
----------------------------------------------------------------------
[33/50] beam git commit: This closes #1853
Posted by dh...@apache.org.
This closes #1853
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/717b415f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/717b415f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/717b415f
Branch: refs/heads/python-sdk
Commit: 717b415f1a6024f1630d922cbd357c894452af40
Parents: b4726d0 e591d8b
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 09:38:05 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 09:38:05 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
[30/50] beam git commit: PackageUtil: preserve classpath ordering
when uploading
Posted by dh...@apache.org.
PackageUtil: preserve classpath ordering when uploading
Also add a test
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b0b91c84
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b0b91c84
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b0b91c84
Branch: refs/heads/python-sdk
Commit: b0b91c842e09aa7fdb5c1dc216574daa43b437ea
Parents: 23e2b91
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 22:15:59 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 22:16:22 2017 -0800
----------------------------------------------------------------------
.../beam/runners/dataflow/util/PackageUtil.java | 11 +++++---
.../runners/dataflow/util/PackageUtilTest.java | 27 ++++++++++++++++++++
2 files changed, 35 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/b0b91c84/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index fa8c94d..685d48c 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -297,16 +297,21 @@ class PackageUtil {
// Inline a copy here because the inner code returns an immutable list and we want to mutate it.
List<PackageAttributes> packageAttributes =
new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
- // Order package attributes in descending size order so that we upload the largest files first.
- Collections.sort(packageAttributes, new PackageUploadOrder());
+ // Compute the returned list of DataflowPackage objects here so that they are returned in the
+ // same order as on the classpath.
List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+ for (final PackageAttributes attributes : packageAttributes) {
+ packages.add(attributes.getDataflowPackage());
+ }
+
+ // Order package attributes in descending size order so that we upload the largest files first.
+ Collections.sort(packageAttributes, new PackageUploadOrder());
final AtomicInteger numUploaded = new AtomicInteger(0);
final AtomicInteger numCached = new AtomicInteger(0);
List<ListenableFuture<?>> futures = new LinkedList<>();
for (final PackageAttributes attributes : packageAttributes) {
- packages.add(attributes.getDataflowPackage());
futures.add(executorService.submit(new Runnable() {
@Override
public void run() {
http://git-wip-us.apache.org/repos/asf/beam/blob/b0b91c84/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 3828415..800c5a9 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -19,6 +19,7 @@ package org.apache.beam.runners.dataflow.util;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.startsWith;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
@@ -59,6 +60,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.Channels;
import java.nio.channels.Pipe;
+import java.nio.channels.Pipe.SinkChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
@@ -86,6 +88,8 @@ import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
/** Tests for PackageUtil. */
@RunWith(JUnit4.class)
@@ -265,6 +269,29 @@ public class PackageUtilTest {
}
@Test
+ public void testStagingPreservesClasspath() throws Exception {
+ File smallFile = makeFileWithContents("small.txt", "small");
+ File largeFile = makeFileWithContents("large.txt", "large contents");
+ when(mockGcsUtil.fileSize(any(GcsPath.class)))
+ .thenThrow(new FileNotFoundException("some/path"));
+ when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+ .thenAnswer(new Answer<SinkChannel>() {
+ @Override
+ public SinkChannel answer(InvocationOnMock invocation) throws Throwable {
+ return Pipe.open().sink();
+ }
+ });
+
+ List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
+ ImmutableList.of(smallFile.getAbsolutePath(), largeFile.getAbsolutePath()),
+ STAGING_PATH, mockGcsUtil);
+ // Verify that the packages are returned small, then large, matching input order even though
+ // the large file would be uploaded first.
+ assertThat(targets.get(0).getName(), startsWith("small"));
+ assertThat(targets.get(1).getName(), startsWith("large"));
+ }
+
+ @Test
public void testPackageUploadWithDirectorySucceeds() throws Exception {
Pipe pipe = Pipe.open();
File tmpDirectory = tmpFolder.newFolder("folder");
[49/50] beam git commit: Update pom.xml for sdks/python.
Posted by dh...@apache.org.
Update pom.xml for sdks/python.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f1b8679c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f1b8679c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f1b8679c
Branch: refs/heads/python-sdk
Commit: f1b8679c4af283d1e751043e2e765b7f295af0b2
Parents: c2859a5
Author: Ahmet Altay <al...@google.com>
Authored: Fri Jan 27 17:04:21 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Jan 27 17:04:21 2017 -0800
----------------------------------------------------------------------
sdks/python/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/f1b8679c/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index cc90969..615ddc5 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-parent</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
[11/50] beam git commit: This closes #1834
Posted by dh...@apache.org.
This closes #1834
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1148be6b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1148be6b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1148be6b
Branch: refs/heads/python-sdk
Commit: 1148be6bb17eae70c2753d33aebbac9f7943dd03
Parents: f2389ab bffe80d
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 15:51:19 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:51:19 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 3 ++-
.../sdk/testing/UsesUnboundedPCollections.java | 23 ++++++++++++++++++++
.../org/apache/beam/sdk/io/PubsubIOTest.java | 4 ++--
3 files changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
[34/50] beam git commit: Update the NOTICE year range
Posted by dh...@apache.org.
Update the NOTICE year range
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b97b3935
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b97b3935
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b97b3935
Branch: refs/heads/python-sdk
Commit: b97b3935c30cba2626333898ced32f82a6c54351
Parents: 717b415
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Thu Jan 26 14:46:37 2017 +0100
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Jan 26 10:16:49 2017 -0800
----------------------------------------------------------------------
NOTICE | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/b97b3935/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 9b35cd4..0412683 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
Apache Beam
-Copyright 2016 The Apache Software Foundation
+Copyright 2016-2017 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
[22/50] beam git commit: This closes #1184
Posted by dh...@apache.org.
This closes #1184
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c5257837
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c5257837
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c5257837
Branch: refs/heads/python-sdk
Commit: c525783704e0cc47845df8cdec1715e1f1c74008
Parents: 979c937 3ecf7e7
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 11:03:05 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 11:03:05 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 +
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
.../runners/dataflow/util/PackageUtilTest.java | 42 ++-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 +
6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------
[04/50] beam git commit: [BEAM-1258] Improve logging in
BigQueryIO.verifyTableEmpty().
Posted by dh...@apache.org.
[BEAM-1258] Improve logging in BigQueryIO.verifyTableEmpty().
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/5b6dd91d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/5b6dd91d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/5b6dd91d
Branch: refs/heads/python-sdk
Commit: 5b6dd91d27ce73fa66db4d445b0ceb88f09971d8
Parents: cb6e0a8
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 14:52:30 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 32 +++++++++++---------
.../sdk/io/gcp/bigquery/BigQueryServices.java | 2 ++
2 files changed, 19 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/5b6dd91d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 701374d..aff199a 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1863,25 +1863,27 @@ public class BigQueryIO {
writeDisposition, validate, testServices);
}
- private static void verifyTableEmpty(
+ private static void verifyTableNotExistOrEmpty(
DatasetService datasetService,
- TableReference table) {
+ TableReference tableRef) {
try {
- boolean isEmpty = datasetService.isTableEmpty(
- table.getProjectId(), table.getDatasetId(), table.getTableId());
- if (!isEmpty) {
- throw new IllegalArgumentException(
- "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
+ if (datasetService.getTable(
+ tableRef.getProjectId(),
+ tableRef.getDatasetId(),
+ tableRef.getTableId()) != null) {
+ checkState(
+ datasetService.isTableEmpty(
+ tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
+ "BigQuery table is not empty: %s.",
+ BigQueryIO.toTableSpec(tableRef));
}
} catch (IOException | InterruptedException e) {
- ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
- if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
- // Nothing to do. If the table does not exist, it is considered empty.
- } else {
- throw new RuntimeException(
- "unable to confirm BigQuery table emptiness for table "
- + BigQueryIO.toTableSpec(table), e);
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
}
+ throw new RuntimeException(
+ "unable to confirm BigQuery table emptiness for table "
+ + BigQueryIO.toTableSpec(tableRef), e);
}
}
@@ -1917,7 +1919,7 @@ public class BigQueryIO {
verifyTablePresence(datasetService, table);
}
if (getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
- verifyTableEmpty(datasetService, table);
+ verifyTableNotExistOrEmpty(datasetService, table);
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/5b6dd91d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
index 7173996..32cf46d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
@@ -136,6 +136,8 @@ interface BigQueryServices extends Serializable {
/**
* Returns true if the table is empty.
+ *
+ * @throws IOException if the table is not found.
*/
boolean isTableEmpty(String projectId, String datasetId, String tableId)
throws IOException, InterruptedException;
[45/50] beam git commit: Update Beam version in the Maven archetypes
Posted by dh...@apache.org.
Update Beam version in the Maven archetypes
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/9c118156
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/9c118156
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/9c118156
Branch: refs/heads/python-sdk
Commit: 9c1181563d89e604b899e5e945d5975359f42543
Parents: 4a29131
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:34:24 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:34:24 2017 +0100
----------------------------------------------------------------------
.../examples-java8/src/main/resources/archetype-resources/pom.xml | 2 +-
.../examples/src/main/resources/archetype-resources/pom.xml | 2 +-
.../starter/src/main/resources/archetype-resources/pom.xml | 2 +-
.../starter/src/test/resources/projects/basic/reference/pom.xml | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
index 05cb797..55211ed 100644
--- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
@@ -27,7 +27,7 @@
<packaging>jar</packaging>
<properties>
- <beam.version>0.5.0-SNAPSHOT</beam.version>
+ <beam.version>0.6.0-SNAPSHOT</beam.version>
</properties>
<build>
http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index 74f08bf..654973c 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -27,7 +27,7 @@
<packaging>jar</packaging>
<properties>
- <beam.version>0.5.0-SNAPSHOT</beam.version>
+ <beam.version>0.6.0-SNAPSHOT</beam.version>
</properties>
<repositories>
http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
index e7f1185..5d2a408 100644
--- a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
@@ -25,7 +25,7 @@
<version>${version}</version>
<properties>
- <beam.version>0.5.0-SNAPSHOT</beam.version>
+ <beam.version>0.6.0-SNAPSHOT</beam.version>
</properties>
<repositories>
http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
index 871d194..1c666eb 100644
--- a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
@@ -25,7 +25,7 @@
<version>0.1</version>
<properties>
- <beam.version>0.5.0-SNAPSHOT</beam.version>
+ <beam.version>0.6.0-SNAPSHOT</beam.version>
</properties>
<repositories>
[24/50] beam git commit: This closes #1846
Posted by dh...@apache.org.
This closes #1846
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/95beda69
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/95beda69
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/95beda69
Branch: refs/heads/python-sdk
Commit: 95beda69bff7dfe519422fd19916c7a851dadf55
Parents: c525783 f05c5d3
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 12:13:37 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 12:13:37 2017 -0800
----------------------------------------------------------------------
examples/pom.xml | 14 +++++++++-----
runners/pom.xml | 14 +++++++++-----
sdks/pom.xml | 13 +++++++------
3 files changed, 25 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
[35/50] beam git commit: This closes #1852
Posted by dh...@apache.org.
This closes #1852
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/96377241
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/96377241
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/96377241
Branch: refs/heads/python-sdk
Commit: 9637724145d2defabc8cff0a3a825eaf9a32be6e
Parents: 717b415 b97b393
Author: Davor Bonaci <da...@google.com>
Authored: Thu Jan 26 10:16:57 2017 -0800
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Jan 26 10:16:57 2017 -0800
----------------------------------------------------------------------
NOTICE | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
[07/50] beam git commit: This closes #1833: Removes ReduceFnExecutor
interface
Posted by dh...@apache.org.
This closes #1833: Removes ReduceFnExecutor interface
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/11c3cd70
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/11c3cd70
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/11c3cd70
Branch: refs/heads/python-sdk
Commit: 11c3cd70b784650e8b60a5660449cfafdba84bbf
Parents: b333487 8989473
Author: Kenneth Knowles <kl...@google.com>
Authored: Tue Jan 24 13:48:23 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Tue Jan 24 13:48:23 2017 -0800
----------------------------------------------------------------------
.../apache/beam/runners/core/DoFnRunner.java | 20 --------------------
.../core/GroupAlsoByWindowViaWindowSetDoFn.java | 5 +----
.../beam/runners/direct/ParDoEvaluator.java | 2 --
.../runners/spark/translation/DoFnFunction.java | 2 --
.../spark/translation/MultiDoFnFunction.java | 2 --
5 files changed, 1 insertion(+), 30 deletions(-)
----------------------------------------------------------------------
[19/50] beam git commit: DataflowRunner: move source for properties
into pom
Posted by dh...@apache.org.
DataflowRunner: move source for properties into pom
Also drop unused properties that are now in the root
pom.xml.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e95335f0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e95335f0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e95335f0
Branch: refs/heads/python-sdk
Commit: e95335f0f6a94ebe257d8d5ce82bb82205dca95c
Parents: bf9d454
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 07:50:57 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 10:40:13 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 4 ++--
.../org/apache/beam/runners/dataflow/dataflow.properties | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/e95335f0/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 1d05193..eea5502 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,8 +33,8 @@
<packaging>jar</packaging>
<properties>
- <timestamp>${maven.build.timestamp}</timestamp>
- <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
+ <dataflow.container_version>beam-master-20170120</dataflow.container_version>
+ <dataflow.environment_major_version>6</dataflow.environment_major_version>
</properties>
<build>
http://git-wip-us.apache.org/repos/asf/beam/blob/e95335f0/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties b/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
index 9976ed9..47e316c 100644
--- a/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
+++ b/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
@@ -16,8 +16,8 @@
#
# Dataflow runtime properties
-environment.major.version=6
+environment.major.version=${dataflow.environment_major_version}
-worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:beam-master-20170120
+worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:${dataflow.container_version}
-worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:beam-master-20170120
+worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:${dataflow.container_version}
[40/50] beam git commit: Refactored existing code. Added iterable and
KV. Changed from element to of.
Posted by dh...@apache.org.
Refactored existing code. Added iterable and KV. Changed from element to of.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e01ce864
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e01ce864
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e01ce864
Branch: refs/heads/python-sdk
Commit: e01ce864edf551afefe861041541bb2a05340a08
Parents: 83f8c46
Author: Jesse Anderson <je...@smokinghand.com>
Authored: Tue Jan 24 08:37:33 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 22:52:09 2017 -0800
----------------------------------------------------------------------
.../apache/beam/sdk/transforms/ToString.java | 168 ++++++++++++++++---
.../java/org/apache/beam/sdk/io/WriteTest.java | 2 +-
.../beam/sdk/transforms/ToStringTest.java | 86 ++++++++--
3 files changed, 226 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
index ef49267..d5c9784 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
@@ -18,51 +18,181 @@
package org.apache.beam.sdk.transforms;
+import java.util.Iterator;
+
+import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
/**
- * {@link PTransform PTransforms} for converting a {@link PCollection PCollection<T>} to a
- * {@link PCollection PCollection<String>}.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<Long> longs = ...;
- * PCollection<String> strings = longs.apply(ToString.<Long>element());
- * } </pre>
- *
+ * {@link PTransform PTransforms} for converting a {@link PCollection PCollection<?>},
+ * {@link PCollection PCollection<KV<?,?>>}, or
+ * {@link PCollection PCollection<Iterable<?>>}
+ * to a {@link PCollection PCollection<String>}.
*
* <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your own
* {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
*/
public final class ToString {
+ private ToString() {
+ // do not instantiate
+ }
/**
* Returns a {@code PTransform<PCollection, PCollection<String>>} which transforms each
* element of the input {@link PCollection} to a {@link String} using the
* {@link Object#toString} method.
*/
- public static PTransform<PCollection<?>, PCollection<String>> element() {
- return new Default();
+ public static PTransform<PCollection<?>, PCollection<String>> of() {
+ return new SimpleToString();
}
- private ToString() {
+ /**
+ * Returns a {@code PTransform<PCollection<KV<?,?>, PCollection<String>>} which transforms each
+ * element of the input {@link PCollection} to a {@link String} by using the
+ * {@link Object#toString} on the key followed by a "," followed by the {@link Object#toString}
+ * of the value.
+ */
+ public static PTransform<PCollection<? extends KV<?, ?>>, PCollection<String>> kv() {
+ return kv(",");
+ }
+
+ /**
+ * Returns a {@code PTransform<PCollection<KV<?,?>, PCollection<String>>} which transforms each
+ * element of the input {@link PCollection} to a {@link String} by using the
+ * {@link Object#toString} on the key followed by the specified delimeter followed by the
+ * {@link Object#toString} of the value.
+ * @param delimiter The delimiter to put between the key and value
+ */
+ public static PTransform<PCollection<? extends KV<?, ?>>,
+ PCollection<String>> kv(String delimiter) {
+ return new KVToString(delimiter);
+ }
+
+ /**
+ * Returns a {@code PTransform<PCollection<Iterable<?>, PCollection<String>>} which
+ * transforms each item in the iterable of the input {@link PCollection} to a {@link String}
+ * using the {@link Object#toString} method followed by a "," until
+ * the last element in the iterable. There is no trailing delimiter.
+ */
+ public static PTransform<PCollection<? extends Iterable<?>>, PCollection<String>> iterable() {
+ return iterable(",");
+ }
+
+ /**
+ * Returns a {@code PTransform<PCollection<Iterable<?>, PCollection<String>>} which
+ * transforms each item in the iterable of the input {@link PCollection} to a {@link String}
+ * using the {@link Object#toString} method followed by the specified delimiter until
+ * the last element in the iterable. There is no trailing delimiter.
+ * @param delimiter The delimiter to put between the items in the iterable.
+ */
+ public static PTransform<PCollection<? extends Iterable<?>>,
+ PCollection<String>> iterable(String delimiter) {
+ return new IterablesToString(delimiter);
}
/**
* A {@link PTransform} that converts a {@code PCollection} to a {@code PCollection<String>}
* using the {@link Object#toString} method.
+ *
+ * <p>Example of use:
+ * <pre>{@code
+ * PCollection<Long> longs = ...;
+ * PCollection<String> strings = longs.apply(ToString.of());
+ * }</pre>
+ *
+ *
+ * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your own
+ * {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
*/
- private static final class Default extends PTransform<PCollection<?>, PCollection<String>> {
+ private static final class SimpleToString extends
+ PTransform<PCollection<?>, PCollection<String>> {
@Override
public PCollection<String> expand(PCollection<?> input) {
- return input.apply(MapElements.via(new ToStringFunction<>()));
+ return input.apply(MapElements.via(new SimpleFunction<Object, String>() {
+ @Override
+ public String apply(Object input) {
+ return input.toString();
+ }
+ }));
}
+ }
+
+ /**
+ * A {@link PTransform} that converts a {@code PCollection} of {@code KV} to a
+ * {@code PCollection<String>} using the {@link Object#toString} method for
+ * the key and value and an optional delimiter.
+ *
+ * <p>Example of use:
+ * <pre>{@code
+ * PCollection<KV<String, Long>> nameToLong = ...;
+ * PCollection<String> strings = nameToLong.apply(ToString.kv());
+ * }</pre>
+ *
+ *
+ * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your
+ * own {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
+ */
+ private static final class KVToString extends
+ PTransform<PCollection<? extends KV<?, ?>>, PCollection<String>> {
+ private final String delimiter;
+
+ public KVToString(String delimiter) {
+ this.delimiter = delimiter;
+ }
+
+ @Override
+ public PCollection<String> expand(PCollection<? extends KV<?, ?>> input) {
+ return input.apply(MapElements.via(new SimpleFunction<KV<?, ?>, String>() {
+ @Override
+ public String apply(KV<?, ?> input) {
+ return input.getKey().toString() + delimiter + input.getValue().toString();
+ }
+ }));
+ }
+ }
+
+ /**
+ * A {@link PTransform} that converts a {@code PCollection} of {@link Iterable} to a
+ * {@code PCollection<String>} using the {@link Object#toString} method and
+ * an optional delimiter.
+ *
+ * <p>Example of use:
+ * <pre>{@code
+ * PCollection<Iterable<Long>> longs = ...;
+ * PCollection<String> strings = nameToLong.apply(ToString.iterable());
+ * }</pre>
+ *
+ *
+ * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your
+ * own {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
+ */
+ private static final class IterablesToString extends
+ PTransform<PCollection<? extends Iterable<?>>, PCollection<String>> {
+ private final String delimiter;
+
+ public IterablesToString(String delimiter) {
+ this.delimiter = delimiter;
+ }
+
+ @Override
+ public PCollection<String> expand(PCollection<? extends Iterable<?>> input) {
+ return input.apply(MapElements.via(new SimpleFunction<Iterable<?>, String>() {
+ @Override
+ public String apply(Iterable<?> input) {
+ StringBuilder builder = new StringBuilder();
+ Iterator iterator = input.iterator();
+
+ while (iterator.hasNext()) {
+ builder.append(iterator.next().toString());
+
+ if (iterator.hasNext()) {
+ builder.append(delimiter);
+ }
+ }
- private static class ToStringFunction<T> extends SimpleFunction<T, String> {
- @Override
- public String apply(T input) {
- return input.toString();
- }
+ return builder.toString();
+ }
+ }));
}
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
index 9772b9b..f81cc0c 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
@@ -297,7 +297,7 @@ public class WriteTest {
@Test
public void testWriteUnbounded() {
PCollection<String> unbounded = p.apply(CountingInput.unbounded())
- .apply(ToString.element());
+ .apply(ToString.of());
TestSink sink = new TestSink();
thrown.expect(IllegalArgumentException.class);
http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
index e5c9f05..ab984f1 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
@@ -20,10 +20,13 @@ package org.apache.beam.sdk.transforms;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
+
+import org.apache.beam.sdk.coders.IterableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.RunnableOnService;
import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.junit.Rule;
import org.junit.Test;
@@ -41,19 +44,82 @@ public class ToStringTest {
@Test
@Category(RunnableOnService.class)
- public void testToStringElement() {
+ public void testToStringOf() {
Integer[] ints = {1, 2, 3, 4, 5};
+ String[] strings = {"1", "2", "3", "4", "5"};
PCollection<Integer> input = p.apply(Create.of(Arrays.asList(ints)));
- PCollection<String> output = input.apply(ToString.<Integer>element());
- PAssert.that(output).containsInAnyOrder(toStringList(ints));
+ PCollection<String> output = input.apply(ToString.of());
+ PAssert.that(output).containsInAnyOrder(strings);
+ p.run();
+ }
+
+ @Test
+ @Category(RunnableOnService.class)
+ public void testToStringKV() {
+ ArrayList<KV<String, Integer>> kvs = new ArrayList<>();
+ kvs.add(KV.of("one", 1));
+ kvs.add(KV.of("two", 2));
+
+ ArrayList<String> expected = new ArrayList<>();
+ expected.add("one,1");
+ expected.add("two,2");
+
+ PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs));
+ PCollection<String> output = input.apply(ToString.kv());
+ PAssert.that(output).containsInAnyOrder(expected);
p.run();
}
- private List<String> toStringList(Object[] ints) {
- List<String> ll = new ArrayList<>(ints.length);
- for (Object i : ints) {
- ll.add(i.toString());
- }
- return ll;
+ @Test
+ @Category(RunnableOnService.class)
+ public void testToStringKVWithDelimiter() {
+ ArrayList<KV<String, Integer>> kvs = new ArrayList<>();
+ kvs.add(KV.of("one", 1));
+ kvs.add(KV.of("two", 2));
+
+ ArrayList<String> expected = new ArrayList<>();
+ expected.add("one\t1");
+ expected.add("two\t2");
+
+ PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs));
+ PCollection<String> output = input.apply(ToString.kv("\t"));
+ PAssert.that(output).containsInAnyOrder(expected);
+ p.run();
+ }
+
+ @Test
+ @Category(RunnableOnService.class)
+ public void testToStringIterable() {
+ ArrayList<Iterable<String>> iterables = new ArrayList<>();
+ iterables.add(Arrays.asList(new String[]{"one", "two", "three"}));
+ iterables.add(Arrays.asList(new String[]{"four", "five", "six"}));
+
+ ArrayList<String> expected = new ArrayList<>();
+ expected.add("one,two,three");
+ expected.add("four,five,six");
+
+ PCollection<Iterable<String>> input = p.apply(Create.of(iterables)
+ .withCoder(IterableCoder.of(StringUtf8Coder.of())));
+ PCollection<String> output = input.apply(ToString.iterable());
+ PAssert.that(output).containsInAnyOrder(expected);
+ p.run();
+ }
+
+ @Test
+ @Category(RunnableOnService.class)
+ public void testToStringIterableWithDelimiter() {
+ ArrayList<Iterable<String>> iterables = new ArrayList<>();
+ iterables.add(Arrays.asList(new String[]{"one", "two", "three"}));
+ iterables.add(Arrays.asList(new String[]{"four", "five", "six"}));
+
+ ArrayList<String> expected = new ArrayList<>();
+ expected.add("one\ttwo\tthree");
+ expected.add("four\tfive\tsix");
+
+ PCollection<Iterable<String>> input = p.apply(Create.of(iterables)
+ .withCoder(IterableCoder.of(StringUtf8Coder.of())));
+ PCollection<String> output = input.apply(ToString.iterable("\t"));
+ PAssert.that(output).containsInAnyOrder(expected);
+ p.run();
}
}
[43/50] beam git commit: [maven-release-plugin] prepare branch
release-0.5.0
Posted by dh...@apache.org.
[maven-release-plugin] prepare branch release-0.5.0
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/da2dff90
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/da2dff90
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/da2dff90
Branch: refs/heads/python-sdk
Commit: da2dff90cb10e5881496ffd4efb368ba84544174
Parents: 47304d1
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:27:06 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:27:06 2017 +0100
----------------------------------------------------------------------
pom.xml | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/da2dff90/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a96275c..2281f67 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
<connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
<url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
+ <tag>release-0.5.0</tag>
</scm>
<issueManagement>
[42/50] beam git commit: BEAM-980 Support configuration of Apex DAG
through properties file.
Posted by dh...@apache.org.
BEAM-980 Support configuration of Apex DAG through properties file.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/31c63cb8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/31c63cb8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/31c63cb8
Branch: refs/heads/python-sdk
Commit: 31c63cb8c14ea71ed45376d19b4fd9f285d80763
Parents: 1c6e667
Author: Thomas Weise <th...@apache.org>
Authored: Wed Jan 25 22:22:36 2017 -0800
Committer: Thomas Weise <th...@apache.org>
Committed: Thu Jan 26 22:54:00 2017 -0800
----------------------------------------------------------------------
.../beam/runners/apex/ApexPipelineOptions.java | 7 +-
.../apache/beam/runners/apex/ApexRunner.java | 43 ++++++++---
.../beam/runners/apex/ApexYarnLauncher.java | 23 +++++-
.../beam/runners/apex/ApexRunnerTest.java | 75 ++++++++++++++++++++
.../beam/runners/apex/ApexYarnLauncherTest.java | 9 ++-
.../test/resources/beam-runners-apex.properties | 20 ++++++
6 files changed, 161 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
index 54fdf76..f37e874 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
@@ -56,5 +56,10 @@ public interface ApexPipelineOptions extends PipelineOptions, java.io.Serializab
@Default.Long(0)
long getRunMillis();
-}
+ @Description("configuration properties file for the Apex engine")
+ void setConfigFile(String name);
+
+ @Default.String("classpath:/beam-runners-apex.properties")
+ String getConfigFile();
+}
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
index f12ebef..e220e6c 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
@@ -22,10 +22,16 @@ import com.datatorrent.api.Context.DAGContext;
import com.datatorrent.api.DAG;
import com.datatorrent.api.StreamingApplication;
import com.google.common.base.Throwables;
+
+import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Properties;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.apex.api.EmbeddedAppLauncher;
import org.apache.apex.api.Launcher;
@@ -64,6 +70,7 @@ import org.apache.hadoop.conf.Configuration;
public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
private final ApexPipelineOptions options;
+ public static final String CLASSPATH_SCHEME = "classpath";
/**
* TODO: this isn't thread safe and may cause issues when tests run in parallel
@@ -126,6 +133,31 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
}
};
+ Properties configProperties = new Properties();
+ try {
+ if (options.getConfigFile() != null) {
+ URI configURL = new URI(options.getConfigFile());
+ if (CLASSPATH_SCHEME.equals(configURL.getScheme())) {
+ InputStream is = this.getClass().getResourceAsStream(configURL.getPath());
+ if (is != null) {
+ configProperties.load(is);
+ is.close();
+ }
+ } else {
+ if (!configURL.isAbsolute()) {
+ // resolve as local file name
+ File f = new File(options.getConfigFile());
+ configURL = f.toURI();
+ }
+ try (InputStream is = configURL.toURL().openStream()) {
+ configProperties.load(is);
+ }
+ }
+ }
+ } catch (IOException | URISyntaxException ex) {
+ throw new RuntimeException("Error loading properties", ex);
+ }
+
if (options.isEmbeddedExecution()) {
Launcher<AppHandle> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
@@ -135,6 +167,7 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
launchAttributes.put(EmbeddedAppLauncher.HEARTBEAT_MONITORING, false);
}
Configuration conf = new Configuration(false);
+ ApexYarnLauncher.addProperties(conf, configProperties);
try {
ApexRunner.ASSERTION_ERROR.set(null);
AppHandle apexAppResult = launcher.launchApp(apexApp, conf, launchAttributes);
@@ -146,7 +179,7 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
} else {
try {
ApexYarnLauncher yarnLauncher = new ApexYarnLauncher();
- AppHandle apexAppResult = yarnLauncher.launchApp(apexApp);
+ AppHandle apexAppResult = yarnLauncher.launchApp(apexApp, configProperties);
return new ApexRunnerResult(apexDAG.get(), apexAppResult);
} catch (IOException e) {
throw new RuntimeException("Failed to launch the application on YARN.", e);
@@ -155,14 +188,6 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
}
- private static class IdentityFn<T> extends DoFn<T, T> {
- private static final long serialVersionUID = 1L;
- @ProcessElement
- public void processElement(ProcessContext c) {
- c.output(c.element());
- }
- }
-
////////////////////////////////////////////
// Adapted from FlinkRunner for View support
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
index a2d88f4..6bc42f0 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
@@ -52,6 +52,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import java.util.Set;
import java.util.jar.JarFile;
import java.util.jar.Manifest;
@@ -80,7 +81,8 @@ import org.slf4j.LoggerFactory;
public class ApexYarnLauncher {
private static final Logger LOG = LoggerFactory.getLogger(ApexYarnLauncher.class);
- public AppHandle launchApp(StreamingApplication app) throws IOException {
+ public AppHandle launchApp(StreamingApplication app, Properties configProperties)
+ throws IOException {
List<File> jarsToShip = getYarnDeployDependencies();
StringBuilder classpath = new StringBuilder();
@@ -103,7 +105,7 @@ public class ApexYarnLauncher {
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
launchAttributes.put(YarnAppLauncher.LIB_JARS, classpath.toString().replace(':', ','));
- LaunchParams lp = new LaunchParams(dag, launchAttributes);
+ LaunchParams lp = new LaunchParams(dag, launchAttributes, configProperties);
lp.cmd = "hadoop " + ApexYarnLauncher.class.getName();
HashMap<String, String> env = new HashMap<>();
env.put("HADOOP_USER_CLASSPATH_FIRST", "1");
@@ -292,6 +294,18 @@ public class ApexYarnLauncher {
}
/**
+ * Transfer the properties to the configuration object.
+ * @param conf
+ * @param props
+ */
+ public static void addProperties(Configuration conf, Properties props) {
+ for (final String propertyName : props.stringPropertyNames()) {
+ String propertyValue = props.getProperty(propertyName);
+ conf.set(propertyName, propertyValue);
+ }
+ }
+
+ /**
* The main method expects the serialized DAG and will launch the YARN application.
* @param args location of launch parameters
* @throws IOException when parameters cannot be read
@@ -309,6 +323,7 @@ public class ApexYarnLauncher {
}
};
Configuration conf = new Configuration(); // configuration from Hadoop client
+ addProperties(conf, params.configProperties);
AppHandle appHandle = params.getApexLauncher().launchApp(apexApp, conf,
params.launchAttributes);
if (appHandle == null) {
@@ -327,12 +342,14 @@ public class ApexYarnLauncher {
private static final long serialVersionUID = 1L;
private final DAG dag;
private final Attribute.AttributeMap launchAttributes;
+ private final Properties configProperties;
private HashMap<String, String> env;
private String cmd;
- protected LaunchParams(DAG dag, AttributeMap launchAttributes) {
+ protected LaunchParams(DAG dag, AttributeMap launchAttributes, Properties configProperties) {
this.dag = dag;
this.launchAttributes = launchAttributes;
+ this.configProperties = configProperties;
}
protected Launcher<?> getApexLauncher() {
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
new file mode 100644
index 0000000..436c959
--- /dev/null
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.apex;
+
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.OperatorMeta;
+import com.datatorrent.stram.engine.OperatorContext;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.Collections;
+import java.util.Properties;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.Create;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests for the Apex runner.
+ */
+public class ApexRunnerTest {
+
+ @Test
+ public void testConfigProperties() throws Exception {
+
+ String operName = "testProperties";
+ ApexPipelineOptions options = PipelineOptionsFactory.create()
+ .as(ApexPipelineOptions.class);
+ options.setRunner(ApexRunner.class);
+
+ // default configuration from class path
+ Pipeline p = Pipeline.create(options);
+ p.apply(operName, Create.of(Collections.emptyList()));
+ ApexRunnerResult result = (ApexRunnerResult) p.run();
+ result.cancel();
+
+ DAG dag = result.getApexDAG();
+ OperatorMeta t1Meta = dag.getOperatorMeta(operName);
+ Assert.assertNotNull(t1Meta);
+ Assert.assertEquals(new Integer(32), t1Meta.getValue(OperatorContext.MEMORY_MB));
+
+ File tmp = File.createTempFile("beam-runners-apex-", ".properties");
+ tmp.deleteOnExit();
+ Properties props = new Properties();
+ props.setProperty("dt.operator." + operName + ".attr.MEMORY_MB", "64");
+ try (FileOutputStream fos = new FileOutputStream(tmp)) {
+ props.store(fos, "");
+ }
+ options.setConfigFile(tmp.getAbsolutePath());
+ result = (ApexRunnerResult) p.run();
+ result.cancel();
+ tmp.delete();
+ dag = result.getApexDAG();
+ t1Meta = dag.getOperatorMeta(operName);
+ Assert.assertNotNull(t1Meta);
+ Assert.assertEquals(new Integer(64), t1Meta.getValue(OperatorContext.MEMORY_MB));
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
index 986818b..6ffb091 100644
--- a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
@@ -35,6 +35,7 @@ import java.nio.file.Files;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import java.util.jar.JarFile;
import org.apache.apex.api.EmbeddedAppLauncher;
@@ -78,15 +79,17 @@ public class ApexYarnLauncherTest {
Configuration conf = new Configuration(false);
DAG dag = embeddedLauncher.prepareDAG(app, conf);
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
+ Properties configProperties = new Properties();
ApexYarnLauncher launcher = new ApexYarnLauncher();
- launcher.launchApp(new MockApexYarnLauncherParams(dag, launchAttributes));
+ launcher.launchApp(new MockApexYarnLauncherParams(dag, launchAttributes, configProperties));
}
private static class MockApexYarnLauncherParams extends ApexYarnLauncher.LaunchParams {
private static final long serialVersionUID = 1L;
- public MockApexYarnLauncherParams(DAG dag, AttributeMap launchAttributes) {
- super(dag, launchAttributes);
+ public MockApexYarnLauncherParams(DAG dag, AttributeMap launchAttributes,
+ Properties properties) {
+ super(dag, launchAttributes, properties);
}
@Override
http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/resources/beam-runners-apex.properties
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/resources/beam-runners-apex.properties b/runners/apex/src/test/resources/beam-runners-apex.properties
new file mode 100644
index 0000000..48f8b05
--- /dev/null
+++ b/runners/apex/src/test/resources/beam-runners-apex.properties
@@ -0,0 +1,20 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# properties for unit test
+dt.operator.testProperties.attr.MEMORY_MB=32
[31/50] beam git commit: This closes #1849
Posted by dh...@apache.org.
This closes #1849
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b4726d08
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b4726d08
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b4726d08
Branch: refs/heads/python-sdk
Commit: b4726d088faa2ea74ba3a7e29a7559f737ccf4f2
Parents: 1c6e667 b0b91c8
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 07:15:54 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 07:15:54 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 +
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 352 ++++++++++++-------
.../runners/dataflow/util/PackageUtilTest.java | 69 +++-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 +
6 files changed, 312 insertions(+), 148 deletions(-)
----------------------------------------------------------------------
[08/50] beam git commit: [BEAM-1071] Allow for BigQueryIO to write
tables with CREATE_NEVER disposition
Posted by dh...@apache.org.
[BEAM-1071] Allow for BigQueryIO to write tables with CREATE_NEVER disposition
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/dc369522
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/dc369522
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/dc369522
Branch: refs/heads/python-sdk
Commit: dc369522d1cfa46ae9058919d93229de05db2b6a
Parents: 11c3cd7
Author: Sam McVeety <sg...@google.com>
Authored: Mon Dec 12 18:47:20 2016 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 14:41:39 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 51 ++++++++++++++------
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 36 ++++++++++++++
2 files changed, 71 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/dc369522/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index aff199a..fa49f55 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1925,10 +1925,17 @@ public class BigQueryIO {
if (input.isBounded() == PCollection.IsBounded.UNBOUNDED || tableRefFunction != null) {
// We will use BigQuery's streaming write API -- validate supported dispositions.
- checkArgument(
- createDisposition != CreateDisposition.CREATE_NEVER,
- "CreateDisposition.CREATE_NEVER is not supported for an unbounded PCollection or when"
- + " using a tablespec function.");
+ if (tableRefFunction != null) {
+ checkArgument(
+ createDisposition != CreateDisposition.CREATE_NEVER,
+ "CreateDisposition.CREATE_NEVER is not supported when using a tablespec"
+ + " function.");
+ }
+ if (jsonSchema == null) {
+ checkArgument(
+ createDisposition == CreateDisposition.CREATE_NEVER,
+ "CreateDisposition.CREATE_NEVER must be used if jsonSchema is null.");
+ }
checkArgument(
writeDisposition != WriteDisposition.WRITE_TRUNCATE,
@@ -1965,7 +1972,9 @@ public class BigQueryIO {
if (input.isBounded() == IsBounded.UNBOUNDED || tableRefFunction != null) {
return input.apply(
new StreamWithDeDup(getTable(), tableRefFunction,
- NestedValueProvider.of(jsonSchema, new JsonSchemaToTableSchema()), bqServices));
+ jsonSchema == null ? null : NestedValueProvider.of(
+ jsonSchema, new JsonSchemaToTableSchema()),
+ createDisposition, bqServices));
}
ValueProvider<TableReference> table = getTableWithDefaultProject(options);
@@ -2608,16 +2617,19 @@ public class BigQueryIO {
* Implementation of DoFn to perform streaming BigQuery write.
*/
@SystemDoFnInternal
- private static class StreamingWriteFn
+ @VisibleForTesting
+ static class StreamingWriteFn
extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
/** TableSchema in JSON. Use String to make the class Serializable. */
- private final ValueProvider<String> jsonTableSchema;
+ @Nullable private final ValueProvider<String> jsonTableSchema;
private final BigQueryServices bqServices;
/** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
private transient Map<String, List<TableRow>> tableRows;
+ private final Write.CreateDisposition createDisposition;
+
/** The list of unique ids for each BigQuery table row. */
private transient Map<String, List<String>> uniqueIdsForTableRows;
@@ -2631,9 +2643,12 @@ public class BigQueryIO {
createAggregator("ByteCount", Sum.ofLongs());
/** Constructor. */
- StreamingWriteFn(ValueProvider<TableSchema> schema, BigQueryServices bqServices) {
- this.jsonTableSchema =
+ StreamingWriteFn(@Nullable ValueProvider<TableSchema> schema,
+ Write.CreateDisposition createDisposition,
+ BigQueryServices bqServices) {
+ this.jsonTableSchema = schema == null ? null :
NestedValueProvider.of(schema, new TableSchemaToJsonSchema());
+ this.createDisposition = createDisposition;
this.bqServices = checkNotNull(bqServices, "bqServices");
}
@@ -2689,7 +2704,8 @@ public class BigQueryIO {
public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
throws InterruptedException, IOException {
TableReference tableReference = parseTableSpec(tableSpec);
- if (!createdTables.contains(tableSpec)) {
+ if (createDisposition != createDisposition.CREATE_NEVER
+ && !createdTables.contains(tableSpec)) {
synchronized (createdTables) {
// Another thread may have succeeded in creating the table in the meanwhile, so
// check again. This check isn't needed for correctness, but we add it to prevent
@@ -2945,19 +2961,22 @@ public class BigQueryIO {
* it leverages BigQuery best effort de-dup mechanism.
*/
private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
- private final transient ValueProvider<TableReference> tableReference;
- private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
- private final transient ValueProvider<TableSchema> tableSchema;
+ @Nullable private final transient ValueProvider<TableReference> tableReference;
+ @Nullable private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+ @Nullable private final transient ValueProvider<TableSchema> tableSchema;
+ private final Write.CreateDisposition createDisposition;
private final BigQueryServices bqServices;
/** Constructor. */
StreamWithDeDup(ValueProvider<TableReference> tableReference,
- SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
- ValueProvider<TableSchema> tableSchema,
+ @Nullable SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
+ @Nullable ValueProvider<TableSchema> tableSchema,
+ Write.CreateDisposition createDisposition,
BigQueryServices bqServices) {
this.tableReference = tableReference;
this.tableRefFunction = tableRefFunction;
this.tableSchema = tableSchema;
+ this.createDisposition = createDisposition;
this.bqServices = checkNotNull(bqServices, "bqServices");
}
@@ -2989,7 +3008,7 @@ public class BigQueryIO {
tagged
.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
.apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
- .apply(ParDo.of(new StreamingWriteFn(tableSchema, bqServices)));
+ .apply(ParDo.of(new StreamingWriteFn(tableSchema, createDisposition, bqServices)));
// Note that the implementation to return PDone here breaks the
// implicit assumption about the job execution order. If a user
http://git-wip-us.apache.org/repos/asf/beam/blob/dc369522/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 3e8c2c9..ba7f44e 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -1523,6 +1523,42 @@ public class BigQueryIOTest implements Serializable {
}
@Test
+ public void testStreamingWriteFnCreateNever() throws Exception {
+ BigQueryIO.StreamingWriteFn fn = new BigQueryIO.StreamingWriteFn(
+ null, CreateDisposition.CREATE_NEVER, new FakeBigQueryServices());
+ assertEquals(BigQueryIO.parseTableSpec("dataset.table"),
+ fn.getOrCreateTable(null, "dataset.table"));
+ }
+
+ @Test
+ public void testCreateNeverWithStreaming() throws Exception {
+ BigQueryOptions options = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
+ options.setProject("project");
+ options.setStreaming(true);
+ Pipeline p = TestPipeline.create(options);
+
+ TableReference tableRef = new TableReference();
+ tableRef.setDatasetId("dataset");
+ tableRef.setTableId("sometable");
+
+ PCollection<TableRow> tableRows =
+ p.apply(CountingInput.unbounded())
+ .apply(
+ MapElements.via(
+ new SimpleFunction<Long, TableRow>() {
+ @Override
+ public TableRow apply(Long input) {
+ return null;
+ }
+ }))
+ .setCoder(TableRowJsonCoder.of());
+ tableRows
+ .apply(BigQueryIO.Write.to(tableRef)
+ .withCreateDisposition(CreateDisposition.CREATE_NEVER)
+ .withoutValidation());
+ }
+
+ @Test
public void testTableParsing() {
TableReference ref = BigQueryIO
.parseTableSpec("my-project:data_set.table_name");
[18/50] beam git commit: This closes #1839
Posted by dh...@apache.org.
This closes #1839
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bf9d4542
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bf9d4542
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bf9d4542
Branch: refs/heads/python-sdk
Commit: bf9d454290bba7fac8829b2edeb416b7d9606062
Parents: 7402d76 a361b65
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 09:03:51 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 09:03:51 2017 -0800
----------------------------------------------------------------------
.../runners/flink/translation/FlinkBatchTransformTranslators.java | 2 +-
.../flink/translation/FlinkStreamingTranslationContext.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
[26/50] beam git commit: [BEAM-708] Using AutoValue in
BoundedReadFromUnboundedSource
Posted by dh...@apache.org.
[BEAM-708] Using AutoValue in BoundedReadFromUnboundedSource
This closes #1794
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6413299a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6413299a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6413299a
Branch: refs/heads/python-sdk
Commit: 6413299a20be57de849684479134479fa1acee2d
Parents: 95beda6 a67ff91
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jan 25 14:22:56 2017 -0800
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jan 25 14:22:56 2017 -0800
----------------------------------------------------------------------
.../sdk/io/BoundedReadFromUnboundedSource.java | 69 +++++++++++++-------
1 file changed, 44 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
[37/50] beam git commit: This closes #1856
Posted by dh...@apache.org.
This closes #1856
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2cbc08b5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2cbc08b5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2cbc08b5
Branch: refs/heads/python-sdk
Commit: 2cbc08b5870036c52a94bb1f1f1d081d387e4ae0
Parents: 9637724 4d0225e
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 14:42:04 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 14:42:04 2017 -0800
----------------------------------------------------------------------
.../apache/beam/examples/WindowedWordCountIT.java | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
[21/50] beam git commit: PackageUtil: parallelize staging of files
Posted by dh...@apache.org.
PackageUtil: parallelize staging of files
Proceeds in stages:
1. In parallel, hash and size all files.
2. Sort files by descending size.
3. In parallel, upload files.
Also a little cleanup for Dataflow 2.0:
* proper visibility
* removing some deprecated code
* refactoring into smaller methods.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3ecf7e70
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3ecf7e70
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3ecf7e70
Branch: refs/heads/python-sdk
Commit: 3ecf7e70bcc4775d804f096de647d13c407a8d52
Parents: 979c937
Author: Dan Halperin <dh...@google.com>
Authored: Mon Oct 24 17:27:23 2016 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 11:03:03 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 +
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
.../runners/dataflow/util/PackageUtilTest.java | 42 ++-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 +
6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index eea5502..9858b3d 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,6 +203,11 @@
</dependency>
<dependency>
+ <groupId>com.google.apis</groupId>
+ <artifactId>google-api-services-storage</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-credentials</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 6ca4c3f..53822e3 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,13 +17,19 @@
*/
package org.apache.beam.runners.dataflow.util;
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.storage.Storage;
import java.util.List;
import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
+import org.apache.beam.sdk.util.Transport;
/**
* Utility class for staging files to GCS.
@@ -35,6 +41,7 @@ public class GcsStager implements Stager {
this.options = options;
}
+ @SuppressWarnings("unused") // used via reflection
public static GcsStager fromOptions(PipelineOptions options) {
return new GcsStager(options.as(DataflowPipelineOptions.class));
}
@@ -48,7 +55,16 @@ public class GcsStager implements Stager {
if (windmillBinary != null) {
filesToStage.add("windmill_main=" + windmillBinary);
}
+ int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
+ checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
+ uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
+ Storage.Builder storageBuilder = Transport.newStorageClient(options);
+ GcsUtil util = GcsUtilFactory.create(
+ storageBuilder.build(),
+ storageBuilder.getHttpRequestInitializer(),
+ options.getExecutorService(),
+ uploadSizeBytes);
return PackageUtil.stageClasspathElements(
- options.getFilesToStage(), options.getStagingLocation());
+ options.getFilesToStage(), options.getStagingLocation(), util);
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index 6d910ba..fa8c94d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,53 +17,62 @@
*/
package org.apache.beam.runners.dataflow.util;
+import static com.google.common.base.Preconditions.checkArgument;
+
import com.fasterxml.jackson.core.Base64Variants;
import com.google.api.client.util.BackOff;
import com.google.api.client.util.Sleeper;
import com.google.api.services.dataflow.model.DataflowPackage;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.collect.Lists;
import com.google.common.hash.Funnels;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.Files;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import javax.annotation.Nullable;
import org.apache.beam.sdk.util.FluentBackoff;
+import org.apache.beam.sdk.util.GcsIOChannelFactory;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.IOChannelFactory;
import org.apache.beam.sdk.util.IOChannelUtils;
import org.apache.beam.sdk.util.MimeTypes;
import org.apache.beam.sdk.util.ZipFiles;
+import org.apache.beam.sdk.util.gcsfs.GcsPath;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Helper routines for packages. */
-public class PackageUtil {
+class PackageUtil {
private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
/**
* A reasonable upper bound on the number of jars required to launch a Dataflow job.
*/
- public static final int SANE_CLASSPATH_SIZE = 1000;
- /**
- * The initial interval to use between package staging attempts.
- */
- private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
- /**
- * The maximum number of retries when staging a file.
- */
- private static final int MAX_RETRIES = 4;
+ private static final int SANE_CLASSPATH_SIZE = 1000;
private static final FluentBackoff BACKOFF_FACTORY =
- FluentBackoff.DEFAULT
- .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
+ FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
/**
* Translates exceptions from API calls.
@@ -71,35 +80,18 @@ public class PackageUtil {
private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
/**
- * Creates a DataflowPackage containing information about how a classpath element should be
- * staged, including the staging destination as well as its size and hash.
- *
- * @param classpathElement The local path for the classpath element.
- * @param stagingPath The base location for staged classpath elements.
- * @param overridePackageName If non-null, use the given value as the package name
- * instead of generating one automatically.
- * @return The package.
- */
- @Deprecated
- public static DataflowPackage createPackage(File classpathElement,
- String stagingPath, String overridePackageName) {
- return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
- .getDataflowPackage();
- }
-
- /**
* Compute and cache the attributes of a classpath element that we will need to stage it.
*
- * @param classpathElement the file or directory to be staged.
+ * @param source the file or directory to be staged.
* @param stagingPath The base location for staged classpath elements.
* @param overridePackageName If non-null, use the given value as the package name
* instead of generating one automatically.
* @return a {@link PackageAttributes} that containing metadata about the object to be staged.
*/
- static PackageAttributes createPackageAttributes(File classpathElement,
- String stagingPath, String overridePackageName) {
+ static PackageAttributes createPackageAttributes(File source,
+ String stagingPath, @Nullable String overridePackageName) {
try {
- boolean directory = classpathElement.isDirectory();
+ boolean directory = source.isDirectory();
// Compute size and hash in one pass over file or directory.
Hasher hasher = Hashing.md5().newHasher();
@@ -108,142 +100,232 @@ public class PackageUtil {
if (!directory) {
// Files are staged as-is.
- Files.asByteSource(classpathElement).copyTo(countingOutputStream);
+ Files.asByteSource(source).copyTo(countingOutputStream);
} else {
// Directories are recursively zipped.
- ZipFiles.zipDirectory(classpathElement, countingOutputStream);
+ ZipFiles.zipDirectory(source, countingOutputStream);
}
long size = countingOutputStream.getCount();
String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
// Create the DataflowPackage with staging name and location.
- String uniqueName = getUniqueContentName(classpathElement, hash);
+ String uniqueName = getUniqueContentName(source, hash);
String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
DataflowPackage target = new DataflowPackage();
target.setName(overridePackageName != null ? overridePackageName : uniqueName);
target.setLocation(resourcePath);
- return new PackageAttributes(size, hash, directory, target);
+ return new PackageAttributes(size, hash, directory, target, source.getPath());
} catch (IOException e) {
- throw new RuntimeException("Package setup failure for " + classpathElement, e);
+ throw new RuntimeException("Package setup failure for " + source, e);
}
}
- /**
- * Transfers the classpath elements to the staging location.
- *
- * @param classpathElements The elements to stage.
- * @param stagingPath The base location to stage the elements to.
- * @return A list of cloud workflow packages, each representing a classpath element.
- */
- public static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, String stagingPath) {
- return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
- }
-
- // Visible for testing.
- static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, String stagingPath,
- Sleeper retrySleeper) {
- LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
- + "prepare for execution.", classpathElements.size());
-
- if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
- LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
- + "copies to all workers. Having this many entries on your classpath may be indicative "
- + "of an issue in your pipeline. You may want to consider trimming the classpath to "
- + "necessary dependencies only, using --filesToStage pipeline option to override "
- + "what files are being staged, or bundling several dependencies into one.",
- classpathElements.size());
- }
-
- ArrayList<DataflowPackage> packages = new ArrayList<>();
+ /** Utility comparator used in uploading packages efficiently. */
+ private static class PackageUploadOrder implements Comparator<PackageAttributes> {
+ @Override
+ public int compare(PackageAttributes o1, PackageAttributes o2) {
+ // Smaller size compares high so that bigger packages are uploaded first.
+ long sizeDiff = o2.getSize() - o1.getSize();
+ if (sizeDiff != 0) {
+ // returns sign of long
+ return Long.signum(sizeDiff);
+ }
- if (stagingPath == null) {
- throw new IllegalArgumentException(
- "Can't stage classpath elements on because no staging location has been provided");
+ // Otherwise, choose arbitrarily based on hash.
+ return o1.getHash().compareTo(o2.getHash());
}
+ }
- int numUploaded = 0;
- int numCached = 0;
+ /**
+ * Utility function that computes sizes and hashes of packages so that we can validate whether
+ * they have already been correctly staged.
+ */
+ private static List<PackageAttributes> computePackageAttributes(
+ Collection<String> classpathElements, final String stagingPath,
+ ListeningExecutorService executorService) {
+ List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
for (String classpathElement : classpathElements) {
- String packageName = null;
+ @Nullable String userPackageName = null;
if (classpathElement.contains("=")) {
String[] components = classpathElement.split("=", 2);
- packageName = components[0];
+ userPackageName = components[0];
classpathElement = components[1];
}
+ @Nullable final String packageName = userPackageName;
- File file = new File(classpathElement);
+ final File file = new File(classpathElement);
if (!file.exists()) {
LOG.warn("Skipping non-existent classpath element {} that was specified.",
classpathElement);
continue;
}
- PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+ ListenableFuture<PackageAttributes> future =
+ executorService.submit(new Callable<PackageAttributes>() {
+ @Override
+ public PackageAttributes call() throws Exception {
+ return createPackageAttributes(file, stagingPath, packageName);
+ }
+ });
+ futures.add(future);
+ }
+
+ try {
+ return Futures.allAsList(futures).get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Interrupted while staging packages", e);
+ } catch (ExecutionException e) {
+ throw new RuntimeException("Error while staging packages", e.getCause());
+ }
+ }
+
+ private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
+ throws IOException {
+ IOChannelFactory factory = IOChannelUtils.getFactory(target);
+ if (factory instanceof GcsIOChannelFactory) {
+ return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
+ } else {
+ return factory.create(target, MimeTypes.BINARY);
+ }
+ }
- DataflowPackage workflowPackage = attributes.getDataflowPackage();
- packages.add(workflowPackage);
- String target = workflowPackage.getLocation();
+ /**
+ * Utility to verify whether a package has already been staged and, if not, copy it to the
+ * staging location.
+ */
+ private static void stageOnePackage(
+ PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
+ Sleeper retrySleeper, GcsUtil gcsUtil) {
+ String source = attributes.getSourcePath();
+ String target = attributes.getDataflowPackage().getLocation();
- // TODO: Should we attempt to detect the Mime type rather than
- // always using MimeTypes.BINARY?
+ // TODO: Should we attempt to detect the Mime type rather than
+ // always using MimeTypes.BINARY?
+ try {
try {
- try {
- long remoteLength = IOChannelUtils.getSizeBytes(target);
- if (remoteLength == attributes.getSize()) {
- LOG.debug("Skipping classpath element already staged: {} at {}",
- classpathElement, target);
- numCached++;
- continue;
- }
- } catch (FileNotFoundException expected) {
- // If the file doesn't exist, it means we need to upload it.
+ long remoteLength = IOChannelUtils.getSizeBytes(target);
+ if (remoteLength == attributes.getSize()) {
+ LOG.debug("Skipping classpath element already staged: {} at {}",
+ attributes.getSourcePath(), target);
+ numCached.incrementAndGet();
+ return;
}
+ } catch (FileNotFoundException expected) {
+ // If the file doesn't exist, it means we need to upload it.
+ }
- // Upload file, retrying on failure.
- BackOff backoff = BACKOFF_FACTORY.backoff();
- while (true) {
- try {
- LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
- try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
- copyContent(classpathElement, writer);
- }
- numUploaded++;
- break;
- } catch (IOException e) {
- if (ERROR_EXTRACTOR.accessDenied(e)) {
- String errorMessage = String.format(
- "Uploaded failed due to permissions error, will NOT retry staging "
- + "of classpath %s. Please verify credentials are valid and that you have "
- + "write access to %s. Stale credentials can be resolved by executing "
- + "'gcloud auth login'.", classpathElement, target);
- LOG.error(errorMessage);
- throw new IOException(errorMessage, e);
- }
- long sleep = backoff.nextBackOffMillis();
- if (sleep == BackOff.STOP) {
- // Rethrow last error, to be included as a cause in the catch below.
- LOG.error("Upload failed, will NOT retry staging of classpath: {}",
- classpathElement, e);
- throw e;
- } else {
- LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
- classpathElement, e);
- retrySleeper.sleep(sleep);
- }
+ // Upload file, retrying on failure.
+ BackOff backoff = BACKOFF_FACTORY.backoff();
+ while (true) {
+ try {
+ LOG.debug("Uploading classpath element {} to {}", source, target);
+ try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
+ copyContent(source, writer);
+ }
+ numUploaded.incrementAndGet();
+ break;
+ } catch (IOException e) {
+ if (ERROR_EXTRACTOR.accessDenied(e)) {
+ String errorMessage = String.format(
+ "Uploaded failed due to permissions error, will NOT retry staging "
+ + "of classpath %s. Please verify credentials are valid and that you have "
+ + "write access to %s. Stale credentials can be resolved by executing "
+ + "'gcloud auth application-default login'.", source, target);
+ LOG.error(errorMessage);
+ throw new IOException(errorMessage, e);
+ }
+ long sleep = backoff.nextBackOffMillis();
+ if (sleep == BackOff.STOP) {
+ // Rethrow last error, to be included as a cause in the catch below.
+ LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+ source, e);
+ throw e;
+ } else {
+ LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+ source, e);
+ retrySleeper.sleep(sleep);
}
}
- } catch (Exception e) {
- throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
}
+ } catch (Exception e) {
+ throw new RuntimeException("Could not stage classpath element: " + source, e);
}
+ }
- LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
- + "{} files cached",
- numUploaded, numCached);
+ /**
+ * Transfers the classpath elements to the staging location.
+ *
+ * @param classpathElements The elements to stage.
+ * @param stagingPath The base location to stage the elements to.
+ * @return A list of cloud workflow packages, each representing a classpath element.
+ */
+ static List<DataflowPackage> stageClasspathElements(
+ Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
+ ListeningExecutorService executorService =
+ MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
+ try {
+ return stageClasspathElements(
+ classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
+ } finally {
+ executorService.shutdown();
+ }
+ }
+
+ // Visible for testing.
+ static List<DataflowPackage> stageClasspathElements(
+ Collection<String> classpathElements, final String stagingPath,
+ final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+ LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+ + "prepare for execution.", classpathElements.size());
+
+ if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
+ LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
+ + "copies to all workers. Having this many entries on your classpath may be indicative "
+ + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+ + "necessary dependencies only, using --filesToStage pipeline option to override "
+ + "what files are being staged, or bundling several dependencies into one.",
+ classpathElements.size());
+ }
+
+ checkArgument(
+ stagingPath != null,
+ "Can't stage classpath elements because no staging location has been provided");
+
+ // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
+ List<PackageAttributes> packageAttributes =
+ new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
+ // Order package attributes in descending size order so that we upload the largest files first.
+ Collections.sort(packageAttributes, new PackageUploadOrder());
+
+ List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+ final AtomicInteger numUploaded = new AtomicInteger(0);
+ final AtomicInteger numCached = new AtomicInteger(0);
+
+ List<ListenableFuture<?>> futures = new LinkedList<>();
+ for (final PackageAttributes attributes : packageAttributes) {
+ packages.add(attributes.getDataflowPackage());
+ futures.add(executorService.submit(new Runnable() {
+ @Override
+ public void run() {
+ stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+ }
+ }));
+ }
+ try {
+ Futures.allAsList(futures).get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Interrupted while staging packages", e);
+ } catch (ExecutionException e) {
+ throw new RuntimeException("Error while staging packages", e.getCause());
+ }
+
+ LOG.info(
+ "Staging files complete: {} files cached, {} files newly uploaded",
+ numUploaded.get(), numCached.get());
return packages;
}
@@ -293,13 +375,15 @@ public class PackageUtil {
private final boolean directory;
private final long size;
private final String hash;
+ private final String sourcePath;
private DataflowPackage dataflowPackage;
public PackageAttributes(long size, String hash, boolean directory,
- DataflowPackage dataflowPackage) {
+ DataflowPackage dataflowPackage, String sourcePath) {
this.size = size;
this.hash = Objects.requireNonNull(hash, "hash");
this.directory = directory;
+ this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
}
@@ -330,5 +414,12 @@ public class PackageUtil {
public String getHash() {
return hash;
}
+
+ /**
+ * @return the file to be uploaded
+ */
+ public String getSourcePath() {
+ return sourcePath;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 05a87dd..3828415 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
package org.apache.beam.runners.dataflow.util;
import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
@@ -53,6 +53,7 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.google.common.io.LineReader;
+import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -235,7 +236,7 @@ public class PackageUtilTest {
classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
}
- PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
+ PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
}
@@ -250,7 +251,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -277,7 +278,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -304,7 +305,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -327,7 +328,8 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper);
+ STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+ mockGcsUtil);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -348,16 +350,20 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper);
+ STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+ mockGcsUtil);
fail("Expected RuntimeException");
} catch (RuntimeException e) {
- assertTrue("Expected IOException containing detailed message.",
- e.getCause() instanceof IOException);
- assertThat(e.getCause().getMessage(),
+ assertThat("Expected RuntimeException wrapping IOException.",
+ e.getCause(), instanceOf(RuntimeException.class));
+ assertThat("Expected IOException containing detailed message.",
+ e.getCause().getCause(), instanceOf(IOException.class));
+ assertThat(e.getCause().getCause().getMessage(),
Matchers.allOf(
Matchers.containsString("Uploaded failed due to permissions error"),
Matchers.containsString(
- "Stale credentials can be resolved by executing 'gcloud auth login'")));
+ "Stale credentials can be resolved by executing 'gcloud auth application-default "
+ + "login'")));
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -377,9 +383,8 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH,
- fastNanoClockAndSleeper);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
+ MoreExecutors.newDirectExecutorService(), mockGcsUtil);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -393,7 +398,7 @@ public class PackageUtilTest {
when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verifyNoMoreInteractions(mockGcsUtil);
@@ -411,7 +416,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -429,7 +434,8 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
+ mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -446,7 +452,7 @@ public class PackageUtilTest {
String nonExistentFile =
IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
- ImmutableList.of(nonExistentFile), STAGING_PATH));
+ ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
}
/**
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 0553efc..72e106d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,6 +25,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
import org.apache.beam.sdk.util.AppEngineEnvironment;
import org.apache.beam.sdk.util.GcsPathValidator;
import org.apache.beam.sdk.util.GcsUtil;
@@ -81,8 +82,9 @@ public interface GcsOptions extends
+ "information on the restrictions and performance implications of this value.\n\n"
+ "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
+ "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
+ @Nullable
Integer getGcsUploadBufferSizeBytes();
- void setGcsUploadBufferSizeBytes(Integer bytes);
+ void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
/**
* The class of the validator that should be created and used to validate paths.
http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index a10ea28..5e83584 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,6 +101,18 @@ public class GcsUtil {
gcsOptions.getExecutorService(),
gcsOptions.getGcsUploadBufferSizeBytes());
}
+
+ /**
+ * Returns an instance of {@link GcsUtil} based on the given parameters.
+ */
+ public static GcsUtil create(
+ Storage storageClient,
+ HttpRequestInitializer httpRequestInitializer,
+ ExecutorService executorService,
+ @Nullable Integer uploadBufferSizeBytes) {
+ return new GcsUtil(
+ storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
+ }
}
private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);
[10/50] beam git commit: Add a UsesUnboundedPCollections category and
exclude it from DataflowRunner batch tests
Posted by dh...@apache.org.
Add a UsesUnboundedPCollections category and exclude it from DataflowRunner batch tests
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bffe80d5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bffe80d5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bffe80d5
Branch: refs/heads/python-sdk
Commit: bffe80d55d54592b8146e1b185c72eac38751f33
Parents: f2389ab
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 13:57:02 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:51:13 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 3 ++-
.../sdk/testing/UsesUnboundedPCollections.java | 23 ++++++++++++++++++++
.../org/apache/beam/sdk/io/PubsubIOTest.java | 4 ++--
3 files changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 7bf2089..1d05193 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -78,9 +78,10 @@
<id>runnable-on-service-tests</id>
<configuration>
<excludedGroups>
+ org.apache.beam.sdk.testing.UsesMetrics,
org.apache.beam.sdk.testing.UsesTimersInParDo,
org.apache.beam.sdk.testing.UsesSplittableParDo,
- org.apache.beam.sdk.testing.UsesMetrics
+ org.apache.beam.sdk.testing.UsesUnboundedPCollections,
</excludedGroups>
<excludes>
<exclude>org.apache.beam.sdk.transforms.FlattenTest</exclude>
http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
new file mode 100644
index 0000000..d2caf4a
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.testing;
+
+/**
+ * Category tag for validation tests which utilize at least one unbounded {@code PCollection}.
+ */
+public interface UsesUnboundedPCollections {}
http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
index a0d58ea..5ec08b4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
@@ -25,10 +25,10 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import java.util.Set;
-
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.testing.RunnableOnService;
+import org.apache.beam.sdk.testing.UsesUnboundedPCollections;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
import org.joda.time.Duration;
@@ -150,7 +150,7 @@ public class PubsubIOTest {
}
@Test
- @Category(RunnableOnService.class)
+ @Category({RunnableOnService.class, UsesUnboundedPCollections.class})
public void testPrimitiveReadDisplayData() {
DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
Set<DisplayData> displayData;
[46/50] beam git commit: This closes #1859
Posted by dh...@apache.org.
This closes #1859
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b21bdf47
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b21bdf47
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b21bdf47
Branch: refs/heads/python-sdk
Commit: b21bdf4755363191209e05f96ca8044731a346ed
Parents: 4a29131 9c11815
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 21:18:45 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 21:18:45 2017 +0100
----------------------------------------------------------------------
.../examples-java8/src/main/resources/archetype-resources/pom.xml | 2 +-
.../examples/src/main/resources/archetype-resources/pom.xml | 2 +-
.../starter/src/main/resources/archetype-resources/pom.xml | 2 +-
.../starter/src/test/resources/projects/basic/reference/pom.xml | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
[13/50] beam git commit: This closes #1826
Posted by dh...@apache.org.
This closes #1826
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e77de7c6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e77de7c6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e77de7c6
Branch: refs/heads/python-sdk
Commit: e77de7c61daf6aaa5d0562440cfd2f34cd456424
Parents: 1148be6 4cdd877
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 15:55:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:55:09 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/FileSystems.java | 32 +++++++------------
.../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +++-----------------
2 files changed, 15 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
[12/50] beam git commit: [BEAM-59] Beam FileSystem.setDefaultConfig:
remove scheme from the signature.
Posted by dh...@apache.org.
[BEAM-59] Beam FileSystem.setDefaultConfig: remove scheme from the signature.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4cdd8771
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4cdd8771
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4cdd8771
Branch: refs/heads/python-sdk
Commit: 4cdd87718c3d0719b7c0e421b9cbaf4eb902672e
Parents: 1148be6
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 18:08:44 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:54:53 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/FileSystems.java | 32 +++++++------------
.../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +++-----------------
2 files changed, 15 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/4cdd8771/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
index d086ec6..e19c1e4 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
@@ -17,8 +17,8 @@
*/
package org.apache.beam.sdk.io;
-import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
@@ -53,6 +53,8 @@ public class FileSystems {
private static final Map<String, FileSystemRegistrar> SCHEME_TO_REGISTRAR =
new ConcurrentHashMap<>();
+ private static PipelineOptions defaultConfig;
+
private static final Map<String, PipelineOptions> SCHEME_TO_DEFAULT_CONFIG =
new ConcurrentHashMap<>();
@@ -78,27 +80,12 @@ public class FileSystems {
}
/**
- * Sets the default configuration to be used with a {@link FileSystemRegistrar} for the provided
- * {@code scheme}.
+ * Sets the default configuration in workers.
*
- * <p>Syntax: <pre>scheme = alpha *( alpha | digit | "+" | "-" | "." )</pre>
- * Upper case letters are treated as the same as lower case letters.
+ * <p>It will be used in {@link FileSystemRegistrar FileSystemRegistrars} for all schemes.
*/
- public static void setDefaultConfig(String scheme, PipelineOptions options) {
- String lowerCaseScheme = checkNotNull(scheme, "scheme").toLowerCase();
- checkArgument(
- URI_SCHEME_PATTERN.matcher(lowerCaseScheme).matches(),
- String.format("Scheme: [%s] doesn't match URI syntax: %s",
- lowerCaseScheme, URI_SCHEME_PATTERN.pattern()));
- checkArgument(
- SCHEME_TO_REGISTRAR.containsKey(lowerCaseScheme),
- String.format("No FileSystemRegistrar found for scheme: [%s].", lowerCaseScheme));
- SCHEME_TO_DEFAULT_CONFIG.put(lowerCaseScheme, checkNotNull(options, "options"));
- }
-
- @VisibleForTesting
- static PipelineOptions getDefaultConfig(String scheme) {
- return SCHEME_TO_DEFAULT_CONFIG.get(scheme.toLowerCase());
+ public static void setDefaultConfigInWorkers(PipelineOptions options) {
+ defaultConfig = checkNotNull(options, "options");
}
/**
@@ -106,9 +93,12 @@ public class FileSystems {
*/
@VisibleForTesting
static FileSystem getFileSystemInternal(URI uri) {
+ checkState(
+ defaultConfig != null,
+ "Expect the runner have called setDefaultConfigInWorkers().");
String lowerCaseScheme = (uri.getScheme() != null
? uri.getScheme().toLowerCase() : LocalFileSystemRegistrar.LOCAL_FILE_SCHEME);
- return getRegistrarInternal(lowerCaseScheme).fromOptions(getDefaultConfig(lowerCaseScheme));
+ return getRegistrarInternal(lowerCaseScheme).fromOptions(defaultConfig);
}
/**
http://git-wip-us.apache.org/repos/asf/beam/blob/4cdd8771/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
index 9b41b98..113a562 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
@@ -17,8 +17,6 @@
*/
package org.apache.beam.sdk.io;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import com.google.common.collect.Sets;
@@ -26,6 +24,7 @@ import java.net.URI;
import javax.annotation.Nullable;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
@@ -41,33 +40,9 @@ public class FileSystemsTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
- @Test
- public void testSetDefaultConfig() throws Exception {
- PipelineOptions first = PipelineOptionsFactory.create();
- PipelineOptions second = PipelineOptionsFactory.create();
- FileSystems.setDefaultConfig("file", first);
- assertEquals(first, FileSystems.getDefaultConfig("file"));
- assertEquals(first, FileSystems.getDefaultConfig("FILE"));
-
- FileSystems.setDefaultConfig("FILE", second);
- assertNotEquals(first, FileSystems.getDefaultConfig("file"));
- assertNotEquals(first, FileSystems.getDefaultConfig("FILE"));
- assertEquals(second, FileSystems.getDefaultConfig("file"));
- assertEquals(second, FileSystems.getDefaultConfig("FILE"));
- }
-
- @Test
- public void testSetDefaultConfigNotFound() throws Exception {
- thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("No FileSystemRegistrar found for scheme: [gs-s3].");
- FileSystems.setDefaultConfig("gs-s3", PipelineOptionsFactory.create());
- }
-
- @Test
- public void testSetDefaultConfigInvalidScheme() throws Exception {
- thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Scheme: [gs:] doesn't match URI syntax");
- FileSystems.setDefaultConfig("gs:", PipelineOptionsFactory.create());
+ @Before
+ public void setup() {
+ FileSystems.setDefaultConfigInWorkers(PipelineOptionsFactory.create());
}
@Test
[36/50] beam git commit: Add prefix and suffix to WindowedWordCountIT
output location
Posted by dh...@apache.org.
Add prefix and suffix to WindowedWordCountIT output location
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4d0225e8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4d0225e8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4d0225e8
Branch: refs/heads/python-sdk
Commit: 4d0225e8e29047dc7b4e0f5cea2414eaef4b038c
Parents: 9637724
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jan 26 13:42:58 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Thu Jan 26 13:47:08 2017 -0800
----------------------------------------------------------------------
.../apache/beam/examples/WindowedWordCountIT.java | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/4d0225e8/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
index e4570ac..703f836 100644
--- a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
+++ b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
@@ -28,6 +28,7 @@ import java.util.Date;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.concurrent.ThreadLocalRandom;
import org.apache.beam.examples.common.WriteWindowedFilesDoFn;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
@@ -47,8 +48,10 @@ import org.hamcrest.TypeSafeMatcher;
import org.joda.time.Duration;
import org.joda.time.Instant;
import org.junit.BeforeClass;
+import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.slf4j.Logger;
@@ -58,6 +61,8 @@ import org.slf4j.LoggerFactory;
@RunWith(JUnit4.class)
public class WindowedWordCountIT {
+ @Rule public TestName testName = new TestName();
+
private static final String DEFAULT_INPUT =
"gs://apache-beam-samples/shakespeare/winterstale-personae";
static final int MAX_READ_RETRIES = 4;
@@ -100,7 +105,9 @@ public class WindowedWordCountIT {
options.setOutput(
IOChannelUtils.resolve(
options.getTempRoot(),
- String.format("WindowedWordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()),
+ String.format(
+ "WindowedWordCountIT.%s-%tFT%<tH:%<tM:%<tS.%<tL+%s",
+ testName.getMethodName(), new Date(), ThreadLocalRandom.current().nextInt()),
"output",
"results"));
return options;
@@ -133,8 +140,7 @@ public class WindowedWordCountIT {
new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10)))));
}
- ShardedFile inputFile =
- new ExplicitShardedFile(Collections.singleton(options.getInputFile()));
+ ShardedFile inputFile = new ExplicitShardedFile(Collections.singleton(options.getInputFile()));
// For this integration test, input is tiny and we can build the expected counts
SortedMap<String, Long> expectedWordCounts = new TreeMap<>();
@@ -144,8 +150,8 @@ public class WindowedWordCountIT {
for (String word : words) {
if (!word.isEmpty()) {
- expectedWordCounts.put(word,
- MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
+ expectedWordCounts.put(
+ word, MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
}
}
}
[27/50] beam git commit: Revert "This closes #1184"
Posted by dh...@apache.org.
Revert "This closes #1184"
This reverts commit c525783704e0cc47845df8cdec1715e1f1c74008, reversing
changes made to 979c9376f820577bad43c18cc1a7ee86fab9d942.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fee029f7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fee029f7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fee029f7
Branch: refs/heads/python-sdk
Commit: fee029f7f9963c9de821ff5792d7f45fabe6cb5d
Parents: 6413299
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 15:54:26 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 15:54:26 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 -
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 349 +++++++------------
.../runners/dataflow/util/PackageUtilTest.java | 42 +--
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 -
6 files changed, 149 insertions(+), 281 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 9858b3d..eea5502 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,11 +203,6 @@
</dependency>
<dependency>
- <groupId>com.google.apis</groupId>
- <artifactId>google-api-services-storage</artifactId>
- </dependency>
-
- <dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-credentials</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 53822e3..6ca4c3f 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,19 +17,13 @@
*/
package org.apache.beam.runners.dataflow.util;
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.storage.Storage;
import java.util.List;
import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.GcsUtil;
-import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
-import org.apache.beam.sdk.util.Transport;
/**
* Utility class for staging files to GCS.
@@ -41,7 +35,6 @@ public class GcsStager implements Stager {
this.options = options;
}
- @SuppressWarnings("unused") // used via reflection
public static GcsStager fromOptions(PipelineOptions options) {
return new GcsStager(options.as(DataflowPipelineOptions.class));
}
@@ -55,16 +48,7 @@ public class GcsStager implements Stager {
if (windmillBinary != null) {
filesToStage.add("windmill_main=" + windmillBinary);
}
- int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
- checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
- uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
- Storage.Builder storageBuilder = Transport.newStorageClient(options);
- GcsUtil util = GcsUtilFactory.create(
- storageBuilder.build(),
- storageBuilder.getHttpRequestInitializer(),
- options.getExecutorService(),
- uploadSizeBytes);
return PackageUtil.stageClasspathElements(
- options.getFilesToStage(), options.getStagingLocation(), util);
+ options.getFilesToStage(), options.getStagingLocation());
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index fa8c94d..6d910ba 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,62 +17,53 @@
*/
package org.apache.beam.runners.dataflow.util;
-import static com.google.common.base.Preconditions.checkArgument;
-
import com.fasterxml.jackson.core.Base64Variants;
import com.google.api.client.util.BackOff;
import com.google.api.client.util.Sleeper;
import com.google.api.services.dataflow.model.DataflowPackage;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.collect.Lists;
import com.google.common.hash.Funnels;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.Files;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListeningExecutorService;
-import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executors;
-import java.util.concurrent.atomic.AtomicInteger;
-import javax.annotation.Nullable;
import org.apache.beam.sdk.util.FluentBackoff;
-import org.apache.beam.sdk.util.GcsIOChannelFactory;
-import org.apache.beam.sdk.util.GcsUtil;
-import org.apache.beam.sdk.util.IOChannelFactory;
import org.apache.beam.sdk.util.IOChannelUtils;
import org.apache.beam.sdk.util.MimeTypes;
import org.apache.beam.sdk.util.ZipFiles;
-import org.apache.beam.sdk.util.gcsfs.GcsPath;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Helper routines for packages. */
-class PackageUtil {
+public class PackageUtil {
private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
/**
* A reasonable upper bound on the number of jars required to launch a Dataflow job.
*/
- private static final int SANE_CLASSPATH_SIZE = 1000;
+ public static final int SANE_CLASSPATH_SIZE = 1000;
+ /**
+ * The initial interval to use between package staging attempts.
+ */
+ private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
+ /**
+ * The maximum number of retries when staging a file.
+ */
+ private static final int MAX_RETRIES = 4;
private static final FluentBackoff BACKOFF_FACTORY =
- FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
+ FluentBackoff.DEFAULT
+ .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
/**
* Translates exceptions from API calls.
@@ -80,18 +71,35 @@ class PackageUtil {
private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
/**
+ * Creates a DataflowPackage containing information about how a classpath element should be
+ * staged, including the staging destination as well as its size and hash.
+ *
+ * @param classpathElement The local path for the classpath element.
+ * @param stagingPath The base location for staged classpath elements.
+ * @param overridePackageName If non-null, use the given value as the package name
+ * instead of generating one automatically.
+ * @return The package.
+ */
+ @Deprecated
+ public static DataflowPackage createPackage(File classpathElement,
+ String stagingPath, String overridePackageName) {
+ return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
+ .getDataflowPackage();
+ }
+
+ /**
* Compute and cache the attributes of a classpath element that we will need to stage it.
*
- * @param source the file or directory to be staged.
+ * @param classpathElement the file or directory to be staged.
* @param stagingPath The base location for staged classpath elements.
* @param overridePackageName If non-null, use the given value as the package name
* instead of generating one automatically.
* @return a {@link PackageAttributes} that containing metadata about the object to be staged.
*/
- static PackageAttributes createPackageAttributes(File source,
- String stagingPath, @Nullable String overridePackageName) {
+ static PackageAttributes createPackageAttributes(File classpathElement,
+ String stagingPath, String overridePackageName) {
try {
- boolean directory = source.isDirectory();
+ boolean directory = classpathElement.isDirectory();
// Compute size and hash in one pass over file or directory.
Hasher hasher = Hashing.md5().newHasher();
@@ -100,158 +108,25 @@ class PackageUtil {
if (!directory) {
// Files are staged as-is.
- Files.asByteSource(source).copyTo(countingOutputStream);
+ Files.asByteSource(classpathElement).copyTo(countingOutputStream);
} else {
// Directories are recursively zipped.
- ZipFiles.zipDirectory(source, countingOutputStream);
+ ZipFiles.zipDirectory(classpathElement, countingOutputStream);
}
long size = countingOutputStream.getCount();
String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
// Create the DataflowPackage with staging name and location.
- String uniqueName = getUniqueContentName(source, hash);
+ String uniqueName = getUniqueContentName(classpathElement, hash);
String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
DataflowPackage target = new DataflowPackage();
target.setName(overridePackageName != null ? overridePackageName : uniqueName);
target.setLocation(resourcePath);
- return new PackageAttributes(size, hash, directory, target, source.getPath());
+ return new PackageAttributes(size, hash, directory, target);
} catch (IOException e) {
- throw new RuntimeException("Package setup failure for " + source, e);
- }
- }
-
- /** Utility comparator used in uploading packages efficiently. */
- private static class PackageUploadOrder implements Comparator<PackageAttributes> {
- @Override
- public int compare(PackageAttributes o1, PackageAttributes o2) {
- // Smaller size compares high so that bigger packages are uploaded first.
- long sizeDiff = o2.getSize() - o1.getSize();
- if (sizeDiff != 0) {
- // returns sign of long
- return Long.signum(sizeDiff);
- }
-
- // Otherwise, choose arbitrarily based on hash.
- return o1.getHash().compareTo(o2.getHash());
- }
- }
-
- /**
- * Utility function that computes sizes and hashes of packages so that we can validate whether
- * they have already been correctly staged.
- */
- private static List<PackageAttributes> computePackageAttributes(
- Collection<String> classpathElements, final String stagingPath,
- ListeningExecutorService executorService) {
- List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
- for (String classpathElement : classpathElements) {
- @Nullable String userPackageName = null;
- if (classpathElement.contains("=")) {
- String[] components = classpathElement.split("=", 2);
- userPackageName = components[0];
- classpathElement = components[1];
- }
- @Nullable final String packageName = userPackageName;
-
- final File file = new File(classpathElement);
- if (!file.exists()) {
- LOG.warn("Skipping non-existent classpath element {} that was specified.",
- classpathElement);
- continue;
- }
-
- ListenableFuture<PackageAttributes> future =
- executorService.submit(new Callable<PackageAttributes>() {
- @Override
- public PackageAttributes call() throws Exception {
- return createPackageAttributes(file, stagingPath, packageName);
- }
- });
- futures.add(future);
- }
-
- try {
- return Futures.allAsList(futures).get();
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new RuntimeException("Interrupted while staging packages", e);
- } catch (ExecutionException e) {
- throw new RuntimeException("Error while staging packages", e.getCause());
- }
- }
-
- private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
- throws IOException {
- IOChannelFactory factory = IOChannelUtils.getFactory(target);
- if (factory instanceof GcsIOChannelFactory) {
- return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
- } else {
- return factory.create(target, MimeTypes.BINARY);
- }
- }
-
- /**
- * Utility to verify whether a package has already been staged and, if not, copy it to the
- * staging location.
- */
- private static void stageOnePackage(
- PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
- Sleeper retrySleeper, GcsUtil gcsUtil) {
- String source = attributes.getSourcePath();
- String target = attributes.getDataflowPackage().getLocation();
-
- // TODO: Should we attempt to detect the Mime type rather than
- // always using MimeTypes.BINARY?
- try {
- try {
- long remoteLength = IOChannelUtils.getSizeBytes(target);
- if (remoteLength == attributes.getSize()) {
- LOG.debug("Skipping classpath element already staged: {} at {}",
- attributes.getSourcePath(), target);
- numCached.incrementAndGet();
- return;
- }
- } catch (FileNotFoundException expected) {
- // If the file doesn't exist, it means we need to upload it.
- }
-
- // Upload file, retrying on failure.
- BackOff backoff = BACKOFF_FACTORY.backoff();
- while (true) {
- try {
- LOG.debug("Uploading classpath element {} to {}", source, target);
- try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
- copyContent(source, writer);
- }
- numUploaded.incrementAndGet();
- break;
- } catch (IOException e) {
- if (ERROR_EXTRACTOR.accessDenied(e)) {
- String errorMessage = String.format(
- "Uploaded failed due to permissions error, will NOT retry staging "
- + "of classpath %s. Please verify credentials are valid and that you have "
- + "write access to %s. Stale credentials can be resolved by executing "
- + "'gcloud auth application-default login'.", source, target);
- LOG.error(errorMessage);
- throw new IOException(errorMessage, e);
- }
- long sleep = backoff.nextBackOffMillis();
- if (sleep == BackOff.STOP) {
- // Rethrow last error, to be included as a cause in the catch below.
- LOG.error("Upload failed, will NOT retry staging of classpath: {}",
- source, e);
- throw e;
- } else {
- LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
- source, e);
- retrySleeper.sleep(sleep);
- }
- }
- }
- } catch (Exception e) {
- throw new RuntimeException("Could not stage classpath element: " + source, e);
+ throw new RuntimeException("Package setup failure for " + classpathElement, e);
}
}
@@ -262,70 +137,113 @@ class PackageUtil {
* @param stagingPath The base location to stage the elements to.
* @return A list of cloud workflow packages, each representing a classpath element.
*/
- static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
- ListeningExecutorService executorService =
- MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
- try {
- return stageClasspathElements(
- classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
- } finally {
- executorService.shutdown();
- }
+ public static List<DataflowPackage> stageClasspathElements(
+ Collection<String> classpathElements, String stagingPath) {
+ return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
}
// Visible for testing.
static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, final String stagingPath,
- final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+ Collection<String> classpathElements, String stagingPath,
+ Sleeper retrySleeper) {
LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+ "prepare for execution.", classpathElements.size());
if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
- + "copies to all workers. Having this many entries on your classpath may be indicative "
- + "of an issue in your pipeline. You may want to consider trimming the classpath to "
- + "necessary dependencies only, using --filesToStage pipeline option to override "
- + "what files are being staged, or bundling several dependencies into one.",
+ + "copies to all workers. Having this many entries on your classpath may be indicative "
+ + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+ + "necessary dependencies only, using --filesToStage pipeline option to override "
+ + "what files are being staged, or bundling several dependencies into one.",
classpathElements.size());
}
- checkArgument(
- stagingPath != null,
- "Can't stage classpath elements because no staging location has been provided");
+ ArrayList<DataflowPackage> packages = new ArrayList<>();
- // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
- List<PackageAttributes> packageAttributes =
- new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
- // Order package attributes in descending size order so that we upload the largest files first.
- Collections.sort(packageAttributes, new PackageUploadOrder());
+ if (stagingPath == null) {
+ throw new IllegalArgumentException(
+ "Can't stage classpath elements on because no staging location has been provided");
+ }
- List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
- final AtomicInteger numUploaded = new AtomicInteger(0);
- final AtomicInteger numCached = new AtomicInteger(0);
+ int numUploaded = 0;
+ int numCached = 0;
+ for (String classpathElement : classpathElements) {
+ String packageName = null;
+ if (classpathElement.contains("=")) {
+ String[] components = classpathElement.split("=", 2);
+ packageName = components[0];
+ classpathElement = components[1];
+ }
- List<ListenableFuture<?>> futures = new LinkedList<>();
- for (final PackageAttributes attributes : packageAttributes) {
- packages.add(attributes.getDataflowPackage());
- futures.add(executorService.submit(new Runnable() {
- @Override
- public void run() {
- stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+ File file = new File(classpathElement);
+ if (!file.exists()) {
+ LOG.warn("Skipping non-existent classpath element {} that was specified.",
+ classpathElement);
+ continue;
+ }
+
+ PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+
+ DataflowPackage workflowPackage = attributes.getDataflowPackage();
+ packages.add(workflowPackage);
+ String target = workflowPackage.getLocation();
+
+ // TODO: Should we attempt to detect the Mime type rather than
+ // always using MimeTypes.BINARY?
+ try {
+ try {
+ long remoteLength = IOChannelUtils.getSizeBytes(target);
+ if (remoteLength == attributes.getSize()) {
+ LOG.debug("Skipping classpath element already staged: {} at {}",
+ classpathElement, target);
+ numCached++;
+ continue;
+ }
+ } catch (FileNotFoundException expected) {
+ // If the file doesn't exist, it means we need to upload it.
}
- }));
- }
- try {
- Futures.allAsList(futures).get();
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new RuntimeException("Interrupted while staging packages", e);
- } catch (ExecutionException e) {
- throw new RuntimeException("Error while staging packages", e.getCause());
+
+ // Upload file, retrying on failure.
+ BackOff backoff = BACKOFF_FACTORY.backoff();
+ while (true) {
+ try {
+ LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
+ try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
+ copyContent(classpathElement, writer);
+ }
+ numUploaded++;
+ break;
+ } catch (IOException e) {
+ if (ERROR_EXTRACTOR.accessDenied(e)) {
+ String errorMessage = String.format(
+ "Uploaded failed due to permissions error, will NOT retry staging "
+ + "of classpath %s. Please verify credentials are valid and that you have "
+ + "write access to %s. Stale credentials can be resolved by executing "
+ + "'gcloud auth login'.", classpathElement, target);
+ LOG.error(errorMessage);
+ throw new IOException(errorMessage, e);
+ }
+ long sleep = backoff.nextBackOffMillis();
+ if (sleep == BackOff.STOP) {
+ // Rethrow last error, to be included as a cause in the catch below.
+ LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+ classpathElement, e);
+ throw e;
+ } else {
+ LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+ classpathElement, e);
+ retrySleeper.sleep(sleep);
+ }
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
+ }
}
- LOG.info(
- "Staging files complete: {} files cached, {} files newly uploaded",
- numUploaded.get(), numCached.get());
+ LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
+ + "{} files cached",
+ numUploaded, numCached);
return packages;
}
@@ -375,15 +293,13 @@ class PackageUtil {
private final boolean directory;
private final long size;
private final String hash;
- private final String sourcePath;
private DataflowPackage dataflowPackage;
public PackageAttributes(long size, String hash, boolean directory,
- DataflowPackage dataflowPackage, String sourcePath) {
+ DataflowPackage dataflowPackage) {
this.size = size;
this.hash = Objects.requireNonNull(hash, "hash");
this.directory = directory;
- this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
}
@@ -414,12 +330,5 @@ class PackageUtil {
public String getHash() {
return hash;
}
-
- /**
- * @return the file to be uploaded
- */
- public String getSourcePath() {
- return sourcePath;
- }
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 3828415..05a87dd 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
package org.apache.beam.runners.dataflow.util;
import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
@@ -53,7 +53,6 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.google.common.io.LineReader;
-import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -236,7 +235,7 @@ public class PackageUtilTest {
classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
}
- PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
+ PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
}
@@ -251,7 +250,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -278,7 +277,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -305,7 +304,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -328,8 +327,7 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
- mockGcsUtil);
+ STAGING_PATH, fastNanoClockAndSleeper);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -350,20 +348,16 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
- mockGcsUtil);
+ STAGING_PATH, fastNanoClockAndSleeper);
fail("Expected RuntimeException");
} catch (RuntimeException e) {
- assertThat("Expected RuntimeException wrapping IOException.",
- e.getCause(), instanceOf(RuntimeException.class));
- assertThat("Expected IOException containing detailed message.",
- e.getCause().getCause(), instanceOf(IOException.class));
- assertThat(e.getCause().getCause().getMessage(),
+ assertTrue("Expected IOException containing detailed message.",
+ e.getCause() instanceof IOException);
+ assertThat(e.getCause().getMessage(),
Matchers.allOf(
Matchers.containsString("Uploaded failed due to permissions error"),
Matchers.containsString(
- "Stale credentials can be resolved by executing 'gcloud auth application-default "
- + "login'")));
+ "Stale credentials can be resolved by executing 'gcloud auth login'")));
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -383,8 +377,9 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
- MoreExecutors.newDirectExecutorService(), mockGcsUtil);
+ ImmutableList.of(tmpFile.getAbsolutePath()),
+ STAGING_PATH,
+ fastNanoClockAndSleeper);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -398,7 +393,7 @@ public class PackageUtilTest {
when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verifyNoMoreInteractions(mockGcsUtil);
@@ -416,7 +411,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -434,8 +429,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
- mockGcsUtil);
+ ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -452,7 +446,7 @@ public class PackageUtilTest {
String nonExistentFile =
IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
- ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
+ ImmutableList.of(nonExistentFile), STAGING_PATH));
}
/**
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 72e106d..0553efc 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,7 +25,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
-import javax.annotation.Nullable;
import org.apache.beam.sdk.util.AppEngineEnvironment;
import org.apache.beam.sdk.util.GcsPathValidator;
import org.apache.beam.sdk.util.GcsUtil;
@@ -82,9 +81,8 @@ public interface GcsOptions extends
+ "information on the restrictions and performance implications of this value.\n\n"
+ "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
+ "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
- @Nullable
Integer getGcsUploadBufferSizeBytes();
- void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
+ void setGcsUploadBufferSizeBytes(Integer bytes);
/**
* The class of the validator that should be created and used to validate paths.
http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index 5e83584..a10ea28 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,18 +101,6 @@ public class GcsUtil {
gcsOptions.getExecutorService(),
gcsOptions.getGcsUploadBufferSizeBytes());
}
-
- /**
- * Returns an instance of {@link GcsUtil} based on the given parameters.
- */
- public static GcsUtil create(
- Storage storageClient,
- HttpRequestInitializer httpRequestInitializer,
- ExecutorService executorService,
- @Nullable Integer uploadBufferSizeBytes) {
- return new GcsUtil(
- storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
- }
}
private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);
[29/50] beam git commit: Recommit "DataflowRunner: parallelize
staging of files"
Posted by dh...@apache.org.
Recommit "DataflowRunner: parallelize staging of files"
Revert "This closes #1847"
This reverts commit 1c6e667414788fe99f583fac39d458a4984ae162, reversing
changes made to 6413299a20be57de849684479134479fa1acee2d.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/23e2b913
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/23e2b913
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/23e2b913
Branch: refs/heads/python-sdk
Commit: 23e2b913946acb2690fbac2d751a5672d80121aa
Parents: 1c6e667
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 21:04:20 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 21:04:27 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 +
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
.../runners/dataflow/util/PackageUtilTest.java | 42 ++-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 +
6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index eea5502..9858b3d 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,6 +203,11 @@
</dependency>
<dependency>
+ <groupId>com.google.apis</groupId>
+ <artifactId>google-api-services-storage</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-credentials</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 6ca4c3f..53822e3 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,13 +17,19 @@
*/
package org.apache.beam.runners.dataflow.util;
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.storage.Storage;
import java.util.List;
import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
+import org.apache.beam.sdk.util.Transport;
/**
* Utility class for staging files to GCS.
@@ -35,6 +41,7 @@ public class GcsStager implements Stager {
this.options = options;
}
+ @SuppressWarnings("unused") // used via reflection
public static GcsStager fromOptions(PipelineOptions options) {
return new GcsStager(options.as(DataflowPipelineOptions.class));
}
@@ -48,7 +55,16 @@ public class GcsStager implements Stager {
if (windmillBinary != null) {
filesToStage.add("windmill_main=" + windmillBinary);
}
+ int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
+ checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
+ uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
+ Storage.Builder storageBuilder = Transport.newStorageClient(options);
+ GcsUtil util = GcsUtilFactory.create(
+ storageBuilder.build(),
+ storageBuilder.getHttpRequestInitializer(),
+ options.getExecutorService(),
+ uploadSizeBytes);
return PackageUtil.stageClasspathElements(
- options.getFilesToStage(), options.getStagingLocation());
+ options.getFilesToStage(), options.getStagingLocation(), util);
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index 6d910ba..fa8c94d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,53 +17,62 @@
*/
package org.apache.beam.runners.dataflow.util;
+import static com.google.common.base.Preconditions.checkArgument;
+
import com.fasterxml.jackson.core.Base64Variants;
import com.google.api.client.util.BackOff;
import com.google.api.client.util.Sleeper;
import com.google.api.services.dataflow.model.DataflowPackage;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.collect.Lists;
import com.google.common.hash.Funnels;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.Files;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import javax.annotation.Nullable;
import org.apache.beam.sdk.util.FluentBackoff;
+import org.apache.beam.sdk.util.GcsIOChannelFactory;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.IOChannelFactory;
import org.apache.beam.sdk.util.IOChannelUtils;
import org.apache.beam.sdk.util.MimeTypes;
import org.apache.beam.sdk.util.ZipFiles;
+import org.apache.beam.sdk.util.gcsfs.GcsPath;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Helper routines for packages. */
-public class PackageUtil {
+class PackageUtil {
private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
/**
* A reasonable upper bound on the number of jars required to launch a Dataflow job.
*/
- public static final int SANE_CLASSPATH_SIZE = 1000;
- /**
- * The initial interval to use between package staging attempts.
- */
- private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
- /**
- * The maximum number of retries when staging a file.
- */
- private static final int MAX_RETRIES = 4;
+ private static final int SANE_CLASSPATH_SIZE = 1000;
private static final FluentBackoff BACKOFF_FACTORY =
- FluentBackoff.DEFAULT
- .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
+ FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
/**
* Translates exceptions from API calls.
@@ -71,35 +80,18 @@ public class PackageUtil {
private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
/**
- * Creates a DataflowPackage containing information about how a classpath element should be
- * staged, including the staging destination as well as its size and hash.
- *
- * @param classpathElement The local path for the classpath element.
- * @param stagingPath The base location for staged classpath elements.
- * @param overridePackageName If non-null, use the given value as the package name
- * instead of generating one automatically.
- * @return The package.
- */
- @Deprecated
- public static DataflowPackage createPackage(File classpathElement,
- String stagingPath, String overridePackageName) {
- return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
- .getDataflowPackage();
- }
-
- /**
* Compute and cache the attributes of a classpath element that we will need to stage it.
*
- * @param classpathElement the file or directory to be staged.
+ * @param source the file or directory to be staged.
* @param stagingPath The base location for staged classpath elements.
* @param overridePackageName If non-null, use the given value as the package name
* instead of generating one automatically.
* @return a {@link PackageAttributes} that containing metadata about the object to be staged.
*/
- static PackageAttributes createPackageAttributes(File classpathElement,
- String stagingPath, String overridePackageName) {
+ static PackageAttributes createPackageAttributes(File source,
+ String stagingPath, @Nullable String overridePackageName) {
try {
- boolean directory = classpathElement.isDirectory();
+ boolean directory = source.isDirectory();
// Compute size and hash in one pass over file or directory.
Hasher hasher = Hashing.md5().newHasher();
@@ -108,142 +100,232 @@ public class PackageUtil {
if (!directory) {
// Files are staged as-is.
- Files.asByteSource(classpathElement).copyTo(countingOutputStream);
+ Files.asByteSource(source).copyTo(countingOutputStream);
} else {
// Directories are recursively zipped.
- ZipFiles.zipDirectory(classpathElement, countingOutputStream);
+ ZipFiles.zipDirectory(source, countingOutputStream);
}
long size = countingOutputStream.getCount();
String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
// Create the DataflowPackage with staging name and location.
- String uniqueName = getUniqueContentName(classpathElement, hash);
+ String uniqueName = getUniqueContentName(source, hash);
String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
DataflowPackage target = new DataflowPackage();
target.setName(overridePackageName != null ? overridePackageName : uniqueName);
target.setLocation(resourcePath);
- return new PackageAttributes(size, hash, directory, target);
+ return new PackageAttributes(size, hash, directory, target, source.getPath());
} catch (IOException e) {
- throw new RuntimeException("Package setup failure for " + classpathElement, e);
+ throw new RuntimeException("Package setup failure for " + source, e);
}
}
- /**
- * Transfers the classpath elements to the staging location.
- *
- * @param classpathElements The elements to stage.
- * @param stagingPath The base location to stage the elements to.
- * @return A list of cloud workflow packages, each representing a classpath element.
- */
- public static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, String stagingPath) {
- return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
- }
-
- // Visible for testing.
- static List<DataflowPackage> stageClasspathElements(
- Collection<String> classpathElements, String stagingPath,
- Sleeper retrySleeper) {
- LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
- + "prepare for execution.", classpathElements.size());
-
- if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
- LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
- + "copies to all workers. Having this many entries on your classpath may be indicative "
- + "of an issue in your pipeline. You may want to consider trimming the classpath to "
- + "necessary dependencies only, using --filesToStage pipeline option to override "
- + "what files are being staged, or bundling several dependencies into one.",
- classpathElements.size());
- }
-
- ArrayList<DataflowPackage> packages = new ArrayList<>();
+ /** Utility comparator used in uploading packages efficiently. */
+ private static class PackageUploadOrder implements Comparator<PackageAttributes> {
+ @Override
+ public int compare(PackageAttributes o1, PackageAttributes o2) {
+ // Smaller size compares high so that bigger packages are uploaded first.
+ long sizeDiff = o2.getSize() - o1.getSize();
+ if (sizeDiff != 0) {
+ // returns sign of long
+ return Long.signum(sizeDiff);
+ }
- if (stagingPath == null) {
- throw new IllegalArgumentException(
- "Can't stage classpath elements on because no staging location has been provided");
+ // Otherwise, choose arbitrarily based on hash.
+ return o1.getHash().compareTo(o2.getHash());
}
+ }
- int numUploaded = 0;
- int numCached = 0;
+ /**
+ * Utility function that computes sizes and hashes of packages so that we can validate whether
+ * they have already been correctly staged.
+ */
+ private static List<PackageAttributes> computePackageAttributes(
+ Collection<String> classpathElements, final String stagingPath,
+ ListeningExecutorService executorService) {
+ List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
for (String classpathElement : classpathElements) {
- String packageName = null;
+ @Nullable String userPackageName = null;
if (classpathElement.contains("=")) {
String[] components = classpathElement.split("=", 2);
- packageName = components[0];
+ userPackageName = components[0];
classpathElement = components[1];
}
+ @Nullable final String packageName = userPackageName;
- File file = new File(classpathElement);
+ final File file = new File(classpathElement);
if (!file.exists()) {
LOG.warn("Skipping non-existent classpath element {} that was specified.",
classpathElement);
continue;
}
- PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+ ListenableFuture<PackageAttributes> future =
+ executorService.submit(new Callable<PackageAttributes>() {
+ @Override
+ public PackageAttributes call() throws Exception {
+ return createPackageAttributes(file, stagingPath, packageName);
+ }
+ });
+ futures.add(future);
+ }
+
+ try {
+ return Futures.allAsList(futures).get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Interrupted while staging packages", e);
+ } catch (ExecutionException e) {
+ throw new RuntimeException("Error while staging packages", e.getCause());
+ }
+ }
+
+ private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
+ throws IOException {
+ IOChannelFactory factory = IOChannelUtils.getFactory(target);
+ if (factory instanceof GcsIOChannelFactory) {
+ return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
+ } else {
+ return factory.create(target, MimeTypes.BINARY);
+ }
+ }
- DataflowPackage workflowPackage = attributes.getDataflowPackage();
- packages.add(workflowPackage);
- String target = workflowPackage.getLocation();
+ /**
+ * Utility to verify whether a package has already been staged and, if not, copy it to the
+ * staging location.
+ */
+ private static void stageOnePackage(
+ PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
+ Sleeper retrySleeper, GcsUtil gcsUtil) {
+ String source = attributes.getSourcePath();
+ String target = attributes.getDataflowPackage().getLocation();
- // TODO: Should we attempt to detect the Mime type rather than
- // always using MimeTypes.BINARY?
+ // TODO: Should we attempt to detect the Mime type rather than
+ // always using MimeTypes.BINARY?
+ try {
try {
- try {
- long remoteLength = IOChannelUtils.getSizeBytes(target);
- if (remoteLength == attributes.getSize()) {
- LOG.debug("Skipping classpath element already staged: {} at {}",
- classpathElement, target);
- numCached++;
- continue;
- }
- } catch (FileNotFoundException expected) {
- // If the file doesn't exist, it means we need to upload it.
+ long remoteLength = IOChannelUtils.getSizeBytes(target);
+ if (remoteLength == attributes.getSize()) {
+ LOG.debug("Skipping classpath element already staged: {} at {}",
+ attributes.getSourcePath(), target);
+ numCached.incrementAndGet();
+ return;
}
+ } catch (FileNotFoundException expected) {
+ // If the file doesn't exist, it means we need to upload it.
+ }
- // Upload file, retrying on failure.
- BackOff backoff = BACKOFF_FACTORY.backoff();
- while (true) {
- try {
- LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
- try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
- copyContent(classpathElement, writer);
- }
- numUploaded++;
- break;
- } catch (IOException e) {
- if (ERROR_EXTRACTOR.accessDenied(e)) {
- String errorMessage = String.format(
- "Uploaded failed due to permissions error, will NOT retry staging "
- + "of classpath %s. Please verify credentials are valid and that you have "
- + "write access to %s. Stale credentials can be resolved by executing "
- + "'gcloud auth login'.", classpathElement, target);
- LOG.error(errorMessage);
- throw new IOException(errorMessage, e);
- }
- long sleep = backoff.nextBackOffMillis();
- if (sleep == BackOff.STOP) {
- // Rethrow last error, to be included as a cause in the catch below.
- LOG.error("Upload failed, will NOT retry staging of classpath: {}",
- classpathElement, e);
- throw e;
- } else {
- LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
- classpathElement, e);
- retrySleeper.sleep(sleep);
- }
+ // Upload file, retrying on failure.
+ BackOff backoff = BACKOFF_FACTORY.backoff();
+ while (true) {
+ try {
+ LOG.debug("Uploading classpath element {} to {}", source, target);
+ try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
+ copyContent(source, writer);
+ }
+ numUploaded.incrementAndGet();
+ break;
+ } catch (IOException e) {
+ if (ERROR_EXTRACTOR.accessDenied(e)) {
+ String errorMessage = String.format(
+ "Uploaded failed due to permissions error, will NOT retry staging "
+ + "of classpath %s. Please verify credentials are valid and that you have "
+ + "write access to %s. Stale credentials can be resolved by executing "
+ + "'gcloud auth application-default login'.", source, target);
+ LOG.error(errorMessage);
+ throw new IOException(errorMessage, e);
+ }
+ long sleep = backoff.nextBackOffMillis();
+ if (sleep == BackOff.STOP) {
+ // Rethrow last error, to be included as a cause in the catch below.
+ LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+ source, e);
+ throw e;
+ } else {
+ LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+ source, e);
+ retrySleeper.sleep(sleep);
}
}
- } catch (Exception e) {
- throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
}
+ } catch (Exception e) {
+ throw new RuntimeException("Could not stage classpath element: " + source, e);
}
+ }
- LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
- + "{} files cached",
- numUploaded, numCached);
+ /**
+ * Transfers the classpath elements to the staging location.
+ *
+ * @param classpathElements The elements to stage.
+ * @param stagingPath The base location to stage the elements to.
+ * @return A list of cloud workflow packages, each representing a classpath element.
+ */
+ static List<DataflowPackage> stageClasspathElements(
+ Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
+ ListeningExecutorService executorService =
+ MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
+ try {
+ return stageClasspathElements(
+ classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
+ } finally {
+ executorService.shutdown();
+ }
+ }
+
+ // Visible for testing.
+ static List<DataflowPackage> stageClasspathElements(
+ Collection<String> classpathElements, final String stagingPath,
+ final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+ LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+ + "prepare for execution.", classpathElements.size());
+
+ if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
+ LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
+ + "copies to all workers. Having this many entries on your classpath may be indicative "
+ + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+ + "necessary dependencies only, using --filesToStage pipeline option to override "
+ + "what files are being staged, or bundling several dependencies into one.",
+ classpathElements.size());
+ }
+
+ checkArgument(
+ stagingPath != null,
+ "Can't stage classpath elements because no staging location has been provided");
+
+ // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
+ List<PackageAttributes> packageAttributes =
+ new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
+ // Order package attributes in descending size order so that we upload the largest files first.
+ Collections.sort(packageAttributes, new PackageUploadOrder());
+
+ List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+ final AtomicInteger numUploaded = new AtomicInteger(0);
+ final AtomicInteger numCached = new AtomicInteger(0);
+
+ List<ListenableFuture<?>> futures = new LinkedList<>();
+ for (final PackageAttributes attributes : packageAttributes) {
+ packages.add(attributes.getDataflowPackage());
+ futures.add(executorService.submit(new Runnable() {
+ @Override
+ public void run() {
+ stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+ }
+ }));
+ }
+ try {
+ Futures.allAsList(futures).get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Interrupted while staging packages", e);
+ } catch (ExecutionException e) {
+ throw new RuntimeException("Error while staging packages", e.getCause());
+ }
+
+ LOG.info(
+ "Staging files complete: {} files cached, {} files newly uploaded",
+ numUploaded.get(), numCached.get());
return packages;
}
@@ -293,13 +375,15 @@ public class PackageUtil {
private final boolean directory;
private final long size;
private final String hash;
+ private final String sourcePath;
private DataflowPackage dataflowPackage;
public PackageAttributes(long size, String hash, boolean directory,
- DataflowPackage dataflowPackage) {
+ DataflowPackage dataflowPackage, String sourcePath) {
this.size = size;
this.hash = Objects.requireNonNull(hash, "hash");
this.directory = directory;
+ this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
}
@@ -330,5 +414,12 @@ public class PackageUtil {
public String getHash() {
return hash;
}
+
+ /**
+ * @return the file to be uploaded
+ */
+ public String getSourcePath() {
+ return sourcePath;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 05a87dd..3828415 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
package org.apache.beam.runners.dataflow.util;
import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
@@ -53,6 +53,7 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.google.common.io.LineReader;
+import com.google.common.util.concurrent.MoreExecutors;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -235,7 +236,7 @@ public class PackageUtilTest {
classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
}
- PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
+ PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
}
@@ -250,7 +251,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -277,7 +278,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -304,7 +305,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -327,7 +328,8 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper);
+ STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+ mockGcsUtil);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -348,16 +350,20 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH, fastNanoClockAndSleeper);
+ STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+ mockGcsUtil);
fail("Expected RuntimeException");
} catch (RuntimeException e) {
- assertTrue("Expected IOException containing detailed message.",
- e.getCause() instanceof IOException);
- assertThat(e.getCause().getMessage(),
+ assertThat("Expected RuntimeException wrapping IOException.",
+ e.getCause(), instanceOf(RuntimeException.class));
+ assertThat("Expected IOException containing detailed message.",
+ e.getCause().getCause(), instanceOf(IOException.class));
+ assertThat(e.getCause().getCause().getMessage(),
Matchers.allOf(
Matchers.containsString("Uploaded failed due to permissions error"),
Matchers.containsString(
- "Stale credentials can be resolved by executing 'gcloud auth login'")));
+ "Stale credentials can be resolved by executing 'gcloud auth application-default "
+ + "login'")));
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -377,9 +383,8 @@ public class PackageUtilTest {
try {
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()),
- STAGING_PATH,
- fastNanoClockAndSleeper);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
+ MoreExecutors.newDirectExecutorService(), mockGcsUtil);
} finally {
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -393,7 +398,7 @@ public class PackageUtilTest {
when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verifyNoMoreInteractions(mockGcsUtil);
@@ -411,7 +416,7 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
PackageUtil.stageClasspathElements(
- ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -429,7 +434,8 @@ public class PackageUtilTest {
when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
- ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
+ ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
+ mockGcsUtil);
DataflowPackage target = Iterables.getOnlyElement(targets);
verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -446,7 +452,7 @@ public class PackageUtilTest {
String nonExistentFile =
IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
- ImmutableList.of(nonExistentFile), STAGING_PATH));
+ ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
}
/**
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 0553efc..72e106d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,6 +25,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
import org.apache.beam.sdk.util.AppEngineEnvironment;
import org.apache.beam.sdk.util.GcsPathValidator;
import org.apache.beam.sdk.util.GcsUtil;
@@ -81,8 +82,9 @@ public interface GcsOptions extends
+ "information on the restrictions and performance implications of this value.\n\n"
+ "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
+ "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
+ @Nullable
Integer getGcsUploadBufferSizeBytes();
- void setGcsUploadBufferSizeBytes(Integer bytes);
+ void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
/**
* The class of the validator that should be created and used to validate paths.
http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index a10ea28..5e83584 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,6 +101,18 @@ public class GcsUtil {
gcsOptions.getExecutorService(),
gcsOptions.getGcsUploadBufferSizeBytes());
}
+
+ /**
+ * Returns an instance of {@link GcsUtil} based on the given parameters.
+ */
+ public static GcsUtil create(
+ Storage storageClient,
+ HttpRequestInitializer httpRequestInitializer,
+ ExecutorService executorService,
+ @Nullable Integer uploadBufferSizeBytes) {
+ return new GcsUtil(
+ storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
+ }
}
private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);
[06/50] beam git commit: Removes ReduceFnExecutor interface
Posted by dh...@apache.org.
Removes ReduceFnExecutor interface
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8989473b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8989473b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8989473b
Branch: refs/heads/python-sdk
Commit: 8989473b8e379a40b888565aadead001379c9398
Parents: b333487
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jan 24 13:32:24 2017 -0800
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jan 24 13:32:24 2017 -0800
----------------------------------------------------------------------
.../apache/beam/runners/core/DoFnRunner.java | 20 --------------------
.../core/GroupAlsoByWindowViaWindowSetDoFn.java | 5 +----
.../beam/runners/direct/ParDoEvaluator.java | 2 --
.../runners/spark/translation/DoFnFunction.java | 2 --
.../spark/translation/MultiDoFnFunction.java | 2 --
5 files changed, 1 insertion(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
index 66f95db..b29adcc 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
@@ -17,12 +17,10 @@
*/
package org.apache.beam.runners.core;
-import org.apache.beam.sdk.transforms.Aggregator;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.util.TimeDomain;
import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
import org.joda.time.Instant;
/**
@@ -51,22 +49,4 @@ public interface DoFnRunner<InputT, OutputT> {
* additional tasks, such as flushing in-memory states.
*/
void finishBundle();
-
- /**
- * An internal interface for signaling that a {@link OldDoFn} requires late data dropping.
- */
- public interface ReduceFnExecutor<K, InputT, OutputT, W> {
- /**
- * Gets this object as a {@link OldDoFn}.
- *
- * <p>Most implementors of this interface are expected to be {@link OldDoFn} instances, and will
- * return themselves.
- */
- OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn();
-
- /**
- * Returns an aggregator that tracks elements that are dropped due to being late.
- */
- Aggregator<Long, Long> getDroppedDueToLatenessAggregator();
- }
}
http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
index ecce4fc..d0387cf 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -17,7 +17,6 @@
*/
package org.apache.beam.runners.core;
-import org.apache.beam.runners.core.DoFnRunner.ReduceFnExecutor;
import org.apache.beam.runners.core.triggers.ExecutableTriggerStateMachine;
import org.apache.beam.runners.core.triggers.TriggerStateMachines;
import org.apache.beam.sdk.transforms.Aggregator;
@@ -37,7 +36,7 @@ import org.apache.beam.sdk.values.KV;
@SystemDoFnInternal
public class GroupAlsoByWindowViaWindowSetDoFn<
K, InputT, OutputT, W extends BoundedWindow, RinT extends KeyedWorkItem<K, InputT>>
- extends OldDoFn<RinT, KV<K, OutputT>> implements ReduceFnExecutor<K, InputT, OutputT, W> {
+ extends OldDoFn<RinT, KV<K, OutputT>> {
public static <K, InputT, OutputT, W extends BoundedWindow>
OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
@@ -95,7 +94,6 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
reduceFnRunner.persist();
}
- @Override
public OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn() {
// Safe contravariant cast
@SuppressWarnings("unchecked")
@@ -104,7 +102,6 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
return asFn;
}
- @Override
public Aggregator<Long, Long> getDroppedDueToLatenessAggregator() {
return droppedDueToLateness;
}
http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index 97d5360..48f0f8d 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -73,8 +73,6 @@ class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
ReadyCheckingSideInputReader sideInputReader =
evaluationContext.createSideInputReader(sideInputs);
- // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
- // and window-exploded processing is achieved within the simple runner
DoFnRunner<InputT, OutputT> underlying =
DoFnRunners.simpleRunner(
evaluationContext.getPipelineOptions(),
http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
index bd6cfbe..4fd5e51 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
@@ -81,8 +81,6 @@ public class DoFnFunction<InputT, OutputT>
DoFnOutputManager outputManager = new DoFnOutputManager();
- // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
- // and window-exploded processing is achieved within the simple runner
DoFnRunner<InputT, OutputT> doFnRunner =
DoFnRunners.simpleRunner(
runtimeContext.getPipelineOptions(),
http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
index cceffc8..911e6c5 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
@@ -88,8 +88,6 @@ public class MultiDoFnFunction<InputT, OutputT>
DoFnOutputManager outputManager = new DoFnOutputManager();
- // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
- // and window-exploded processing is achieved within the simple runner
DoFnRunner<InputT, OutputT> doFnRunner =
DoFnRunners.simpleRunner(
runtimeContext.getPipelineOptions(),
[25/50] beam git commit: fixup! Hide visibility of internal
implementation class
Posted by dh...@apache.org.
fixup! Hide visibility of internal implementation class
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a67ff91e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a67ff91e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a67ff91e
Branch: refs/heads/python-sdk
Commit: a67ff91e546cb77ad050e6b7573a884f190840cb
Parents: 968c311
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jan 25 14:13:55 2017 -0800
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jan 25 14:13:55 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/a67ff91e/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
index f52b822..04e1755 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
@@ -145,7 +145,7 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
* number of records and read time into a {@link BoundedSource}.
*/
@AutoValue
- public abstract static class UnboundedToBoundedSourceAdapter<T>
+ abstract static class UnboundedToBoundedSourceAdapter<T>
extends BoundedSource<ValueWithRecordId<T>> {
@Nullable abstract UnboundedSource<T, ?> getSource();
@Nullable abstract long getMaxNumRecords();
[09/50] beam git commit: This closes #1590
Posted by dh...@apache.org.
This closes #1590
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f2389ab7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f2389ab7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f2389ab7
Branch: refs/heads/python-sdk
Commit: f2389ab7ba1d562d23420d7e2ecd638524439dc6
Parents: 11c3cd7 dc36952
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 14:41:55 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 14:41:55 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 51 ++++++++++++++------
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 36 ++++++++++++++
2 files changed, 71 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
[44/50] beam git commit: [maven-release-plugin] prepare for next
development iteration
Posted by dh...@apache.org.
[maven-release-plugin] prepare for next development iteration
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4a29131d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4a29131d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4a29131d
Branch: refs/heads/python-sdk
Commit: 4a29131d3c0f490d01820e92e028ec07eaffe927
Parents: da2dff9
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:27:16 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:27:16 2017 +0100
----------------------------------------------------------------------
examples/java/pom.xml | 2 +-
examples/java8/pom.xml | 2 +-
examples/pom.xml | 2 +-
pom.xml | 4 ++--
runners/apex/pom.xml | 2 +-
runners/core-java/pom.xml | 2 +-
runners/direct-java/pom.xml | 2 +-
runners/flink/examples/pom.xml | 2 +-
runners/flink/pom.xml | 2 +-
runners/flink/runner/pom.xml | 2 +-
runners/google-cloud-dataflow-java/pom.xml | 2 +-
runners/pom.xml | 2 +-
runners/spark/pom.xml | 2 +-
sdks/java/build-tools/pom.xml | 2 +-
sdks/java/core/pom.xml | 2 +-
sdks/java/extensions/join-library/pom.xml | 2 +-
sdks/java/extensions/pom.xml | 2 +-
sdks/java/extensions/sorter/pom.xml | 2 +-
sdks/java/io/elasticsearch/pom.xml | 2 +-
sdks/java/io/google-cloud-platform/pom.xml | 2 +-
sdks/java/io/hdfs/pom.xml | 2 +-
sdks/java/io/jdbc/pom.xml | 2 +-
sdks/java/io/jms/pom.xml | 2 +-
sdks/java/io/kafka/pom.xml | 2 +-
sdks/java/io/kinesis/pom.xml | 2 +-
sdks/java/io/mongodb/pom.xml | 2 +-
sdks/java/io/mqtt/pom.xml | 2 +-
sdks/java/io/pom.xml | 2 +-
sdks/java/java8tests/pom.xml | 2 +-
sdks/java/maven-archetypes/examples-java8/pom.xml | 2 +-
sdks/java/maven-archetypes/examples/pom.xml | 2 +-
sdks/java/maven-archetypes/pom.xml | 2 +-
sdks/java/maven-archetypes/starter/pom.xml | 2 +-
sdks/java/pom.xml | 2 +-
sdks/pom.xml | 2 +-
35 files changed, 36 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
index e001d1c..9da814b 100644
--- a/examples/java/pom.xml
+++ b/examples/java/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-examples-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index 370d79f..d0042e3 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-examples-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index 4294c2d..550578b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2281f67..d09bf59 100644
--- a/pom.xml
+++ b/pom.xml
@@ -34,7 +34,7 @@
<url>http://beam.apache.org/</url>
<inceptionYear>2016</inceptionYear>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<licenses>
<license>
@@ -48,7 +48,7 @@
<connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
<url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
- <tag>release-0.5.0</tag>
+ <tag>HEAD</tag>
</scm>
<issueManagement>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/apex/pom.xml
----------------------------------------------------------------------
diff --git a/runners/apex/pom.xml b/runners/apex/pom.xml
index 47139a6..7ae07e2 100644
--- a/runners/apex/pom.xml
+++ b/runners/apex/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/core-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/core-java/pom.xml b/runners/core-java/pom.xml
index 9e8393d..d8706b1 100644
--- a/runners/core-java/pom.xml
+++ b/runners/core-java/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 0d44136..53281be 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/examples/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/examples/pom.xml b/runners/flink/examples/pom.xml
index e424042..1d426bd 100644
--- a/runners/flink/examples/pom.xml
+++ b/runners/flink/examples/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-flink-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/pom.xml b/runners/flink/pom.xml
index 7eab021..6f4236e 100644
--- a/runners/flink/pom.xml
+++ b/runners/flink/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index 8c73385..6a7cbff 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-flink-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index b446b7b..f17eb78 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index ceaedfe..6513a33 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index 196b5bb..5d46f8d 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/build-tools/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/build-tools/pom.xml b/sdks/java/build-tools/pom.xml
index 4c0a749..545f394 100644
--- a/sdks/java/build-tools/pom.xml
+++ b/sdks/java/build-tools/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index b02cd1d..bb019c1 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/join-library/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/join-library/pom.xml b/sdks/java/extensions/join-library/pom.xml
index def0340..562f921 100644
--- a/sdks/java/extensions/join-library/pom.xml
+++ b/sdks/java/extensions/join-library/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-extensions-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/pom.xml b/sdks/java/extensions/pom.xml
index 01645d2..99e0cb6 100644
--- a/sdks/java/extensions/pom.xml
+++ b/sdks/java/extensions/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/sorter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/sorter/pom.xml b/sdks/java/extensions/sorter/pom.xml
index 1961452..9d03ba2 100644
--- a/sdks/java/extensions/sorter/pom.xml
+++ b/sdks/java/extensions/sorter/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-extensions-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/elasticsearch/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/elasticsearch/pom.xml b/sdks/java/io/elasticsearch/pom.xml
index 94e8c6c..bc2cdb7 100644
--- a/sdks/java/io/elasticsearch/pom.xml
+++ b/sdks/java/io/elasticsearch/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/google-cloud-platform/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/pom.xml b/sdks/java/io/google-cloud-platform/pom.xml
index d800d34..8f9067f 100644
--- a/sdks/java/io/google-cloud-platform/pom.xml
+++ b/sdks/java/io/google-cloud-platform/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/pom.xml b/sdks/java/io/hdfs/pom.xml
index a8be68a..48c269f 100644
--- a/sdks/java/io/hdfs/pom.xml
+++ b/sdks/java/io/hdfs/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index 44c7abd..afe236a 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/jms/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jms/pom.xml b/sdks/java/io/jms/pom.xml
index 4e03a19..80d1f6c 100644
--- a/sdks/java/io/jms/pom.xml
+++ b/sdks/java/io/jms/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/kafka/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/pom.xml b/sdks/java/io/kafka/pom.xml
index 2637449..2dd775e 100644
--- a/sdks/java/io/kafka/pom.xml
+++ b/sdks/java/io/kafka/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/kinesis/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/pom.xml b/sdks/java/io/kinesis/pom.xml
index 29e9e5f..187d0c0 100644
--- a/sdks/java/io/kinesis/pom.xml
+++ b/sdks/java/io/kinesis/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/mongodb/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mongodb/pom.xml b/sdks/java/io/mongodb/pom.xml
index 56de6b3..19d9d18 100644
--- a/sdks/java/io/mongodb/pom.xml
+++ b/sdks/java/io/mongodb/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mqtt/pom.xml b/sdks/java/io/mqtt/pom.xml
index da19028..2547c78 100644
--- a/sdks/java/io/mqtt/pom.xml
+++ b/sdks/java/io/mqtt/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/pom.xml b/sdks/java/io/pom.xml
index ffe3c02..70ccf9d 100644
--- a/sdks/java/io/pom.xml
+++ b/sdks/java/io/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/java8tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/pom.xml b/sdks/java/java8tests/pom.xml
index 8545de4..c503d37 100644
--- a/sdks/java/java8tests/pom.xml
+++ b/sdks/java/java8tests/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/examples-java8/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/pom.xml b/sdks/java/maven-archetypes/examples-java8/pom.xml
index 5f27207..2632d6d 100644
--- a/sdks/java/maven-archetypes/examples-java8/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/examples/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/pom.xml b/sdks/java/maven-archetypes/examples/pom.xml
index e819c8c..09e5428 100644
--- a/sdks/java/maven-archetypes/examples/pom.xml
+++ b/sdks/java/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/pom.xml b/sdks/java/maven-archetypes/pom.xml
index 28e9fe9..194e5bd 100644
--- a/sdks/java/maven-archetypes/pom.xml
+++ b/sdks/java/maven-archetypes/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
index 5b21407..092995a 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/pom.xml b/sdks/java/pom.xml
index 1ab3452..555fdd4 100644
--- a/sdks/java/pom.xml
+++ b/sdks/java/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/pom.xml b/sdks/pom.xml
index 2682728..06dbb9b 100644
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>org.apache.beam</groupId>
<artifactId>beam-parent</artifactId>
- <version>0.5.0-SNAPSHOT</version>
+ <version>0.6.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
[39/50] beam git commit: This closes #1823
Posted by dh...@apache.org.
This closes #1823
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/83f8c460
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/83f8c460
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/83f8c460
Branch: refs/heads/python-sdk
Commit: 83f8c460c93501903864c8e09b4dbcff6903a5ae
Parents: 2cbc08b 6531545
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 17:22:55 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 17:22:55 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 63 ++++++++++++-------
.../io/gcp/bigquery/BigQueryServicesImpl.java | 1 +
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 64 +++++++++++++-------
.../gcp/bigquery/BigQueryServicesImplTest.java | 2 +
4 files changed, 87 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
[23/50] beam git commit: [BEAM-246] re-enable Checkstyle by default
Posted by dh...@apache.org.
[BEAM-246] re-enable Checkstyle by default
This adds 50%+ overhead to a clean build (with testing disabled), but
per dev@ discussion is a huge usability win for contributors and
committers alike.
https://lists.apache.org/thread.html/CAA8k_FKafuon8GEA3CXwR2MZh2kAXEFZQK=BgX5tk2fZJebrag@mail.gmail.com
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f05c5d32
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f05c5d32
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f05c5d32
Branch: refs/heads/python-sdk
Commit: f05c5d32cb5dbee6de4247a803d7b7c7fbe52173
Parents: c525783
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 13:52:06 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 12:13:33 2017 -0800
----------------------------------------------------------------------
examples/pom.xml | 14 +++++++++-----
runners/pom.xml | 14 +++++++++-----
sdks/pom.xml | 13 +++++++------
3 files changed, 25 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index adfbaa9..4294c2d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -51,11 +51,6 @@
<build>
<plugins>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
-
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
</plugin>
@@ -64,4 +59,13 @@
</profile>
</profiles>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
</project>
http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index fb84164..ceaedfe 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -47,11 +47,6 @@
<build>
<plugins>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
-
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
</plugin>
@@ -99,4 +94,13 @@
</build>
</profile>
</profiles>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/sdks/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/pom.xml b/sdks/pom.xml
index bfdfcd9..2682728 100644
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@ -42,11 +42,6 @@
<build>
<plugins>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
-
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
</plugin>
@@ -58,7 +53,6 @@
<build>
<pluginManagement>
<plugins>
-
<!-- SDKs will generally offer test suites for runners, as sdks/java does. -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -74,6 +68,13 @@
</plugin>
</plugins>
</pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ </plugins>
</build>
</project>
[02/50] beam git commit: [BEAM-1258] demote retrying loggings to info
level.
Posted by dh...@apache.org.
[BEAM-1258] demote retrying loggings to info level.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3afdc5c0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3afdc5c0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3afdc5c0
Branch: refs/heads/python-sdk
Commit: 3afdc5c0ef37e48b1750f70e54cd64f5063da83b
Parents: 2a23e8b
Author: Pei He <pe...@google.com>
Authored: Tue Jan 24 11:09:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800
----------------------------------------------------------------------
.../sdk/io/gcp/bigquery/BigQueryServicesImpl.java | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/3afdc5c0/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index c9edf7c..c524ce4 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -220,11 +220,11 @@ class BigQueryServicesImpl implements BigQueryServices {
return; // SUCCEEDED
}
// ignore and retry
- LOG.warn("Ignore the error and retry inserting the job.", e);
+ LOG.info("Ignore the error and retry inserting the job.", e);
lastException = e;
} catch (IOException e) {
// ignore and retry
- LOG.warn("Ignore the error and retry inserting the job.", e);
+ LOG.info("Ignore the error and retry inserting the job.", e);
lastException = e;
}
} while (nextBackOff(sleeper, backoff));
@@ -261,7 +261,7 @@ class BigQueryServicesImpl implements BigQueryServices {
// The job is not DONE, wait longer and retry.
} catch (IOException e) {
// ignore and retry
- LOG.warn("Ignore the error and retry polling job status.", e);
+ LOG.info("Ignore the error and retry polling job status.", e);
}
} while (nextBackOff(sleeper, backoff));
LOG.warn("Unable to poll job status: {}, aborting after reached max .", jobRef.getJobId());
@@ -316,12 +316,12 @@ class BigQueryServicesImpl implements BigQueryServices {
LOG.info("No BigQuery job with job id {} found.", jobId);
return null;
}
- LOG.warn(
+ LOG.info(
"Ignoring the error encountered while trying to query the BigQuery job {}",
jobId, e);
lastException = e;
} catch (IOException e) {
- LOG.warn(
+ LOG.info(
"Ignoring the error encountered while trying to query the BigQuery job {}",
jobId, e);
lastException = e;
@@ -618,10 +618,10 @@ class BigQueryServicesImpl implements BigQueryServices {
return; // SUCCEEDED
}
// ignore and retry
- LOG.warn("Ignore the error and retry creating the dataset.", e);
+ LOG.info("Ignore the error and retry creating the dataset.", e);
lastException = e;
} catch (IOException e) {
- LOG.warn("Ignore the error and retry creating the dataset.", e);
+ LOG.info("Ignore the error and retry creating the dataset.", e);
lastException = e;
}
} while (nextBackOff(sleeper, backoff));
@@ -891,7 +891,7 @@ class BigQueryServicesImpl implements BigQueryServices {
if (!shouldRetry.apply(e)) {
break;
}
- LOG.warn("Ignore the error and retry the request.", e);
+ LOG.info("Ignore the error and retry the request.", e);
}
} while (nextBackOff(sleeper, backoff));
throw new IOException(
[14/50] beam git commit: Refactor BigQueryServices to have
TableReference in methods signatures
Posted by dh...@apache.org.
Refactor BigQueryServices to have TableReference in methods signatures
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f9d1d682
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f9d1d682
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f9d1d682
Branch: refs/heads/python-sdk
Commit: f9d1d682340fa3083bc18723605bf3d0aa6d76cd
Parents: e77de7c
Author: Pei He <pe...@google.com>
Authored: Tue Jan 24 16:45:16 2017 -0800
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jan 24 18:00:40 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 40 +++++--------------
.../sdk/io/gcp/bigquery/BigQueryServices.java | 9 ++---
.../io/gcp/bigquery/BigQueryServicesImpl.java | 23 ++++-------
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 41 ++++++++------------
.../sdk/io/gcp/bigquery/BigQueryUtilTest.java | 3 +-
5 files changed, 40 insertions(+), 76 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index fa49f55..b6f9fb0 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -997,8 +997,7 @@ public class BigQueryIO {
TableReference table = JSON_FACTORY.fromString(jsonTable.get(), TableReference.class);
Long numBytes = bqServices.getDatasetService(options.as(BigQueryOptions.class))
- .getTable(table.getProjectId(), table.getDatasetId(), table.getTableId())
- .getNumBytes();
+ .getTable(table).getNumBytes();
tableSizeBytes.compareAndSet(null, numBytes);
}
return tableSizeBytes.get();
@@ -1088,10 +1087,7 @@ public class BigQueryIO {
DatasetService tableService = bqServices.getDatasetService(bqOptions);
if (referencedTables != null && !referencedTables.isEmpty()) {
TableReference queryTable = referencedTables.get(0);
- location = tableService.getTable(
- queryTable.getProjectId(),
- queryTable.getDatasetId(),
- queryTable.getTableId()).getLocation();
+ location = tableService.getTable(queryTable).getLocation();
}
// 2. Create the temporary dataset in the query location.
@@ -1120,10 +1116,7 @@ public class BigQueryIO {
JSON_FACTORY.fromString(jsonQueryTempTable.get(), TableReference.class);
DatasetService tableService = bqServices.getDatasetService(bqOptions);
- tableService.deleteTable(
- tableToRemove.getProjectId(),
- tableToRemove.getDatasetId(),
- tableToRemove.getTableId());
+ tableService.deleteTable(tableToRemove);
tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
}
@@ -1227,10 +1220,8 @@ public class BigQueryIO {
String extractJobId = getExtractJobId(jobIdToken);
List<String> tempFiles = executeExtract(extractJobId, tableToExtract, jobService);
- TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(
- tableToExtract.getProjectId(),
- tableToExtract.getDatasetId(),
- tableToExtract.getTableId()).getSchema();
+ TableSchema tableSchema = bqServices.getDatasetService(bqOptions)
+ .getTable(tableToExtract).getSchema();
cleanupTempResource(bqOptions);
return createSources(tempFiles, tableSchema);
@@ -1867,13 +1858,9 @@ public class BigQueryIO {
DatasetService datasetService,
TableReference tableRef) {
try {
- if (datasetService.getTable(
- tableRef.getProjectId(),
- tableRef.getDatasetId(),
- tableRef.getTableId()) != null) {
+ if (datasetService.getTable(tableRef) != null) {
checkState(
- datasetService.isTableEmpty(
- tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
+ datasetService.isTableEmpty(tableRef),
"BigQuery table is not empty: %s.",
BigQueryIO.toTableSpec(tableRef));
}
@@ -2535,10 +2522,7 @@ public class BigQueryIO {
for (TableReference tableRef : tempTables) {
try {
LOG.debug("Deleting table {}", toJsonString(tableRef));
- tableService.deleteTable(
- tableRef.getProjectId(),
- tableRef.getDatasetId(),
- tableRef.getTableId());
+ tableService.deleteTable(tableRef);
} catch (Exception e) {
LOG.warn("Failed to delete the table {}", toJsonString(tableRef), e);
}
@@ -2587,7 +2571,7 @@ public class BigQueryIO {
private static void verifyTablePresence(DatasetService datasetService, TableReference table) {
try {
- datasetService.getTable(table.getProjectId(), table.getDatasetId(), table.getTableId());
+ datasetService.getTable(table);
} catch (Exception e) {
ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
@@ -2712,11 +2696,7 @@ public class BigQueryIO {
// every thread from attempting a create and overwhelming our BigQuery quota.
DatasetService datasetService = bqServices.getDatasetService(options);
if (!createdTables.contains(tableSpec)) {
- Table table = datasetService.getTable(
- tableReference.getProjectId(),
- tableReference.getDatasetId(),
- tableReference.getTableId());
- if (table == null) {
+ if (datasetService.getTable(tableReference) == null) {
TableSchema tableSchema = JSON_FACTORY.fromString(
jsonTableSchema.get(), TableSchema.class);
datasetService.createTable(
http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
index 32cf46d..03e4391 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
@@ -119,8 +119,7 @@ interface BigQueryServices extends Serializable {
* <p>Returns null if the table is not found.
*/
@Nullable
- Table getTable(String projectId, String datasetId, String tableId)
- throws InterruptedException, IOException;
+ Table getTable(TableReference tableRef) throws InterruptedException, IOException;
/**
* Creates the specified table if it does not exist.
@@ -131,16 +130,14 @@ interface BigQueryServices extends Serializable {
* Deletes the table specified by tableId from the dataset.
* If the table contains data, all the data will be deleted.
*/
- void deleteTable(String projectId, String datasetId, String tableId)
- throws IOException, InterruptedException;
+ void deleteTable(TableReference tableRef) throws IOException, InterruptedException;
/**
* Returns true if the table is empty.
*
* @throws IOException if the table is not found.
*/
- boolean isTableEmpty(String projectId, String datasetId, String tableId)
- throws IOException, InterruptedException;
+ boolean isTableEmpty(TableReference tableRef) throws IOException, InterruptedException;
/**
* Gets the specified {@link Dataset} resource by dataset ID.
http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index c524ce4..75796ab 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -394,15 +394,12 @@ class BigQueryServicesImpl implements BigQueryServices {
*/
@Override
@Nullable
- public Table getTable(String projectId, String datasetId, String tableId)
+ public Table getTable(TableReference tableRef)
throws IOException, InterruptedException {
BackOff backoff =
FluentBackoff.DEFAULT
.withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
- return getTable(
- new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId),
- backoff,
- Sleeper.DEFAULT);
+ return getTable(tableRef, backoff, Sleeper.DEFAULT);
}
@VisibleForTesting
@@ -506,31 +503,27 @@ class BigQueryServicesImpl implements BigQueryServices {
* @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
*/
@Override
- public void deleteTable(String projectId, String datasetId, String tableId)
- throws IOException, InterruptedException {
+ public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
BackOff backoff =
FluentBackoff.DEFAULT
.withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
executeWithRetries(
- client.tables().delete(projectId, datasetId, tableId),
+ client.tables().delete(
+ tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
String.format(
"Unable to delete table: %s, aborting after %d retries.",
- tableId, MAX_RPC_RETRIES),
+ tableRef.getTableId(), MAX_RPC_RETRIES),
Sleeper.DEFAULT,
backoff,
ALWAYS_RETRY);
}
@Override
- public boolean isTableEmpty(String projectId, String datasetId, String tableId)
- throws IOException, InterruptedException {
+ public boolean isTableEmpty(TableReference tableRef) throws IOException, InterruptedException {
BackOff backoff =
FluentBackoff.DEFAULT
.withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
- return isTableEmpty(
- new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId),
- backoff,
- Sleeper.DEFAULT);
+ return isTableEmpty(tableRef, backoff, Sleeper.DEFAULT);
}
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index ba7f44e..0b8d60d 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -32,6 +32,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doNothing;
@@ -526,18 +527,18 @@ public class BigQueryIOTest implements Serializable {
private static class FakeDatasetService implements DatasetService, Serializable {
@Override
- public Table getTable(String projectId, String datasetId, String tableId)
+ public Table getTable(TableReference tableRef)
throws InterruptedException, IOException {
synchronized (tables) {
Map<String, TableContainer> dataset =
checkNotNull(
- tables.get(projectId, datasetId),
+ tables.get(tableRef.getProjectId(), tableRef.getDatasetId()),
"Tried to get a dataset %s:%s from %s, but no such dataset was set",
- projectId,
- datasetId,
- tableId,
+ tableRef.getProjectId(),
+ tableRef.getDatasetId(),
+ tableRef.getTableId(),
FakeDatasetService.class.getSimpleName());
- TableContainer tableContainer = dataset.get(tableId);
+ TableContainer tableContainer = dataset.get(tableRef.getTableId());
return tableContainer == null ? null : tableContainer.getTable();
}
}
@@ -569,8 +570,7 @@ public class BigQueryIOTest implements Serializable {
}
@Override
- public void deleteTable(String projectId, String datasetId, String tableId)
- throws IOException, InterruptedException {
+ public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
throw new UnsupportedOperationException("Unsupported");
}
@@ -595,9 +595,9 @@ public class BigQueryIOTest implements Serializable {
}
@Override
- public boolean isTableEmpty(String projectId, String datasetId, String tableId)
+ public boolean isTableEmpty(TableReference tableRef)
throws IOException, InterruptedException {
- Long numBytes = getTable(projectId, datasetId, tableId).getNumBytes();
+ Long numBytes = getTable(tableRef).getNumBytes();
return numBytes == null || numBytes == 0L;
}
@@ -1738,7 +1738,7 @@ public class BigQueryIOTest implements Serializable {
IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
when(mockIOChannelFactory.resolve(anyString(), anyString()))
.thenReturn("mock://tempLocation/output");
- when(mockDatasetService.getTable(anyString(), anyString(), anyString()))
+ when(mockDatasetService.getTable(any(TableReference.class)))
.thenReturn(new Table().setSchema(new TableSchema()));
Assert.assertThat(
@@ -1810,13 +1810,9 @@ public class BigQueryIOTest implements Serializable {
new JobStatistics2()
.setTotalBytesProcessed(100L)
.setReferencedTables(ImmutableList.of(queryTable))));
- when(mockDatasetService.getTable(
- eq(queryTable.getProjectId()), eq(queryTable.getDatasetId()), eq(queryTable.getTableId())))
+ when(mockDatasetService.getTable(eq(queryTable)))
.thenReturn(new Table().setSchema(new TableSchema()));
- when(mockDatasetService.getTable(
- eq(destinationTable.getProjectId()),
- eq(destinationTable.getDatasetId()),
- eq(destinationTable.getTableId())))
+ when(mockDatasetService.getTable(eq(destinationTable)))
.thenReturn(new Table().setSchema(new TableSchema()));
IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
when(mockIOChannelFactory.resolve(anyString(), anyString()))
@@ -1898,10 +1894,7 @@ public class BigQueryIOTest implements Serializable {
.thenReturn(new JobStatistics().setQuery(
new JobStatistics2()
.setTotalBytesProcessed(100L)));
- when(mockDatasetService.getTable(
- eq(destinationTable.getProjectId()),
- eq(destinationTable.getDatasetId()),
- eq(destinationTable.getTableId())))
+ when(mockDatasetService.getTable(eq(destinationTable)))
.thenReturn(new Table().setSchema(new TableSchema()));
IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
when(mockIOChannelFactory.resolve(anyString(), anyString()))
@@ -2263,9 +2256,9 @@ public class BigQueryIOTest implements Serializable {
BigQueryIO.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, tables.get(2))));
doThrow(new IOException("Unable to delete table"))
- .when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(0));
- doNothing().when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(1));
- doNothing().when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(2));
+ .when(mockDatasetService).deleteTable(tableRefs.get(0));
+ doNothing().when(mockDatasetService).deleteTable(tableRefs.get(1));
+ doNothing().when(mockDatasetService).deleteTable(tableRefs.get(2));
WriteRename.removeTemporaryTables(mockDatasetService, tableRefs);
http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
index 8130238..7b5b226 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
@@ -370,7 +370,8 @@ public class BigQueryUtilTest {
BigQueryServicesImpl.DatasetServiceImpl services =
new BigQueryServicesImpl.DatasetServiceImpl(mockClient, options);
- services.getTable("project", "dataset", "table");
+ services.getTable(
+ new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"));
verifyTableGet();
}
[47/50] beam git commit: This closes #1850
Posted by dh...@apache.org.
This closes #1850
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/34b4a6d9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/34b4a6d9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/34b4a6d9
Branch: refs/heads/python-sdk
Commit: 34b4a6d9dc2cf5e8da43346077a36b460501afe2
Parents: b21bdf4 31c63cb
Author: Thomas Weise <th...@apache.org>
Authored: Fri Jan 27 14:01:09 2017 -0800
Committer: Thomas Weise <th...@apache.org>
Committed: Fri Jan 27 14:01:09 2017 -0800
----------------------------------------------------------------------
.../beam/runners/apex/ApexPipelineOptions.java | 7 +-
.../apache/beam/runners/apex/ApexRunner.java | 43 ++++++++---
.../beam/runners/apex/ApexYarnLauncher.java | 23 +++++-
.../beam/runners/apex/ApexRunnerTest.java | 75 ++++++++++++++++++++
.../beam/runners/apex/ApexYarnLauncherTest.java | 9 ++-
.../test/resources/beam-runners-apex.properties | 20 ++++++
6 files changed, 161 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
[32/50] beam git commit: DataflowRunner: upgrade worker with Pubsub
attribute changes
Posted by dh...@apache.org.
DataflowRunner: upgrade worker with Pubsub attribute changes
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e591d8b9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e591d8b9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e591d8b9
Branch: refs/heads/python-sdk
Commit: e591d8b91ac81c86c0e41af58422a4ea27c9727e
Parents: b4726d0
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 06:56:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 09:37:59 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/e591d8b9/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 9858b3d..b446b7b 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,7 +33,7 @@
<packaging>jar</packaging>
<properties>
- <dataflow.container_version>beam-master-20170120</dataflow.container_version>
+ <dataflow.container_version>beam-master-20170126</dataflow.container_version>
<dataflow.environment_major_version>6</dataflow.environment_major_version>
</properties>
[38/50] beam git commit: [BEAM-1235] BigQueryIO.Write: log failed
load/copy jobs.
Posted by dh...@apache.org.
[BEAM-1235] BigQueryIO.Write: log failed load/copy jobs.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6531545e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6531545e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6531545e
Branch: refs/heads/python-sdk
Commit: 6531545e647f98870a69bd46fabbbadb727969e5
Parents: 2cbc08b
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 16:25:43 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 17:22:52 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 63 ++++++++++++-------
.../io/gcp/bigquery/BigQueryServicesImpl.java | 1 +
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 64 +++++++++++++-------
.../gcp/bigquery/BigQueryServicesImplTest.java | 2 +
4 files changed, 87 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index b6f9fb0..4ace985 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1155,7 +1155,8 @@ public class BigQueryIO {
jobService.startQueryJob(jobRef, queryConfig);
Job job = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
if (parseStatus(job) != Status.SUCCEEDED) {
- throw new IOException("Query job failed: " + jobId);
+ throw new IOException(String.format(
+ "Query job %s failed, status: %s.", jobId, statusToPrettyString(job.getStatus())));
}
}
@@ -1260,8 +1261,8 @@ public class BigQueryIO {
jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
if (parseStatus(extractJob) != Status.SUCCEEDED) {
throw new IOException(String.format(
- "Extract job %s failed, status: %s",
- extractJob.getJobReference().getJobId(), extractJob.getStatus()));
+ "Extract job %s failed, status: %s.",
+ extractJob.getJobReference().getJobId(), statusToPrettyString(extractJob.getStatus())));
}
List<String> tempFiles = getExtractFilePaths(extractDestinationDir, extractJob);
@@ -2361,30 +2362,36 @@ public class BigQueryIO {
.setSourceFormat("NEWLINE_DELIMITED_JSON");
String projectId = ref.getProjectId();
+ Job lastFailedLoadJob = null;
for (int i = 0; i < Bound.MAX_RETRY_JOBS; ++i) {
String jobId = jobIdPrefix + "-" + i;
- LOG.info("Starting BigQuery load job {}: try {}/{}", jobId, i, Bound.MAX_RETRY_JOBS);
JobReference jobRef = new JobReference()
.setProjectId(projectId)
.setJobId(jobId);
jobService.startLoadJob(jobRef, loadConfig);
- Status jobStatus =
- parseStatus(jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES));
+ Job loadJob = jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES);
+ Status jobStatus = parseStatus(loadJob);
switch (jobStatus) {
case SUCCEEDED:
return;
case UNKNOWN:
- throw new RuntimeException("Failed to poll the load job status of job " + jobId);
+ throw new RuntimeException(String.format(
+ "UNKNOWN status of load job [%s]: %s.", jobId, jobToPrettyString(loadJob)));
case FAILED:
- LOG.info("BigQuery load job failed: {}", jobId);
+ lastFailedLoadJob = loadJob;
continue;
default:
- throw new IllegalStateException(String.format("Unexpected job status: %s of job %s",
- jobStatus, jobId));
+ throw new IllegalStateException(String.format(
+ "Unexpected status [%s] of load job: %s.",
+ jobStatus, jobToPrettyString(loadJob)));
}
}
- throw new RuntimeException(String.format("Failed to create the load job %s, reached max "
- + "retries: %d", jobIdPrefix, Bound.MAX_RETRY_JOBS));
+ throw new RuntimeException(String.format(
+ "Failed to create load job with id prefix %s, "
+ + "reached max retries: %d, last failed load job: %s.",
+ jobIdPrefix,
+ Bound.MAX_RETRY_JOBS,
+ jobToPrettyString(lastFailedLoadJob)));
}
static void removeTemporaryFiles(
@@ -2491,30 +2498,36 @@ public class BigQueryIO {
.setCreateDisposition(createDisposition.name());
String projectId = ref.getProjectId();
+ Job lastFailedCopyJob = null;
for (int i = 0; i < Bound.MAX_RETRY_JOBS; ++i) {
String jobId = jobIdPrefix + "-" + i;
- LOG.info("Starting BigQuery copy job {}: try {}/{}", jobId, i, Bound.MAX_RETRY_JOBS);
JobReference jobRef = new JobReference()
.setProjectId(projectId)
.setJobId(jobId);
jobService.startCopyJob(jobRef, copyConfig);
- Status jobStatus =
- parseStatus(jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES));
+ Job copyJob = jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES);
+ Status jobStatus = parseStatus(copyJob);
switch (jobStatus) {
case SUCCEEDED:
return;
case UNKNOWN:
- throw new RuntimeException("Failed to poll the copy job status of job " + jobId);
+ throw new RuntimeException(String.format(
+ "UNKNOWN status of copy job [%s]: %s.", jobId, jobToPrettyString(copyJob)));
case FAILED:
- LOG.info("BigQuery copy job failed: {}", jobId);
+ lastFailedCopyJob = copyJob;
continue;
default:
- throw new IllegalStateException(String.format("Unexpected job status: %s of job %s",
- jobStatus, jobId));
+ throw new IllegalStateException(String.format(
+ "Unexpected status [%s] of load job: %s.",
+ jobStatus, jobToPrettyString(copyJob)));
}
}
- throw new RuntimeException(String.format("Failed to create the copy job %s, reached max "
- + "retries: %d", jobIdPrefix, Bound.MAX_RETRY_JOBS));
+ throw new RuntimeException(String.format(
+ "Failed to create copy job with id prefix %s, "
+ + "reached max retries: %d, last failed copy job: %s.",
+ jobIdPrefix,
+ Bound.MAX_RETRY_JOBS,
+ jobToPrettyString(lastFailedCopyJob)));
}
static void removeTemporaryTables(DatasetService tableService,
@@ -2549,6 +2562,14 @@ public class BigQueryIO {
private Write() {}
}
+ private static String jobToPrettyString(@Nullable Job job) throws IOException {
+ return job == null ? "null" : job.toPrettyString();
+ }
+
+ private static String statusToPrettyString(@Nullable JobStatus status) throws IOException {
+ return status == null ? "Unknown status: null." : status.toPrettyString();
+ }
+
private static void verifyDatasetPresence(DatasetService datasetService, TableReference table) {
try {
datasetService.getDataset(table.getProjectId(), table.getDatasetId());
http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 75796ab..7c3edbe 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -214,6 +214,7 @@ class BigQueryServicesImpl implements BigQueryServices {
do {
try {
client.jobs().insert(jobRef.getProjectId(), job).execute();
+ LOG.info("Started BigQuery job: {}.", jobRef);
return; // SUCCEEDED
} catch (GoogleJsonResponseException e) {
if (errorExtractor.itemAlreadyExists(e)) {
http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 0b8d60d..bbfc2ce 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -988,12 +988,6 @@ public class BigQueryIOTest implements Serializable {
.withoutValidation());
p.run();
- logged.verifyInfo("Starting BigQuery load job");
- logged.verifyInfo("BigQuery load job failed");
- logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyInfo("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyNotLogged("try 3/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
File tempDir = new File(bqOptions.getTempLocation());
testNumFiles(tempDir, 0);
}
@@ -1232,11 +1226,49 @@ public class BigQueryIOTest implements Serializable {
.withoutValidation());
thrown.expect(RuntimeException.class);
- thrown.expectMessage("Failed to poll the load job status");
- p.run();
+ thrown.expectMessage("UNKNOWN status of load job");
+ try {
+ p.run();
+ } finally {
+ File tempDir = new File(bqOptions.getTempLocation());
+ testNumFiles(tempDir, 0);
+ }
+ }
- File tempDir = new File(bqOptions.getTempLocation());
- testNumFiles(tempDir, 0);
+ @Test
+ @Category(NeedsRunner.class)
+ public void testWriteFailedJobs() throws Exception {
+ BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
+ bqOptions.setProject("defaultProject");
+ bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
+
+ FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
+ .withJobService(new FakeJobService()
+ .startJobReturns("done", "done", "done")
+ .pollJobReturns(Status.FAILED, Status.FAILED, Status.FAILED));
+
+ Pipeline p = TestPipeline.create(bqOptions);
+ p.apply(Create.of(
+ new TableRow().set("name", "a").set("number", 1),
+ new TableRow().set("name", "b").set("number", 2),
+ new TableRow().set("name", "c").set("number", 3))
+ .withCoder(TableRowJsonCoder.of()))
+ .apply(BigQueryIO.Write.to("dataset-id.table-id")
+ .withCreateDisposition(CreateDisposition.CREATE_NEVER)
+ .withTestServices(fakeBqServices)
+ .withoutValidation());
+
+ thrown.expect(RuntimeException.class);
+ thrown.expectMessage("Failed to create load job with id prefix");
+ thrown.expectMessage("reached max retries");
+ thrown.expectMessage("last failed load job");
+
+ try {
+ p.run();
+ } finally {
+ File tempDir = new File(bqOptions.getTempLocation());
+ testNumFiles(tempDir, 0);
+ }
}
@Test
@@ -2164,12 +2196,6 @@ public class BigQueryIOTest implements Serializable {
List<String> tempTables = tester.takeOutputElements();
- logged.verifyInfo("Starting BigQuery load job");
- logged.verifyInfo("BigQuery load job failed");
- logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyNotLogged("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-
assertEquals(expectedTempTables, tempTables);
}
@@ -2237,12 +2263,6 @@ public class BigQueryIOTest implements Serializable {
DoFnTester<String, Void> tester = DoFnTester.of(writeRename);
tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
tester.processElement(null);
-
- logged.verifyInfo("Starting BigQuery copy job");
- logged.verifyInfo("BigQuery copy job failed");
- logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
- logged.verifyNotLogged("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
}
@Test
http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
index 1ce10f1..ef51650 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
@@ -138,6 +138,7 @@ public class BigQueryServicesImplTest {
verify(response, times(1)).getStatusCode();
verify(response, times(1)).getContent();
verify(response, times(1)).getContentType();
+ expectedLogs.verifyInfo(String.format("Started BigQuery job: %s", jobRef));
}
/**
@@ -161,6 +162,7 @@ public class BigQueryServicesImplTest {
verify(response, times(1)).getStatusCode();
verify(response, times(1)).getContent();
verify(response, times(1)).getContentType();
+ expectedLogs.verifyNotLogged("Started BigQuery job");
}
/**
[05/50] beam git commit: This closes #1825
Posted by dh...@apache.org.
This closes #1825
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b3334879
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b3334879
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b3334879
Branch: refs/heads/python-sdk
Commit: b3334879fb75150b6f07c24a138fb1d92e1d7def
Parents: cb6e0a8 3afdc5c
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 12:25:28 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:28 2017 -0800
----------------------------------------------------------------------
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 32 +++++++++++---------
.../sdk/io/gcp/bigquery/BigQueryServices.java | 2 ++
.../io/gcp/bigquery/BigQueryServicesImpl.java | 16 +++++-----
3 files changed, 27 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
[50/50] beam git commit: Closes #1861
Posted by dh...@apache.org.
Closes #1861
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/27cf68ee
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/27cf68ee
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/27cf68ee
Branch: refs/heads/python-sdk
Commit: 27cf68ee72bd58475c170712f7afe20102601606
Parents: 1bc6859 f1b8679
Author: Dan Halperin <dh...@google.com>
Authored: Sun Jan 29 08:21:18 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Sun Jan 29 08:21:18 2017 -0800
----------------------------------------------------------------------
.jenkins/common_job_properties.groovy | 9 +-
...job_beam_PostCommit_Java_MavenInstall.groovy | 2 +-
.../job_beam_PreCommit_Java_MavenInstall.groovy | 2 +-
.../job_beam_Release_NightlySnapshot.groovy | 2 +-
.jenkins/job_seed.groovy | 2 +-
.travis/README.md | 2 +-
DISCLAIMER | 10 -
NOTICE | 4 +-
README.md | 46 +-
examples/java/README.md | 16 +-
examples/java/pom.xml | 21 +-
.../beam/examples/DebuggingWordCount.java | 4 +-
.../org/apache/beam/examples/WordCount.java | 6 +-
.../beam/examples/complete/AutoComplete.java | 2 +-
.../org/apache/beam/examples/complete/README.md | 14 +-
.../apache/beam/examples/complete/TfIdf.java | 2 +-
.../examples/complete/TopWikipediaSessions.java | 2 +-
.../examples/complete/TrafficMaxLaneFlow.java | 2 +-
.../beam/examples/complete/TrafficRoutes.java | 2 +-
.../examples/cookbook/BigQueryTornadoes.java | 2 +-
.../cookbook/CombinePerKeyExamples.java | 2 +-
.../org/apache/beam/examples/cookbook/README.md | 14 +-
.../beam/examples/cookbook/TriggerExample.java | 4 +-
.../beam/examples/WindowedWordCountIT.java | 16 +-
examples/java8/pom.xml | 2 +-
.../beam/examples/complete/game/GameStats.java | 7 +-
.../examples/complete/game/LeaderBoard.java | 5 +-
.../beam/examples/complete/game/UserScore.java | 2 +-
examples/pom.xml | 16 +-
pom.xml | 41 +-
runners/apex/README.md | 4 +-
runners/apex/pom.xml | 3 +-
.../beam/runners/apex/ApexPipelineOptions.java | 7 +-
.../apache/beam/runners/apex/ApexRunner.java | 43 +-
.../beam/runners/apex/ApexYarnLauncher.java | 23 +-
.../translation/CreateValuesTranslator.java | 18 +-
.../FlattenPCollectionTranslator.java | 28 +-
.../apex/translation/GroupByKeyTranslator.java | 2 +-
.../translation/ParDoBoundMultiTranslator.java | 27 +-
.../apex/translation/ParDoBoundTranslator.java | 4 +-
.../apex/translation/TranslationContext.java | 27 +-
.../apex/translation/WindowBoundTranslator.java | 8 +-
.../operators/ApexGroupByKeyOperator.java | 4 +-
.../operators/ApexParDoOperator.java | 6 +-
.../ApexReadUnboundedInputOperator.java | 17 +-
.../beam/runners/apex/ApexRunnerTest.java | 75 ++
.../beam/runners/apex/ApexYarnLauncherTest.java | 9 +-
.../runners/apex/examples/WordCountTest.java | 2 +-
.../translation/ParDoBoundTranslatorTest.java | 6 +-
.../translation/ReadUnboundTranslatorTest.java | 8 +-
.../utils/ApexStateInternalsTest.java | 2 +-
.../test/resources/beam-runners-apex.properties | 20 +
runners/core-java/pom.xml | 2 +-
.../beam/runners/core/AssignWindowsDoFn.java | 3 +-
.../apache/beam/runners/core/DoFnAdapters.java | 343 ++++++
.../apache/beam/runners/core/DoFnRunner.java | 21 -
.../apache/beam/runners/core/DoFnRunners.java | 138 +--
.../core/GroupAlsoByWindowViaWindowSetDoFn.java | 10 +-
.../runners/core/GroupAlsoByWindowsDoFn.java | 5 +-
.../beam/runners/core/KeyedWorkItemCoder.java | 4 +-
.../core/LateDataDroppingDoFnRunner.java | 1 -
.../apache/beam/runners/core/NonEmptyPanes.java | 2 +-
.../org/apache/beam/runners/core/OldDoFn.java | 472 ++++++++
.../runners/core/PerKeyCombineFnRunner.java | 70 --
.../runners/core/PerKeyCombineFnRunners.java | 101 --
.../beam/runners/core/SimpleDoFnRunner.java | 63 -
.../beam/runners/core/SimpleOldDoFnRunner.java | 7 +-
.../beam/runners/core/SplittableParDo.java | 7 -
.../core/UnboundedReadFromBoundedSource.java | 14 +-
.../AfterDelayFromFirstElementStateMachine.java | 2 +-
.../core/triggers/AfterPaneStateMachine.java | 2 +-
.../core/DoFnDelegatingAggregatorTest.java | 144 +++
.../core/GroupAlsoByWindowsProperties.java | 2 +-
.../runners/core/KeyedWorkItemCoderTest.java | 6 +
.../core/LateDataDroppingDoFnRunnerTest.java | 2 +-
.../apache/beam/runners/core/NoOpOldDoFn.java | 72 ++
.../beam/runners/core/OldDoFnContextTest.java | 72 ++
.../apache/beam/runners/core/OldDoFnTest.java | 192 +++
.../beam/runners/core/ReduceFnRunnerTest.java | 12 +-
.../beam/runners/core/ReduceFnTester.java | 2 +-
.../runners/core/SimpleOldDoFnRunnerTest.java | 2 +-
.../UnboundedReadFromBoundedSourceTest.java | 12 +-
runners/direct-java/pom.xml | 3 +-
.../direct/BoundedReadEvaluatorFactory.java | 10 +-
...ecycleManagerRemovingTransformEvaluator.java | 19 +-
.../beam/runners/direct/EvaluationContext.java | 2 +-
.../direct/ExecutorServiceParallelExecutor.java | 4 +-
.../runners/direct/FlattenEvaluatorFactory.java | 4 +-
.../GroupAlsoByWindowEvaluatorFactory.java | 9 +-
.../direct/GroupByKeyOnlyEvaluatorFactory.java | 10 +-
.../beam/runners/direct/ParDoEvaluator.java | 16 +-
.../runners/direct/ParDoEvaluatorFactory.java | 19 +-
.../direct/ParDoMultiOverrideFactory.java | 13 +-
.../direct/StatefulParDoEvaluatorFactory.java | 27 +-
.../direct/TestStreamEvaluatorFactory.java | 5 +-
.../direct/UnboundedReadEvaluatorFactory.java | 22 +-
.../runners/direct/ViewEvaluatorFactory.java | 8 +-
.../beam/runners/direct/WatermarkManager.java | 6 +-
.../runners/direct/WindowEvaluatorFactory.java | 3 +-
.../runners/direct/AggregatorContainerTest.java | 16 +-
.../direct/BoundedReadEvaluatorFactoryTest.java | 5 -
.../CopyOnAccessInMemoryStateInternalsTest.java | 4 +-
.../runners/direct/DirectGraphVisitorTest.java | 16 +-
.../beam/runners/direct/DirectRunnerTest.java | 5 -
...leManagerRemovingTransformEvaluatorTest.java | 103 +-
.../runners/direct/EvaluationContextTest.java | 6 +-
.../beam/runners/direct/ParDoEvaluatorTest.java | 3 +-
.../StatefulParDoEvaluatorFactoryTest.java | 4 +-
.../UnboundedReadEvaluatorFactoryTest.java | 13 +-
runners/flink/README.md | 6 +-
runners/flink/examples/pom.xml | 2 +-
.../beam/runners/flink/examples/WordCount.java | 2 +-
.../flink/examples/streaming/AutoComplete.java | 2 +-
.../examples/streaming/KafkaIOExamples.java | 4 +-
.../KafkaWindowedWordCountExample.java | 2 +-
.../examples/streaming/WindowedWordCount.java | 2 +-
runners/flink/pom.xml | 2 +-
runners/flink/runner/pom.xml | 4 +-
.../runners/flink/FlinkPipelineOptions.java | 6 +-
.../runners/flink/OldPerKeyCombineFnRunner.java | 62 +
.../flink/OldPerKeyCombineFnRunners.java | 155 +++
.../FlinkBatchTransformTranslators.java | 40 +-
.../FlinkBatchTranslationContext.java | 21 +-
.../FlinkStreamingTransformTranslators.java | 46 +-
.../FlinkStreamingTranslationContext.java | 20 +-
.../functions/FlinkDoFnFunction.java | 4 +-
.../FlinkMergingNonShuffleReduceFunction.java | 10 +-
.../FlinkMergingPartialReduceFunction.java | 8 +-
.../functions/FlinkMergingReduceFunction.java | 8 +-
.../functions/FlinkMultiOutputDoFnFunction.java | 4 +-
.../FlinkMultiOutputProcessContext.java | 2 +-
.../functions/FlinkNoElementAssignContext.java | 2 +-
.../functions/FlinkPartialReduceFunction.java | 10 +-
.../functions/FlinkProcessContextBase.java | 4 +-
.../functions/FlinkReduceFunction.java | 10 +-
.../FlinkSingleOutputProcessContext.java | 2 +-
.../wrappers/streaming/DoFnOperator.java | 6 +-
.../streaming/SingletonKeyedWorkItemCoder.java | 10 +-
.../wrappers/streaming/WindowDoFnOperator.java | 2 +-
.../streaming/io/BoundedSourceWrapper.java | 2 +-
.../streaming/io/UnboundedFlinkSink.java | 6 +
.../streaming/io/UnboundedSourceWrapper.java | 2 +-
.../beam/runners/flink/PipelineOptionsTest.java | 13 +
.../streaming/FlinkStateInternalsTest.java | 2 +-
.../streaming/UnboundedSourceWrapperTest.java | 464 +++----
runners/google-cloud-dataflow-java/pom.xml | 15 +-
.../beam/runners/dataflow/AssignWindows.java | 89 ++
.../dataflow/DataflowAggregatorTransforms.java | 79 ++
.../dataflow/DataflowMetricUpdateExtractor.java | 109 ++
.../runners/dataflow/DataflowPipelineJob.java | 2 -
.../dataflow/DataflowPipelineTranslator.java | 510 +++-----
.../beam/runners/dataflow/DataflowRunner.java | 109 +-
.../DataflowUnboundedReadFromBoundedSource.java | 547 +++++++++
.../beam/runners/dataflow/ReadTranslator.java | 102 ++
.../runners/dataflow/TransformTranslator.java | 120 ++
.../dataflow/internal/AssignWindows.java | 89 --
.../dataflow/internal/CustomSources.java | 5 -
.../internal/DataflowAggregatorTransforms.java | 79 --
.../internal/DataflowMetricUpdateExtractor.java | 109 --
.../DataflowUnboundedReadFromBoundedSource.java | 556 ---------
.../runners/dataflow/internal/IsmFormat.java | 20 +-
.../dataflow/internal/ReadTranslator.java | 107 --
.../DataflowPipelineWorkerPoolOptions.java | 16 +-
.../beam/runners/dataflow/util/DoFnInfo.java | 66 +-
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 352 ++++--
.../beam/runners/dataflow/dataflow.properties | 6 +-
.../dataflow/DataflowPipelineJobTest.java | 38 +-
.../DataflowPipelineTranslatorTest.java | 3 +-
.../runners/dataflow/DataflowRunnerTest.java | 8 +-
...aflowUnboundedReadFromBoundedSourceTest.java | 79 ++
...aflowUnboundedReadFromBoundedSourceTest.java | 83 --
.../DataflowPipelineDebugOptionsTest.java | 2 +-
.../options/DataflowPipelineOptionsTest.java | 4 +-
.../options/DataflowProfilingOptionsTest.java | 4 +-
.../runners/dataflow/util/PackageUtilTest.java | 69 +-
runners/pom.xml | 16 +-
runners/spark/README.md | 8 +-
runners/spark/pom.xml | 29 +-
.../spark/aggregators/NamedAggregators.java | 4 +-
.../coders/BeamSparkRunnerRegistrator.java | 48 +-
.../spark/coders/StatelessJavaSerializer.java | 97 ++
.../runners/spark/coders/WritableCoder.java | 4 +-
.../beam/runners/spark/examples/WordCount.java | 2 +-
.../beam/runners/spark/io/MicrobatchSource.java | 9 +-
.../runners/spark/io/SparkUnboundedSource.java | 127 +-
.../spark/stateful/StateSpecFunctions.java | 37 +-
.../runners/spark/translation/DoFnFunction.java | 15 +-
.../spark/translation/EvaluationContext.java | 83 +-
.../translation/GroupCombineFunctions.java | 8 +-
.../spark/translation/MultiDoFnFunction.java | 14 +-
.../translation/SparkAbstractCombineFn.java | 12 +-
.../spark/translation/SparkGlobalCombineFn.java | 13 +-
.../translation/SparkGroupAlsoByWindowFn.java | 2 +-
.../spark/translation/SparkKeyedCombineFn.java | 13 +-
.../spark/translation/SparkPCollectionView.java | 99 ++
.../spark/translation/SparkRuntimeContext.java | 63 +-
.../spark/translation/TransformTranslator.java | 67 +-
.../spark/translation/TranslationUtils.java | 37 +-
.../streaming/StreamingTransformTranslator.java | 114 +-
.../runners/spark/util/BroadcastHelper.java | 127 --
.../runners/spark/util/SideInputBroadcast.java | 77 ++
.../spark/util/SparkSideInputReader.java | 8 +-
.../coders/BeamSparkRunnerRegistratorTest.java | 57 -
.../streaming/KafkaStreamingTest.java | 57 +-
.../ResumeFromCheckpointStreamingTest.java | 20 +-
.../streaming/utils/PAssertStreaming.java | 4 +-
sdks/java/build-tools/pom.xml | 2 +-
.../src/main/resources/beam/findbugs-filter.xml | 26 -
sdks/java/core/pom.xml | 2 +-
.../beam/sdk/annotations/Experimental.java | 5 +-
.../org/apache/beam/sdk/coders/AtomicCoder.java | 2 +-
.../org/apache/beam/sdk/coders/AvroCoder.java | 30 +-
.../apache/beam/sdk/coders/BigDecimalCoder.java | 6 +-
.../beam/sdk/coders/BigEndianIntegerCoder.java | 7 +
.../beam/sdk/coders/BigEndianLongCoder.java | 7 +
.../apache/beam/sdk/coders/ByteArrayCoder.java | 7 +
.../org/apache/beam/sdk/coders/ByteCoder.java | 7 +
.../apache/beam/sdk/coders/ByteStringCoder.java | 8 +
.../java/org/apache/beam/sdk/coders/Coder.java | 7 +
.../apache/beam/sdk/coders/CollectionCoder.java | 12 +-
.../org/apache/beam/sdk/coders/CustomCoder.java | 18 +-
.../apache/beam/sdk/coders/DelegateCoder.java | 29 +-
.../org/apache/beam/sdk/coders/DoubleCoder.java | 7 +
.../apache/beam/sdk/coders/DurationCoder.java | 8 +
.../apache/beam/sdk/coders/InstantCoder.java | 7 +
.../apache/beam/sdk/coders/IterableCoder.java | 12 +-
.../org/apache/beam/sdk/coders/JAXBCoder.java | 48 +-
.../org/apache/beam/sdk/coders/KvCoder.java | 35 +-
.../beam/sdk/coders/LengthPrefixCoder.java | 145 +++
.../org/apache/beam/sdk/coders/ListCoder.java | 7 +
.../org/apache/beam/sdk/coders/MapCoder.java | 62 +-
.../apache/beam/sdk/coders/NullableCoder.java | 6 +
.../beam/sdk/coders/SerializableCoder.java | 17 +-
.../org/apache/beam/sdk/coders/SetCoder.java | 12 +-
.../apache/beam/sdk/coders/StandardCoder.java | 39 +-
.../beam/sdk/coders/StringDelegateCoder.java | 16 +-
.../apache/beam/sdk/coders/StringUtf8Coder.java | 7 +
.../beam/sdk/coders/TableRowJsonCoder.java | 7 +
.../beam/sdk/coders/TextualIntegerCoder.java | 8 +
.../org/apache/beam/sdk/coders/VarIntCoder.java | 10 +-
.../apache/beam/sdk/coders/VarLongCoder.java | 7 +
.../org/apache/beam/sdk/coders/VoidCoder.java | 7 +
.../beam/sdk/coders/protobuf/ProtoCoder.java | 8 +-
.../java/org/apache/beam/sdk/io/AvroSource.java | 5 -
.../sdk/io/BoundedReadFromUnboundedSource.java | 79 +-
.../org/apache/beam/sdk/io/BoundedSource.java | 8 -
.../apache/beam/sdk/io/CompressedSource.java | 8 -
.../org/apache/beam/sdk/io/CountingSource.java | 5 -
.../org/apache/beam/sdk/io/FileSystems.java | 32 +-
.../java/org/apache/beam/sdk/io/PubsubIO.java | 1142 +++++++++---------
.../apache/beam/sdk/io/PubsubUnboundedSink.java | 88 +-
.../beam/sdk/io/PubsubUnboundedSource.java | 104 +-
.../main/java/org/apache/beam/sdk/io/Read.java | 7 +-
.../java/org/apache/beam/sdk/io/TextIO.java | 5 -
.../java/org/apache/beam/sdk/io/XmlSource.java | 5 -
.../org/apache/beam/sdk/options/GcpOptions.java | 36 +-
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../beam/sdk/options/PipelineOptions.java | 2 +-
.../sdk/options/PipelineOptionsFactory.java | 10 +-
.../apache/beam/sdk/options/ValueProvider.java | 6 +-
.../beam/sdk/runners/TransformHierarchy.java | 33 +-
.../testing/FlattenWithHeterogeneousCoders.java | 29 +
.../org/apache/beam/sdk/testing/PAssert.java | 12 +-
.../beam/sdk/testing/RunnableOnService.java | 14 +-
.../beam/sdk/testing/SourceTestUtils.java | 5 -
.../org/apache/beam/sdk/testing/TestStream.java | 8 +
.../sdk/testing/UsesUnboundedPCollections.java | 23 +
.../beam/sdk/testing/ValueInSingleWindow.java | 6 +-
.../sdk/transforms/AggregatorRetriever.java | 13 +-
.../beam/sdk/transforms/AppliedPTransform.java | 11 +-
.../org/apache/beam/sdk/transforms/Combine.java | 197 +--
.../apache/beam/sdk/transforms/CombineFns.java | 14 +-
.../org/apache/beam/sdk/transforms/Count.java | 4 +-
.../org/apache/beam/sdk/transforms/Create.java | 5 -
.../sdk/transforms/DelegatingAggregator.java | 2 +-
.../beam/sdk/transforms/DoFnAdapters.java | 504 --------
.../apache/beam/sdk/transforms/DoFnTester.java | 7 -
.../apache/beam/sdk/transforms/GroupByKey.java | 2 +-
.../org/apache/beam/sdk/transforms/Max.java | 124 +-
.../org/apache/beam/sdk/transforms/Mean.java | 27 +-
.../org/apache/beam/sdk/transforms/Min.java | 122 +-
.../org/apache/beam/sdk/transforms/OldDoFn.java | 758 ------------
.../apache/beam/sdk/transforms/PTransform.java | 9 +-
.../org/apache/beam/sdk/transforms/ParDo.java | 46 +-
.../org/apache/beam/sdk/transforms/Regex.java | 589 ++++++++-
.../org/apache/beam/sdk/transforms/Sum.java | 57 +-
.../apache/beam/sdk/transforms/ToString.java | 198 +++
.../org/apache/beam/sdk/transforms/Top.java | 27 +-
.../beam/sdk/transforms/join/CoGbkResult.java | 35 +-
.../sdk/transforms/reflect/DoFnInvoker.java | 20 -
.../sdk/transforms/reflect/DoFnInvokers.java | 142 +--
.../sdk/transforms/reflect/DoFnSignature.java | 15 +-
.../windowing/AfterDelayFromFirstElement.java | 2 +-
.../sdk/transforms/windowing/AfterPane.java | 2 +-
.../sdk/transforms/windowing/GlobalWindow.java | 6 +
.../transforms/windowing/IntervalWindow.java | 4 +-
.../beam/sdk/transforms/windowing/Window.java | 3 +-
.../org/apache/beam/sdk/util/CoderUtils.java | 28 +-
.../beam/sdk/util/CombineContextFactory.java | 18 -
.../org/apache/beam/sdk/util/DefaultBucket.java | 105 ++
.../util/EmptyOnDeserializationThreadLocal.java | 39 +
.../apache/beam/sdk/util/GcpProjectUtil.java | 2 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 36 +-
.../org/apache/beam/sdk/util/NameUtils.java | 162 +++
.../org/apache/beam/sdk/util/PropertyNames.java | 1 +
.../org/apache/beam/sdk/util/PubsubClient.java | 28 +-
.../apache/beam/sdk/util/PubsubGrpcClient.java | 6 +-
.../apache/beam/sdk/util/PubsubJsonClient.java | 4 +-
.../apache/beam/sdk/util/PubsubTestClient.java | 6 +-
.../org/apache/beam/sdk/util/StringUtils.java | 100 --
.../apache/beam/sdk/util/TimerInternals.java | 4 +-
.../org/apache/beam/sdk/util/WindowedValue.java | 23 +-
.../beam/sdk/util/state/StateContexts.java | 4 +-
.../org/apache/beam/sdk/values/PValueBase.java | 4 +-
.../beam/sdk/values/TimestampedValue.java | 10 +-
.../sdk/AggregatorPipelineExtractorTest.java | 16 +-
.../apache/beam/sdk/coders/AvroCoderTest.java | 7 +
.../beam/sdk/coders/BigDecimalCoderTest.java | 46 +-
.../sdk/coders/BigEndianIntegerCoderTest.java | 9 +
.../beam/sdk/coders/BigEndianLongCoderTest.java | 9 +
.../beam/sdk/coders/ByteArrayCoderTest.java | 6 +
.../apache/beam/sdk/coders/ByteCoderTest.java | 9 +
.../beam/sdk/coders/ByteStringCoderTest.java | 8 +
.../beam/sdk/coders/CoderRegistryTest.java | 6 +
.../org/apache/beam/sdk/coders/CoderTest.java | 8 +
.../beam/sdk/coders/CollectionCoderTest.java | 16 +
.../beam/sdk/coders/DefaultCoderTest.java | 4 +-
.../beam/sdk/coders/DelegateCoderTest.java | 35 +-
.../apache/beam/sdk/coders/DoubleCoderTest.java | 9 +
.../beam/sdk/coders/DurationCoderTest.java | 10 +
.../beam/sdk/coders/InstantCoderTest.java | 9 +
.../beam/sdk/coders/IterableCoderTest.java | 27 +-
.../apache/beam/sdk/coders/JAXBCoderTest.java | 26 +-
.../org/apache/beam/sdk/coders/KvCoderTest.java | 29 +
.../beam/sdk/coders/LengthPrefixCoderTest.java | 129 ++
.../apache/beam/sdk/coders/ListCoderTest.java | 16 +-
.../apache/beam/sdk/coders/MapCoderTest.java | 21 +-
.../beam/sdk/coders/NullableCoderTest.java | 12 +
.../beam/sdk/coders/SerializableCoderTest.java | 9 +
.../apache/beam/sdk/coders/SetCoderTest.java | 16 +
.../beam/sdk/coders/StandardCoderTest.java | 40 +
.../sdk/coders/StringDelegateCoderTest.java | 11 +
.../beam/sdk/coders/StringUtf8CoderTest.java | 9 +
.../beam/sdk/coders/TableRowJsonCoderTest.java | 9 +
.../sdk/coders/TextualIntegerCoderTest.java | 9 +
.../apache/beam/sdk/coders/VarIntCoderTest.java | 9 +
.../beam/sdk/coders/VarLongCoderTest.java | 9 +
.../apache/beam/sdk/coders/VoidCoderTest.java | 40 +
.../beam/sdk/io/AvroIOGeneratedClassTest.java | 285 -----
.../apache/beam/sdk/io/AvroIOTransformTest.java | 324 +++++
.../beam/sdk/io/CompressedSourceTest.java | 5 -
.../apache/beam/sdk/io/FileBasedSourceTest.java | 5 -
.../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +-
.../beam/sdk/io/OffsetBasedSourceTest.java | 5 -
.../org/apache/beam/sdk/io/PubsubIOTest.java | 86 +-
.../beam/sdk/io/PubsubUnboundedSinkTest.java | 41 +-
.../beam/sdk/io/PubsubUnboundedSourceTest.java | 10 +-
.../java/org/apache/beam/sdk/io/ReadTest.java | 5 -
.../java/org/apache/beam/sdk/io/WriteTest.java | 10 +-
.../apache/beam/sdk/options/GcpOptionsTest.java | 4 +-
.../sdk/options/PipelineOptionsFactoryTest.java | 6 +-
.../beam/sdk/options/PipelineOptionsTest.java | 3 +-
.../beam/sdk/options/ValueProviderTest.java | 36 +-
.../sdk/options/ValueProviderUtilsTest.java | 2 +-
.../sdk/runners/TransformHierarchyTest.java | 30 +-
.../apache/beam/sdk/testing/TestStreamTest.java | 5 +
.../testing/ValueInSingleWindowCoderTest.java | 7 +
.../sdk/transforms/ApproximateUniqueTest.java | 483 ++++----
.../beam/sdk/transforms/CombineFnsTest.java | 20 +-
.../apache/beam/sdk/transforms/CombineTest.java | 99 +-
.../apache/beam/sdk/transforms/CountTest.java | 2 +-
.../apache/beam/sdk/transforms/CreateTest.java | 8 -
.../DoFnDelegatingAggregatorTest.java | 142 ---
.../apache/beam/sdk/transforms/DoFnTest.java | 15 +-
.../beam/sdk/transforms/DoFnTesterTest.java | 6 +-
.../apache/beam/sdk/transforms/FlattenTest.java | 27 +
.../apache/beam/sdk/transforms/KvSwapTest.java | 13 +-
.../org/apache/beam/sdk/transforms/MaxTest.java | 20 +-
.../apache/beam/sdk/transforms/MeanTest.java | 7 +-
.../org/apache/beam/sdk/transforms/MinTest.java | 21 +-
.../apache/beam/sdk/transforms/NoOpOldDoFn.java | 71 --
.../beam/sdk/transforms/OldDoFnContextTest.java | 69 --
.../apache/beam/sdk/transforms/OldDoFnTest.java | 188 ---
.../apache/beam/sdk/transforms/ParDoTest.java | 74 +-
.../apache/beam/sdk/transforms/RegexTest.java | 127 +-
.../apache/beam/sdk/transforms/SampleTest.java | 405 ++++---
.../beam/sdk/transforms/SimpleStatsFnsTest.java | 36 +-
.../org/apache/beam/sdk/transforms/SumTest.java | 24 +-
.../beam/sdk/transforms/ToStringTest.java | 125 ++
.../org/apache/beam/sdk/transforms/TopTest.java | 13 +-
.../apache/beam/sdk/transforms/ViewTest.java | 2 +-
.../transforms/join/CoGbkResultCoderTest.java | 10 +-
.../sdk/transforms/join/UnionCoderTest.java | 24 +-
.../transforms/reflect/DoFnInvokersTest.java | 42 -
.../transforms/windowing/GlobalWindowTest.java | 64 +
.../apache/beam/sdk/util/CombineFnUtilTest.java | 8 +-
.../apache/beam/sdk/util/DefaultBucketTest.java | 112 ++
.../org/apache/beam/sdk/util/GcsUtilTest.java | 56 +
.../org/apache/beam/sdk/util/NameUtilsTest.java | 177 +++
.../beam/sdk/util/PubsubGrpcClientTest.java | 8 +-
.../beam/sdk/util/PubsubJsonClientTest.java | 3 +-
.../beam/sdk/util/PubsubTestClientTest.java | 4 +-
.../beam/sdk/util/SerializableUtilsTest.java | 4 +-
.../apache/beam/sdk/util/StringUtilsTest.java | 100 --
.../beam/sdk/util/TimerInternalsTest.java | 5 +
.../beam/sdk/util/ValueWithRecordIdTest.java | 34 +
.../apache/beam/sdk/util/WindowedValueTest.java | 23 +
.../util/state/InMemoryStateInternalsTest.java | 2 +-
.../beam/sdk/util/state/StateTagTest.java | 11 +-
.../beam/sdk/values/TimestampedValueTest.java | 19 +-
sdks/java/extensions/join-library/README.md | 10 -
sdks/java/extensions/join-library/pom.xml | 2 +-
sdks/java/extensions/pom.xml | 2 +-
sdks/java/extensions/sorter/pom.xml | 2 +-
sdks/java/io/elasticsearch/pom.xml | 175 +++
.../sdk/io/elasticsearch/ElasticsearchIO.java | 819 +++++++++++++
.../beam/sdk/io/elasticsearch/package-info.java | 20 +
.../elasticsearch/ElasticSearchIOTestUtils.java | 129 ++
.../io/elasticsearch/ElasticsearchIOTest.java | 358 ++++++
sdks/java/io/google-cloud-platform/pom.xml | 2 +-
.../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 308 +++--
.../sdk/io/gcp/bigquery/BigQueryServices.java | 16 +-
.../io/gcp/bigquery/BigQueryServicesImpl.java | 76 +-
.../beam/sdk/io/gcp/bigtable/BigtableIO.java | 8 -
.../io/gcp/bigtable/BigtableTestOptions.java | 37 -
.../sdk/io/gcp/bigquery/BigQueryIOTest.java | 205 +++-
.../gcp/bigquery/BigQueryServicesImplTest.java | 141 +++
.../sdk/io/gcp/bigquery/BigQueryUtilTest.java | 3 +-
.../sdk/io/gcp/bigtable/BigtableIOTest.java | 5 +-
.../io/gcp/bigtable/BigtableTestOptions.java | 37 +
sdks/java/io/hdfs/pom.xml | 2 +-
.../beam/sdk/io/hdfs/AvroWrapperCoder.java | 4 +-
.../apache/beam/sdk/io/hdfs/HDFSFileSource.java | 5 -
.../apache/beam/sdk/io/hdfs/WritableCoder.java | 4 +-
.../beam/sdk/io/hdfs/AvroWrapperCoderTest.java | 1 -
sdks/java/io/jdbc/pom.xml | 2 +-
sdks/java/io/jms/pom.xml | 2 +-
sdks/java/io/kafka/pom.xml | 2 +-
.../org/apache/beam/sdk/io/kafka/KafkaIO.java | 7 +-
.../beam/sdk/io/kafka/KafkaRecordCoder.java | 4 +-
.../apache/beam/sdk/io/kafka/KafkaIOTest.java | 9 +-
.../beam/sdk/io/kafka/KafkaRecordCoderTest.java | 34 +
sdks/java/io/kinesis/pom.xml | 2 +-
.../beam/sdk/io/kinesis/KinesisRecordCoder.java | 4 +-
.../beam/sdk/io/kinesis/package-info.java | 2 +-
sdks/java/io/mongodb/pom.xml | 2 +-
.../beam/sdk/io/mongodb/MongoDbGridFSIO.java | 5 -
.../apache/beam/sdk/io/mongodb/MongoDbIO.java | 5 -
sdks/java/io/mqtt/pom.xml | 152 +++
.../org/apache/beam/sdk/io/mqtt/MqttIO.java | 588 +++++++++
.../apache/beam/sdk/io/mqtt/package-info.java | 22 +
.../org/apache/beam/sdk/io/mqtt/MqttIOTest.java | 197 +++
sdks/java/io/pom.xml | 4 +-
sdks/java/java8tests/pom.xml | 2 +-
.../maven-archetypes/examples-java8/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 19 +-
sdks/java/maven-archetypes/examples/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 19 +-
sdks/java/maven-archetypes/pom.xml | 2 +-
sdks/java/maven-archetypes/starter/pom.xml | 2 +-
.../main/resources/archetype-resources/pom.xml | 4 +-
.../resources/projects/basic/reference/pom.xml | 4 +-
sdks/java/pom.xml | 2 +-
sdks/pom.xml | 15 +-
sdks/python/pom.xml | 2 +-
466 files changed, 13883 insertions(+), 8047 deletions(-)
----------------------------------------------------------------------
[03/50] beam git commit: [BEAM-1302] BigQueryServicesImpl: skip
logging warning if exceptions do not need retry.
Posted by dh...@apache.org.
[BEAM-1302] BigQueryServicesImpl: skip logging warning if exceptions do not need retry.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2a23e8b5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2a23e8b5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2a23e8b5
Branch: refs/heads/python-sdk
Commit: 2a23e8b5dd11c825dfe13f79d69c2099069be724
Parents: 5b6dd91
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 17:39:20 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/2a23e8b5/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 2098148..c9edf7c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -887,11 +887,11 @@ class BigQueryServicesImpl implements BigQueryServices {
try {
return request.execute();
} catch (IOException e) {
- LOG.warn("Ignore the error and retry the request.", e);
lastException = e;
if (!shouldRetry.apply(e)) {
break;
}
+ LOG.warn("Ignore the error and retry the request.", e);
}
} while (nextBackOff(sleeper, backoff));
throw new IOException(
[16/50] beam git commit: address comments
Posted by dh...@apache.org.
address comments
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/968c3112
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/968c3112
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/968c3112
Branch: refs/heads/python-sdk
Commit: 968c31122395d120117ed725aad83d5e3a47e3b1
Parents: eeec9f1
Author: Kai Jiang <ji...@gmail.com>
Authored: Wed Jan 25 04:49:35 2017 -0800
Committer: Kai Jiang <ji...@gmail.com>
Committed: Wed Jan 25 05:09:33 2017 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/968c3112/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
index 7e25a01..f52b822 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
@@ -141,8 +141,8 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
}
/**
- * An Adapter wraps the underlying {@link UnboundedSource} with the specified bounds on
- * number of records and read time into {@link BoundedSource}.
+ * Adapter that wraps the underlying {@link UnboundedSource} with the specified bounds on
+ * number of records and read time into a {@link BoundedSource}.
*/
@AutoValue
public abstract static class UnboundedToBoundedSourceAdapter<T>
@@ -151,8 +151,6 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
@Nullable abstract long getMaxNumRecords();
@Nullable abstract Duration getMaxReadTime();
- public abstract String toString();
-
abstract Builder<T> toBuilder();
@AutoValue.Builder
[20/50] beam git commit: This closes #1843
Posted by dh...@apache.org.
This closes #1843
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/979c9376
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/979c9376
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/979c9376
Branch: refs/heads/python-sdk
Commit: 979c9376f820577bad43c18cc1a7ee86fab9d942
Parents: bf9d454 e95335f
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 10:40:16 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 10:40:16 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 4 ++--
.../org/apache/beam/runners/dataflow/dataflow.properties | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
[41/50] beam git commit: This closes #1830
Posted by dh...@apache.org.
This closes #1830
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/47304d1f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/47304d1f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/47304d1f
Branch: refs/heads/python-sdk
Commit: 47304d1fc75d3a7751883638efdaf9f9d8b40a25
Parents: 83f8c46 e01ce86
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 22:52:12 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 22:52:12 2017 -0800
----------------------------------------------------------------------
.../apache/beam/sdk/transforms/ToString.java | 168 ++++++++++++++++---
.../java/org/apache/beam/sdk/io/WriteTest.java | 2 +-
.../beam/sdk/transforms/ToStringTest.java | 86 ++++++++--
3 files changed, 226 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
[28/50] beam git commit: This closes #1847
Posted by dh...@apache.org.
This closes #1847
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1c6e6674
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1c6e6674
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1c6e6674
Branch: refs/heads/python-sdk
Commit: 1c6e667414788fe99f583fac39d458a4984ae162
Parents: 6413299 fee029f
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 17:47:08 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 17:47:08 2017 -0800
----------------------------------------------------------------------
runners/google-cloud-dataflow-java/pom.xml | 5 -
.../beam/runners/dataflow/util/GcsStager.java | 18 +-
.../beam/runners/dataflow/util/PackageUtil.java | 349 +++++++------------
.../runners/dataflow/util/PackageUtilTest.java | 42 +--
.../org/apache/beam/sdk/options/GcsOptions.java | 4 +-
.../java/org/apache/beam/sdk/util/GcsUtil.java | 12 -
6 files changed, 149 insertions(+), 281 deletions(-)
----------------------------------------------------------------------
[17/50] beam git commit: Fix Flink RunnableOnService tests
Posted by dh...@apache.org.
Fix Flink RunnableOnService tests
* Check that a Multi-Output map contains the Tag, not the TaggedValue
* Return Inputs from getInputs
Don't return outputs.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a361b65d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a361b65d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a361b65d
Branch: refs/heads/python-sdk
Commit: a361b65d6aa56d70769403d884abf48d1e1141a4
Parents: 7402d76
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jan 24 17:41:07 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 09:03:23 2017 -0800
----------------------------------------------------------------------
.../runners/flink/translation/FlinkBatchTransformTranslators.java | 2 +-
.../flink/translation/FlinkStreamingTranslationContext.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/a361b65d/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
index 654b464..f7f1878 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
@@ -580,7 +580,7 @@ class FlinkBatchTransformTranslators {
outputMap.put(transform.getMainOutputTag(), 0);
int count = 1;
for (TaggedPValue taggedValue : outputs) {
- if (!outputMap.containsKey(taggedValue)) {
+ if (!outputMap.containsKey(taggedValue.getTag())) {
outputMap.put(taggedValue.getTag(), count++);
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/a361b65d/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
index 6db252e..7932f68 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
@@ -107,7 +107,7 @@ public class FlinkStreamingTranslationContext {
}
public <T extends PInput> List<TaggedPValue> getInputs(PTransform<T, ?> transform) {
- return currentTransform.getOutputs();
+ return currentTransform.getInputs();
}
@SuppressWarnings("unchecked")