You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by dh...@apache.org on 2017/01/29 16:21:47 UTC

[01/50] beam git commit: This closes #1822: Revert "Simplified API surface verifications"

Repository: beam
Updated Branches:
  refs/heads/python-sdk 1bc685980 -> 27cf68ee7


This closes #1822: Revert "Simplified API surface verifications"


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/cb6e0a80
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/cb6e0a80
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/cb6e0a80

Branch: refs/heads/python-sdk
Commit: cb6e0a80c57b056489d447cde092cffdd041eed5
Parents: 6ecbfb9 9248bef
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jan 23 19:47:47 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jan 23 19:47:47 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/util/ApiSurface.java    | 420 +++++--------------
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |  61 ---
 .../apache/beam/sdk/util/ApiSurfaceTest.java    | 152 +++++--
 .../apache/beam/sdk/io/gcp/ApiSurfaceTest.java  | 134 ++++++
 .../beam/sdk/io/gcp/GcpApiSurfaceTest.java      |  76 ----
 5 files changed, 359 insertions(+), 484 deletions(-)
----------------------------------------------------------------------



[48/50] beam git commit: Merge remote-tracking branch 'origin/master' into python-sdk.

Posted by dh...@apache.org.
Merge remote-tracking branch 'origin/master' into python-sdk.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c2859a55
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c2859a55
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c2859a55

Branch: refs/heads/python-sdk
Commit: c2859a55f89c9807a037adfde9f7e8f506c108ce
Parents: 1bc6859 34b4a6d
Author: Ahmet Altay <al...@google.com>
Authored: Fri Jan 27 16:57:44 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Jan 27 16:57:44 2017 -0800

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy           |    9 +-
 ...job_beam_PostCommit_Java_MavenInstall.groovy |    2 +-
 .../job_beam_PreCommit_Java_MavenInstall.groovy |    2 +-
 .../job_beam_Release_NightlySnapshot.groovy     |    2 +-
 .jenkins/job_seed.groovy                        |    2 +-
 .travis/README.md                               |    2 +-
 DISCLAIMER                                      |   10 -
 NOTICE                                          |    4 +-
 README.md                                       |   46 +-
 examples/java/README.md                         |   16 +-
 examples/java/pom.xml                           |   21 +-
 .../beam/examples/DebuggingWordCount.java       |    4 +-
 .../org/apache/beam/examples/WordCount.java     |    6 +-
 .../beam/examples/complete/AutoComplete.java    |    2 +-
 .../org/apache/beam/examples/complete/README.md |   14 +-
 .../apache/beam/examples/complete/TfIdf.java    |    2 +-
 .../examples/complete/TopWikipediaSessions.java |    2 +-
 .../examples/complete/TrafficMaxLaneFlow.java   |    2 +-
 .../beam/examples/complete/TrafficRoutes.java   |    2 +-
 .../examples/cookbook/BigQueryTornadoes.java    |    2 +-
 .../cookbook/CombinePerKeyExamples.java         |    2 +-
 .../org/apache/beam/examples/cookbook/README.md |   14 +-
 .../beam/examples/cookbook/TriggerExample.java  |    4 +-
 .../beam/examples/WindowedWordCountIT.java      |   16 +-
 examples/java8/pom.xml                          |    2 +-
 .../beam/examples/complete/game/GameStats.java  |    7 +-
 .../examples/complete/game/LeaderBoard.java     |    5 +-
 .../beam/examples/complete/game/UserScore.java  |    2 +-
 examples/pom.xml                                |   16 +-
 pom.xml                                         |   41 +-
 runners/apex/README.md                          |    4 +-
 runners/apex/pom.xml                            |    3 +-
 .../beam/runners/apex/ApexPipelineOptions.java  |    7 +-
 .../apache/beam/runners/apex/ApexRunner.java    |   43 +-
 .../beam/runners/apex/ApexYarnLauncher.java     |   23 +-
 .../translation/CreateValuesTranslator.java     |   18 +-
 .../FlattenPCollectionTranslator.java           |   28 +-
 .../apex/translation/GroupByKeyTranslator.java  |    2 +-
 .../translation/ParDoBoundMultiTranslator.java  |   27 +-
 .../apex/translation/ParDoBoundTranslator.java  |    4 +-
 .../apex/translation/TranslationContext.java    |   27 +-
 .../apex/translation/WindowBoundTranslator.java |    8 +-
 .../operators/ApexGroupByKeyOperator.java       |    4 +-
 .../operators/ApexParDoOperator.java            |    6 +-
 .../ApexReadUnboundedInputOperator.java         |   17 +-
 .../beam/runners/apex/ApexRunnerTest.java       |   75 ++
 .../beam/runners/apex/ApexYarnLauncherTest.java |    9 +-
 .../runners/apex/examples/WordCountTest.java    |    2 +-
 .../translation/ParDoBoundTranslatorTest.java   |    6 +-
 .../translation/ReadUnboundTranslatorTest.java  |    8 +-
 .../utils/ApexStateInternalsTest.java           |    2 +-
 .../test/resources/beam-runners-apex.properties |   20 +
 runners/core-java/pom.xml                       |    2 +-
 .../beam/runners/core/AssignWindowsDoFn.java    |    3 +-
 .../apache/beam/runners/core/DoFnAdapters.java  |  343 ++++++
 .../apache/beam/runners/core/DoFnRunner.java    |   21 -
 .../apache/beam/runners/core/DoFnRunners.java   |  138 +--
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |   10 +-
 .../runners/core/GroupAlsoByWindowsDoFn.java    |    5 +-
 .../beam/runners/core/KeyedWorkItemCoder.java   |    4 +-
 .../core/LateDataDroppingDoFnRunner.java        |    1 -
 .../apache/beam/runners/core/NonEmptyPanes.java |    2 +-
 .../org/apache/beam/runners/core/OldDoFn.java   |  472 ++++++++
 .../runners/core/PerKeyCombineFnRunner.java     |   70 --
 .../runners/core/PerKeyCombineFnRunners.java    |  101 --
 .../beam/runners/core/SimpleDoFnRunner.java     |   63 -
 .../beam/runners/core/SimpleOldDoFnRunner.java  |    7 +-
 .../beam/runners/core/SplittableParDo.java      |    7 -
 .../core/UnboundedReadFromBoundedSource.java    |   14 +-
 .../AfterDelayFromFirstElementStateMachine.java |    2 +-
 .../core/triggers/AfterPaneStateMachine.java    |    2 +-
 .../core/DoFnDelegatingAggregatorTest.java      |  144 +++
 .../core/GroupAlsoByWindowsProperties.java      |    2 +-
 .../runners/core/KeyedWorkItemCoderTest.java    |    6 +
 .../core/LateDataDroppingDoFnRunnerTest.java    |    2 +-
 .../apache/beam/runners/core/NoOpOldDoFn.java   |   72 ++
 .../beam/runners/core/OldDoFnContextTest.java   |   72 ++
 .../apache/beam/runners/core/OldDoFnTest.java   |  192 +++
 .../beam/runners/core/ReduceFnRunnerTest.java   |   12 +-
 .../beam/runners/core/ReduceFnTester.java       |    2 +-
 .../runners/core/SimpleOldDoFnRunnerTest.java   |    2 +-
 .../UnboundedReadFromBoundedSourceTest.java     |   12 +-
 runners/direct-java/pom.xml                     |    3 +-
 .../direct/BoundedReadEvaluatorFactory.java     |   10 +-
 ...ecycleManagerRemovingTransformEvaluator.java |   19 +-
 .../beam/runners/direct/EvaluationContext.java  |    2 +-
 .../direct/ExecutorServiceParallelExecutor.java |    4 +-
 .../runners/direct/FlattenEvaluatorFactory.java |    4 +-
 .../GroupAlsoByWindowEvaluatorFactory.java      |    9 +-
 .../direct/GroupByKeyOnlyEvaluatorFactory.java  |   10 +-
 .../beam/runners/direct/ParDoEvaluator.java     |   16 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   19 +-
 .../direct/ParDoMultiOverrideFactory.java       |   13 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |   27 +-
 .../direct/TestStreamEvaluatorFactory.java      |    5 +-
 .../direct/UnboundedReadEvaluatorFactory.java   |   22 +-
 .../runners/direct/ViewEvaluatorFactory.java    |    8 +-
 .../beam/runners/direct/WatermarkManager.java   |    6 +-
 .../runners/direct/WindowEvaluatorFactory.java  |    3 +-
 .../runners/direct/AggregatorContainerTest.java |   16 +-
 .../direct/BoundedReadEvaluatorFactoryTest.java |    5 -
 .../CopyOnAccessInMemoryStateInternalsTest.java |    4 +-
 .../runners/direct/DirectGraphVisitorTest.java  |   16 +-
 .../beam/runners/direct/DirectRunnerTest.java   |    5 -
 ...leManagerRemovingTransformEvaluatorTest.java |  103 +-
 .../runners/direct/EvaluationContextTest.java   |    6 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |    3 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |    4 +-
 .../UnboundedReadEvaluatorFactoryTest.java      |   13 +-
 runners/flink/README.md                         |    6 +-
 runners/flink/examples/pom.xml                  |    2 +-
 .../beam/runners/flink/examples/WordCount.java  |    2 +-
 .../flink/examples/streaming/AutoComplete.java  |    2 +-
 .../examples/streaming/KafkaIOExamples.java     |    4 +-
 .../KafkaWindowedWordCountExample.java          |    2 +-
 .../examples/streaming/WindowedWordCount.java   |    2 +-
 runners/flink/pom.xml                           |    2 +-
 runners/flink/runner/pom.xml                    |    4 +-
 .../runners/flink/FlinkPipelineOptions.java     |    6 +-
 .../runners/flink/OldPerKeyCombineFnRunner.java |   62 +
 .../flink/OldPerKeyCombineFnRunners.java        |  155 +++
 .../FlinkBatchTransformTranslators.java         |   40 +-
 .../FlinkBatchTranslationContext.java           |   21 +-
 .../FlinkStreamingTransformTranslators.java     |   46 +-
 .../FlinkStreamingTranslationContext.java       |   20 +-
 .../functions/FlinkDoFnFunction.java            |    4 +-
 .../FlinkMergingNonShuffleReduceFunction.java   |   10 +-
 .../FlinkMergingPartialReduceFunction.java      |    8 +-
 .../functions/FlinkMergingReduceFunction.java   |    8 +-
 .../functions/FlinkMultiOutputDoFnFunction.java |    4 +-
 .../FlinkMultiOutputProcessContext.java         |    2 +-
 .../functions/FlinkNoElementAssignContext.java  |    2 +-
 .../functions/FlinkPartialReduceFunction.java   |   10 +-
 .../functions/FlinkProcessContextBase.java      |    4 +-
 .../functions/FlinkReduceFunction.java          |   10 +-
 .../FlinkSingleOutputProcessContext.java        |    2 +-
 .../wrappers/streaming/DoFnOperator.java        |    6 +-
 .../streaming/SingletonKeyedWorkItemCoder.java  |   10 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |    2 +-
 .../streaming/io/BoundedSourceWrapper.java      |    2 +-
 .../streaming/io/UnboundedFlinkSink.java        |    6 +
 .../streaming/io/UnboundedSourceWrapper.java    |    2 +-
 .../beam/runners/flink/PipelineOptionsTest.java |   13 +
 .../streaming/FlinkStateInternalsTest.java      |    2 +-
 .../streaming/UnboundedSourceWrapperTest.java   |  464 +++----
 runners/google-cloud-dataflow-java/pom.xml      |   15 +-
 .../beam/runners/dataflow/AssignWindows.java    |   89 ++
 .../dataflow/DataflowAggregatorTransforms.java  |   79 ++
 .../dataflow/DataflowMetricUpdateExtractor.java |  109 ++
 .../runners/dataflow/DataflowPipelineJob.java   |    2 -
 .../dataflow/DataflowPipelineTranslator.java    |  510 +++-----
 .../beam/runners/dataflow/DataflowRunner.java   |  109 +-
 .../DataflowUnboundedReadFromBoundedSource.java |  547 +++++++++
 .../beam/runners/dataflow/ReadTranslator.java   |  102 ++
 .../runners/dataflow/TransformTranslator.java   |  120 ++
 .../dataflow/internal/AssignWindows.java        |   89 --
 .../dataflow/internal/CustomSources.java        |    5 -
 .../internal/DataflowAggregatorTransforms.java  |   79 --
 .../internal/DataflowMetricUpdateExtractor.java |  109 --
 .../DataflowUnboundedReadFromBoundedSource.java |  556 ---------
 .../runners/dataflow/internal/IsmFormat.java    |   20 +-
 .../dataflow/internal/ReadTranslator.java       |  107 --
 .../DataflowPipelineWorkerPoolOptions.java      |   16 +-
 .../beam/runners/dataflow/util/DoFnInfo.java    |   66 +-
 .../beam/runners/dataflow/util/GcsStager.java   |   18 +-
 .../beam/runners/dataflow/util/PackageUtil.java |  352 ++++--
 .../beam/runners/dataflow/dataflow.properties   |    6 +-
 .../dataflow/DataflowPipelineJobTest.java       |   38 +-
 .../DataflowPipelineTranslatorTest.java         |    3 +-
 .../runners/dataflow/DataflowRunnerTest.java    |    8 +-
 ...aflowUnboundedReadFromBoundedSourceTest.java |   79 ++
 ...aflowUnboundedReadFromBoundedSourceTest.java |   83 --
 .../DataflowPipelineDebugOptionsTest.java       |    2 +-
 .../options/DataflowPipelineOptionsTest.java    |    4 +-
 .../options/DataflowProfilingOptionsTest.java   |    4 +-
 .../runners/dataflow/util/PackageUtilTest.java  |   69 +-
 runners/pom.xml                                 |   16 +-
 runners/spark/README.md                         |    8 +-
 runners/spark/pom.xml                           |   29 +-
 .../spark/aggregators/NamedAggregators.java     |    4 +-
 .../coders/BeamSparkRunnerRegistrator.java      |   48 +-
 .../spark/coders/StatelessJavaSerializer.java   |   97 ++
 .../runners/spark/coders/WritableCoder.java     |    4 +-
 .../beam/runners/spark/examples/WordCount.java  |    2 +-
 .../beam/runners/spark/io/MicrobatchSource.java |    9 +-
 .../runners/spark/io/SparkUnboundedSource.java  |  127 +-
 .../spark/stateful/StateSpecFunctions.java      |   37 +-
 .../runners/spark/translation/DoFnFunction.java |   15 +-
 .../spark/translation/EvaluationContext.java    |   83 +-
 .../translation/GroupCombineFunctions.java      |    8 +-
 .../spark/translation/MultiDoFnFunction.java    |   14 +-
 .../translation/SparkAbstractCombineFn.java     |   12 +-
 .../spark/translation/SparkGlobalCombineFn.java |   13 +-
 .../translation/SparkGroupAlsoByWindowFn.java   |    2 +-
 .../spark/translation/SparkKeyedCombineFn.java  |   13 +-
 .../spark/translation/SparkPCollectionView.java |   99 ++
 .../spark/translation/SparkRuntimeContext.java  |   63 +-
 .../spark/translation/TransformTranslator.java  |   67 +-
 .../spark/translation/TranslationUtils.java     |   37 +-
 .../streaming/StreamingTransformTranslator.java |  114 +-
 .../runners/spark/util/BroadcastHelper.java     |  127 --
 .../runners/spark/util/SideInputBroadcast.java  |   77 ++
 .../spark/util/SparkSideInputReader.java        |    8 +-
 .../coders/BeamSparkRunnerRegistratorTest.java  |   57 -
 .../streaming/KafkaStreamingTest.java           |   57 +-
 .../ResumeFromCheckpointStreamingTest.java      |   20 +-
 .../streaming/utils/PAssertStreaming.java       |    4 +-
 sdks/java/build-tools/pom.xml                   |    2 +-
 .../src/main/resources/beam/findbugs-filter.xml |   26 -
 sdks/java/core/pom.xml                          |    2 +-
 .../beam/sdk/annotations/Experimental.java      |    5 +-
 .../org/apache/beam/sdk/coders/AtomicCoder.java |    2 +-
 .../org/apache/beam/sdk/coders/AvroCoder.java   |   30 +-
 .../apache/beam/sdk/coders/BigDecimalCoder.java |    6 +-
 .../beam/sdk/coders/BigEndianIntegerCoder.java  |    7 +
 .../beam/sdk/coders/BigEndianLongCoder.java     |    7 +
 .../apache/beam/sdk/coders/ByteArrayCoder.java  |    7 +
 .../org/apache/beam/sdk/coders/ByteCoder.java   |    7 +
 .../apache/beam/sdk/coders/ByteStringCoder.java |    8 +
 .../java/org/apache/beam/sdk/coders/Coder.java  |    7 +
 .../apache/beam/sdk/coders/CollectionCoder.java |   12 +-
 .../org/apache/beam/sdk/coders/CustomCoder.java |   18 +-
 .../apache/beam/sdk/coders/DelegateCoder.java   |   29 +-
 .../org/apache/beam/sdk/coders/DoubleCoder.java |    7 +
 .../apache/beam/sdk/coders/DurationCoder.java   |    8 +
 .../apache/beam/sdk/coders/InstantCoder.java    |    7 +
 .../apache/beam/sdk/coders/IterableCoder.java   |   12 +-
 .../org/apache/beam/sdk/coders/JAXBCoder.java   |   48 +-
 .../org/apache/beam/sdk/coders/KvCoder.java     |   35 +-
 .../beam/sdk/coders/LengthPrefixCoder.java      |  145 +++
 .../org/apache/beam/sdk/coders/ListCoder.java   |    7 +
 .../org/apache/beam/sdk/coders/MapCoder.java    |   62 +-
 .../apache/beam/sdk/coders/NullableCoder.java   |    6 +
 .../beam/sdk/coders/SerializableCoder.java      |   17 +-
 .../org/apache/beam/sdk/coders/SetCoder.java    |   12 +-
 .../apache/beam/sdk/coders/StandardCoder.java   |   39 +-
 .../beam/sdk/coders/StringDelegateCoder.java    |   16 +-
 .../apache/beam/sdk/coders/StringUtf8Coder.java |    7 +
 .../beam/sdk/coders/TableRowJsonCoder.java      |    7 +
 .../beam/sdk/coders/TextualIntegerCoder.java    |    8 +
 .../org/apache/beam/sdk/coders/VarIntCoder.java |   10 +-
 .../apache/beam/sdk/coders/VarLongCoder.java    |    7 +
 .../org/apache/beam/sdk/coders/VoidCoder.java   |    7 +
 .../beam/sdk/coders/protobuf/ProtoCoder.java    |    8 +-
 .../java/org/apache/beam/sdk/io/AvroSource.java |    5 -
 .../sdk/io/BoundedReadFromUnboundedSource.java  |   79 +-
 .../org/apache/beam/sdk/io/BoundedSource.java   |    8 -
 .../apache/beam/sdk/io/CompressedSource.java    |    8 -
 .../org/apache/beam/sdk/io/CountingSource.java  |    5 -
 .../org/apache/beam/sdk/io/FileSystems.java     |   32 +-
 .../java/org/apache/beam/sdk/io/PubsubIO.java   | 1142 +++++++++---------
 .../apache/beam/sdk/io/PubsubUnboundedSink.java |   88 +-
 .../beam/sdk/io/PubsubUnboundedSource.java      |  104 +-
 .../main/java/org/apache/beam/sdk/io/Read.java  |    7 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     |    5 -
 .../java/org/apache/beam/sdk/io/XmlSource.java  |    5 -
 .../org/apache/beam/sdk/options/GcpOptions.java |   36 +-
 .../org/apache/beam/sdk/options/GcsOptions.java |    4 +-
 .../beam/sdk/options/PipelineOptions.java       |    2 +-
 .../sdk/options/PipelineOptionsFactory.java     |   10 +-
 .../apache/beam/sdk/options/ValueProvider.java  |    6 +-
 .../beam/sdk/runners/TransformHierarchy.java    |   33 +-
 .../testing/FlattenWithHeterogeneousCoders.java |   29 +
 .../org/apache/beam/sdk/testing/PAssert.java    |   12 +-
 .../beam/sdk/testing/RunnableOnService.java     |   14 +-
 .../beam/sdk/testing/SourceTestUtils.java       |    5 -
 .../org/apache/beam/sdk/testing/TestStream.java |    8 +
 .../sdk/testing/UsesUnboundedPCollections.java  |   23 +
 .../beam/sdk/testing/ValueInSingleWindow.java   |    6 +-
 .../sdk/transforms/AggregatorRetriever.java     |   13 +-
 .../beam/sdk/transforms/AppliedPTransform.java  |   11 +-
 .../org/apache/beam/sdk/transforms/Combine.java |  197 +--
 .../apache/beam/sdk/transforms/CombineFns.java  |   14 +-
 .../org/apache/beam/sdk/transforms/Count.java   |    4 +-
 .../org/apache/beam/sdk/transforms/Create.java  |    5 -
 .../sdk/transforms/DelegatingAggregator.java    |    2 +-
 .../beam/sdk/transforms/DoFnAdapters.java       |  504 --------
 .../apache/beam/sdk/transforms/DoFnTester.java  |    7 -
 .../apache/beam/sdk/transforms/GroupByKey.java  |    2 +-
 .../org/apache/beam/sdk/transforms/Max.java     |  124 +-
 .../org/apache/beam/sdk/transforms/Mean.java    |   27 +-
 .../org/apache/beam/sdk/transforms/Min.java     |  122 +-
 .../org/apache/beam/sdk/transforms/OldDoFn.java |  758 ------------
 .../apache/beam/sdk/transforms/PTransform.java  |    9 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   46 +-
 .../org/apache/beam/sdk/transforms/Regex.java   |  589 ++++++++-
 .../org/apache/beam/sdk/transforms/Sum.java     |   57 +-
 .../apache/beam/sdk/transforms/ToString.java    |  198 +++
 .../org/apache/beam/sdk/transforms/Top.java     |   27 +-
 .../beam/sdk/transforms/join/CoGbkResult.java   |   35 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   20 -
 .../sdk/transforms/reflect/DoFnInvokers.java    |  142 +--
 .../sdk/transforms/reflect/DoFnSignature.java   |   15 +-
 .../windowing/AfterDelayFromFirstElement.java   |    2 +-
 .../sdk/transforms/windowing/AfterPane.java     |    2 +-
 .../sdk/transforms/windowing/GlobalWindow.java  |    6 +
 .../transforms/windowing/IntervalWindow.java    |    4 +-
 .../beam/sdk/transforms/windowing/Window.java   |    3 +-
 .../org/apache/beam/sdk/util/CoderUtils.java    |   28 +-
 .../beam/sdk/util/CombineContextFactory.java    |   18 -
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 ++
 .../util/EmptyOnDeserializationThreadLocal.java |   39 +
 .../apache/beam/sdk/util/GcpProjectUtil.java    |    2 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |   36 +-
 .../org/apache/beam/sdk/util/NameUtils.java     |  162 +++
 .../org/apache/beam/sdk/util/PropertyNames.java |    1 +
 .../org/apache/beam/sdk/util/PubsubClient.java  |   28 +-
 .../apache/beam/sdk/util/PubsubGrpcClient.java  |    6 +-
 .../apache/beam/sdk/util/PubsubJsonClient.java  |    4 +-
 .../apache/beam/sdk/util/PubsubTestClient.java  |    6 +-
 .../org/apache/beam/sdk/util/StringUtils.java   |  100 --
 .../apache/beam/sdk/util/TimerInternals.java    |    4 +-
 .../org/apache/beam/sdk/util/WindowedValue.java |   23 +-
 .../beam/sdk/util/state/StateContexts.java      |    4 +-
 .../org/apache/beam/sdk/values/PValueBase.java  |    4 +-
 .../beam/sdk/values/TimestampedValue.java       |   10 +-
 .../sdk/AggregatorPipelineExtractorTest.java    |   16 +-
 .../apache/beam/sdk/coders/AvroCoderTest.java   |    7 +
 .../beam/sdk/coders/BigDecimalCoderTest.java    |   46 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java   |    9 +
 .../beam/sdk/coders/BigEndianLongCoderTest.java |    9 +
 .../beam/sdk/coders/ByteArrayCoderTest.java     |    6 +
 .../apache/beam/sdk/coders/ByteCoderTest.java   |    9 +
 .../beam/sdk/coders/ByteStringCoderTest.java    |    8 +
 .../beam/sdk/coders/CoderRegistryTest.java      |    6 +
 .../org/apache/beam/sdk/coders/CoderTest.java   |    8 +
 .../beam/sdk/coders/CollectionCoderTest.java    |   16 +
 .../beam/sdk/coders/DefaultCoderTest.java       |    4 +-
 .../beam/sdk/coders/DelegateCoderTest.java      |   35 +-
 .../apache/beam/sdk/coders/DoubleCoderTest.java |    9 +
 .../beam/sdk/coders/DurationCoderTest.java      |   10 +
 .../beam/sdk/coders/InstantCoderTest.java       |    9 +
 .../beam/sdk/coders/IterableCoderTest.java      |   27 +-
 .../apache/beam/sdk/coders/JAXBCoderTest.java   |   26 +-
 .../org/apache/beam/sdk/coders/KvCoderTest.java |   29 +
 .../beam/sdk/coders/LengthPrefixCoderTest.java  |  129 ++
 .../apache/beam/sdk/coders/ListCoderTest.java   |   16 +-
 .../apache/beam/sdk/coders/MapCoderTest.java    |   21 +-
 .../beam/sdk/coders/NullableCoderTest.java      |   12 +
 .../beam/sdk/coders/SerializableCoderTest.java  |    9 +
 .../apache/beam/sdk/coders/SetCoderTest.java    |   16 +
 .../beam/sdk/coders/StandardCoderTest.java      |   40 +
 .../sdk/coders/StringDelegateCoderTest.java     |   11 +
 .../beam/sdk/coders/StringUtf8CoderTest.java    |    9 +
 .../beam/sdk/coders/TableRowJsonCoderTest.java  |    9 +
 .../sdk/coders/TextualIntegerCoderTest.java     |    9 +
 .../apache/beam/sdk/coders/VarIntCoderTest.java |    9 +
 .../beam/sdk/coders/VarLongCoderTest.java       |    9 +
 .../apache/beam/sdk/coders/VoidCoderTest.java   |   40 +
 .../beam/sdk/io/AvroIOGeneratedClassTest.java   |  285 -----
 .../apache/beam/sdk/io/AvroIOTransformTest.java |  324 +++++
 .../beam/sdk/io/CompressedSourceTest.java       |    5 -
 .../apache/beam/sdk/io/FileBasedSourceTest.java |    5 -
 .../org/apache/beam/sdk/io/FileSystemsTest.java |   33 +-
 .../beam/sdk/io/OffsetBasedSourceTest.java      |    5 -
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |   86 +-
 .../beam/sdk/io/PubsubUnboundedSinkTest.java    |   41 +-
 .../beam/sdk/io/PubsubUnboundedSourceTest.java  |   10 +-
 .../java/org/apache/beam/sdk/io/ReadTest.java   |    5 -
 .../java/org/apache/beam/sdk/io/WriteTest.java  |   10 +-
 .../apache/beam/sdk/options/GcpOptionsTest.java |    4 +-
 .../sdk/options/PipelineOptionsFactoryTest.java |    6 +-
 .../beam/sdk/options/PipelineOptionsTest.java   |    3 +-
 .../beam/sdk/options/ValueProviderTest.java     |   36 +-
 .../sdk/options/ValueProviderUtilsTest.java     |    2 +-
 .../sdk/runners/TransformHierarchyTest.java     |   30 +-
 .../apache/beam/sdk/testing/TestStreamTest.java |    5 +
 .../testing/ValueInSingleWindowCoderTest.java   |    7 +
 .../sdk/transforms/ApproximateUniqueTest.java   |  483 ++++----
 .../beam/sdk/transforms/CombineFnsTest.java     |   20 +-
 .../apache/beam/sdk/transforms/CombineTest.java |   99 +-
 .../apache/beam/sdk/transforms/CountTest.java   |    2 +-
 .../apache/beam/sdk/transforms/CreateTest.java  |    8 -
 .../DoFnDelegatingAggregatorTest.java           |  142 ---
 .../apache/beam/sdk/transforms/DoFnTest.java    |   15 +-
 .../beam/sdk/transforms/DoFnTesterTest.java     |    6 +-
 .../apache/beam/sdk/transforms/FlattenTest.java |   27 +
 .../apache/beam/sdk/transforms/KvSwapTest.java  |   13 +-
 .../org/apache/beam/sdk/transforms/MaxTest.java |   20 +-
 .../apache/beam/sdk/transforms/MeanTest.java    |    7 +-
 .../org/apache/beam/sdk/transforms/MinTest.java |   21 +-
 .../apache/beam/sdk/transforms/NoOpOldDoFn.java |   71 --
 .../beam/sdk/transforms/OldDoFnContextTest.java |   69 --
 .../apache/beam/sdk/transforms/OldDoFnTest.java |  188 ---
 .../apache/beam/sdk/transforms/ParDoTest.java   |   74 +-
 .../apache/beam/sdk/transforms/RegexTest.java   |  127 +-
 .../apache/beam/sdk/transforms/SampleTest.java  |  405 ++++---
 .../beam/sdk/transforms/SimpleStatsFnsTest.java |   36 +-
 .../org/apache/beam/sdk/transforms/SumTest.java |   24 +-
 .../beam/sdk/transforms/ToStringTest.java       |  125 ++
 .../org/apache/beam/sdk/transforms/TopTest.java |   13 +-
 .../apache/beam/sdk/transforms/ViewTest.java    |    2 +-
 .../transforms/join/CoGbkResultCoderTest.java   |   10 +-
 .../sdk/transforms/join/UnionCoderTest.java     |   24 +-
 .../transforms/reflect/DoFnInvokersTest.java    |   42 -
 .../transforms/windowing/GlobalWindowTest.java  |   64 +
 .../apache/beam/sdk/util/CombineFnUtilTest.java |    8 +-
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 ++
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |   56 +
 .../org/apache/beam/sdk/util/NameUtilsTest.java |  177 +++
 .../beam/sdk/util/PubsubGrpcClientTest.java     |    8 +-
 .../beam/sdk/util/PubsubJsonClientTest.java     |    3 +-
 .../beam/sdk/util/PubsubTestClientTest.java     |    4 +-
 .../beam/sdk/util/SerializableUtilsTest.java    |    4 +-
 .../apache/beam/sdk/util/StringUtilsTest.java   |  100 --
 .../beam/sdk/util/TimerInternalsTest.java       |    5 +
 .../beam/sdk/util/ValueWithRecordIdTest.java    |   34 +
 .../apache/beam/sdk/util/WindowedValueTest.java |   23 +
 .../util/state/InMemoryStateInternalsTest.java  |    2 +-
 .../beam/sdk/util/state/StateTagTest.java       |   11 +-
 .../beam/sdk/values/TimestampedValueTest.java   |   19 +-
 sdks/java/extensions/join-library/README.md     |   10 -
 sdks/java/extensions/join-library/pom.xml       |    2 +-
 sdks/java/extensions/pom.xml                    |    2 +-
 sdks/java/extensions/sorter/pom.xml             |    2 +-
 sdks/java/io/elasticsearch/pom.xml              |  175 +++
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |  819 +++++++++++++
 .../beam/sdk/io/elasticsearch/package-info.java |   20 +
 .../elasticsearch/ElasticSearchIOTestUtils.java |  129 ++
 .../io/elasticsearch/ElasticsearchIOTest.java   |  358 ++++++
 sdks/java/io/google-cloud-platform/pom.xml      |    2 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  308 +++--
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |   16 +-
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |   76 +-
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    8 -
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 -
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  205 +++-
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  141 +++
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |    3 +-
 .../sdk/io/gcp/bigtable/BigtableIOTest.java     |    5 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 +
 sdks/java/io/hdfs/pom.xml                       |    2 +-
 .../beam/sdk/io/hdfs/AvroWrapperCoder.java      |    4 +-
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java |    5 -
 .../apache/beam/sdk/io/hdfs/WritableCoder.java  |    4 +-
 .../beam/sdk/io/hdfs/AvroWrapperCoderTest.java  |    1 -
 sdks/java/io/jdbc/pom.xml                       |    2 +-
 sdks/java/io/jms/pom.xml                        |    2 +-
 sdks/java/io/kafka/pom.xml                      |    2 +-
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |    7 +-
 .../beam/sdk/io/kafka/KafkaRecordCoder.java     |    4 +-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |    9 +-
 .../beam/sdk/io/kafka/KafkaRecordCoderTest.java |   34 +
 sdks/java/io/kinesis/pom.xml                    |    2 +-
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |    4 +-
 .../beam/sdk/io/kinesis/package-info.java       |    2 +-
 sdks/java/io/mongodb/pom.xml                    |    2 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |    5 -
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |    5 -
 sdks/java/io/mqtt/pom.xml                       |  152 +++
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  588 +++++++++
 .../apache/beam/sdk/io/mqtt/package-info.java   |   22 +
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java |  197 +++
 sdks/java/io/pom.xml                            |    4 +-
 sdks/java/java8tests/pom.xml                    |    2 +-
 .../maven-archetypes/examples-java8/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   19 +-
 sdks/java/maven-archetypes/examples/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   19 +-
 sdks/java/maven-archetypes/pom.xml              |    2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    4 +-
 .../resources/projects/basic/reference/pom.xml  |    4 +-
 sdks/java/pom.xml                               |    2 +-
 sdks/pom.xml                                    |   15 +-
 465 files changed, 13882 insertions(+), 8046 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/c2859a55/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/beam/blob/c2859a55/sdks/pom.xml
----------------------------------------------------------------------
diff --cc sdks/pom.xml
index e2dff16,06dbb9b..45d8df0
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@@ -73,9 -66,15 +67,16 @@@
              </execution>
            </executions>
          </plugin>
 +
        </plugins>
      </pluginManagement>
+ 
+     <plugins>
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-checkstyle-plugin</artifactId>
+       </plugin>
+     </plugins>
    </build>
  
  </project>


[15/50] beam git commit: This closes #1838

Posted by dh...@apache.org.
This closes #1838


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7402d760
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7402d760
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7402d760

Branch: refs/heads/python-sdk
Commit: 7402d760004f8e7f79ca122c5fd26ec4f35dbdbe
Parents: e77de7c f9d1d68
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jan 24 18:00:43 2017 -0800
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jan 24 18:00:43 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 40 +++++--------------
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |  9 ++---
 .../io/gcp/bigquery/BigQueryServicesImpl.java   | 23 ++++-------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 41 ++++++++------------
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |  3 +-
 5 files changed, 40 insertions(+), 76 deletions(-)
----------------------------------------------------------------------



[33/50] beam git commit: This closes #1853

Posted by dh...@apache.org.
This closes #1853


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/717b415f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/717b415f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/717b415f

Branch: refs/heads/python-sdk
Commit: 717b415f1a6024f1630d922cbd357c894452af40
Parents: b4726d0 e591d8b
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 09:38:05 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 09:38:05 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[30/50] beam git commit: PackageUtil: preserve classpath ordering when uploading

Posted by dh...@apache.org.
PackageUtil: preserve classpath ordering when uploading

Also add a test


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b0b91c84
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b0b91c84
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b0b91c84

Branch: refs/heads/python-sdk
Commit: b0b91c842e09aa7fdb5c1dc216574daa43b437ea
Parents: 23e2b91
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 22:15:59 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 22:16:22 2017 -0800

----------------------------------------------------------------------
 .../beam/runners/dataflow/util/PackageUtil.java | 11 +++++---
 .../runners/dataflow/util/PackageUtilTest.java  | 27 ++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b0b91c84/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index fa8c94d..685d48c 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -297,16 +297,21 @@ class PackageUtil {
     // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
     List<PackageAttributes> packageAttributes =
         new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
-    // Order package attributes in descending size order so that we upload the largest files first.
-    Collections.sort(packageAttributes, new PackageUploadOrder());
 
+    // Compute the returned list of DataflowPackage objects here so that they are returned in the
+    // same order as on the classpath.
     List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+    for (final PackageAttributes attributes : packageAttributes) {
+      packages.add(attributes.getDataflowPackage());
+    }
+
+    // Order package attributes in descending size order so that we upload the largest files first.
+    Collections.sort(packageAttributes, new PackageUploadOrder());
     final AtomicInteger numUploaded = new AtomicInteger(0);
     final AtomicInteger numCached = new AtomicInteger(0);
 
     List<ListenableFuture<?>> futures = new LinkedList<>();
     for (final PackageAttributes attributes : packageAttributes) {
-      packages.add(attributes.getDataflowPackage());
       futures.add(executorService.submit(new Runnable() {
         @Override
         public void run() {

http://git-wip-us.apache.org/repos/asf/beam/blob/b0b91c84/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 3828415..800c5a9 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -19,6 +19,7 @@ package org.apache.beam.runners.dataflow.util;
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.startsWith;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
@@ -59,6 +60,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.Channels;
 import java.nio.channels.Pipe;
+import java.nio.channels.Pipe.SinkChannel;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -86,6 +88,8 @@ import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 /** Tests for PackageUtil. */
 @RunWith(JUnit4.class)
@@ -265,6 +269,29 @@ public class PackageUtilTest {
   }
 
   @Test
+  public void testStagingPreservesClasspath() throws Exception {
+    File smallFile = makeFileWithContents("small.txt", "small");
+    File largeFile = makeFileWithContents("large.txt", "large contents");
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new FileNotFoundException("some/path"));
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .thenAnswer(new Answer<SinkChannel>() {
+          @Override
+          public SinkChannel answer(InvocationOnMock invocation) throws Throwable {
+            return Pipe.open().sink();
+          }
+        });
+
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
+        ImmutableList.of(smallFile.getAbsolutePath(), largeFile.getAbsolutePath()),
+        STAGING_PATH, mockGcsUtil);
+    // Verify that the packages are returned small, then large, matching input order even though
+    // the large file would be uploaded first.
+    assertThat(targets.get(0).getName(), startsWith("small"));
+    assertThat(targets.get(1).getName(), startsWith("large"));
+  }
+
+  @Test
   public void testPackageUploadWithDirectorySucceeds() throws Exception {
     Pipe pipe = Pipe.open();
     File tmpDirectory = tmpFolder.newFolder("folder");


[49/50] beam git commit: Update pom.xml for sdks/python.

Posted by dh...@apache.org.
Update pom.xml for sdks/python.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f1b8679c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f1b8679c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f1b8679c

Branch: refs/heads/python-sdk
Commit: f1b8679c4af283d1e751043e2e765b7f295af0b2
Parents: c2859a5
Author: Ahmet Altay <al...@google.com>
Authored: Fri Jan 27 17:04:21 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Jan 27 17:04:21 2017 -0800

----------------------------------------------------------------------
 sdks/python/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f1b8679c/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index cc90969..615ddc5 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-parent</artifactId>
-    <version>0.5.0-incubating-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 


[11/50] beam git commit: This closes #1834

Posted by dh...@apache.org.
This closes #1834


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1148be6b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1148be6b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1148be6b

Branch: refs/heads/python-sdk
Commit: 1148be6bb17eae70c2753d33aebbac9f7943dd03
Parents: f2389ab bffe80d
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 15:51:19 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:51:19 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |  3 ++-
 .../sdk/testing/UsesUnboundedPCollections.java  | 23 ++++++++++++++++++++
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |  4 ++--
 3 files changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------



[34/50] beam git commit: Update the NOTICE year range

Posted by dh...@apache.org.
Update the NOTICE year range


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b97b3935
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b97b3935
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b97b3935

Branch: refs/heads/python-sdk
Commit: b97b3935c30cba2626333898ced32f82a6c54351
Parents: 717b415
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Thu Jan 26 14:46:37 2017 +0100
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Jan 26 10:16:49 2017 -0800

----------------------------------------------------------------------
 NOTICE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b97b3935/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 9b35cd4..0412683 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
 Apache Beam
-Copyright 2016 The Apache Software Foundation
+Copyright 2016-2017 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).


[22/50] beam git commit: This closes #1184

Posted by dh...@apache.org.
This closes #1184


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c5257837
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c5257837
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c5257837

Branch: refs/heads/python-sdk
Commit: c525783704e0cc47845df8cdec1715e1f1c74008
Parents: 979c937 3ecf7e7
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 11:03:05 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 11:03:05 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 +
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
 .../runners/dataflow/util/PackageUtilTest.java  |  42 ++-
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 +
 6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------



[04/50] beam git commit: [BEAM-1258] Improve logging in BigQueryIO.verifyTableEmpty().

Posted by dh...@apache.org.
[BEAM-1258] Improve logging in BigQueryIO.verifyTableEmpty().


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/5b6dd91d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/5b6dd91d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/5b6dd91d

Branch: refs/heads/python-sdk
Commit: 5b6dd91d27ce73fa66db4d445b0ceb88f09971d8
Parents: cb6e0a8
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 14:52:30 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 32 +++++++++++---------
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |  2 ++
 2 files changed, 19 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/5b6dd91d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 701374d..aff199a 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1863,25 +1863,27 @@ public class BigQueryIO {
             writeDisposition, validate, testServices);
       }
 
-      private static void verifyTableEmpty(
+      private static void verifyTableNotExistOrEmpty(
           DatasetService datasetService,
-          TableReference table) {
+          TableReference tableRef) {
         try {
-          boolean isEmpty = datasetService.isTableEmpty(
-              table.getProjectId(), table.getDatasetId(), table.getTableId());
-          if (!isEmpty) {
-            throw new IllegalArgumentException(
-                "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
+          if (datasetService.getTable(
+              tableRef.getProjectId(),
+              tableRef.getDatasetId(),
+              tableRef.getTableId()) != null) {
+            checkState(
+                datasetService.isTableEmpty(
+                    tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
+                "BigQuery table is not empty: %s.",
+                BigQueryIO.toTableSpec(tableRef));
           }
         } catch (IOException | InterruptedException e) {
-          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-          if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
-            // Nothing to do. If the table does not exist, it is considered empty.
-          } else {
-            throw new RuntimeException(
-                "unable to confirm BigQuery table emptiness for table "
-                    + BigQueryIO.toTableSpec(table), e);
+          if (e instanceof InterruptedException) {
+            Thread.currentThread().interrupt();
           }
+          throw new RuntimeException(
+              "unable to confirm BigQuery table emptiness for table "
+                  + BigQueryIO.toTableSpec(tableRef), e);
         }
       }
 
@@ -1917,7 +1919,7 @@ public class BigQueryIO {
             verifyTablePresence(datasetService, table);
           }
           if (getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
-            verifyTableEmpty(datasetService, table);
+            verifyTableNotExistOrEmpty(datasetService, table);
           }
         }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/5b6dd91d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
index 7173996..32cf46d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
@@ -136,6 +136,8 @@ interface BigQueryServices extends Serializable {
 
     /**
      * Returns true if the table is empty.
+     *
+     * @throws IOException if the table is not found.
      */
     boolean isTableEmpty(String projectId, String datasetId, String tableId)
         throws IOException, InterruptedException;


[45/50] beam git commit: Update Beam version in the Maven archetypes

Posted by dh...@apache.org.
Update Beam version in the Maven archetypes


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/9c118156
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/9c118156
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/9c118156

Branch: refs/heads/python-sdk
Commit: 9c1181563d89e604b899e5e945d5975359f42543
Parents: 4a29131
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:34:24 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:34:24 2017 +0100

----------------------------------------------------------------------
 .../examples-java8/src/main/resources/archetype-resources/pom.xml  | 2 +-
 .../examples/src/main/resources/archetype-resources/pom.xml        | 2 +-
 .../starter/src/main/resources/archetype-resources/pom.xml         | 2 +-
 .../starter/src/test/resources/projects/basic/reference/pom.xml    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
index 05cb797..55211ed 100644
--- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
@@ -27,7 +27,7 @@
   <packaging>jar</packaging>
 
   <properties>
-    <beam.version>0.5.0-SNAPSHOT</beam.version>
+    <beam.version>0.6.0-SNAPSHOT</beam.version>
   </properties>
 
   <build>

http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index 74f08bf..654973c 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -27,7 +27,7 @@
   <packaging>jar</packaging>
 
   <properties>
-    <beam.version>0.5.0-SNAPSHOT</beam.version>
+    <beam.version>0.6.0-SNAPSHOT</beam.version>
   </properties>
 
   <repositories>

http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
index e7f1185..5d2a408 100644
--- a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
@@ -25,7 +25,7 @@
   <version>${version}</version>
 
   <properties>
-    <beam.version>0.5.0-SNAPSHOT</beam.version>
+    <beam.version>0.6.0-SNAPSHOT</beam.version>
   </properties>
 
   <repositories>

http://git-wip-us.apache.org/repos/asf/beam/blob/9c118156/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
index 871d194..1c666eb 100644
--- a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
@@ -25,7 +25,7 @@
   <version>0.1</version>
 
   <properties>
-    <beam.version>0.5.0-SNAPSHOT</beam.version>
+    <beam.version>0.6.0-SNAPSHOT</beam.version>
   </properties>
 
   <repositories>


[24/50] beam git commit: This closes #1846

Posted by dh...@apache.org.
This closes #1846


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/95beda69
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/95beda69
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/95beda69

Branch: refs/heads/python-sdk
Commit: 95beda69bff7dfe519422fd19916c7a851dadf55
Parents: c525783 f05c5d3
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 12:13:37 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 12:13:37 2017 -0800

----------------------------------------------------------------------
 examples/pom.xml | 14 +++++++++-----
 runners/pom.xml  | 14 +++++++++-----
 sdks/pom.xml     | 13 +++++++------
 3 files changed, 25 insertions(+), 16 deletions(-)
----------------------------------------------------------------------



[35/50] beam git commit: This closes #1852

Posted by dh...@apache.org.
This closes #1852


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/96377241
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/96377241
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/96377241

Branch: refs/heads/python-sdk
Commit: 9637724145d2defabc8cff0a3a825eaf9a32be6e
Parents: 717b415 b97b393
Author: Davor Bonaci <da...@google.com>
Authored: Thu Jan 26 10:16:57 2017 -0800
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Jan 26 10:16:57 2017 -0800

----------------------------------------------------------------------
 NOTICE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[07/50] beam git commit: This closes #1833: Removes ReduceFnExecutor interface

Posted by dh...@apache.org.
This closes #1833: Removes ReduceFnExecutor interface


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/11c3cd70
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/11c3cd70
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/11c3cd70

Branch: refs/heads/python-sdk
Commit: 11c3cd70b784650e8b60a5660449cfafdba84bbf
Parents: b333487 8989473
Author: Kenneth Knowles <kl...@google.com>
Authored: Tue Jan 24 13:48:23 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Tue Jan 24 13:48:23 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/runners/core/DoFnRunner.java    | 20 --------------------
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |  5 +----
 .../beam/runners/direct/ParDoEvaluator.java     |  2 --
 .../runners/spark/translation/DoFnFunction.java |  2 --
 .../spark/translation/MultiDoFnFunction.java    |  2 --
 5 files changed, 1 insertion(+), 30 deletions(-)
----------------------------------------------------------------------



[19/50] beam git commit: DataflowRunner: move source for properties into pom

Posted by dh...@apache.org.
DataflowRunner: move source for properties into pom

Also drop unused properties that are now in the root
pom.xml.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e95335f0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e95335f0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e95335f0

Branch: refs/heads/python-sdk
Commit: e95335f0f6a94ebe257d8d5ce82bb82205dca95c
Parents: bf9d454
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 07:50:57 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 10:40:13 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml                     | 4 ++--
 .../org/apache/beam/runners/dataflow/dataflow.properties       | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e95335f0/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 1d05193..eea5502 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,8 +33,8 @@
   <packaging>jar</packaging>
 
   <properties>
-    <timestamp>${maven.build.timestamp}</timestamp>
-    <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
+    <dataflow.container_version>beam-master-20170120</dataflow.container_version>
+    <dataflow.environment_major_version>6</dataflow.environment_major_version>
   </properties>
 
   <build>

http://git-wip-us.apache.org/repos/asf/beam/blob/e95335f0/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties b/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
index 9976ed9..47e316c 100644
--- a/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
+++ b/runners/google-cloud-dataflow-java/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
@@ -16,8 +16,8 @@
 #
 # Dataflow runtime properties
 
-environment.major.version=6
+environment.major.version=${dataflow.environment_major_version}
 
-worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:beam-master-20170120
+worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:${dataflow.container_version}
 
-worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:beam-master-20170120
+worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:${dataflow.container_version}


[40/50] beam git commit: Refactored existing code. Added iterable and KV. Changed from element to of.

Posted by dh...@apache.org.
Refactored existing code. Added iterable and KV. Changed from element to of.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e01ce864
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e01ce864
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e01ce864

Branch: refs/heads/python-sdk
Commit: e01ce864edf551afefe861041541bb2a05340a08
Parents: 83f8c46
Author: Jesse Anderson <je...@smokinghand.com>
Authored: Tue Jan 24 08:37:33 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 22:52:09 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/sdk/transforms/ToString.java    | 168 ++++++++++++++++---
 .../java/org/apache/beam/sdk/io/WriteTest.java  |   2 +-
 .../beam/sdk/transforms/ToStringTest.java       |  86 ++++++++--
 3 files changed, 226 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
index ef49267..d5c9784 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ToString.java
@@ -18,51 +18,181 @@
 
 package org.apache.beam.sdk.transforms;
 
+import java.util.Iterator;
+
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 
 /**
- * {@link PTransform PTransforms} for converting a {@link PCollection PCollection&lt;T&gt;} to a
- * {@link PCollection PCollection&lt;String&gt;}.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<Long> longs = ...;
- * PCollection<String> strings = longs.apply(ToString.<Long>element());
- * } </pre>
- *
+ * {@link PTransform PTransforms} for converting a {@link PCollection PCollection&lt;?&gt;},
+ * {@link PCollection PCollection&lt;KV&lt;?,?&gt;&gt;}, or
+ * {@link PCollection PCollection&lt;Iterable&lt;?&gt;&gt;}
+ * to a {@link PCollection PCollection&lt;String&gt;}.
  *
  * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your own
  * {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
  */
 public final class ToString {
+  private ToString() {
+    // do not instantiate
+  }
 
   /**
    * Returns a {@code PTransform<PCollection, PCollection<String>>} which transforms each
    * element of the input {@link PCollection} to a {@link String} using the
    * {@link Object#toString} method.
    */
-  public static PTransform<PCollection<?>, PCollection<String>> element() {
-    return new Default();
+  public static PTransform<PCollection<?>, PCollection<String>> of() {
+    return new SimpleToString();
   }
 
-  private ToString() {
+  /**
+   * Returns a {@code PTransform<PCollection<KV<?,?>, PCollection<String>>} which transforms each
+   * element of the input {@link PCollection} to a {@link String} by using the
+   * {@link Object#toString} on the key followed by a "," followed by the {@link Object#toString}
+   * of the value.
+   */
+  public static PTransform<PCollection<? extends KV<?, ?>>, PCollection<String>> kv() {
+    return kv(",");
+  }
+
+  /**
+   * Returns a {@code PTransform<PCollection<KV<?,?>, PCollection<String>>} which transforms each
+   * element of the input {@link PCollection} to a {@link String} by using the
+   * {@link Object#toString} on the key followed by the specified delimeter followed by the
+   * {@link Object#toString} of the value.
+   * @param delimiter The delimiter to put between the key and value
+   */
+  public static PTransform<PCollection<? extends KV<?, ?>>,
+          PCollection<String>> kv(String delimiter) {
+    return new KVToString(delimiter);
+  }
+
+  /**
+   * Returns a {@code PTransform<PCollection<Iterable<?>, PCollection<String>>} which
+   * transforms each item in the iterable of the input {@link PCollection} to a {@link String}
+   * using the {@link Object#toString} method followed by a "," until
+   * the last element in the iterable. There is no trailing delimiter.
+   */
+  public static PTransform<PCollection<? extends Iterable<?>>, PCollection<String>> iterable() {
+    return iterable(",");
+  }
+
+  /**
+   * Returns a {@code PTransform<PCollection<Iterable<?>, PCollection<String>>} which
+   * transforms each item in the iterable of the input {@link PCollection} to a {@link String}
+   * using the {@link Object#toString} method followed by the specified delimiter until
+   * the last element in the iterable. There is no trailing delimiter.
+   * @param delimiter The delimiter to put between the items in the iterable.
+   */
+  public static PTransform<PCollection<? extends Iterable<?>>,
+          PCollection<String>> iterable(String delimiter) {
+    return new IterablesToString(delimiter);
   }
 
   /**
    * A {@link PTransform} that converts a {@code PCollection} to a {@code PCollection<String>}
    * using the {@link  Object#toString} method.
+   *
+   * <p>Example of use:
+   * <pre>{@code
+   * PCollection<Long> longs = ...;
+   * PCollection<String> strings = longs.apply(ToString.of());
+   * }</pre>
+   *
+   *
+   * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your own
+   * {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
    */
-  private static final class Default extends PTransform<PCollection<?>, PCollection<String>> {
+  private static final class SimpleToString extends
+          PTransform<PCollection<?>, PCollection<String>> {
     @Override
     public PCollection<String> expand(PCollection<?> input) {
-      return input.apply(MapElements.via(new ToStringFunction<>()));
+      return input.apply(MapElements.via(new SimpleFunction<Object, String>() {
+        @Override
+        public String apply(Object input) {
+          return input.toString();
+        }
+      }));
     }
+  }
+
+  /**
+   * A {@link PTransform} that converts a {@code PCollection} of {@code KV} to a
+   * {@code PCollection<String>} using the {@link  Object#toString} method for
+   * the key and value and an optional delimiter.
+   *
+   * <p>Example of use:
+   * <pre>{@code
+   * PCollection<KV<String, Long>> nameToLong = ...;
+   * PCollection<String> strings = nameToLong.apply(ToString.kv());
+   * }</pre>
+   *
+   *
+   * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your
+   * own {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
+   */
+  private static final class KVToString extends
+          PTransform<PCollection<? extends KV<?, ?>>, PCollection<String>> {
+    private final String delimiter;
+
+    public KVToString(String delimiter) {
+      this.delimiter = delimiter;
+    }
+
+    @Override
+    public PCollection<String> expand(PCollection<? extends KV<?, ?>> input) {
+      return input.apply(MapElements.via(new SimpleFunction<KV<?, ?>, String>() {
+        @Override
+        public String apply(KV<?, ?> input) {
+          return input.getKey().toString() + delimiter + input.getValue().toString();
+        }
+      }));
+    }
+  }
+
+  /**
+   * A {@link PTransform} that converts a {@code PCollection} of {@link Iterable} to a
+   * {@code PCollection<String>} using the {@link  Object#toString} method and
+   * an optional delimiter.
+   *
+   * <p>Example of use:
+   * <pre>{@code
+   * PCollection<Iterable<Long>> longs = ...;
+   * PCollection<String> strings = nameToLong.apply(ToString.iterable());
+   * }</pre>
+   *
+   *
+   * <p><b>Note</b>: For any custom string conversion and formatting, we recommend applying your
+   * own {@link SerializableFunction} using {@link MapElements#via(SerializableFunction)}
+   */
+  private static final class IterablesToString extends
+          PTransform<PCollection<? extends Iterable<?>>, PCollection<String>> {
+    private final String delimiter;
+
+    public IterablesToString(String delimiter) {
+      this.delimiter = delimiter;
+    }
+
+    @Override
+    public PCollection<String> expand(PCollection<? extends Iterable<?>> input) {
+      return input.apply(MapElements.via(new SimpleFunction<Iterable<?>, String>() {
+        @Override
+        public String apply(Iterable<?> input) {
+          StringBuilder builder = new StringBuilder();
+          Iterator iterator = input.iterator();
+
+          while (iterator.hasNext()) {
+            builder.append(iterator.next().toString());
+
+            if (iterator.hasNext()) {
+              builder.append(delimiter);
+            }
+          }
 
-    private static class ToStringFunction<T> extends SimpleFunction<T, String> {
-      @Override
-      public String apply(T input) {
-        return input.toString();
-      }
+          return builder.toString();
+        }
+      }));
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
index 9772b9b..f81cc0c 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteTest.java
@@ -297,7 +297,7 @@ public class WriteTest {
   @Test
   public void testWriteUnbounded() {
     PCollection<String> unbounded = p.apply(CountingInput.unbounded())
-        .apply(ToString.element());
+        .apply(ToString.of());
 
     TestSink sink = new TestSink();
     thrown.expect(IllegalArgumentException.class);

http://git-wip-us.apache.org/repos/asf/beam/blob/e01ce864/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
index e5c9f05..ab984f1 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ToStringTest.java
@@ -20,10 +20,13 @@ package org.apache.beam.sdk.transforms;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
+
+import org.apache.beam.sdk.coders.IterableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.RunnableOnService;
 import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.junit.Rule;
 import org.junit.Test;
@@ -41,19 +44,82 @@ public class ToStringTest {
 
   @Test
   @Category(RunnableOnService.class)
-  public void testToStringElement() {
+  public void testToStringOf() {
     Integer[] ints = {1, 2, 3, 4, 5};
+    String[] strings = {"1", "2", "3", "4", "5"};
     PCollection<Integer> input = p.apply(Create.of(Arrays.asList(ints)));
-    PCollection<String> output = input.apply(ToString.<Integer>element());
-    PAssert.that(output).containsInAnyOrder(toStringList(ints));
+    PCollection<String> output = input.apply(ToString.of());
+    PAssert.that(output).containsInAnyOrder(strings);
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testToStringKV() {
+    ArrayList<KV<String, Integer>> kvs = new ArrayList<>();
+    kvs.add(KV.of("one", 1));
+    kvs.add(KV.of("two", 2));
+
+    ArrayList<String> expected = new ArrayList<>();
+    expected.add("one,1");
+    expected.add("two,2");
+
+    PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs));
+    PCollection<String> output = input.apply(ToString.kv());
+    PAssert.that(output).containsInAnyOrder(expected);
     p.run();
   }
 
-  private List<String> toStringList(Object[] ints) {
-    List<String> ll = new ArrayList<>(ints.length);
-    for (Object i : ints) {
-      ll.add(i.toString());
-    }
-    return ll;
+  @Test
+  @Category(RunnableOnService.class)
+  public void testToStringKVWithDelimiter() {
+    ArrayList<KV<String, Integer>> kvs = new ArrayList<>();
+    kvs.add(KV.of("one", 1));
+    kvs.add(KV.of("two", 2));
+
+    ArrayList<String> expected = new ArrayList<>();
+    expected.add("one\t1");
+    expected.add("two\t2");
+
+    PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs));
+    PCollection<String> output = input.apply(ToString.kv("\t"));
+    PAssert.that(output).containsInAnyOrder(expected);
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testToStringIterable() {
+    ArrayList<Iterable<String>> iterables = new ArrayList<>();
+    iterables.add(Arrays.asList(new String[]{"one", "two", "three"}));
+    iterables.add(Arrays.asList(new String[]{"four", "five", "six"}));
+
+    ArrayList<String> expected = new ArrayList<>();
+    expected.add("one,two,three");
+    expected.add("four,five,six");
+
+    PCollection<Iterable<String>> input = p.apply(Create.of(iterables)
+            .withCoder(IterableCoder.of(StringUtf8Coder.of())));
+    PCollection<String> output = input.apply(ToString.iterable());
+    PAssert.that(output).containsInAnyOrder(expected);
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testToStringIterableWithDelimiter() {
+    ArrayList<Iterable<String>> iterables = new ArrayList<>();
+    iterables.add(Arrays.asList(new String[]{"one", "two", "three"}));
+    iterables.add(Arrays.asList(new String[]{"four", "five", "six"}));
+
+    ArrayList<String> expected = new ArrayList<>();
+    expected.add("one\ttwo\tthree");
+    expected.add("four\tfive\tsix");
+
+    PCollection<Iterable<String>> input = p.apply(Create.of(iterables)
+            .withCoder(IterableCoder.of(StringUtf8Coder.of())));
+    PCollection<String> output = input.apply(ToString.iterable("\t"));
+    PAssert.that(output).containsInAnyOrder(expected);
+    p.run();
   }
 }


[43/50] beam git commit: [maven-release-plugin] prepare branch release-0.5.0

Posted by dh...@apache.org.
[maven-release-plugin] prepare branch release-0.5.0


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/da2dff90
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/da2dff90
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/da2dff90

Branch: refs/heads/python-sdk
Commit: da2dff90cb10e5881496ffd4efb368ba84544174
Parents: 47304d1
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:27:06 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:27:06 2017 +0100

----------------------------------------------------------------------
 pom.xml | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/da2dff90/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a96275c..2281f67 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
     <connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
     <url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
+    <tag>release-0.5.0</tag>
   </scm>
 
   <issueManagement>


[42/50] beam git commit: BEAM-980 Support configuration of Apex DAG through properties file.

Posted by dh...@apache.org.
BEAM-980 Support configuration of Apex DAG through properties file.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/31c63cb8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/31c63cb8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/31c63cb8

Branch: refs/heads/python-sdk
Commit: 31c63cb8c14ea71ed45376d19b4fd9f285d80763
Parents: 1c6e667
Author: Thomas Weise <th...@apache.org>
Authored: Wed Jan 25 22:22:36 2017 -0800
Committer: Thomas Weise <th...@apache.org>
Committed: Thu Jan 26 22:54:00 2017 -0800

----------------------------------------------------------------------
 .../beam/runners/apex/ApexPipelineOptions.java  |  7 +-
 .../apache/beam/runners/apex/ApexRunner.java    | 43 ++++++++---
 .../beam/runners/apex/ApexYarnLauncher.java     | 23 +++++-
 .../beam/runners/apex/ApexRunnerTest.java       | 75 ++++++++++++++++++++
 .../beam/runners/apex/ApexYarnLauncherTest.java |  9 ++-
 .../test/resources/beam-runners-apex.properties | 20 ++++++
 6 files changed, 161 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
index 54fdf76..f37e874 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java
@@ -56,5 +56,10 @@ public interface ApexPipelineOptions extends PipelineOptions, java.io.Serializab
   @Default.Long(0)
   long getRunMillis();
 
-}
+  @Description("configuration properties file for the Apex engine")
+  void setConfigFile(String name);
+
+  @Default.String("classpath:/beam-runners-apex.properties")
+  String getConfigFile();
 
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
index f12ebef..e220e6c 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java
@@ -22,10 +22,16 @@ import com.datatorrent.api.Context.DAGContext;
 import com.datatorrent.api.DAG;
 import com.datatorrent.api.StreamingApplication;
 import com.google.common.base.Throwables;
+
+import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Properties;
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.apex.api.EmbeddedAppLauncher;
 import org.apache.apex.api.Launcher;
@@ -64,6 +70,7 @@ import org.apache.hadoop.conf.Configuration;
 public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
 
   private final ApexPipelineOptions options;
+  public static final String CLASSPATH_SCHEME = "classpath";
 
   /**
    * TODO: this isn't thread safe and may cause issues when tests run in parallel
@@ -126,6 +133,31 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
       }
     };
 
+    Properties configProperties = new Properties();
+    try {
+      if (options.getConfigFile() != null) {
+        URI configURL = new URI(options.getConfigFile());
+        if (CLASSPATH_SCHEME.equals(configURL.getScheme())) {
+          InputStream is = this.getClass().getResourceAsStream(configURL.getPath());
+          if (is != null) {
+            configProperties.load(is);
+            is.close();
+          }
+        } else {
+          if (!configURL.isAbsolute()) {
+            // resolve as local file name
+            File f = new File(options.getConfigFile());
+            configURL = f.toURI();
+          }
+          try (InputStream is = configURL.toURL().openStream()) {
+            configProperties.load(is);
+          }
+        }
+      }
+    } catch (IOException | URISyntaxException ex) {
+      throw new RuntimeException("Error loading properties", ex);
+    }
+
     if (options.isEmbeddedExecution()) {
       Launcher<AppHandle> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
       Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
@@ -135,6 +167,7 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
         launchAttributes.put(EmbeddedAppLauncher.HEARTBEAT_MONITORING, false);
       }
       Configuration conf = new Configuration(false);
+      ApexYarnLauncher.addProperties(conf, configProperties);
       try {
         ApexRunner.ASSERTION_ERROR.set(null);
         AppHandle apexAppResult = launcher.launchApp(apexApp, conf, launchAttributes);
@@ -146,7 +179,7 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
     } else {
       try {
         ApexYarnLauncher yarnLauncher = new ApexYarnLauncher();
-        AppHandle apexAppResult = yarnLauncher.launchApp(apexApp);
+        AppHandle apexAppResult = yarnLauncher.launchApp(apexApp, configProperties);
         return new ApexRunnerResult(apexDAG.get(), apexAppResult);
       } catch (IOException e) {
         throw new RuntimeException("Failed to launch the application on YARN.", e);
@@ -155,14 +188,6 @@ public class ApexRunner extends PipelineRunner<ApexRunnerResult> {
 
   }
 
-  private static class IdentityFn<T> extends DoFn<T, T> {
-    private static final long serialVersionUID = 1L;
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      c.output(c.element());
-    }
-  }
-
 ////////////////////////////////////////////
 // Adapted from FlinkRunner for View support
 

http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
index a2d88f4..6bc42f0 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
@@ -52,6 +52,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.jar.JarFile;
 import java.util.jar.Manifest;
@@ -80,7 +81,8 @@ import org.slf4j.LoggerFactory;
 public class ApexYarnLauncher {
   private static final Logger LOG = LoggerFactory.getLogger(ApexYarnLauncher.class);
 
-  public AppHandle launchApp(StreamingApplication app) throws IOException {
+  public AppHandle launchApp(StreamingApplication app, Properties configProperties)
+      throws IOException {
 
     List<File> jarsToShip = getYarnDeployDependencies();
     StringBuilder classpath = new StringBuilder();
@@ -103,7 +105,7 @@ public class ApexYarnLauncher {
 
     Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
     launchAttributes.put(YarnAppLauncher.LIB_JARS, classpath.toString().replace(':', ','));
-    LaunchParams lp = new LaunchParams(dag, launchAttributes);
+    LaunchParams lp = new LaunchParams(dag, launchAttributes, configProperties);
     lp.cmd = "hadoop " + ApexYarnLauncher.class.getName();
     HashMap<String, String> env = new HashMap<>();
     env.put("HADOOP_USER_CLASSPATH_FIRST", "1");
@@ -292,6 +294,18 @@ public class ApexYarnLauncher {
   }
 
   /**
+   * Transfer the properties to the configuration object.
+   * @param conf
+   * @param props
+   */
+  public static void addProperties(Configuration conf, Properties props) {
+    for (final String propertyName : props.stringPropertyNames()) {
+      String propertyValue = props.getProperty(propertyName);
+      conf.set(propertyName, propertyValue);
+    }
+  }
+
+  /**
    * The main method expects the serialized DAG and will launch the YARN application.
    * @param args location of launch parameters
    * @throws IOException when parameters cannot be read
@@ -309,6 +323,7 @@ public class ApexYarnLauncher {
       }
     };
     Configuration conf = new Configuration(); // configuration from Hadoop client
+    addProperties(conf, params.configProperties);
     AppHandle appHandle = params.getApexLauncher().launchApp(apexApp, conf,
         params.launchAttributes);
     if (appHandle == null) {
@@ -327,12 +342,14 @@ public class ApexYarnLauncher {
     private static final long serialVersionUID = 1L;
     private final DAG dag;
     private final Attribute.AttributeMap launchAttributes;
+    private final Properties configProperties;
     private HashMap<String, String> env;
     private String cmd;
 
-    protected LaunchParams(DAG dag, AttributeMap launchAttributes) {
+    protected LaunchParams(DAG dag, AttributeMap launchAttributes, Properties configProperties) {
       this.dag = dag;
       this.launchAttributes = launchAttributes;
+      this.configProperties = configProperties;
     }
 
     protected Launcher<?> getApexLauncher() {

http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
new file mode 100644
index 0000000..436c959
--- /dev/null
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.apex;
+
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.OperatorMeta;
+import com.datatorrent.stram.engine.OperatorContext;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.Collections;
+import java.util.Properties;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.Create;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests for the Apex runner.
+ */
+public class ApexRunnerTest {
+
+  @Test
+  public void testConfigProperties() throws Exception {
+
+    String operName = "testProperties";
+    ApexPipelineOptions options = PipelineOptionsFactory.create()
+        .as(ApexPipelineOptions.class);
+    options.setRunner(ApexRunner.class);
+
+    // default configuration from class path
+    Pipeline p = Pipeline.create(options);
+    p.apply(operName, Create.of(Collections.emptyList()));
+    ApexRunnerResult result = (ApexRunnerResult) p.run();
+    result.cancel();
+
+    DAG dag = result.getApexDAG();
+    OperatorMeta t1Meta = dag.getOperatorMeta(operName);
+    Assert.assertNotNull(t1Meta);
+    Assert.assertEquals(new Integer(32), t1Meta.getValue(OperatorContext.MEMORY_MB));
+
+    File tmp = File.createTempFile("beam-runners-apex-", ".properties");
+    tmp.deleteOnExit();
+    Properties props = new Properties();
+    props.setProperty("dt.operator." + operName + ".attr.MEMORY_MB", "64");
+    try (FileOutputStream fos = new FileOutputStream(tmp)) {
+      props.store(fos, "");
+    }
+    options.setConfigFile(tmp.getAbsolutePath());
+    result = (ApexRunnerResult) p.run();
+    result.cancel();
+    tmp.delete();
+    dag = result.getApexDAG();
+    t1Meta = dag.getOperatorMeta(operName);
+    Assert.assertNotNull(t1Meta);
+    Assert.assertEquals(new Integer(64), t1Meta.getValue(OperatorContext.MEMORY_MB));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
index 986818b..6ffb091 100644
--- a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java
@@ -35,6 +35,7 @@ import java.nio.file.Files;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.jar.JarFile;
 
 import org.apache.apex.api.EmbeddedAppLauncher;
@@ -78,15 +79,17 @@ public class ApexYarnLauncherTest {
     Configuration conf = new Configuration(false);
     DAG dag = embeddedLauncher.prepareDAG(app, conf);
     Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
+    Properties configProperties = new Properties();
     ApexYarnLauncher launcher = new ApexYarnLauncher();
-    launcher.launchApp(new MockApexYarnLauncherParams(dag, launchAttributes));
+    launcher.launchApp(new MockApexYarnLauncherParams(dag, launchAttributes, configProperties));
   }
 
   private static class MockApexYarnLauncherParams extends  ApexYarnLauncher.LaunchParams {
     private static final long serialVersionUID = 1L;
 
-    public MockApexYarnLauncherParams(DAG dag, AttributeMap launchAttributes) {
-      super(dag, launchAttributes);
+    public MockApexYarnLauncherParams(DAG dag, AttributeMap launchAttributes,
+        Properties properties) {
+      super(dag, launchAttributes, properties);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/31c63cb8/runners/apex/src/test/resources/beam-runners-apex.properties
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/resources/beam-runners-apex.properties b/runners/apex/src/test/resources/beam-runners-apex.properties
new file mode 100644
index 0000000..48f8b05
--- /dev/null
+++ b/runners/apex/src/test/resources/beam-runners-apex.properties
@@ -0,0 +1,20 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# properties for unit test
+dt.operator.testProperties.attr.MEMORY_MB=32


[31/50] beam git commit: This closes #1849

Posted by dh...@apache.org.
This closes #1849


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b4726d08
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b4726d08
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b4726d08

Branch: refs/heads/python-sdk
Commit: b4726d088faa2ea74ba3a7e29a7559f737ccf4f2
Parents: 1c6e667 b0b91c8
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 07:15:54 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 07:15:54 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 +
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 352 ++++++++++++-------
 .../runners/dataflow/util/PackageUtilTest.java  |  69 +++-
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 +
 6 files changed, 312 insertions(+), 148 deletions(-)
----------------------------------------------------------------------



[08/50] beam git commit: [BEAM-1071] Allow for BigQueryIO to write tables with CREATE_NEVER disposition

Posted by dh...@apache.org.
[BEAM-1071] Allow for BigQueryIO to write tables with CREATE_NEVER disposition


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/dc369522
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/dc369522
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/dc369522

Branch: refs/heads/python-sdk
Commit: dc369522d1cfa46ae9058919d93229de05db2b6a
Parents: 11c3cd7
Author: Sam McVeety <sg...@google.com>
Authored: Mon Dec 12 18:47:20 2016 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 14:41:39 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 51 ++++++++++++++------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 36 ++++++++++++++
 2 files changed, 71 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/dc369522/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index aff199a..fa49f55 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1925,10 +1925,17 @@ public class BigQueryIO {
 
         if (input.isBounded() == PCollection.IsBounded.UNBOUNDED || tableRefFunction != null) {
           // We will use BigQuery's streaming write API -- validate supported dispositions.
-          checkArgument(
-              createDisposition != CreateDisposition.CREATE_NEVER,
-              "CreateDisposition.CREATE_NEVER is not supported for an unbounded PCollection or when"
-                  + " using a tablespec function.");
+          if (tableRefFunction != null) {
+            checkArgument(
+                createDisposition != CreateDisposition.CREATE_NEVER,
+                "CreateDisposition.CREATE_NEVER is not supported when using a tablespec"
+                + " function.");
+          }
+          if (jsonSchema == null) {
+            checkArgument(
+                createDisposition == CreateDisposition.CREATE_NEVER,
+                "CreateDisposition.CREATE_NEVER must be used if jsonSchema is null.");
+          }
 
           checkArgument(
               writeDisposition != WriteDisposition.WRITE_TRUNCATE,
@@ -1965,7 +1972,9 @@ public class BigQueryIO {
         if (input.isBounded() == IsBounded.UNBOUNDED || tableRefFunction != null) {
           return input.apply(
               new StreamWithDeDup(getTable(), tableRefFunction,
-                  NestedValueProvider.of(jsonSchema, new JsonSchemaToTableSchema()), bqServices));
+                  jsonSchema == null ? null : NestedValueProvider.of(
+                      jsonSchema, new JsonSchemaToTableSchema()),
+                  createDisposition, bqServices));
         }
 
         ValueProvider<TableReference> table = getTableWithDefaultProject(options);
@@ -2608,16 +2617,19 @@ public class BigQueryIO {
    * Implementation of DoFn to perform streaming BigQuery write.
    */
   @SystemDoFnInternal
-  private static class StreamingWriteFn
+  @VisibleForTesting
+  static class StreamingWriteFn
       extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
     /** TableSchema in JSON. Use String to make the class Serializable. */
-    private final ValueProvider<String> jsonTableSchema;
+    @Nullable private final ValueProvider<String> jsonTableSchema;
 
     private final BigQueryServices bqServices;
 
     /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
     private transient Map<String, List<TableRow>> tableRows;
 
+    private final Write.CreateDisposition createDisposition;
+
     /** The list of unique ids for each BigQuery table row. */
     private transient Map<String, List<String>> uniqueIdsForTableRows;
 
@@ -2631,9 +2643,12 @@ public class BigQueryIO {
         createAggregator("ByteCount", Sum.ofLongs());
 
     /** Constructor. */
-    StreamingWriteFn(ValueProvider<TableSchema> schema, BigQueryServices bqServices) {
-      this.jsonTableSchema =
+    StreamingWriteFn(@Nullable ValueProvider<TableSchema> schema,
+        Write.CreateDisposition createDisposition,
+        BigQueryServices bqServices) {
+      this.jsonTableSchema = schema == null ? null :
           NestedValueProvider.of(schema, new TableSchemaToJsonSchema());
+      this.createDisposition = createDisposition;
       this.bqServices = checkNotNull(bqServices, "bqServices");
     }
 
@@ -2689,7 +2704,8 @@ public class BigQueryIO {
     public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
         throws InterruptedException, IOException {
       TableReference tableReference = parseTableSpec(tableSpec);
-      if (!createdTables.contains(tableSpec)) {
+      if (createDisposition != createDisposition.CREATE_NEVER
+          && !createdTables.contains(tableSpec)) {
         synchronized (createdTables) {
           // Another thread may have succeeded in creating the table in the meanwhile, so
           // check again. This check isn't needed for correctness, but we add it to prevent
@@ -2945,19 +2961,22 @@ public class BigQueryIO {
   * it leverages BigQuery best effort de-dup mechanism.
    */
   private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
-    private final transient ValueProvider<TableReference> tableReference;
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-    private final transient ValueProvider<TableSchema> tableSchema;
+    @Nullable private final transient ValueProvider<TableReference> tableReference;
+    @Nullable private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+    @Nullable private final transient ValueProvider<TableSchema> tableSchema;
+    private final Write.CreateDisposition createDisposition;
     private final BigQueryServices bqServices;
 
     /** Constructor. */
     StreamWithDeDup(ValueProvider<TableReference> tableReference,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
-        ValueProvider<TableSchema> tableSchema,
+        @Nullable SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
+        @Nullable ValueProvider<TableSchema> tableSchema,
+        Write.CreateDisposition createDisposition,
         BigQueryServices bqServices) {
       this.tableReference = tableReference;
       this.tableRefFunction = tableRefFunction;
       this.tableSchema = tableSchema;
+      this.createDisposition = createDisposition;
       this.bqServices = checkNotNull(bqServices, "bqServices");
     }
 
@@ -2989,7 +3008,7 @@ public class BigQueryIO {
       tagged
           .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
           .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
-          .apply(ParDo.of(new StreamingWriteFn(tableSchema, bqServices)));
+          .apply(ParDo.of(new StreamingWriteFn(tableSchema, createDisposition, bqServices)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order. If a user

http://git-wip-us.apache.org/repos/asf/beam/blob/dc369522/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 3e8c2c9..ba7f44e 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -1523,6 +1523,42 @@ public class BigQueryIOTest implements Serializable {
   }
 
   @Test
+  public void testStreamingWriteFnCreateNever() throws Exception {
+    BigQueryIO.StreamingWriteFn fn = new BigQueryIO.StreamingWriteFn(
+        null, CreateDisposition.CREATE_NEVER, new FakeBigQueryServices());
+    assertEquals(BigQueryIO.parseTableSpec("dataset.table"),
+        fn.getOrCreateTable(null, "dataset.table"));
+  }
+
+  @Test
+  public void testCreateNeverWithStreaming() throws Exception {
+    BigQueryOptions options = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
+    options.setProject("project");
+    options.setStreaming(true);
+    Pipeline p = TestPipeline.create(options);
+
+    TableReference tableRef = new TableReference();
+    tableRef.setDatasetId("dataset");
+    tableRef.setTableId("sometable");
+
+    PCollection<TableRow> tableRows =
+        p.apply(CountingInput.unbounded())
+        .apply(
+            MapElements.via(
+                new SimpleFunction<Long, TableRow>() {
+                  @Override
+                  public TableRow apply(Long input) {
+                    return null;
+                  }
+                }))
+        .setCoder(TableRowJsonCoder.of());
+    tableRows
+        .apply(BigQueryIO.Write.to(tableRef)
+            .withCreateDisposition(CreateDisposition.CREATE_NEVER)
+            .withoutValidation());
+  }
+
+  @Test
   public void testTableParsing() {
     TableReference ref = BigQueryIO
         .parseTableSpec("my-project:data_set.table_name");


[18/50] beam git commit: This closes #1839

Posted by dh...@apache.org.
This closes #1839


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bf9d4542
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bf9d4542
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bf9d4542

Branch: refs/heads/python-sdk
Commit: bf9d454290bba7fac8829b2edeb416b7d9606062
Parents: 7402d76 a361b65
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 09:03:51 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 09:03:51 2017 -0800

----------------------------------------------------------------------
 .../runners/flink/translation/FlinkBatchTransformTranslators.java  | 2 +-
 .../flink/translation/FlinkStreamingTranslationContext.java        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[26/50] beam git commit: [BEAM-708] Using AutoValue in BoundedReadFromUnboundedSource

Posted by dh...@apache.org.
[BEAM-708] Using AutoValue in BoundedReadFromUnboundedSource

This closes #1794


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6413299a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6413299a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6413299a

Branch: refs/heads/python-sdk
Commit: 6413299a20be57de849684479134479fa1acee2d
Parents: 95beda6 a67ff91
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jan 25 14:22:56 2017 -0800
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jan 25 14:22:56 2017 -0800

----------------------------------------------------------------------
 .../sdk/io/BoundedReadFromUnboundedSource.java  | 69 +++++++++++++-------
 1 file changed, 44 insertions(+), 25 deletions(-)
----------------------------------------------------------------------



[37/50] beam git commit: This closes #1856

Posted by dh...@apache.org.
This closes #1856


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2cbc08b5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2cbc08b5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2cbc08b5

Branch: refs/heads/python-sdk
Commit: 2cbc08b5870036c52a94bb1f1f1d081d387e4ae0
Parents: 9637724 4d0225e
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 14:42:04 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 14:42:04 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/examples/WindowedWordCountIT.java   | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)
----------------------------------------------------------------------



[21/50] beam git commit: PackageUtil: parallelize staging of files

Posted by dh...@apache.org.
PackageUtil: parallelize staging of files

Proceeds in stages:
1. In parallel, hash and size all files.
2. Sort files by descending size.
3. In parallel, upload files.

Also a little cleanup for Dataflow 2.0:
* proper visibility
* removing some deprecated code
* refactoring into smaller methods.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3ecf7e70
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3ecf7e70
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3ecf7e70

Branch: refs/heads/python-sdk
Commit: 3ecf7e70bcc4775d804f096de647d13c407a8d52
Parents: 979c937
Author: Dan Halperin <dh...@google.com>
Authored: Mon Oct 24 17:27:23 2016 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 11:03:03 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 +
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
 .../runners/dataflow/util/PackageUtilTest.java  |  42 ++-
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 +
 6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index eea5502..9858b3d 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,6 +203,11 @@
     </dependency>
 
     <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-storage</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>com.google.auth</groupId>
       <artifactId>google-auth-library-credentials</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 6ca4c3f..53822e3 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,13 +17,19 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.storage.Storage;
 import java.util.List;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
+import org.apache.beam.sdk.util.Transport;
 
 /**
  * Utility class for staging files to GCS.
@@ -35,6 +41,7 @@ public class GcsStager implements Stager {
     this.options = options;
   }
 
+  @SuppressWarnings("unused")  // used via reflection
   public static GcsStager fromOptions(PipelineOptions options) {
     return new GcsStager(options.as(DataflowPipelineOptions.class));
   }
@@ -48,7 +55,16 @@ public class GcsStager implements Stager {
     if (windmillBinary != null) {
       filesToStage.add("windmill_main=" + windmillBinary);
     }
+    int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
+    checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
+    uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
+    Storage.Builder storageBuilder = Transport.newStorageClient(options);
+    GcsUtil util = GcsUtilFactory.create(
+        storageBuilder.build(),
+        storageBuilder.getHttpRequestInitializer(),
+        options.getExecutorService(),
+        uploadSizeBytes);
     return PackageUtil.stageClasspathElements(
-        options.getFilesToStage(), options.getStagingLocation());
+        options.getFilesToStage(), options.getStagingLocation(), util);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index 6d910ba..fa8c94d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,53 +17,62 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.fasterxml.jackson.core.Base64Variants;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.collect.Lists;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
 import com.google.common.io.CountingOutputStream;
 import com.google.common.io.Files;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.FluentBackoff;
+import org.apache.beam.sdk.util.GcsIOChannelFactory;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.ZipFiles;
+import org.apache.beam.sdk.util.gcsfs.GcsPath;
 import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /** Helper routines for packages. */
-public class PackageUtil {
+class PackageUtil {
   private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
   /**
    * A reasonable upper bound on the number of jars required to launch a Dataflow job.
    */
-  public static final int SANE_CLASSPATH_SIZE = 1000;
-  /**
-   * The initial interval to use between package staging attempts.
-   */
-  private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
-  /**
-   * The maximum number of retries when staging a file.
-   */
-  private static final int MAX_RETRIES = 4;
+  private static final int SANE_CLASSPATH_SIZE = 1000;
 
   private static final FluentBackoff BACKOFF_FACTORY =
-      FluentBackoff.DEFAULT
-          .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
+      FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
 
   /**
    * Translates exceptions from API calls.
@@ -71,35 +80,18 @@ public class PackageUtil {
   private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
 
   /**
-   * Creates a DataflowPackage containing information about how a classpath element should be
-   * staged, including the staging destination as well as its size and hash.
-   *
-   * @param classpathElement The local path for the classpath element.
-   * @param stagingPath The base location for staged classpath elements.
-   * @param overridePackageName If non-null, use the given value as the package name
-   *                            instead of generating one automatically.
-   * @return The package.
-   */
-  @Deprecated
-  public static DataflowPackage createPackage(File classpathElement,
-      String stagingPath, String overridePackageName) {
-    return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
-        .getDataflowPackage();
-  }
-
-  /**
    * Compute and cache the attributes of a classpath element that we will need to stage it.
    *
-   * @param classpathElement the file or directory to be staged.
+   * @param source the file or directory to be staged.
    * @param stagingPath The base location for staged classpath elements.
    * @param overridePackageName If non-null, use the given value as the package name
    *                            instead of generating one automatically.
    * @return a {@link PackageAttributes} that containing metadata about the object to be staged.
    */
-  static PackageAttributes createPackageAttributes(File classpathElement,
-      String stagingPath, String overridePackageName) {
+  static PackageAttributes createPackageAttributes(File source,
+      String stagingPath, @Nullable String overridePackageName) {
     try {
-      boolean directory = classpathElement.isDirectory();
+      boolean directory = source.isDirectory();
 
       // Compute size and hash in one pass over file or directory.
       Hasher hasher = Hashing.md5().newHasher();
@@ -108,142 +100,232 @@ public class PackageUtil {
 
       if (!directory) {
         // Files are staged as-is.
-        Files.asByteSource(classpathElement).copyTo(countingOutputStream);
+        Files.asByteSource(source).copyTo(countingOutputStream);
       } else {
         // Directories are recursively zipped.
-        ZipFiles.zipDirectory(classpathElement, countingOutputStream);
+        ZipFiles.zipDirectory(source, countingOutputStream);
       }
 
       long size = countingOutputStream.getCount();
       String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
 
       // Create the DataflowPackage with staging name and location.
-      String uniqueName = getUniqueContentName(classpathElement, hash);
+      String uniqueName = getUniqueContentName(source, hash);
       String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
       DataflowPackage target = new DataflowPackage();
       target.setName(overridePackageName != null ? overridePackageName : uniqueName);
       target.setLocation(resourcePath);
 
-      return new PackageAttributes(size, hash, directory, target);
+      return new PackageAttributes(size, hash, directory, target, source.getPath());
     } catch (IOException e) {
-      throw new RuntimeException("Package setup failure for " + classpathElement, e);
+      throw new RuntimeException("Package setup failure for " + source, e);
     }
   }
 
-  /**
-   * Transfers the classpath elements to the staging location.
-   *
-   * @param classpathElements The elements to stage.
-   * @param stagingPath The base location to stage the elements to.
-   * @return A list of cloud workflow packages, each representing a classpath element.
-   */
-  public static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath) {
-    return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
-  }
-
-  // Visible for testing.
-  static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath,
-      Sleeper retrySleeper) {
-    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
-        + "prepare for execution.", classpathElements.size());
-
-    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
-      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
-          + "copies to all workers. Having this many entries on your classpath may be indicative "
-          + "of an issue in your pipeline. You may want to consider trimming the classpath to "
-          + "necessary dependencies only, using --filesToStage pipeline option to override "
-          + "what files are being staged, or bundling several dependencies into one.",
-          classpathElements.size());
-    }
-
-    ArrayList<DataflowPackage> packages = new ArrayList<>();
+  /** Utility comparator used in uploading packages efficiently. */
+  private static class PackageUploadOrder implements Comparator<PackageAttributes> {
+    @Override
+    public int compare(PackageAttributes o1, PackageAttributes o2) {
+      // Smaller size compares high so that bigger packages are uploaded first.
+      long sizeDiff = o2.getSize() - o1.getSize();
+      if (sizeDiff != 0) {
+        // returns sign of long
+        return Long.signum(sizeDiff);
+      }
 
-    if (stagingPath == null) {
-      throw new IllegalArgumentException(
-          "Can't stage classpath elements on because no staging location has been provided");
+      // Otherwise, choose arbitrarily based on hash.
+      return o1.getHash().compareTo(o2.getHash());
     }
+  }
 
-    int numUploaded = 0;
-    int numCached = 0;
+  /**
+   * Utility function that computes sizes and hashes of packages so that we can validate whether
+   * they have already been correctly staged.
+   */
+  private static List<PackageAttributes> computePackageAttributes(
+      Collection<String> classpathElements, final String stagingPath,
+      ListeningExecutorService executorService) {
+    List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
     for (String classpathElement : classpathElements) {
-      String packageName = null;
+      @Nullable String userPackageName = null;
       if (classpathElement.contains("=")) {
         String[] components = classpathElement.split("=", 2);
-        packageName = components[0];
+        userPackageName = components[0];
         classpathElement = components[1];
       }
+      @Nullable final String packageName = userPackageName;
 
-      File file = new File(classpathElement);
+      final File file = new File(classpathElement);
       if (!file.exists()) {
         LOG.warn("Skipping non-existent classpath element {} that was specified.",
             classpathElement);
         continue;
       }
 
-      PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+      ListenableFuture<PackageAttributes> future =
+          executorService.submit(new Callable<PackageAttributes>() {
+            @Override
+            public PackageAttributes call() throws Exception {
+              return createPackageAttributes(file, stagingPath, packageName);
+            }
+          });
+      futures.add(future);
+    }
+
+    try {
+      return Futures.allAsList(futures).get();
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new RuntimeException("Interrupted while staging packages", e);
+    } catch (ExecutionException e) {
+      throw new RuntimeException("Error while staging packages", e.getCause());
+    }
+  }
+
+  private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
+      throws IOException {
+    IOChannelFactory factory = IOChannelUtils.getFactory(target);
+    if (factory instanceof GcsIOChannelFactory) {
+      return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
+    } else {
+      return factory.create(target, MimeTypes.BINARY);
+    }
+  }
 
-      DataflowPackage workflowPackage = attributes.getDataflowPackage();
-      packages.add(workflowPackage);
-      String target = workflowPackage.getLocation();
+  /**
+   * Utility to verify whether a package has already been staged and, if not, copy it to the
+   * staging location.
+   */
+  private static void stageOnePackage(
+      PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
+      Sleeper retrySleeper, GcsUtil gcsUtil) {
+    String source = attributes.getSourcePath();
+    String target = attributes.getDataflowPackage().getLocation();
 
-      // TODO: Should we attempt to detect the Mime type rather than
-      // always using MimeTypes.BINARY?
+    // TODO: Should we attempt to detect the Mime type rather than
+    // always using MimeTypes.BINARY?
+    try {
       try {
-        try {
-          long remoteLength = IOChannelUtils.getSizeBytes(target);
-          if (remoteLength == attributes.getSize()) {
-            LOG.debug("Skipping classpath element already staged: {} at {}",
-                classpathElement, target);
-            numCached++;
-            continue;
-          }
-        } catch (FileNotFoundException expected) {
-          // If the file doesn't exist, it means we need to upload it.
+        long remoteLength = IOChannelUtils.getSizeBytes(target);
+        if (remoteLength == attributes.getSize()) {
+          LOG.debug("Skipping classpath element already staged: {} at {}",
+              attributes.getSourcePath(), target);
+          numCached.incrementAndGet();
+          return;
         }
+      } catch (FileNotFoundException expected) {
+        // If the file doesn't exist, it means we need to upload it.
+      }
 
-        // Upload file, retrying on failure.
-        BackOff backoff = BACKOFF_FACTORY.backoff();
-        while (true) {
-          try {
-            LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
-            try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
-              copyContent(classpathElement, writer);
-            }
-            numUploaded++;
-            break;
-          } catch (IOException e) {
-            if (ERROR_EXTRACTOR.accessDenied(e)) {
-              String errorMessage = String.format(
-                  "Uploaded failed due to permissions error, will NOT retry staging "
-                  + "of classpath %s. Please verify credentials are valid and that you have "
-                  + "write access to %s. Stale credentials can be resolved by executing "
-                  + "'gcloud auth login'.", classpathElement, target);
-              LOG.error(errorMessage);
-              throw new IOException(errorMessage, e);
-            }
-            long sleep = backoff.nextBackOffMillis();
-            if (sleep == BackOff.STOP) {
-              // Rethrow last error, to be included as a cause in the catch below.
-              LOG.error("Upload failed, will NOT retry staging of classpath: {}",
-                  classpathElement, e);
-              throw e;
-            } else {
-              LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
-                  classpathElement, e);
-              retrySleeper.sleep(sleep);
-            }
+      // Upload file, retrying on failure.
+      BackOff backoff = BACKOFF_FACTORY.backoff();
+      while (true) {
+        try {
+          LOG.debug("Uploading classpath element {} to {}", source, target);
+          try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
+            copyContent(source, writer);
+          }
+          numUploaded.incrementAndGet();
+          break;
+        } catch (IOException e) {
+          if (ERROR_EXTRACTOR.accessDenied(e)) {
+            String errorMessage = String.format(
+                "Uploaded failed due to permissions error, will NOT retry staging "
+                    + "of classpath %s. Please verify credentials are valid and that you have "
+                    + "write access to %s. Stale credentials can be resolved by executing "
+                    + "'gcloud auth application-default login'.", source, target);
+            LOG.error(errorMessage);
+            throw new IOException(errorMessage, e);
+          }
+          long sleep = backoff.nextBackOffMillis();
+          if (sleep == BackOff.STOP) {
+            // Rethrow last error, to be included as a cause in the catch below.
+            LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+                source, e);
+            throw e;
+          } else {
+            LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+                source, e);
+            retrySleeper.sleep(sleep);
           }
         }
-      } catch (Exception e) {
-        throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
       }
+    } catch (Exception e) {
+      throw new RuntimeException("Could not stage classpath element: " + source, e);
     }
+  }
 
-    LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
-        + "{} files cached",
-        numUploaded, numCached);
+  /**
+   * Transfers the classpath elements to the staging location.
+   *
+   * @param classpathElements The elements to stage.
+   * @param stagingPath The base location to stage the elements to.
+   * @return A list of cloud workflow packages, each representing a classpath element.
+   */
+  static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
+    ListeningExecutorService executorService =
+        MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
+    try {
+      return stageClasspathElements(
+          classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
+    } finally {
+      executorService.shutdown();
+    }
+  }
+
+  // Visible for testing.
+  static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, final String stagingPath,
+      final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+        + "prepare for execution.", classpathElements.size());
+
+    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
+      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
+            + "copies to all workers. Having this many entries on your classpath may be indicative "
+            + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+            + "necessary dependencies only, using --filesToStage pipeline option to override "
+            + "what files are being staged, or bundling several dependencies into one.",
+          classpathElements.size());
+    }
+
+    checkArgument(
+        stagingPath != null,
+        "Can't stage classpath elements because no staging location has been provided");
+
+    // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
+    List<PackageAttributes> packageAttributes =
+        new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
+    // Order package attributes in descending size order so that we upload the largest files first.
+    Collections.sort(packageAttributes, new PackageUploadOrder());
+
+    List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+    final AtomicInteger numUploaded = new AtomicInteger(0);
+    final AtomicInteger numCached = new AtomicInteger(0);
+
+    List<ListenableFuture<?>> futures = new LinkedList<>();
+    for (final PackageAttributes attributes : packageAttributes) {
+      packages.add(attributes.getDataflowPackage());
+      futures.add(executorService.submit(new Runnable() {
+        @Override
+        public void run() {
+          stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+        }
+      }));
+    }
+    try {
+      Futures.allAsList(futures).get();
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new RuntimeException("Interrupted while staging packages", e);
+    } catch (ExecutionException e) {
+      throw new RuntimeException("Error while staging packages", e.getCause());
+    }
+
+    LOG.info(
+        "Staging files complete: {} files cached, {} files newly uploaded",
+        numUploaded.get(), numCached.get());
 
     return packages;
   }
@@ -293,13 +375,15 @@ public class PackageUtil {
     private final boolean directory;
     private final long size;
     private final String hash;
+    private final String sourcePath;
     private DataflowPackage dataflowPackage;
 
     public PackageAttributes(long size, String hash, boolean directory,
-        DataflowPackage dataflowPackage) {
+        DataflowPackage dataflowPackage, String sourcePath) {
       this.size = size;
       this.hash = Objects.requireNonNull(hash, "hash");
       this.directory = directory;
+      this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
       this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
     }
 
@@ -330,5 +414,12 @@ public class PackageUtil {
     public String getHash() {
       return hash;
     }
+
+    /**
+     * @return the file to be uploaded
+     */
+    public String getSourcePath() {
+      return sourcePath;
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 05a87dd..3828415 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
 package org.apache.beam.runners.dataflow.util;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
@@ -53,6 +53,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
+import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -235,7 +236,7 @@ public class PackageUtilTest {
       classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
     }
 
-    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
+    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
 
     logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
   }
@@ -250,7 +251,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -277,7 +278,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -304,7 +305,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -327,7 +328,8 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+          mockGcsUtil);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -348,16 +350,20 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+          mockGcsUtil);
       fail("Expected RuntimeException");
     } catch (RuntimeException e) {
-      assertTrue("Expected IOException containing detailed message.",
-          e.getCause() instanceof IOException);
-      assertThat(e.getCause().getMessage(),
+      assertThat("Expected RuntimeException wrapping IOException.",
+          e.getCause(), instanceOf(RuntimeException.class));
+      assertThat("Expected IOException containing detailed message.",
+          e.getCause().getCause(), instanceOf(IOException.class));
+      assertThat(e.getCause().getCause().getMessage(),
           Matchers.allOf(
               Matchers.containsString("Uploaded failed due to permissions error"),
               Matchers.containsString(
-                  "Stale credentials can be resolved by executing 'gcloud auth login'")));
+                  "Stale credentials can be resolved by executing 'gcloud auth application-default "
+                      + "login'")));
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -377,9 +383,8 @@ public class PackageUtilTest {
 
     try {
       PackageUtil.stageClasspathElements(
-                                              ImmutableList.of(tmpFile.getAbsolutePath()),
-                                              STAGING_PATH,
-                                              fastNanoClockAndSleeper);
+          ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
+          MoreExecutors.newDirectExecutorService(), mockGcsUtil);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -393,7 +398,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verifyNoMoreInteractions(mockGcsUtil);
@@ -411,7 +416,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -429,7 +434,8 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
+        mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -446,7 +452,7 @@ public class PackageUtilTest {
     String nonExistentFile =
         IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
     assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
-        ImmutableList.of(nonExistentFile), STAGING_PATH));
+        ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 0553efc..72e106d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,6 +25,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.AppEngineEnvironment;
 import org.apache.beam.sdk.util.GcsPathValidator;
 import org.apache.beam.sdk.util.GcsUtil;
@@ -81,8 +82,9 @@ public interface GcsOptions extends
       + "information on the restrictions and performance implications of this value.\n\n"
       + "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
       + "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
+  @Nullable
   Integer getGcsUploadBufferSizeBytes();
-  void setGcsUploadBufferSizeBytes(Integer bytes);
+  void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
 
   /**
    * The class of the validator that should be created and used to validate paths.

http://git-wip-us.apache.org/repos/asf/beam/blob/3ecf7e70/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index a10ea28..5e83584 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,6 +101,18 @@ public class GcsUtil {
           gcsOptions.getExecutorService(),
           gcsOptions.getGcsUploadBufferSizeBytes());
     }
+
+    /**
+     * Returns an instance of {@link GcsUtil} based on the given parameters.
+     */
+    public static GcsUtil create(
+        Storage storageClient,
+        HttpRequestInitializer httpRequestInitializer,
+        ExecutorService executorService,
+        @Nullable Integer uploadBufferSizeBytes) {
+      return new GcsUtil(
+          storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
+    }
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);


[10/50] beam git commit: Add a UsesUnboundedPCollections category and exclude it from DataflowRunner batch tests

Posted by dh...@apache.org.
Add a UsesUnboundedPCollections category and exclude it from DataflowRunner batch tests


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bffe80d5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bffe80d5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bffe80d5

Branch: refs/heads/python-sdk
Commit: bffe80d55d54592b8146e1b185c72eac38751f33
Parents: f2389ab
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 13:57:02 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:51:13 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |  3 ++-
 .../sdk/testing/UsesUnboundedPCollections.java  | 23 ++++++++++++++++++++
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |  4 ++--
 3 files changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 7bf2089..1d05193 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -78,9 +78,10 @@
             <id>runnable-on-service-tests</id>
             <configuration>
               <excludedGroups>
+                org.apache.beam.sdk.testing.UsesMetrics,
                 org.apache.beam.sdk.testing.UsesTimersInParDo,
                 org.apache.beam.sdk.testing.UsesSplittableParDo,
-                org.apache.beam.sdk.testing.UsesMetrics
+                org.apache.beam.sdk.testing.UsesUnboundedPCollections,
               </excludedGroups>
               <excludes>
                 <exclude>org.apache.beam.sdk.transforms.FlattenTest</exclude>

http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
new file mode 100644
index 0000000..d2caf4a
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesUnboundedPCollections.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.testing;
+
+/**
+ * Category tag for validation tests which utilize at least one unbounded {@code PCollection}.
+ */
+public interface UsesUnboundedPCollections {}

http://git-wip-us.apache.org/repos/asf/beam/blob/bffe80d5/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
index a0d58ea..5ec08b4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/PubsubIOTest.java
@@ -25,10 +25,10 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 
 import java.util.Set;
-
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.testing.RunnableOnService;
+import org.apache.beam.sdk.testing.UsesUnboundedPCollections;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
 import org.joda.time.Duration;
@@ -150,7 +150,7 @@ public class PubsubIOTest {
   }
 
   @Test
-  @Category(RunnableOnService.class)
+  @Category({RunnableOnService.class, UsesUnboundedPCollections.class})
   public void testPrimitiveReadDisplayData() {
     DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
     Set<DisplayData> displayData;


[46/50] beam git commit: This closes #1859

Posted by dh...@apache.org.
This closes #1859


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b21bdf47
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b21bdf47
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b21bdf47

Branch: refs/heads/python-sdk
Commit: b21bdf4755363191209e05f96ca8044731a346ed
Parents: 4a29131 9c11815
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 21:18:45 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 21:18:45 2017 +0100

----------------------------------------------------------------------
 .../examples-java8/src/main/resources/archetype-resources/pom.xml  | 2 +-
 .../examples/src/main/resources/archetype-resources/pom.xml        | 2 +-
 .../starter/src/main/resources/archetype-resources/pom.xml         | 2 +-
 .../starter/src/test/resources/projects/basic/reference/pom.xml    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------



[13/50] beam git commit: This closes #1826

Posted by dh...@apache.org.
This closes #1826


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e77de7c6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e77de7c6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e77de7c6

Branch: refs/heads/python-sdk
Commit: e77de7c61daf6aaa5d0562440cfd2f34cd456424
Parents: 1148be6 4cdd877
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 15:55:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:55:09 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/FileSystems.java     | 32 +++++++------------
 .../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +++-----------------
 2 files changed, 15 insertions(+), 50 deletions(-)
----------------------------------------------------------------------



[12/50] beam git commit: [BEAM-59] Beam FileSystem.setDefaultConfig: remove scheme from the signature.

Posted by dh...@apache.org.
[BEAM-59] Beam FileSystem.setDefaultConfig: remove scheme from the signature.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4cdd8771
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4cdd8771
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4cdd8771

Branch: refs/heads/python-sdk
Commit: 4cdd87718c3d0719b7c0e421b9cbaf4eb902672e
Parents: 1148be6
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 18:08:44 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 15:54:53 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/FileSystems.java     | 32 +++++++------------
 .../org/apache/beam/sdk/io/FileSystemsTest.java | 33 +++-----------------
 2 files changed, 15 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4cdd8771/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
index d086ec6..e19c1e4 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
@@ -17,8 +17,8 @@
  */
 package org.apache.beam.sdk.io;
 
-import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
@@ -53,6 +53,8 @@ public class FileSystems {
   private static final Map<String, FileSystemRegistrar> SCHEME_TO_REGISTRAR =
       new ConcurrentHashMap<>();
 
+  private static PipelineOptions defaultConfig;
+
   private static final Map<String, PipelineOptions> SCHEME_TO_DEFAULT_CONFIG =
       new ConcurrentHashMap<>();
 
@@ -78,27 +80,12 @@ public class FileSystems {
   }
 
   /**
-   * Sets the default configuration to be used with a {@link FileSystemRegistrar} for the provided
-   * {@code scheme}.
+   * Sets the default configuration in workers.
    *
-   * <p>Syntax: <pre>scheme = alpha *( alpha | digit | "+" | "-" | "." )</pre>
-   * Upper case letters are treated as the same as lower case letters.
+   * <p>It will be used in {@link FileSystemRegistrar FileSystemRegistrars} for all schemes.
    */
-  public static void setDefaultConfig(String scheme, PipelineOptions options) {
-    String lowerCaseScheme = checkNotNull(scheme, "scheme").toLowerCase();
-    checkArgument(
-        URI_SCHEME_PATTERN.matcher(lowerCaseScheme).matches(),
-        String.format("Scheme: [%s] doesn't match URI syntax: %s",
-            lowerCaseScheme, URI_SCHEME_PATTERN.pattern()));
-    checkArgument(
-        SCHEME_TO_REGISTRAR.containsKey(lowerCaseScheme),
-        String.format("No FileSystemRegistrar found for scheme: [%s].", lowerCaseScheme));
-    SCHEME_TO_DEFAULT_CONFIG.put(lowerCaseScheme, checkNotNull(options, "options"));
-  }
-
-  @VisibleForTesting
-  static PipelineOptions getDefaultConfig(String scheme) {
-    return SCHEME_TO_DEFAULT_CONFIG.get(scheme.toLowerCase());
+  public static void setDefaultConfigInWorkers(PipelineOptions options) {
+    defaultConfig = checkNotNull(options, "options");
   }
 
   /**
@@ -106,9 +93,12 @@ public class FileSystems {
    */
   @VisibleForTesting
   static FileSystem getFileSystemInternal(URI uri) {
+    checkState(
+        defaultConfig != null,
+        "Expect the runner have called setDefaultConfigInWorkers().");
     String lowerCaseScheme = (uri.getScheme() != null
         ? uri.getScheme().toLowerCase() : LocalFileSystemRegistrar.LOCAL_FILE_SCHEME);
-    return getRegistrarInternal(lowerCaseScheme).fromOptions(getDefaultConfig(lowerCaseScheme));
+    return getRegistrarInternal(lowerCaseScheme).fromOptions(defaultConfig);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/4cdd8771/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
index 9b41b98..113a562 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileSystemsTest.java
@@ -17,8 +17,6 @@
  */
 package org.apache.beam.sdk.io;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.common.collect.Sets;
@@ -26,6 +24,7 @@ import java.net.URI;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -41,33 +40,9 @@ public class FileSystemsTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
-  @Test
-  public void testSetDefaultConfig() throws Exception {
-    PipelineOptions first = PipelineOptionsFactory.create();
-    PipelineOptions second = PipelineOptionsFactory.create();
-    FileSystems.setDefaultConfig("file", first);
-    assertEquals(first, FileSystems.getDefaultConfig("file"));
-    assertEquals(first, FileSystems.getDefaultConfig("FILE"));
-
-    FileSystems.setDefaultConfig("FILE", second);
-    assertNotEquals(first, FileSystems.getDefaultConfig("file"));
-    assertNotEquals(first, FileSystems.getDefaultConfig("FILE"));
-    assertEquals(second, FileSystems.getDefaultConfig("file"));
-    assertEquals(second, FileSystems.getDefaultConfig("FILE"));
-  }
-
-  @Test
-  public void testSetDefaultConfigNotFound() throws Exception {
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("No FileSystemRegistrar found for scheme: [gs-s3].");
-    FileSystems.setDefaultConfig("gs-s3", PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSetDefaultConfigInvalidScheme() throws Exception {
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Scheme: [gs:] doesn't match URI syntax");
-    FileSystems.setDefaultConfig("gs:", PipelineOptionsFactory.create());
+  @Before
+  public void setup() {
+    FileSystems.setDefaultConfigInWorkers(PipelineOptionsFactory.create());
   }
 
   @Test


[36/50] beam git commit: Add prefix and suffix to WindowedWordCountIT output location

Posted by dh...@apache.org.
Add prefix and suffix to WindowedWordCountIT output location


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4d0225e8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4d0225e8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4d0225e8

Branch: refs/heads/python-sdk
Commit: 4d0225e8e29047dc7b4e0f5cea2414eaef4b038c
Parents: 9637724
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jan 26 13:42:58 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Thu Jan 26 13:47:08 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/examples/WindowedWordCountIT.java   | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4d0225e8/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
index e4570ac..703f836 100644
--- a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
+++ b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
@@ -28,6 +28,7 @@ import java.util.Date;
 import java.util.List;
 import java.util.SortedMap;
 import java.util.TreeMap;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.beam.examples.common.WriteWindowedFilesDoFn;
 import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
@@ -47,8 +48,10 @@ import org.hamcrest.TypeSafeMatcher;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.slf4j.Logger;
@@ -58,6 +61,8 @@ import org.slf4j.LoggerFactory;
 @RunWith(JUnit4.class)
 public class WindowedWordCountIT {
 
+  @Rule public TestName testName = new TestName();
+
   private static final String DEFAULT_INPUT =
       "gs://apache-beam-samples/shakespeare/winterstale-personae";
   static final int MAX_READ_RETRIES = 4;
@@ -100,7 +105,9 @@ public class WindowedWordCountIT {
     options.setOutput(
         IOChannelUtils.resolve(
             options.getTempRoot(),
-            String.format("WindowedWordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()),
+            String.format(
+                "WindowedWordCountIT.%s-%tFT%<tH:%<tM:%<tS.%<tL+%s",
+                testName.getMethodName(), new Date(), ThreadLocalRandom.current().nextInt()),
             "output",
             "results"));
     return options;
@@ -133,8 +140,7 @@ public class WindowedWordCountIT {
               new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10)))));
     }
 
-    ShardedFile inputFile =
-        new ExplicitShardedFile(Collections.singleton(options.getInputFile()));
+    ShardedFile inputFile = new ExplicitShardedFile(Collections.singleton(options.getInputFile()));
 
     // For this integration test, input is tiny and we can build the expected counts
     SortedMap<String, Long> expectedWordCounts = new TreeMap<>();
@@ -144,8 +150,8 @@ public class WindowedWordCountIT {
 
       for (String word : words) {
         if (!word.isEmpty()) {
-          expectedWordCounts.put(word,
-              MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
+          expectedWordCounts.put(
+              word, MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
         }
       }
     }


[27/50] beam git commit: Revert "This closes #1184"

Posted by dh...@apache.org.
Revert "This closes #1184"

This reverts commit c525783704e0cc47845df8cdec1715e1f1c74008, reversing
changes made to 979c9376f820577bad43c18cc1a7ee86fab9d942.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fee029f7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fee029f7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fee029f7

Branch: refs/heads/python-sdk
Commit: fee029f7f9963c9de821ff5792d7f45fabe6cb5d
Parents: 6413299
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 15:54:26 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 15:54:26 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 -
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 349 +++++++------------
 .../runners/dataflow/util/PackageUtilTest.java  |  42 +--
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 -
 6 files changed, 149 insertions(+), 281 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 9858b3d..eea5502 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,11 +203,6 @@
     </dependency>
 
     <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-storage</artifactId>
-    </dependency>
-
-    <dependency>
       <groupId>com.google.auth</groupId>
       <artifactId>google-auth-library-credentials</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 53822e3..6ca4c3f 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,19 +17,13 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.storage.Storage;
 import java.util.List;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.GcsUtil;
-import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
-import org.apache.beam.sdk.util.Transport;
 
 /**
  * Utility class for staging files to GCS.
@@ -41,7 +35,6 @@ public class GcsStager implements Stager {
     this.options = options;
   }
 
-  @SuppressWarnings("unused")  // used via reflection
   public static GcsStager fromOptions(PipelineOptions options) {
     return new GcsStager(options.as(DataflowPipelineOptions.class));
   }
@@ -55,16 +48,7 @@ public class GcsStager implements Stager {
     if (windmillBinary != null) {
       filesToStage.add("windmill_main=" + windmillBinary);
     }
-    int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
-    checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
-    uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
-    Storage.Builder storageBuilder = Transport.newStorageClient(options);
-    GcsUtil util = GcsUtilFactory.create(
-        storageBuilder.build(),
-        storageBuilder.getHttpRequestInitializer(),
-        options.getExecutorService(),
-        uploadSizeBytes);
     return PackageUtil.stageClasspathElements(
-        options.getFilesToStage(), options.getStagingLocation(), util);
+        options.getFilesToStage(), options.getStagingLocation());
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index fa8c94d..6d910ba 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,62 +17,53 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
-import static com.google.common.base.Preconditions.checkArgument;
-
 import com.fasterxml.jackson.core.Base64Variants;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.collect.Lists;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
 import com.google.common.io.CountingOutputStream;
 import com.google.common.io.Files;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListeningExecutorService;
-import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executors;
-import java.util.concurrent.atomic.AtomicInteger;
-import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.FluentBackoff;
-import org.apache.beam.sdk.util.GcsIOChannelFactory;
-import org.apache.beam.sdk.util.GcsUtil;
-import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.ZipFiles;
-import org.apache.beam.sdk.util.gcsfs.GcsPath;
 import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /** Helper routines for packages. */
-class PackageUtil {
+public class PackageUtil {
   private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
   /**
    * A reasonable upper bound on the number of jars required to launch a Dataflow job.
    */
-  private static final int SANE_CLASSPATH_SIZE = 1000;
+  public static final int SANE_CLASSPATH_SIZE = 1000;
+  /**
+   * The initial interval to use between package staging attempts.
+   */
+  private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
+  /**
+   * The maximum number of retries when staging a file.
+   */
+  private static final int MAX_RETRIES = 4;
 
   private static final FluentBackoff BACKOFF_FACTORY =
-      FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
+      FluentBackoff.DEFAULT
+          .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
 
   /**
    * Translates exceptions from API calls.
@@ -80,18 +71,35 @@ class PackageUtil {
   private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
 
   /**
+   * Creates a DataflowPackage containing information about how a classpath element should be
+   * staged, including the staging destination as well as its size and hash.
+   *
+   * @param classpathElement The local path for the classpath element.
+   * @param stagingPath The base location for staged classpath elements.
+   * @param overridePackageName If non-null, use the given value as the package name
+   *                            instead of generating one automatically.
+   * @return The package.
+   */
+  @Deprecated
+  public static DataflowPackage createPackage(File classpathElement,
+      String stagingPath, String overridePackageName) {
+    return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
+        .getDataflowPackage();
+  }
+
+  /**
    * Compute and cache the attributes of a classpath element that we will need to stage it.
    *
-   * @param source the file or directory to be staged.
+   * @param classpathElement the file or directory to be staged.
    * @param stagingPath The base location for staged classpath elements.
    * @param overridePackageName If non-null, use the given value as the package name
    *                            instead of generating one automatically.
    * @return a {@link PackageAttributes} that containing metadata about the object to be staged.
    */
-  static PackageAttributes createPackageAttributes(File source,
-      String stagingPath, @Nullable String overridePackageName) {
+  static PackageAttributes createPackageAttributes(File classpathElement,
+      String stagingPath, String overridePackageName) {
     try {
-      boolean directory = source.isDirectory();
+      boolean directory = classpathElement.isDirectory();
 
       // Compute size and hash in one pass over file or directory.
       Hasher hasher = Hashing.md5().newHasher();
@@ -100,158 +108,25 @@ class PackageUtil {
 
       if (!directory) {
         // Files are staged as-is.
-        Files.asByteSource(source).copyTo(countingOutputStream);
+        Files.asByteSource(classpathElement).copyTo(countingOutputStream);
       } else {
         // Directories are recursively zipped.
-        ZipFiles.zipDirectory(source, countingOutputStream);
+        ZipFiles.zipDirectory(classpathElement, countingOutputStream);
       }
 
       long size = countingOutputStream.getCount();
       String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
 
       // Create the DataflowPackage with staging name and location.
-      String uniqueName = getUniqueContentName(source, hash);
+      String uniqueName = getUniqueContentName(classpathElement, hash);
       String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
       DataflowPackage target = new DataflowPackage();
       target.setName(overridePackageName != null ? overridePackageName : uniqueName);
       target.setLocation(resourcePath);
 
-      return new PackageAttributes(size, hash, directory, target, source.getPath());
+      return new PackageAttributes(size, hash, directory, target);
     } catch (IOException e) {
-      throw new RuntimeException("Package setup failure for " + source, e);
-    }
-  }
-
-  /** Utility comparator used in uploading packages efficiently. */
-  private static class PackageUploadOrder implements Comparator<PackageAttributes> {
-    @Override
-    public int compare(PackageAttributes o1, PackageAttributes o2) {
-      // Smaller size compares high so that bigger packages are uploaded first.
-      long sizeDiff = o2.getSize() - o1.getSize();
-      if (sizeDiff != 0) {
-        // returns sign of long
-        return Long.signum(sizeDiff);
-      }
-
-      // Otherwise, choose arbitrarily based on hash.
-      return o1.getHash().compareTo(o2.getHash());
-    }
-  }
-
-  /**
-   * Utility function that computes sizes and hashes of packages so that we can validate whether
-   * they have already been correctly staged.
-   */
-  private static List<PackageAttributes> computePackageAttributes(
-      Collection<String> classpathElements, final String stagingPath,
-      ListeningExecutorService executorService) {
-    List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
-    for (String classpathElement : classpathElements) {
-      @Nullable String userPackageName = null;
-      if (classpathElement.contains("=")) {
-        String[] components = classpathElement.split("=", 2);
-        userPackageName = components[0];
-        classpathElement = components[1];
-      }
-      @Nullable final String packageName = userPackageName;
-
-      final File file = new File(classpathElement);
-      if (!file.exists()) {
-        LOG.warn("Skipping non-existent classpath element {} that was specified.",
-            classpathElement);
-        continue;
-      }
-
-      ListenableFuture<PackageAttributes> future =
-          executorService.submit(new Callable<PackageAttributes>() {
-            @Override
-            public PackageAttributes call() throws Exception {
-              return createPackageAttributes(file, stagingPath, packageName);
-            }
-          });
-      futures.add(future);
-    }
-
-    try {
-      return Futures.allAsList(futures).get();
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      throw new RuntimeException("Interrupted while staging packages", e);
-    } catch (ExecutionException e) {
-      throw new RuntimeException("Error while staging packages", e.getCause());
-    }
-  }
-
-  private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
-      throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(target);
-    if (factory instanceof GcsIOChannelFactory) {
-      return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
-    } else {
-      return factory.create(target, MimeTypes.BINARY);
-    }
-  }
-
-  /**
-   * Utility to verify whether a package has already been staged and, if not, copy it to the
-   * staging location.
-   */
-  private static void stageOnePackage(
-      PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
-      Sleeper retrySleeper, GcsUtil gcsUtil) {
-    String source = attributes.getSourcePath();
-    String target = attributes.getDataflowPackage().getLocation();
-
-    // TODO: Should we attempt to detect the Mime type rather than
-    // always using MimeTypes.BINARY?
-    try {
-      try {
-        long remoteLength = IOChannelUtils.getSizeBytes(target);
-        if (remoteLength == attributes.getSize()) {
-          LOG.debug("Skipping classpath element already staged: {} at {}",
-              attributes.getSourcePath(), target);
-          numCached.incrementAndGet();
-          return;
-        }
-      } catch (FileNotFoundException expected) {
-        // If the file doesn't exist, it means we need to upload it.
-      }
-
-      // Upload file, retrying on failure.
-      BackOff backoff = BACKOFF_FACTORY.backoff();
-      while (true) {
-        try {
-          LOG.debug("Uploading classpath element {} to {}", source, target);
-          try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
-            copyContent(source, writer);
-          }
-          numUploaded.incrementAndGet();
-          break;
-        } catch (IOException e) {
-          if (ERROR_EXTRACTOR.accessDenied(e)) {
-            String errorMessage = String.format(
-                "Uploaded failed due to permissions error, will NOT retry staging "
-                    + "of classpath %s. Please verify credentials are valid and that you have "
-                    + "write access to %s. Stale credentials can be resolved by executing "
-                    + "'gcloud auth application-default login'.", source, target);
-            LOG.error(errorMessage);
-            throw new IOException(errorMessage, e);
-          }
-          long sleep = backoff.nextBackOffMillis();
-          if (sleep == BackOff.STOP) {
-            // Rethrow last error, to be included as a cause in the catch below.
-            LOG.error("Upload failed, will NOT retry staging of classpath: {}",
-                source, e);
-            throw e;
-          } else {
-            LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
-                source, e);
-            retrySleeper.sleep(sleep);
-          }
-        }
-      }
-    } catch (Exception e) {
-      throw new RuntimeException("Could not stage classpath element: " + source, e);
+      throw new RuntimeException("Package setup failure for " + classpathElement, e);
     }
   }
 
@@ -262,70 +137,113 @@ class PackageUtil {
    * @param stagingPath The base location to stage the elements to.
    * @return A list of cloud workflow packages, each representing a classpath element.
    */
-  static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
-    ListeningExecutorService executorService =
-        MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
-    try {
-      return stageClasspathElements(
-          classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
-    } finally {
-      executorService.shutdown();
-    }
+  public static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, String stagingPath) {
+    return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
   }
 
   // Visible for testing.
   static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, final String stagingPath,
-      final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+      Collection<String> classpathElements, String stagingPath,
+      Sleeper retrySleeper) {
     LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
         + "prepare for execution.", classpathElements.size());
 
     if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
       LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
-            + "copies to all workers. Having this many entries on your classpath may be indicative "
-            + "of an issue in your pipeline. You may want to consider trimming the classpath to "
-            + "necessary dependencies only, using --filesToStage pipeline option to override "
-            + "what files are being staged, or bundling several dependencies into one.",
+          + "copies to all workers. Having this many entries on your classpath may be indicative "
+          + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+          + "necessary dependencies only, using --filesToStage pipeline option to override "
+          + "what files are being staged, or bundling several dependencies into one.",
           classpathElements.size());
     }
 
-    checkArgument(
-        stagingPath != null,
-        "Can't stage classpath elements because no staging location has been provided");
+    ArrayList<DataflowPackage> packages = new ArrayList<>();
 
-    // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
-    List<PackageAttributes> packageAttributes =
-        new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
-    // Order package attributes in descending size order so that we upload the largest files first.
-    Collections.sort(packageAttributes, new PackageUploadOrder());
+    if (stagingPath == null) {
+      throw new IllegalArgumentException(
+          "Can't stage classpath elements on because no staging location has been provided");
+    }
 
-    List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
-    final AtomicInteger numUploaded = new AtomicInteger(0);
-    final AtomicInteger numCached = new AtomicInteger(0);
+    int numUploaded = 0;
+    int numCached = 0;
+    for (String classpathElement : classpathElements) {
+      String packageName = null;
+      if (classpathElement.contains("=")) {
+        String[] components = classpathElement.split("=", 2);
+        packageName = components[0];
+        classpathElement = components[1];
+      }
 
-    List<ListenableFuture<?>> futures = new LinkedList<>();
-    for (final PackageAttributes attributes : packageAttributes) {
-      packages.add(attributes.getDataflowPackage());
-      futures.add(executorService.submit(new Runnable() {
-        @Override
-        public void run() {
-          stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+      File file = new File(classpathElement);
+      if (!file.exists()) {
+        LOG.warn("Skipping non-existent classpath element {} that was specified.",
+            classpathElement);
+        continue;
+      }
+
+      PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+
+      DataflowPackage workflowPackage = attributes.getDataflowPackage();
+      packages.add(workflowPackage);
+      String target = workflowPackage.getLocation();
+
+      // TODO: Should we attempt to detect the Mime type rather than
+      // always using MimeTypes.BINARY?
+      try {
+        try {
+          long remoteLength = IOChannelUtils.getSizeBytes(target);
+          if (remoteLength == attributes.getSize()) {
+            LOG.debug("Skipping classpath element already staged: {} at {}",
+                classpathElement, target);
+            numCached++;
+            continue;
+          }
+        } catch (FileNotFoundException expected) {
+          // If the file doesn't exist, it means we need to upload it.
         }
-      }));
-    }
-    try {
-      Futures.allAsList(futures).get();
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      throw new RuntimeException("Interrupted while staging packages", e);
-    } catch (ExecutionException e) {
-      throw new RuntimeException("Error while staging packages", e.getCause());
+
+        // Upload file, retrying on failure.
+        BackOff backoff = BACKOFF_FACTORY.backoff();
+        while (true) {
+          try {
+            LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
+            try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
+              copyContent(classpathElement, writer);
+            }
+            numUploaded++;
+            break;
+          } catch (IOException e) {
+            if (ERROR_EXTRACTOR.accessDenied(e)) {
+              String errorMessage = String.format(
+                  "Uploaded failed due to permissions error, will NOT retry staging "
+                  + "of classpath %s. Please verify credentials are valid and that you have "
+                  + "write access to %s. Stale credentials can be resolved by executing "
+                  + "'gcloud auth login'.", classpathElement, target);
+              LOG.error(errorMessage);
+              throw new IOException(errorMessage, e);
+            }
+            long sleep = backoff.nextBackOffMillis();
+            if (sleep == BackOff.STOP) {
+              // Rethrow last error, to be included as a cause in the catch below.
+              LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+                  classpathElement, e);
+              throw e;
+            } else {
+              LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+                  classpathElement, e);
+              retrySleeper.sleep(sleep);
+            }
+          }
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
+      }
     }
 
-    LOG.info(
-        "Staging files complete: {} files cached, {} files newly uploaded",
-        numUploaded.get(), numCached.get());
+    LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
+        + "{} files cached",
+        numUploaded, numCached);
 
     return packages;
   }
@@ -375,15 +293,13 @@ class PackageUtil {
     private final boolean directory;
     private final long size;
     private final String hash;
-    private final String sourcePath;
     private DataflowPackage dataflowPackage;
 
     public PackageAttributes(long size, String hash, boolean directory,
-        DataflowPackage dataflowPackage, String sourcePath) {
+        DataflowPackage dataflowPackage) {
       this.size = size;
       this.hash = Objects.requireNonNull(hash, "hash");
       this.directory = directory;
-      this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
       this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
     }
 
@@ -414,12 +330,5 @@ class PackageUtil {
     public String getHash() {
       return hash;
     }
-
-    /**
-     * @return the file to be uploaded
-     */
-    public String getSourcePath() {
-      return sourcePath;
-    }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 3828415..05a87dd 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
 package org.apache.beam.runners.dataflow.util;
 
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
@@ -53,7 +53,6 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
-import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -236,7 +235,7 @@ public class PackageUtilTest {
       classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
     }
 
-    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
+    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
 
     logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
   }
@@ -251,7 +250,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -278,7 +277,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -305,7 +304,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -328,8 +327,7 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
-          mockGcsUtil);
+          STAGING_PATH, fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -350,20 +348,16 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
-          mockGcsUtil);
+          STAGING_PATH, fastNanoClockAndSleeper);
       fail("Expected RuntimeException");
     } catch (RuntimeException e) {
-      assertThat("Expected RuntimeException wrapping IOException.",
-          e.getCause(), instanceOf(RuntimeException.class));
-      assertThat("Expected IOException containing detailed message.",
-          e.getCause().getCause(), instanceOf(IOException.class));
-      assertThat(e.getCause().getCause().getMessage(),
+      assertTrue("Expected IOException containing detailed message.",
+          e.getCause() instanceof IOException);
+      assertThat(e.getCause().getMessage(),
           Matchers.allOf(
               Matchers.containsString("Uploaded failed due to permissions error"),
               Matchers.containsString(
-                  "Stale credentials can be resolved by executing 'gcloud auth application-default "
-                      + "login'")));
+                  "Stale credentials can be resolved by executing 'gcloud auth login'")));
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -383,8 +377,9 @@ public class PackageUtilTest {
 
     try {
       PackageUtil.stageClasspathElements(
-          ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
-          MoreExecutors.newDirectExecutorService(), mockGcsUtil);
+                                              ImmutableList.of(tmpFile.getAbsolutePath()),
+                                              STAGING_PATH,
+                                              fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -398,7 +393,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verifyNoMoreInteractions(mockGcsUtil);
@@ -416,7 +411,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -434,8 +429,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
-        mockGcsUtil);
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -452,7 +446,7 @@ public class PackageUtilTest {
     String nonExistentFile =
         IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
     assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
-        ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
+        ImmutableList.of(nonExistentFile), STAGING_PATH));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 72e106d..0553efc 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,7 +25,6 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
-import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.AppEngineEnvironment;
 import org.apache.beam.sdk.util.GcsPathValidator;
 import org.apache.beam.sdk.util.GcsUtil;
@@ -82,9 +81,8 @@ public interface GcsOptions extends
       + "information on the restrictions and performance implications of this value.\n\n"
       + "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
       + "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
-  @Nullable
   Integer getGcsUploadBufferSizeBytes();
-  void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
+  void setGcsUploadBufferSizeBytes(Integer bytes);
 
   /**
    * The class of the validator that should be created and used to validate paths.

http://git-wip-us.apache.org/repos/asf/beam/blob/fee029f7/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index 5e83584..a10ea28 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,18 +101,6 @@ public class GcsUtil {
           gcsOptions.getExecutorService(),
           gcsOptions.getGcsUploadBufferSizeBytes());
     }
-
-    /**
-     * Returns an instance of {@link GcsUtil} based on the given parameters.
-     */
-    public static GcsUtil create(
-        Storage storageClient,
-        HttpRequestInitializer httpRequestInitializer,
-        ExecutorService executorService,
-        @Nullable Integer uploadBufferSizeBytes) {
-      return new GcsUtil(
-          storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
-    }
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);


[29/50] beam git commit: Recommit "DataflowRunner: parallelize staging of files"

Posted by dh...@apache.org.
Recommit "DataflowRunner: parallelize staging of files"

Revert "This closes #1847"

This reverts commit 1c6e667414788fe99f583fac39d458a4984ae162, reversing
changes made to 6413299a20be57de849684479134479fa1acee2d.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/23e2b913
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/23e2b913
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/23e2b913

Branch: refs/heads/python-sdk
Commit: 23e2b913946acb2690fbac2d751a5672d80121aa
Parents: 1c6e667
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 21:04:20 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 21:04:27 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 +
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 349 ++++++++++++-------
 .../runners/dataflow/util/PackageUtilTest.java  |  42 ++-
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 +
 6 files changed, 281 insertions(+), 149 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index eea5502..9858b3d 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -203,6 +203,11 @@
     </dependency>
 
     <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-storage</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>com.google.auth</groupId>
       <artifactId>google-auth-library-credentials</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
index 6ca4c3f..53822e3 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/GcsStager.java
@@ -17,13 +17,19 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.storage.Storage;
 import java.util.List;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
+import org.apache.beam.sdk.util.Transport;
 
 /**
  * Utility class for staging files to GCS.
@@ -35,6 +41,7 @@ public class GcsStager implements Stager {
     this.options = options;
   }
 
+  @SuppressWarnings("unused")  // used via reflection
   public static GcsStager fromOptions(PipelineOptions options) {
     return new GcsStager(options.as(DataflowPipelineOptions.class));
   }
@@ -48,7 +55,16 @@ public class GcsStager implements Stager {
     if (windmillBinary != null) {
       filesToStage.add("windmill_main=" + windmillBinary);
     }
+    int uploadSizeBytes = firstNonNull(options.getGcsUploadBufferSizeBytes(), 1024 * 1024);
+    checkArgument(uploadSizeBytes > 0, "gcsUploadBufferSizeBytes must be > 0");
+    uploadSizeBytes = Math.min(uploadSizeBytes, 1024 * 1024);
+    Storage.Builder storageBuilder = Transport.newStorageClient(options);
+    GcsUtil util = GcsUtilFactory.create(
+        storageBuilder.build(),
+        storageBuilder.getHttpRequestInitializer(),
+        options.getExecutorService(),
+        uploadSizeBytes);
     return PackageUtil.stageClasspathElements(
-        options.getFilesToStage(), options.getStagingLocation());
+        options.getFilesToStage(), options.getStagingLocation(), util);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index 6d910ba..fa8c94d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -17,53 +17,62 @@
  */
 package org.apache.beam.runners.dataflow.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.fasterxml.jackson.core.Base64Variants;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.collect.Lists;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
 import com.google.common.io.CountingOutputStream;
 import com.google.common.io.Files;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.FluentBackoff;
+import org.apache.beam.sdk.util.GcsIOChannelFactory;
+import org.apache.beam.sdk.util.GcsUtil;
+import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.ZipFiles;
+import org.apache.beam.sdk.util.gcsfs.GcsPath;
 import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /** Helper routines for packages. */
-public class PackageUtil {
+class PackageUtil {
   private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
   /**
    * A reasonable upper bound on the number of jars required to launch a Dataflow job.
    */
-  public static final int SANE_CLASSPATH_SIZE = 1000;
-  /**
-   * The initial interval to use between package staging attempts.
-   */
-  private static final Duration INITIAL_BACKOFF_INTERVAL = Duration.standardSeconds(5);
-  /**
-   * The maximum number of retries when staging a file.
-   */
-  private static final int MAX_RETRIES = 4;
+  private static final int SANE_CLASSPATH_SIZE = 1000;
 
   private static final FluentBackoff BACKOFF_FACTORY =
-      FluentBackoff.DEFAULT
-          .withMaxRetries(MAX_RETRIES).withInitialBackoff(INITIAL_BACKOFF_INTERVAL);
+      FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.standardSeconds(5));
 
   /**
    * Translates exceptions from API calls.
@@ -71,35 +80,18 @@ public class PackageUtil {
   private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
 
   /**
-   * Creates a DataflowPackage containing information about how a classpath element should be
-   * staged, including the staging destination as well as its size and hash.
-   *
-   * @param classpathElement The local path for the classpath element.
-   * @param stagingPath The base location for staged classpath elements.
-   * @param overridePackageName If non-null, use the given value as the package name
-   *                            instead of generating one automatically.
-   * @return The package.
-   */
-  @Deprecated
-  public static DataflowPackage createPackage(File classpathElement,
-      String stagingPath, String overridePackageName) {
-    return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
-        .getDataflowPackage();
-  }
-
-  /**
    * Compute and cache the attributes of a classpath element that we will need to stage it.
    *
-   * @param classpathElement the file or directory to be staged.
+   * @param source the file or directory to be staged.
    * @param stagingPath The base location for staged classpath elements.
    * @param overridePackageName If non-null, use the given value as the package name
    *                            instead of generating one automatically.
    * @return a {@link PackageAttributes} that containing metadata about the object to be staged.
    */
-  static PackageAttributes createPackageAttributes(File classpathElement,
-      String stagingPath, String overridePackageName) {
+  static PackageAttributes createPackageAttributes(File source,
+      String stagingPath, @Nullable String overridePackageName) {
     try {
-      boolean directory = classpathElement.isDirectory();
+      boolean directory = source.isDirectory();
 
       // Compute size and hash in one pass over file or directory.
       Hasher hasher = Hashing.md5().newHasher();
@@ -108,142 +100,232 @@ public class PackageUtil {
 
       if (!directory) {
         // Files are staged as-is.
-        Files.asByteSource(classpathElement).copyTo(countingOutputStream);
+        Files.asByteSource(source).copyTo(countingOutputStream);
       } else {
         // Directories are recursively zipped.
-        ZipFiles.zipDirectory(classpathElement, countingOutputStream);
+        ZipFiles.zipDirectory(source, countingOutputStream);
       }
 
       long size = countingOutputStream.getCount();
       String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
 
       // Create the DataflowPackage with staging name and location.
-      String uniqueName = getUniqueContentName(classpathElement, hash);
+      String uniqueName = getUniqueContentName(source, hash);
       String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
       DataflowPackage target = new DataflowPackage();
       target.setName(overridePackageName != null ? overridePackageName : uniqueName);
       target.setLocation(resourcePath);
 
-      return new PackageAttributes(size, hash, directory, target);
+      return new PackageAttributes(size, hash, directory, target, source.getPath());
     } catch (IOException e) {
-      throw new RuntimeException("Package setup failure for " + classpathElement, e);
+      throw new RuntimeException("Package setup failure for " + source, e);
     }
   }
 
-  /**
-   * Transfers the classpath elements to the staging location.
-   *
-   * @param classpathElements The elements to stage.
-   * @param stagingPath The base location to stage the elements to.
-   * @return A list of cloud workflow packages, each representing a classpath element.
-   */
-  public static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath) {
-    return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
-  }
-
-  // Visible for testing.
-  static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath,
-      Sleeper retrySleeper) {
-    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
-        + "prepare for execution.", classpathElements.size());
-
-    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
-      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
-          + "copies to all workers. Having this many entries on your classpath may be indicative "
-          + "of an issue in your pipeline. You may want to consider trimming the classpath to "
-          + "necessary dependencies only, using --filesToStage pipeline option to override "
-          + "what files are being staged, or bundling several dependencies into one.",
-          classpathElements.size());
-    }
-
-    ArrayList<DataflowPackage> packages = new ArrayList<>();
+  /** Utility comparator used in uploading packages efficiently. */
+  private static class PackageUploadOrder implements Comparator<PackageAttributes> {
+    @Override
+    public int compare(PackageAttributes o1, PackageAttributes o2) {
+      // Smaller size compares high so that bigger packages are uploaded first.
+      long sizeDiff = o2.getSize() - o1.getSize();
+      if (sizeDiff != 0) {
+        // returns sign of long
+        return Long.signum(sizeDiff);
+      }
 
-    if (stagingPath == null) {
-      throw new IllegalArgumentException(
-          "Can't stage classpath elements on because no staging location has been provided");
+      // Otherwise, choose arbitrarily based on hash.
+      return o1.getHash().compareTo(o2.getHash());
     }
+  }
 
-    int numUploaded = 0;
-    int numCached = 0;
+  /**
+   * Utility function that computes sizes and hashes of packages so that we can validate whether
+   * they have already been correctly staged.
+   */
+  private static List<PackageAttributes> computePackageAttributes(
+      Collection<String> classpathElements, final String stagingPath,
+      ListeningExecutorService executorService) {
+    List<ListenableFuture<PackageAttributes>> futures = new LinkedList<>();
     for (String classpathElement : classpathElements) {
-      String packageName = null;
+      @Nullable String userPackageName = null;
       if (classpathElement.contains("=")) {
         String[] components = classpathElement.split("=", 2);
-        packageName = components[0];
+        userPackageName = components[0];
         classpathElement = components[1];
       }
+      @Nullable final String packageName = userPackageName;
 
-      File file = new File(classpathElement);
+      final File file = new File(classpathElement);
       if (!file.exists()) {
         LOG.warn("Skipping non-existent classpath element {} that was specified.",
             classpathElement);
         continue;
       }
 
-      PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
+      ListenableFuture<PackageAttributes> future =
+          executorService.submit(new Callable<PackageAttributes>() {
+            @Override
+            public PackageAttributes call() throws Exception {
+              return createPackageAttributes(file, stagingPath, packageName);
+            }
+          });
+      futures.add(future);
+    }
+
+    try {
+      return Futures.allAsList(futures).get();
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new RuntimeException("Interrupted while staging packages", e);
+    } catch (ExecutionException e) {
+      throw new RuntimeException("Error while staging packages", e.getCause());
+    }
+  }
+
+  private static WritableByteChannel makeWriter(String target, GcsUtil gcsUtil)
+      throws IOException {
+    IOChannelFactory factory = IOChannelUtils.getFactory(target);
+    if (factory instanceof GcsIOChannelFactory) {
+      return gcsUtil.create(GcsPath.fromUri(target), MimeTypes.BINARY);
+    } else {
+      return factory.create(target, MimeTypes.BINARY);
+    }
+  }
 
-      DataflowPackage workflowPackage = attributes.getDataflowPackage();
-      packages.add(workflowPackage);
-      String target = workflowPackage.getLocation();
+  /**
+   * Utility to verify whether a package has already been staged and, if not, copy it to the
+   * staging location.
+   */
+  private static void stageOnePackage(
+      PackageAttributes attributes, AtomicInteger numUploaded, AtomicInteger numCached,
+      Sleeper retrySleeper, GcsUtil gcsUtil) {
+    String source = attributes.getSourcePath();
+    String target = attributes.getDataflowPackage().getLocation();
 
-      // TODO: Should we attempt to detect the Mime type rather than
-      // always using MimeTypes.BINARY?
+    // TODO: Should we attempt to detect the Mime type rather than
+    // always using MimeTypes.BINARY?
+    try {
       try {
-        try {
-          long remoteLength = IOChannelUtils.getSizeBytes(target);
-          if (remoteLength == attributes.getSize()) {
-            LOG.debug("Skipping classpath element already staged: {} at {}",
-                classpathElement, target);
-            numCached++;
-            continue;
-          }
-        } catch (FileNotFoundException expected) {
-          // If the file doesn't exist, it means we need to upload it.
+        long remoteLength = IOChannelUtils.getSizeBytes(target);
+        if (remoteLength == attributes.getSize()) {
+          LOG.debug("Skipping classpath element already staged: {} at {}",
+              attributes.getSourcePath(), target);
+          numCached.incrementAndGet();
+          return;
         }
+      } catch (FileNotFoundException expected) {
+        // If the file doesn't exist, it means we need to upload it.
+      }
 
-        // Upload file, retrying on failure.
-        BackOff backoff = BACKOFF_FACTORY.backoff();
-        while (true) {
-          try {
-            LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
-            try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
-              copyContent(classpathElement, writer);
-            }
-            numUploaded++;
-            break;
-          } catch (IOException e) {
-            if (ERROR_EXTRACTOR.accessDenied(e)) {
-              String errorMessage = String.format(
-                  "Uploaded failed due to permissions error, will NOT retry staging "
-                  + "of classpath %s. Please verify credentials are valid and that you have "
-                  + "write access to %s. Stale credentials can be resolved by executing "
-                  + "'gcloud auth login'.", classpathElement, target);
-              LOG.error(errorMessage);
-              throw new IOException(errorMessage, e);
-            }
-            long sleep = backoff.nextBackOffMillis();
-            if (sleep == BackOff.STOP) {
-              // Rethrow last error, to be included as a cause in the catch below.
-              LOG.error("Upload failed, will NOT retry staging of classpath: {}",
-                  classpathElement, e);
-              throw e;
-            } else {
-              LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
-                  classpathElement, e);
-              retrySleeper.sleep(sleep);
-            }
+      // Upload file, retrying on failure.
+      BackOff backoff = BACKOFF_FACTORY.backoff();
+      while (true) {
+        try {
+          LOG.debug("Uploading classpath element {} to {}", source, target);
+          try (WritableByteChannel writer = makeWriter(target, gcsUtil)) {
+            copyContent(source, writer);
+          }
+          numUploaded.incrementAndGet();
+          break;
+        } catch (IOException e) {
+          if (ERROR_EXTRACTOR.accessDenied(e)) {
+            String errorMessage = String.format(
+                "Uploaded failed due to permissions error, will NOT retry staging "
+                    + "of classpath %s. Please verify credentials are valid and that you have "
+                    + "write access to %s. Stale credentials can be resolved by executing "
+                    + "'gcloud auth application-default login'.", source, target);
+            LOG.error(errorMessage);
+            throw new IOException(errorMessage, e);
+          }
+          long sleep = backoff.nextBackOffMillis();
+          if (sleep == BackOff.STOP) {
+            // Rethrow last error, to be included as a cause in the catch below.
+            LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+                source, e);
+            throw e;
+          } else {
+            LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
+                source, e);
+            retrySleeper.sleep(sleep);
           }
         }
-      } catch (Exception e) {
-        throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
       }
+    } catch (Exception e) {
+      throw new RuntimeException("Could not stage classpath element: " + source, e);
     }
+  }
 
-    LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
-        + "{} files cached",
-        numUploaded, numCached);
+  /**
+   * Transfers the classpath elements to the staging location.
+   *
+   * @param classpathElements The elements to stage.
+   * @param stagingPath The base location to stage the elements to.
+   * @return A list of cloud workflow packages, each representing a classpath element.
+   */
+  static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, String stagingPath, GcsUtil gcsUtil) {
+    ListeningExecutorService executorService =
+        MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(32));
+    try {
+      return stageClasspathElements(
+          classpathElements, stagingPath, Sleeper.DEFAULT, executorService, gcsUtil);
+    } finally {
+      executorService.shutdown();
+    }
+  }
+
+  // Visible for testing.
+  static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, final String stagingPath,
+      final Sleeper retrySleeper, ListeningExecutorService executorService, final GcsUtil gcsUtil) {
+    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+        + "prepare for execution.", classpathElements.size());
+
+    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
+      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
+            + "copies to all workers. Having this many entries on your classpath may be indicative "
+            + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+            + "necessary dependencies only, using --filesToStage pipeline option to override "
+            + "what files are being staged, or bundling several dependencies into one.",
+          classpathElements.size());
+    }
+
+    checkArgument(
+        stagingPath != null,
+        "Can't stage classpath elements because no staging location has been provided");
+
+    // Inline a copy here because the inner code returns an immutable list and we want to mutate it.
+    List<PackageAttributes> packageAttributes =
+        new LinkedList<>(computePackageAttributes(classpathElements, stagingPath, executorService));
+    // Order package attributes in descending size order so that we upload the largest files first.
+    Collections.sort(packageAttributes, new PackageUploadOrder());
+
+    List<DataflowPackage> packages = Lists.newArrayListWithExpectedSize(packageAttributes.size());
+    final AtomicInteger numUploaded = new AtomicInteger(0);
+    final AtomicInteger numCached = new AtomicInteger(0);
+
+    List<ListenableFuture<?>> futures = new LinkedList<>();
+    for (final PackageAttributes attributes : packageAttributes) {
+      packages.add(attributes.getDataflowPackage());
+      futures.add(executorService.submit(new Runnable() {
+        @Override
+        public void run() {
+          stageOnePackage(attributes, numUploaded, numCached, retrySleeper, gcsUtil);
+        }
+      }));
+    }
+    try {
+      Futures.allAsList(futures).get();
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new RuntimeException("Interrupted while staging packages", e);
+    } catch (ExecutionException e) {
+      throw new RuntimeException("Error while staging packages", e.getCause());
+    }
+
+    LOG.info(
+        "Staging files complete: {} files cached, {} files newly uploaded",
+        numUploaded.get(), numCached.get());
 
     return packages;
   }
@@ -293,13 +375,15 @@ public class PackageUtil {
     private final boolean directory;
     private final long size;
     private final String hash;
+    private final String sourcePath;
     private DataflowPackage dataflowPackage;
 
     public PackageAttributes(long size, String hash, boolean directory,
-        DataflowPackage dataflowPackage) {
+        DataflowPackage dataflowPackage, String sourcePath) {
       this.size = size;
       this.hash = Objects.requireNonNull(hash, "hash");
       this.directory = directory;
+      this.sourcePath = Objects.requireNonNull(sourcePath, "sourcePath");
       this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
     }
 
@@ -330,5 +414,12 @@ public class PackageUtil {
     public String getHash() {
       return hash;
     }
+
+    /**
+     * @return the file to be uploaded
+     */
+    public String getSourcePath() {
+      return sourcePath;
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 05a87dd..3828415 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -18,12 +18,12 @@
 package org.apache.beam.runners.dataflow.util;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
@@ -53,6 +53,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
+import com.google.common.util.concurrent.MoreExecutors;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -235,7 +236,7 @@ public class PackageUtilTest {
       classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
     }
 
-    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
+    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, mockGcsUtil);
 
     logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
   }
@@ -250,7 +251,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -277,7 +278,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -304,7 +305,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -327,7 +328,8 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+          mockGcsUtil);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -348,16 +350,20 @@ public class PackageUtilTest {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          STAGING_PATH, fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper, MoreExecutors.newDirectExecutorService(),
+          mockGcsUtil);
       fail("Expected RuntimeException");
     } catch (RuntimeException e) {
-      assertTrue("Expected IOException containing detailed message.",
-          e.getCause() instanceof IOException);
-      assertThat(e.getCause().getMessage(),
+      assertThat("Expected RuntimeException wrapping IOException.",
+          e.getCause(), instanceOf(RuntimeException.class));
+      assertThat("Expected IOException containing detailed message.",
+          e.getCause().getCause(), instanceOf(IOException.class));
+      assertThat(e.getCause().getCause().getMessage(),
           Matchers.allOf(
               Matchers.containsString("Uploaded failed due to permissions error"),
               Matchers.containsString(
-                  "Stale credentials can be resolved by executing 'gcloud auth login'")));
+                  "Stale credentials can be resolved by executing 'gcloud auth application-default "
+                      + "login'")));
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -377,9 +383,8 @@ public class PackageUtilTest {
 
     try {
       PackageUtil.stageClasspathElements(
-                                              ImmutableList.of(tmpFile.getAbsolutePath()),
-                                              STAGING_PATH,
-                                              fastNanoClockAndSleeper);
+          ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper,
+          MoreExecutors.newDirectExecutorService(), mockGcsUtil);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
@@ -393,7 +398,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verifyNoMoreInteractions(mockGcsUtil);
@@ -411,7 +416,7 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, mockGcsUtil);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -429,7 +434,8 @@ public class PackageUtilTest {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH,
+        mockGcsUtil);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -446,7 +452,7 @@ public class PackageUtilTest {
     String nonExistentFile =
         IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
     assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
-        ImmutableList.of(nonExistentFile), STAGING_PATH));
+        ImmutableList.of(nonExistentFile), STAGING_PATH, mockGcsUtil));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
index 0553efc..72e106d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcsOptions.java
@@ -25,6 +25,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.util.AppEngineEnvironment;
 import org.apache.beam.sdk.util.GcsPathValidator;
 import org.apache.beam.sdk.util.GcsUtil;
@@ -81,8 +82,9 @@ public interface GcsOptions extends
       + "information on the restrictions and performance implications of this value.\n\n"
       + "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
       + "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
+  @Nullable
   Integer getGcsUploadBufferSizeBytes();
-  void setGcsUploadBufferSizeBytes(Integer bytes);
+  void setGcsUploadBufferSizeBytes(@Nullable Integer bytes);
 
   /**
    * The class of the validator that should be created and used to validate paths.

http://git-wip-us.apache.org/repos/asf/beam/blob/23e2b913/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
index a10ea28..5e83584 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/GcsUtil.java
@@ -101,6 +101,18 @@ public class GcsUtil {
           gcsOptions.getExecutorService(),
           gcsOptions.getGcsUploadBufferSizeBytes());
     }
+
+    /**
+     * Returns an instance of {@link GcsUtil} based on the given parameters.
+     */
+    public static GcsUtil create(
+        Storage storageClient,
+        HttpRequestInitializer httpRequestInitializer,
+        ExecutorService executorService,
+        @Nullable Integer uploadBufferSizeBytes) {
+      return new GcsUtil(
+          storageClient, httpRequestInitializer, executorService, uploadBufferSizeBytes);
+    }
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);


[06/50] beam git commit: Removes ReduceFnExecutor interface

Posted by dh...@apache.org.
Removes ReduceFnExecutor interface


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8989473b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8989473b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8989473b

Branch: refs/heads/python-sdk
Commit: 8989473b8e379a40b888565aadead001379c9398
Parents: b333487
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jan 24 13:32:24 2017 -0800
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jan 24 13:32:24 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/runners/core/DoFnRunner.java    | 20 --------------------
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |  5 +----
 .../beam/runners/direct/ParDoEvaluator.java     |  2 --
 .../runners/spark/translation/DoFnFunction.java |  2 --
 .../spark/translation/MultiDoFnFunction.java    |  2 --
 5 files changed, 1 insertion(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
index 66f95db..b29adcc 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
@@ -17,12 +17,10 @@
  */
 package org.apache.beam.runners.core;
 
-import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.util.TimeDomain;
 import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
 import org.joda.time.Instant;
 
 /**
@@ -51,22 +49,4 @@ public interface DoFnRunner<InputT, OutputT> {
    * additional tasks, such as flushing in-memory states.
    */
   void finishBundle();
-
-  /**
-   * An internal interface for signaling that a {@link OldDoFn} requires late data dropping.
-   */
-  public interface ReduceFnExecutor<K, InputT, OutputT, W> {
-    /**
-     * Gets this object as a {@link OldDoFn}.
-     *
-     * <p>Most implementors of this interface are expected to be {@link OldDoFn} instances, and will
-     * return themselves.
-     */
-    OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn();
-
-    /**
-     * Returns an aggregator that tracks elements that are dropped due to being late.
-     */
-    Aggregator<Long, Long> getDroppedDueToLatenessAggregator();
-  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
index ecce4fc..d0387cf 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.runners.core;
 
-import org.apache.beam.runners.core.DoFnRunner.ReduceFnExecutor;
 import org.apache.beam.runners.core.triggers.ExecutableTriggerStateMachine;
 import org.apache.beam.runners.core.triggers.TriggerStateMachines;
 import org.apache.beam.sdk.transforms.Aggregator;
@@ -37,7 +36,7 @@ import org.apache.beam.sdk.values.KV;
 @SystemDoFnInternal
 public class GroupAlsoByWindowViaWindowSetDoFn<
         K, InputT, OutputT, W extends BoundedWindow, RinT extends KeyedWorkItem<K, InputT>>
-    extends OldDoFn<RinT, KV<K, OutputT>> implements ReduceFnExecutor<K, InputT, OutputT, W> {
+    extends OldDoFn<RinT, KV<K, OutputT>> {
 
   public static <K, InputT, OutputT, W extends BoundedWindow>
       OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
@@ -95,7 +94,6 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
     reduceFnRunner.persist();
   }
 
-  @Override
   public OldDoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn() {
     // Safe contravariant cast
     @SuppressWarnings("unchecked")
@@ -104,7 +102,6 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
     return asFn;
   }
 
-  @Override
   public Aggregator<Long, Long> getDroppedDueToLatenessAggregator() {
     return droppedDueToLateness;
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index 97d5360..48f0f8d 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -73,8 +73,6 @@ class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
     ReadyCheckingSideInputReader sideInputReader =
         evaluationContext.createSideInputReader(sideInputs);
 
-    // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
-    // and window-exploded processing is achieved within the simple runner
     DoFnRunner<InputT, OutputT> underlying =
         DoFnRunners.simpleRunner(
             evaluationContext.getPipelineOptions(),

http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
index bd6cfbe..4fd5e51 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java
@@ -81,8 +81,6 @@ public class DoFnFunction<InputT, OutputT>
 
     DoFnOutputManager outputManager = new DoFnOutputManager();
 
-    // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
-    // and window-exploded processing is achieved within the simple runner
     DoFnRunner<InputT, OutputT> doFnRunner =
         DoFnRunners.simpleRunner(
             runtimeContext.getPipelineOptions(),

http://git-wip-us.apache.org/repos/asf/beam/blob/8989473b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
index cceffc8..911e6c5 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/MultiDoFnFunction.java
@@ -88,8 +88,6 @@ public class MultiDoFnFunction<InputT, OutputT>
 
     DoFnOutputManager outputManager = new DoFnOutputManager();
 
-    // Unlike for OldDoFn, there is no ReduceFnExecutor that is a new DoFn,
-    // and window-exploded processing is achieved within the simple runner
     DoFnRunner<InputT, OutputT> doFnRunner =
         DoFnRunners.simpleRunner(
             runtimeContext.getPipelineOptions(),


[25/50] beam git commit: fixup! Hide visibility of internal implementation class

Posted by dh...@apache.org.
fixup! Hide visibility of internal implementation class


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a67ff91e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a67ff91e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a67ff91e

Branch: refs/heads/python-sdk
Commit: a67ff91e546cb77ad050e6b7573a884f190840cb
Parents: 968c311
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jan 25 14:13:55 2017 -0800
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jan 25 14:13:55 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a67ff91e/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
index f52b822..04e1755 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
@@ -145,7 +145,7 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
    * number of records and read time into a {@link BoundedSource}.
    */
   @AutoValue
-  public abstract static class UnboundedToBoundedSourceAdapter<T>
+  abstract static class UnboundedToBoundedSourceAdapter<T>
       extends BoundedSource<ValueWithRecordId<T>> {
     @Nullable abstract UnboundedSource<T, ?> getSource();
     @Nullable abstract long getMaxNumRecords();


[09/50] beam git commit: This closes #1590

Posted by dh...@apache.org.
This closes #1590


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f2389ab7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f2389ab7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f2389ab7

Branch: refs/heads/python-sdk
Commit: f2389ab7ba1d562d23420d7e2ecd638524439dc6
Parents: 11c3cd7 dc36952
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 14:41:55 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 14:41:55 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 51 ++++++++++++++------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 36 ++++++++++++++
 2 files changed, 71 insertions(+), 16 deletions(-)
----------------------------------------------------------------------



[44/50] beam git commit: [maven-release-plugin] prepare for next development iteration

Posted by dh...@apache.org.
[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4a29131d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4a29131d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4a29131d

Branch: refs/heads/python-sdk
Commit: 4a29131d3c0f490d01820e92e028ec07eaffe927
Parents: da2dff9
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Jan 27 18:27:16 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Jan 27 18:27:16 2017 +0100

----------------------------------------------------------------------
 examples/java/pom.xml                             | 2 +-
 examples/java8/pom.xml                            | 2 +-
 examples/pom.xml                                  | 2 +-
 pom.xml                                           | 4 ++--
 runners/apex/pom.xml                              | 2 +-
 runners/core-java/pom.xml                         | 2 +-
 runners/direct-java/pom.xml                       | 2 +-
 runners/flink/examples/pom.xml                    | 2 +-
 runners/flink/pom.xml                             | 2 +-
 runners/flink/runner/pom.xml                      | 2 +-
 runners/google-cloud-dataflow-java/pom.xml        | 2 +-
 runners/pom.xml                                   | 2 +-
 runners/spark/pom.xml                             | 2 +-
 sdks/java/build-tools/pom.xml                     | 2 +-
 sdks/java/core/pom.xml                            | 2 +-
 sdks/java/extensions/join-library/pom.xml         | 2 +-
 sdks/java/extensions/pom.xml                      | 2 +-
 sdks/java/extensions/sorter/pom.xml               | 2 +-
 sdks/java/io/elasticsearch/pom.xml                | 2 +-
 sdks/java/io/google-cloud-platform/pom.xml        | 2 +-
 sdks/java/io/hdfs/pom.xml                         | 2 +-
 sdks/java/io/jdbc/pom.xml                         | 2 +-
 sdks/java/io/jms/pom.xml                          | 2 +-
 sdks/java/io/kafka/pom.xml                        | 2 +-
 sdks/java/io/kinesis/pom.xml                      | 2 +-
 sdks/java/io/mongodb/pom.xml                      | 2 +-
 sdks/java/io/mqtt/pom.xml                         | 2 +-
 sdks/java/io/pom.xml                              | 2 +-
 sdks/java/java8tests/pom.xml                      | 2 +-
 sdks/java/maven-archetypes/examples-java8/pom.xml | 2 +-
 sdks/java/maven-archetypes/examples/pom.xml       | 2 +-
 sdks/java/maven-archetypes/pom.xml                | 2 +-
 sdks/java/maven-archetypes/starter/pom.xml        | 2 +-
 sdks/java/pom.xml                                 | 2 +-
 sdks/pom.xml                                      | 2 +-
 35 files changed, 36 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
index e001d1c..9da814b 100644
--- a/examples/java/pom.xml
+++ b/examples/java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-examples-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index 370d79f..d0042e3 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-examples-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index 4294c2d..550578b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2281f67..d09bf59 100644
--- a/pom.xml
+++ b/pom.xml
@@ -34,7 +34,7 @@
   <url>http://beam.apache.org/</url>
   <inceptionYear>2016</inceptionYear>
 
-  <version>0.5.0-SNAPSHOT</version>
+  <version>0.6.0-SNAPSHOT</version>
 
   <licenses>
     <license>
@@ -48,7 +48,7 @@
     <connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
     <url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
-    <tag>release-0.5.0</tag>
+    <tag>HEAD</tag>
   </scm>
 
   <issueManagement>

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/apex/pom.xml
----------------------------------------------------------------------
diff --git a/runners/apex/pom.xml b/runners/apex/pom.xml
index 47139a6..7ae07e2 100644
--- a/runners/apex/pom.xml
+++ b/runners/apex/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/core-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/core-java/pom.xml b/runners/core-java/pom.xml
index 9e8393d..d8706b1 100644
--- a/runners/core-java/pom.xml
+++ b/runners/core-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 0d44136..53281be 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/examples/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/examples/pom.xml b/runners/flink/examples/pom.xml
index e424042..1d426bd 100644
--- a/runners/flink/examples/pom.xml
+++ b/runners/flink/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-flink-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/pom.xml b/runners/flink/pom.xml
index 7eab021..6f4236e 100644
--- a/runners/flink/pom.xml
+++ b/runners/flink/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index 8c73385..6a7cbff 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-flink-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index b446b7b..f17eb78 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index ceaedfe..6513a33 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index 196b5bb..5d46f8d 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/build-tools/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/build-tools/pom.xml b/sdks/java/build-tools/pom.xml
index 4c0a749..545f394 100644
--- a/sdks/java/build-tools/pom.xml
+++ b/sdks/java/build-tools/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index b02cd1d..bb019c1 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/join-library/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/join-library/pom.xml b/sdks/java/extensions/join-library/pom.xml
index def0340..562f921 100644
--- a/sdks/java/extensions/join-library/pom.xml
+++ b/sdks/java/extensions/join-library/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/pom.xml b/sdks/java/extensions/pom.xml
index 01645d2..99e0cb6 100644
--- a/sdks/java/extensions/pom.xml
+++ b/sdks/java/extensions/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/extensions/sorter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/sorter/pom.xml b/sdks/java/extensions/sorter/pom.xml
index 1961452..9d03ba2 100644
--- a/sdks/java/extensions/sorter/pom.xml
+++ b/sdks/java/extensions/sorter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/elasticsearch/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/elasticsearch/pom.xml b/sdks/java/io/elasticsearch/pom.xml
index 94e8c6c..bc2cdb7 100644
--- a/sdks/java/io/elasticsearch/pom.xml
+++ b/sdks/java/io/elasticsearch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/google-cloud-platform/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/pom.xml b/sdks/java/io/google-cloud-platform/pom.xml
index d800d34..8f9067f 100644
--- a/sdks/java/io/google-cloud-platform/pom.xml
+++ b/sdks/java/io/google-cloud-platform/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/pom.xml b/sdks/java/io/hdfs/pom.xml
index a8be68a..48c269f 100644
--- a/sdks/java/io/hdfs/pom.xml
+++ b/sdks/java/io/hdfs/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index 44c7abd..afe236a 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/jms/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jms/pom.xml b/sdks/java/io/jms/pom.xml
index 4e03a19..80d1f6c 100644
--- a/sdks/java/io/jms/pom.xml
+++ b/sdks/java/io/jms/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/kafka/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/pom.xml b/sdks/java/io/kafka/pom.xml
index 2637449..2dd775e 100644
--- a/sdks/java/io/kafka/pom.xml
+++ b/sdks/java/io/kafka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/kinesis/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/pom.xml b/sdks/java/io/kinesis/pom.xml
index 29e9e5f..187d0c0 100644
--- a/sdks/java/io/kinesis/pom.xml
+++ b/sdks/java/io/kinesis/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/mongodb/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mongodb/pom.xml b/sdks/java/io/mongodb/pom.xml
index 56de6b3..19d9d18 100644
--- a/sdks/java/io/mongodb/pom.xml
+++ b/sdks/java/io/mongodb/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mqtt/pom.xml b/sdks/java/io/mqtt/pom.xml
index da19028..2547c78 100644
--- a/sdks/java/io/mqtt/pom.xml
+++ b/sdks/java/io/mqtt/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/io/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/pom.xml b/sdks/java/io/pom.xml
index ffe3c02..70ccf9d 100644
--- a/sdks/java/io/pom.xml
+++ b/sdks/java/io/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/java8tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/pom.xml b/sdks/java/java8tests/pom.xml
index 8545de4..c503d37 100644
--- a/sdks/java/java8tests/pom.xml
+++ b/sdks/java/java8tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/examples-java8/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/pom.xml b/sdks/java/maven-archetypes/examples-java8/pom.xml
index 5f27207..2632d6d 100644
--- a/sdks/java/maven-archetypes/examples-java8/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/examples/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/pom.xml b/sdks/java/maven-archetypes/examples/pom.xml
index e819c8c..09e5428 100644
--- a/sdks/java/maven-archetypes/examples/pom.xml
+++ b/sdks/java/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/pom.xml b/sdks/java/maven-archetypes/pom.xml
index 28e9fe9..194e5bd 100644
--- a/sdks/java/maven-archetypes/pom.xml
+++ b/sdks/java/maven-archetypes/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
index 5b21407..092995a 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/java/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/pom.xml b/sdks/java/pom.xml
index 1ab3452..555fdd4 100644
--- a/sdks/java/pom.xml
+++ b/sdks/java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4a29131d/sdks/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/pom.xml b/sdks/pom.xml
index 2682728..06dbb9b 100644
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 


[39/50] beam git commit: This closes #1823

Posted by dh...@apache.org.
This closes #1823


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/83f8c460
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/83f8c460
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/83f8c460

Branch: refs/heads/python-sdk
Commit: 83f8c460c93501903864c8e09b4dbcff6903a5ae
Parents: 2cbc08b 6531545
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 17:22:55 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 17:22:55 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 63 ++++++++++++-------
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |  1 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 64 +++++++++++++-------
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  2 +
 4 files changed, 87 insertions(+), 43 deletions(-)
----------------------------------------------------------------------



[23/50] beam git commit: [BEAM-246] re-enable Checkstyle by default

Posted by dh...@apache.org.
[BEAM-246] re-enable Checkstyle by default

This adds 50%+ overhead to a clean build (with testing disabled), but
per dev@ discussion is a huge usability win for contributors and
committers alike.

https://lists.apache.org/thread.html/CAA8k_FKafuon8GEA3CXwR2MZh2kAXEFZQK=BgX5tk2fZJebrag@mail.gmail.com


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f05c5d32
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f05c5d32
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f05c5d32

Branch: refs/heads/python-sdk
Commit: f05c5d32cb5dbee6de4247a803d7b7c7fbe52173
Parents: c525783
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 13:52:06 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 12:13:33 2017 -0800

----------------------------------------------------------------------
 examples/pom.xml | 14 +++++++++-----
 runners/pom.xml  | 14 +++++++++-----
 sdks/pom.xml     | 13 +++++++------
 3 files changed, 25 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index adfbaa9..4294c2d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -51,11 +51,6 @@
       <build>
         <plugins>
           <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-checkstyle-plugin</artifactId>
-          </plugin>
-
-          <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>findbugs-maven-plugin</artifactId>
           </plugin>
@@ -64,4 +59,13 @@
     </profile>
   </profiles>
 
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
 </project>

http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index fb84164..ceaedfe 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -47,11 +47,6 @@
       <build>
         <plugins>
           <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-checkstyle-plugin</artifactId>
-          </plugin>
-
-          <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>findbugs-maven-plugin</artifactId>
           </plugin>
@@ -99,4 +94,13 @@
       </build>
     </profile>
   </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>

http://git-wip-us.apache.org/repos/asf/beam/blob/f05c5d32/sdks/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/pom.xml b/sdks/pom.xml
index bfdfcd9..2682728 100644
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@ -42,11 +42,6 @@
       <build>
         <plugins>
           <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-checkstyle-plugin</artifactId>
-          </plugin>
-
-          <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>findbugs-maven-plugin</artifactId>
           </plugin>
@@ -58,7 +53,6 @@
   <build>
     <pluginManagement>
       <plugins>
-
         <!-- SDKs will generally offer test suites for runners, as sdks/java does. -->
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -74,6 +68,13 @@
         </plugin>
       </plugins>
     </pluginManagement>
+
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
   </build>
 
 </project>


[02/50] beam git commit: [BEAM-1258] demote retrying loggings to info level.

Posted by dh...@apache.org.
[BEAM-1258] demote retrying loggings to info level.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3afdc5c0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3afdc5c0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3afdc5c0

Branch: refs/heads/python-sdk
Commit: 3afdc5c0ef37e48b1750f70e54cd64f5063da83b
Parents: 2a23e8b
Author: Pei He <pe...@google.com>
Authored: Tue Jan 24 11:09:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BigQueryServicesImpl.java   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3afdc5c0/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index c9edf7c..c524ce4 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -220,11 +220,11 @@ class BigQueryServicesImpl implements BigQueryServices {
             return; // SUCCEEDED
           }
           // ignore and retry
-          LOG.warn("Ignore the error and retry inserting the job.", e);
+          LOG.info("Ignore the error and retry inserting the job.", e);
           lastException = e;
         } catch (IOException e) {
           // ignore and retry
-          LOG.warn("Ignore the error and retry inserting the job.", e);
+          LOG.info("Ignore the error and retry inserting the job.", e);
           lastException = e;
         }
       } while (nextBackOff(sleeper, backoff));
@@ -261,7 +261,7 @@ class BigQueryServicesImpl implements BigQueryServices {
           // The job is not DONE, wait longer and retry.
         } catch (IOException e) {
           // ignore and retry
-          LOG.warn("Ignore the error and retry polling job status.", e);
+          LOG.info("Ignore the error and retry polling job status.", e);
         }
       } while (nextBackOff(sleeper, backoff));
       LOG.warn("Unable to poll job status: {}, aborting after reached max .", jobRef.getJobId());
@@ -316,12 +316,12 @@ class BigQueryServicesImpl implements BigQueryServices {
             LOG.info("No BigQuery job with job id {} found.", jobId);
             return null;
           }
-          LOG.warn(
+          LOG.info(
               "Ignoring the error encountered while trying to query the BigQuery job {}",
               jobId, e);
           lastException = e;
         } catch (IOException e) {
-          LOG.warn(
+          LOG.info(
               "Ignoring the error encountered while trying to query the BigQuery job {}",
               jobId, e);
           lastException = e;
@@ -618,10 +618,10 @@ class BigQueryServicesImpl implements BigQueryServices {
             return; // SUCCEEDED
           }
           // ignore and retry
-          LOG.warn("Ignore the error and retry creating the dataset.", e);
+          LOG.info("Ignore the error and retry creating the dataset.", e);
           lastException = e;
         } catch (IOException e) {
-          LOG.warn("Ignore the error and retry creating the dataset.", e);
+          LOG.info("Ignore the error and retry creating the dataset.", e);
           lastException = e;
         }
       } while (nextBackOff(sleeper, backoff));
@@ -891,7 +891,7 @@ class BigQueryServicesImpl implements BigQueryServices {
         if (!shouldRetry.apply(e)) {
           break;
         }
-        LOG.warn("Ignore the error and retry the request.", e);
+        LOG.info("Ignore the error and retry the request.", e);
       }
     } while (nextBackOff(sleeper, backoff));
     throw new IOException(


[14/50] beam git commit: Refactor BigQueryServices to have TableReference in methods signatures

Posted by dh...@apache.org.
Refactor BigQueryServices to have TableReference in methods signatures


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f9d1d682
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f9d1d682
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f9d1d682

Branch: refs/heads/python-sdk
Commit: f9d1d682340fa3083bc18723605bf3d0aa6d76cd
Parents: e77de7c
Author: Pei He <pe...@google.com>
Authored: Tue Jan 24 16:45:16 2017 -0800
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jan 24 18:00:40 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 40 +++++--------------
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |  9 ++---
 .../io/gcp/bigquery/BigQueryServicesImpl.java   | 23 ++++-------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 41 ++++++++------------
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |  3 +-
 5 files changed, 40 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index fa49f55..b6f9fb0 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -997,8 +997,7 @@ public class BigQueryIO {
         TableReference table = JSON_FACTORY.fromString(jsonTable.get(), TableReference.class);
 
         Long numBytes = bqServices.getDatasetService(options.as(BigQueryOptions.class))
-            .getTable(table.getProjectId(), table.getDatasetId(), table.getTableId())
-            .getNumBytes();
+            .getTable(table).getNumBytes();
         tableSizeBytes.compareAndSet(null, numBytes);
       }
       return tableSizeBytes.get();
@@ -1088,10 +1087,7 @@ public class BigQueryIO {
       DatasetService tableService = bqServices.getDatasetService(bqOptions);
       if (referencedTables != null && !referencedTables.isEmpty()) {
         TableReference queryTable = referencedTables.get(0);
-        location = tableService.getTable(
-            queryTable.getProjectId(),
-            queryTable.getDatasetId(),
-            queryTable.getTableId()).getLocation();
+        location = tableService.getTable(queryTable).getLocation();
       }
 
       // 2. Create the temporary dataset in the query location.
@@ -1120,10 +1116,7 @@ public class BigQueryIO {
           JSON_FACTORY.fromString(jsonQueryTempTable.get(), TableReference.class);
 
       DatasetService tableService = bqServices.getDatasetService(bqOptions);
-      tableService.deleteTable(
-          tableToRemove.getProjectId(),
-          tableToRemove.getDatasetId(),
-          tableToRemove.getTableId());
+      tableService.deleteTable(tableToRemove);
       tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
     }
 
@@ -1227,10 +1220,8 @@ public class BigQueryIO {
       String extractJobId = getExtractJobId(jobIdToken);
       List<String> tempFiles = executeExtract(extractJobId, tableToExtract, jobService);
 
-      TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(
-          tableToExtract.getProjectId(),
-          tableToExtract.getDatasetId(),
-          tableToExtract.getTableId()).getSchema();
+      TableSchema tableSchema = bqServices.getDatasetService(bqOptions)
+          .getTable(tableToExtract).getSchema();
 
       cleanupTempResource(bqOptions);
       return createSources(tempFiles, tableSchema);
@@ -1867,13 +1858,9 @@ public class BigQueryIO {
           DatasetService datasetService,
           TableReference tableRef) {
         try {
-          if (datasetService.getTable(
-              tableRef.getProjectId(),
-              tableRef.getDatasetId(),
-              tableRef.getTableId()) != null) {
+          if (datasetService.getTable(tableRef) != null) {
             checkState(
-                datasetService.isTableEmpty(
-                    tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
+                datasetService.isTableEmpty(tableRef),
                 "BigQuery table is not empty: %s.",
                 BigQueryIO.toTableSpec(tableRef));
           }
@@ -2535,10 +2522,7 @@ public class BigQueryIO {
         for (TableReference tableRef : tempTables) {
           try {
             LOG.debug("Deleting table {}", toJsonString(tableRef));
-            tableService.deleteTable(
-                tableRef.getProjectId(),
-                tableRef.getDatasetId(),
-                tableRef.getTableId());
+            tableService.deleteTable(tableRef);
           } catch (Exception e) {
             LOG.warn("Failed to delete the table {}", toJsonString(tableRef), e);
           }
@@ -2587,7 +2571,7 @@ public class BigQueryIO {
 
   private static void verifyTablePresence(DatasetService datasetService, TableReference table) {
     try {
-      datasetService.getTable(table.getProjectId(), table.getDatasetId(), table.getTableId());
+      datasetService.getTable(table);
     } catch (Exception e) {
       ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
       if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
@@ -2712,11 +2696,7 @@ public class BigQueryIO {
           // every thread from attempting a create and overwhelming our BigQuery quota.
           DatasetService datasetService = bqServices.getDatasetService(options);
           if (!createdTables.contains(tableSpec)) {
-            Table table = datasetService.getTable(
-                tableReference.getProjectId(),
-                tableReference.getDatasetId(),
-                tableReference.getTableId());
-            if (table == null) {
+            if (datasetService.getTable(tableReference) == null) {
               TableSchema tableSchema = JSON_FACTORY.fromString(
                   jsonTableSchema.get(), TableSchema.class);
               datasetService.createTable(

http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
index 32cf46d..03e4391 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
@@ -119,8 +119,7 @@ interface BigQueryServices extends Serializable {
      * <p>Returns null if the table is not found.
      */
     @Nullable
-    Table getTable(String projectId, String datasetId, String tableId)
-        throws InterruptedException, IOException;
+    Table getTable(TableReference tableRef) throws InterruptedException, IOException;
 
     /**
      * Creates the specified table if it does not exist.
@@ -131,16 +130,14 @@ interface BigQueryServices extends Serializable {
      * Deletes the table specified by tableId from the dataset.
      * If the table contains data, all the data will be deleted.
      */
-    void deleteTable(String projectId, String datasetId, String tableId)
-        throws IOException, InterruptedException;
+    void deleteTable(TableReference tableRef) throws IOException, InterruptedException;
 
     /**
      * Returns true if the table is empty.
      *
      * @throws IOException if the table is not found.
      */
-    boolean isTableEmpty(String projectId, String datasetId, String tableId)
-        throws IOException, InterruptedException;
+    boolean isTableEmpty(TableReference tableRef) throws IOException, InterruptedException;
 
     /**
      * Gets the specified {@link Dataset} resource by dataset ID.

http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index c524ce4..75796ab 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -394,15 +394,12 @@ class BigQueryServicesImpl implements BigQueryServices {
      */
     @Override
     @Nullable
-    public Table getTable(String projectId, String datasetId, String tableId)
+    public Table getTable(TableReference tableRef)
         throws IOException, InterruptedException {
       BackOff backoff =
           FluentBackoff.DEFAULT
               .withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
-      return getTable(
-          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId),
-          backoff,
-          Sleeper.DEFAULT);
+      return getTable(tableRef, backoff, Sleeper.DEFAULT);
     }
 
     @VisibleForTesting
@@ -506,31 +503,27 @@ class BigQueryServicesImpl implements BigQueryServices {
      * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
      */
     @Override
-    public void deleteTable(String projectId, String datasetId, String tableId)
-        throws IOException, InterruptedException {
+    public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
       BackOff backoff =
           FluentBackoff.DEFAULT
               .withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
       executeWithRetries(
-          client.tables().delete(projectId, datasetId, tableId),
+          client.tables().delete(
+              tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()),
           String.format(
               "Unable to delete table: %s, aborting after %d retries.",
-              tableId, MAX_RPC_RETRIES),
+              tableRef.getTableId(), MAX_RPC_RETRIES),
           Sleeper.DEFAULT,
           backoff,
           ALWAYS_RETRY);
     }
 
     @Override
-    public boolean isTableEmpty(String projectId, String datasetId, String tableId)
-        throws IOException, InterruptedException {
+    public boolean isTableEmpty(TableReference tableRef) throws IOException, InterruptedException {
       BackOff backoff =
           FluentBackoff.DEFAULT
               .withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF).backoff();
-      return isTableEmpty(
-          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId),
-          backoff,
-          Sleeper.DEFAULT);
+      return isTableEmpty(tableRef, backoff, Sleeper.DEFAULT);
     }
 
     @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index ba7f44e..0b8d60d 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -32,6 +32,7 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.doNothing;
@@ -526,18 +527,18 @@ public class BigQueryIOTest implements Serializable {
   private static class FakeDatasetService implements DatasetService, Serializable {
 
     @Override
-    public Table getTable(String projectId, String datasetId, String tableId)
+    public Table getTable(TableReference tableRef)
         throws InterruptedException, IOException {
       synchronized (tables) {
         Map<String, TableContainer> dataset =
             checkNotNull(
-                tables.get(projectId, datasetId),
+                tables.get(tableRef.getProjectId(), tableRef.getDatasetId()),
                 "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-                projectId,
-                datasetId,
-                tableId,
+                tableRef.getProjectId(),
+                tableRef.getDatasetId(),
+                tableRef.getTableId(),
                 FakeDatasetService.class.getSimpleName());
-        TableContainer tableContainer = dataset.get(tableId);
+        TableContainer tableContainer = dataset.get(tableRef.getTableId());
         return tableContainer == null ? null : tableContainer.getTable();
       }
     }
@@ -569,8 +570,7 @@ public class BigQueryIOTest implements Serializable {
     }
 
     @Override
-    public void deleteTable(String projectId, String datasetId, String tableId)
-        throws IOException, InterruptedException {
+    public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
       throw new UnsupportedOperationException("Unsupported");
     }
 
@@ -595,9 +595,9 @@ public class BigQueryIOTest implements Serializable {
     }
 
     @Override
-    public boolean isTableEmpty(String projectId, String datasetId, String tableId)
+    public boolean isTableEmpty(TableReference tableRef)
         throws IOException, InterruptedException {
-      Long numBytes = getTable(projectId, datasetId, tableId).getNumBytes();
+      Long numBytes = getTable(tableRef).getNumBytes();
       return numBytes == null || numBytes == 0L;
     }
 
@@ -1738,7 +1738,7 @@ public class BigQueryIOTest implements Serializable {
     IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
     when(mockIOChannelFactory.resolve(anyString(), anyString()))
         .thenReturn("mock://tempLocation/output");
-    when(mockDatasetService.getTable(anyString(), anyString(), anyString()))
+    when(mockDatasetService.getTable(any(TableReference.class)))
         .thenReturn(new Table().setSchema(new TableSchema()));
 
     Assert.assertThat(
@@ -1810,13 +1810,9 @@ public class BigQueryIOTest implements Serializable {
             new JobStatistics2()
                 .setTotalBytesProcessed(100L)
                 .setReferencedTables(ImmutableList.of(queryTable))));
-    when(mockDatasetService.getTable(
-        eq(queryTable.getProjectId()), eq(queryTable.getDatasetId()), eq(queryTable.getTableId())))
+    when(mockDatasetService.getTable(eq(queryTable)))
         .thenReturn(new Table().setSchema(new TableSchema()));
-    when(mockDatasetService.getTable(
-        eq(destinationTable.getProjectId()),
-        eq(destinationTable.getDatasetId()),
-        eq(destinationTable.getTableId())))
+    when(mockDatasetService.getTable(eq(destinationTable)))
         .thenReturn(new Table().setSchema(new TableSchema()));
     IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
     when(mockIOChannelFactory.resolve(anyString(), anyString()))
@@ -1898,10 +1894,7 @@ public class BigQueryIOTest implements Serializable {
         .thenReturn(new JobStatistics().setQuery(
             new JobStatistics2()
                 .setTotalBytesProcessed(100L)));
-    when(mockDatasetService.getTable(
-        eq(destinationTable.getProjectId()),
-        eq(destinationTable.getDatasetId()),
-        eq(destinationTable.getTableId())))
+    when(mockDatasetService.getTable(eq(destinationTable)))
         .thenReturn(new Table().setSchema(new TableSchema()));
     IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
     when(mockIOChannelFactory.resolve(anyString(), anyString()))
@@ -2263,9 +2256,9 @@ public class BigQueryIOTest implements Serializable {
         BigQueryIO.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, tables.get(2))));
 
     doThrow(new IOException("Unable to delete table"))
-        .when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(0));
-    doNothing().when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(1));
-    doNothing().when(mockDatasetService).deleteTable(projectId, datasetId, tables.get(2));
+        .when(mockDatasetService).deleteTable(tableRefs.get(0));
+    doNothing().when(mockDatasetService).deleteTable(tableRefs.get(1));
+    doNothing().when(mockDatasetService).deleteTable(tableRefs.get(2));
 
     WriteRename.removeTemporaryTables(mockDatasetService, tableRefs);
 

http://git-wip-us.apache.org/repos/asf/beam/blob/f9d1d682/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
index 8130238..7b5b226 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
@@ -370,7 +370,8 @@ public class BigQueryUtilTest {
     BigQueryServicesImpl.DatasetServiceImpl services =
             new BigQueryServicesImpl.DatasetServiceImpl(mockClient, options);
 
-    services.getTable("project", "dataset", "table");
+    services.getTable(
+        new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"));
 
     verifyTableGet();
   }


[47/50] beam git commit: This closes #1850

Posted by dh...@apache.org.
This closes #1850


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/34b4a6d9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/34b4a6d9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/34b4a6d9

Branch: refs/heads/python-sdk
Commit: 34b4a6d9dc2cf5e8da43346077a36b460501afe2
Parents: b21bdf4 31c63cb
Author: Thomas Weise <th...@apache.org>
Authored: Fri Jan 27 14:01:09 2017 -0800
Committer: Thomas Weise <th...@apache.org>
Committed: Fri Jan 27 14:01:09 2017 -0800

----------------------------------------------------------------------
 .../beam/runners/apex/ApexPipelineOptions.java  |  7 +-
 .../apache/beam/runners/apex/ApexRunner.java    | 43 ++++++++---
 .../beam/runners/apex/ApexYarnLauncher.java     | 23 +++++-
 .../beam/runners/apex/ApexRunnerTest.java       | 75 ++++++++++++++++++++
 .../beam/runners/apex/ApexYarnLauncherTest.java |  9 ++-
 .../test/resources/beam-runners-apex.properties | 20 ++++++
 6 files changed, 161 insertions(+), 16 deletions(-)
----------------------------------------------------------------------



[32/50] beam git commit: DataflowRunner: upgrade worker with Pubsub attribute changes

Posted by dh...@apache.org.
DataflowRunner: upgrade worker with Pubsub attribute changes


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e591d8b9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e591d8b9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e591d8b9

Branch: refs/heads/python-sdk
Commit: e591d8b91ac81c86c0e41af58422a4ea27c9727e
Parents: b4726d0
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 06:56:09 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 09:37:59 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e591d8b9/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 9858b3d..b446b7b 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,7 +33,7 @@
   <packaging>jar</packaging>
 
   <properties>
-    <dataflow.container_version>beam-master-20170120</dataflow.container_version>
+    <dataflow.container_version>beam-master-20170126</dataflow.container_version>
     <dataflow.environment_major_version>6</dataflow.environment_major_version>
   </properties>
 


[38/50] beam git commit: [BEAM-1235] BigQueryIO.Write: log failed load/copy jobs.

Posted by dh...@apache.org.
[BEAM-1235] BigQueryIO.Write: log failed load/copy jobs.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6531545e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6531545e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6531545e

Branch: refs/heads/python-sdk
Commit: 6531545e647f98870a69bd46fabbbadb727969e5
Parents: 2cbc08b
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 16:25:43 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 17:22:52 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 63 ++++++++++++-------
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |  1 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 64 +++++++++++++-------
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  2 +
 4 files changed, 87 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index b6f9fb0..4ace985 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1155,7 +1155,8 @@ public class BigQueryIO {
       jobService.startQueryJob(jobRef, queryConfig);
       Job job = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
       if (parseStatus(job) != Status.SUCCEEDED) {
-        throw new IOException("Query job failed: " + jobId);
+        throw new IOException(String.format(
+            "Query job %s failed, status: %s.", jobId, statusToPrettyString(job.getStatus())));
       }
     }
 
@@ -1260,8 +1261,8 @@ public class BigQueryIO {
           jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
       if (parseStatus(extractJob) != Status.SUCCEEDED) {
         throw new IOException(String.format(
-            "Extract job %s failed, status: %s",
-            extractJob.getJobReference().getJobId(), extractJob.getStatus()));
+            "Extract job %s failed, status: %s.",
+            extractJob.getJobReference().getJobId(), statusToPrettyString(extractJob.getStatus())));
       }
 
       List<String> tempFiles = getExtractFilePaths(extractDestinationDir, extractJob);
@@ -2361,30 +2362,36 @@ public class BigQueryIO {
             .setSourceFormat("NEWLINE_DELIMITED_JSON");
 
         String projectId = ref.getProjectId();
+        Job lastFailedLoadJob = null;
         for (int i = 0; i < Bound.MAX_RETRY_JOBS; ++i) {
           String jobId = jobIdPrefix + "-" + i;
-          LOG.info("Starting BigQuery load job {}: try {}/{}", jobId, i, Bound.MAX_RETRY_JOBS);
           JobReference jobRef = new JobReference()
               .setProjectId(projectId)
               .setJobId(jobId);
           jobService.startLoadJob(jobRef, loadConfig);
-          Status jobStatus =
-              parseStatus(jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES));
+          Job loadJob = jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES);
+          Status jobStatus = parseStatus(loadJob);
           switch (jobStatus) {
             case SUCCEEDED:
               return;
             case UNKNOWN:
-              throw new RuntimeException("Failed to poll the load job status of job " + jobId);
+              throw new RuntimeException(String.format(
+                  "UNKNOWN status of load job [%s]: %s.", jobId, jobToPrettyString(loadJob)));
             case FAILED:
-              LOG.info("BigQuery load job failed: {}", jobId);
+              lastFailedLoadJob = loadJob;
               continue;
             default:
-              throw new IllegalStateException(String.format("Unexpected job status: %s of job %s",
-                  jobStatus, jobId));
+              throw new IllegalStateException(String.format(
+                  "Unexpected status [%s] of load job: %s.",
+                  jobStatus, jobToPrettyString(loadJob)));
           }
         }
-        throw new RuntimeException(String.format("Failed to create the load job %s, reached max "
-            + "retries: %d", jobIdPrefix, Bound.MAX_RETRY_JOBS));
+        throw new RuntimeException(String.format(
+            "Failed to create load job with id prefix %s, "
+                + "reached max retries: %d, last failed load job: %s.",
+            jobIdPrefix,
+            Bound.MAX_RETRY_JOBS,
+            jobToPrettyString(lastFailedLoadJob)));
       }
 
       static void removeTemporaryFiles(
@@ -2491,30 +2498,36 @@ public class BigQueryIO {
             .setCreateDisposition(createDisposition.name());
 
         String projectId = ref.getProjectId();
+        Job lastFailedCopyJob = null;
         for (int i = 0; i < Bound.MAX_RETRY_JOBS; ++i) {
           String jobId = jobIdPrefix + "-" + i;
-          LOG.info("Starting BigQuery copy job {}: try {}/{}", jobId, i, Bound.MAX_RETRY_JOBS);
           JobReference jobRef = new JobReference()
               .setProjectId(projectId)
               .setJobId(jobId);
           jobService.startCopyJob(jobRef, copyConfig);
-          Status jobStatus =
-              parseStatus(jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES));
+          Job copyJob = jobService.pollJob(jobRef, Bound.LOAD_JOB_POLL_MAX_RETRIES);
+          Status jobStatus = parseStatus(copyJob);
           switch (jobStatus) {
             case SUCCEEDED:
               return;
             case UNKNOWN:
-              throw new RuntimeException("Failed to poll the copy job status of job " + jobId);
+              throw new RuntimeException(String.format(
+                  "UNKNOWN status of copy job [%s]: %s.", jobId, jobToPrettyString(copyJob)));
             case FAILED:
-              LOG.info("BigQuery copy job failed: {}", jobId);
+              lastFailedCopyJob = copyJob;
               continue;
             default:
-              throw new IllegalStateException(String.format("Unexpected job status: %s of job %s",
-                  jobStatus, jobId));
+              throw new IllegalStateException(String.format(
+                  "Unexpected status [%s] of load job: %s.",
+                  jobStatus, jobToPrettyString(copyJob)));
           }
         }
-        throw new RuntimeException(String.format("Failed to create the copy job %s, reached max "
-            + "retries: %d", jobIdPrefix, Bound.MAX_RETRY_JOBS));
+        throw new RuntimeException(String.format(
+            "Failed to create copy job with id prefix %s, "
+                + "reached max retries: %d, last failed copy job: %s.",
+            jobIdPrefix,
+            Bound.MAX_RETRY_JOBS,
+            jobToPrettyString(lastFailedCopyJob)));
       }
 
       static void removeTemporaryTables(DatasetService tableService,
@@ -2549,6 +2562,14 @@ public class BigQueryIO {
     private Write() {}
   }
 
+  private static String jobToPrettyString(@Nullable Job job) throws IOException {
+    return job == null ? "null" : job.toPrettyString();
+  }
+
+  private static String statusToPrettyString(@Nullable JobStatus status) throws IOException {
+    return status == null ? "Unknown status: null." : status.toPrettyString();
+  }
+
   private static void verifyDatasetPresence(DatasetService datasetService, TableReference table) {
     try {
       datasetService.getDataset(table.getProjectId(), table.getDatasetId());

http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 75796ab..7c3edbe 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -214,6 +214,7 @@ class BigQueryServicesImpl implements BigQueryServices {
       do {
         try {
           client.jobs().insert(jobRef.getProjectId(), job).execute();
+          LOG.info("Started BigQuery job: {}.", jobRef);
           return; // SUCCEEDED
         } catch (GoogleJsonResponseException e) {
           if (errorExtractor.itemAlreadyExists(e)) {

http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 0b8d60d..bbfc2ce 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -988,12 +988,6 @@ public class BigQueryIOTest implements Serializable {
         .withoutValidation());
     p.run();
 
-    logged.verifyInfo("Starting BigQuery load job");
-    logged.verifyInfo("BigQuery load job failed");
-    logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyInfo("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyNotLogged("try 3/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
     File tempDir = new File(bqOptions.getTempLocation());
     testNumFiles(tempDir, 0);
   }
@@ -1232,11 +1226,49 @@ public class BigQueryIOTest implements Serializable {
         .withoutValidation());
 
     thrown.expect(RuntimeException.class);
-    thrown.expectMessage("Failed to poll the load job status");
-    p.run();
+    thrown.expectMessage("UNKNOWN status of load job");
+    try {
+      p.run();
+    } finally {
+      File tempDir = new File(bqOptions.getTempLocation());
+      testNumFiles(tempDir, 0);
+    }
+  }
 
-    File tempDir = new File(bqOptions.getTempLocation());
-    testNumFiles(tempDir, 0);
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteFailedJobs() throws Exception {
+    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
+    bqOptions.setProject("defaultProject");
+    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
+
+    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
+        .withJobService(new FakeJobService()
+            .startJobReturns("done", "done", "done")
+            .pollJobReturns(Status.FAILED, Status.FAILED, Status.FAILED));
+
+    Pipeline p = TestPipeline.create(bqOptions);
+    p.apply(Create.of(
+        new TableRow().set("name", "a").set("number", 1),
+        new TableRow().set("name", "b").set("number", 2),
+        new TableRow().set("name", "c").set("number", 3))
+        .withCoder(TableRowJsonCoder.of()))
+        .apply(BigQueryIO.Write.to("dataset-id.table-id")
+            .withCreateDisposition(CreateDisposition.CREATE_NEVER)
+            .withTestServices(fakeBqServices)
+            .withoutValidation());
+
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("Failed to create load job with id prefix");
+    thrown.expectMessage("reached max retries");
+    thrown.expectMessage("last failed load job");
+
+    try {
+      p.run();
+    } finally {
+      File tempDir = new File(bqOptions.getTempLocation());
+      testNumFiles(tempDir, 0);
+    }
   }
 
   @Test
@@ -2164,12 +2196,6 @@ public class BigQueryIOTest implements Serializable {
 
     List<String> tempTables = tester.takeOutputElements();
 
-    logged.verifyInfo("Starting BigQuery load job");
-    logged.verifyInfo("BigQuery load job failed");
-    logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyNotLogged("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-
     assertEquals(expectedTempTables, tempTables);
   }
 
@@ -2237,12 +2263,6 @@ public class BigQueryIOTest implements Serializable {
     DoFnTester<String, Void> tester = DoFnTester.of(writeRename);
     tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
     tester.processElement(null);
-
-    logged.verifyInfo("Starting BigQuery copy job");
-    logged.verifyInfo("BigQuery copy job failed");
-    logged.verifyInfo("try 0/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyInfo("try 1/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
-    logged.verifyNotLogged("try 2/" + BigQueryIO.Write.Bound.MAX_RETRY_JOBS);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/beam/blob/6531545e/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
index 1ce10f1..ef51650 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
@@ -138,6 +138,7 @@ public class BigQueryServicesImplTest {
     verify(response, times(1)).getStatusCode();
     verify(response, times(1)).getContent();
     verify(response, times(1)).getContentType();
+    expectedLogs.verifyInfo(String.format("Started BigQuery job: %s", jobRef));
   }
 
   /**
@@ -161,6 +162,7 @@ public class BigQueryServicesImplTest {
     verify(response, times(1)).getStatusCode();
     verify(response, times(1)).getContent();
     verify(response, times(1)).getContentType();
+    expectedLogs.verifyNotLogged("Started BigQuery job");
   }
 
   /**


[05/50] beam git commit: This closes #1825

Posted by dh...@apache.org.
This closes #1825


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b3334879
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b3334879
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b3334879

Branch: refs/heads/python-sdk
Commit: b3334879fb75150b6f07c24a138fb1d92e1d7def
Parents: cb6e0a8 3afdc5c
Author: Dan Halperin <dh...@google.com>
Authored: Tue Jan 24 12:25:28 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:28 2017 -0800

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 32 +++++++++++---------
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |  2 ++
 .../io/gcp/bigquery/BigQueryServicesImpl.java   | 16 +++++-----
 3 files changed, 27 insertions(+), 23 deletions(-)
----------------------------------------------------------------------



[50/50] beam git commit: Closes #1861

Posted by dh...@apache.org.
Closes #1861


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/27cf68ee
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/27cf68ee
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/27cf68ee

Branch: refs/heads/python-sdk
Commit: 27cf68ee72bd58475c170712f7afe20102601606
Parents: 1bc6859 f1b8679
Author: Dan Halperin <dh...@google.com>
Authored: Sun Jan 29 08:21:18 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Sun Jan 29 08:21:18 2017 -0800

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy           |    9 +-
 ...job_beam_PostCommit_Java_MavenInstall.groovy |    2 +-
 .../job_beam_PreCommit_Java_MavenInstall.groovy |    2 +-
 .../job_beam_Release_NightlySnapshot.groovy     |    2 +-
 .jenkins/job_seed.groovy                        |    2 +-
 .travis/README.md                               |    2 +-
 DISCLAIMER                                      |   10 -
 NOTICE                                          |    4 +-
 README.md                                       |   46 +-
 examples/java/README.md                         |   16 +-
 examples/java/pom.xml                           |   21 +-
 .../beam/examples/DebuggingWordCount.java       |    4 +-
 .../org/apache/beam/examples/WordCount.java     |    6 +-
 .../beam/examples/complete/AutoComplete.java    |    2 +-
 .../org/apache/beam/examples/complete/README.md |   14 +-
 .../apache/beam/examples/complete/TfIdf.java    |    2 +-
 .../examples/complete/TopWikipediaSessions.java |    2 +-
 .../examples/complete/TrafficMaxLaneFlow.java   |    2 +-
 .../beam/examples/complete/TrafficRoutes.java   |    2 +-
 .../examples/cookbook/BigQueryTornadoes.java    |    2 +-
 .../cookbook/CombinePerKeyExamples.java         |    2 +-
 .../org/apache/beam/examples/cookbook/README.md |   14 +-
 .../beam/examples/cookbook/TriggerExample.java  |    4 +-
 .../beam/examples/WindowedWordCountIT.java      |   16 +-
 examples/java8/pom.xml                          |    2 +-
 .../beam/examples/complete/game/GameStats.java  |    7 +-
 .../examples/complete/game/LeaderBoard.java     |    5 +-
 .../beam/examples/complete/game/UserScore.java  |    2 +-
 examples/pom.xml                                |   16 +-
 pom.xml                                         |   41 +-
 runners/apex/README.md                          |    4 +-
 runners/apex/pom.xml                            |    3 +-
 .../beam/runners/apex/ApexPipelineOptions.java  |    7 +-
 .../apache/beam/runners/apex/ApexRunner.java    |   43 +-
 .../beam/runners/apex/ApexYarnLauncher.java     |   23 +-
 .../translation/CreateValuesTranslator.java     |   18 +-
 .../FlattenPCollectionTranslator.java           |   28 +-
 .../apex/translation/GroupByKeyTranslator.java  |    2 +-
 .../translation/ParDoBoundMultiTranslator.java  |   27 +-
 .../apex/translation/ParDoBoundTranslator.java  |    4 +-
 .../apex/translation/TranslationContext.java    |   27 +-
 .../apex/translation/WindowBoundTranslator.java |    8 +-
 .../operators/ApexGroupByKeyOperator.java       |    4 +-
 .../operators/ApexParDoOperator.java            |    6 +-
 .../ApexReadUnboundedInputOperator.java         |   17 +-
 .../beam/runners/apex/ApexRunnerTest.java       |   75 ++
 .../beam/runners/apex/ApexYarnLauncherTest.java |    9 +-
 .../runners/apex/examples/WordCountTest.java    |    2 +-
 .../translation/ParDoBoundTranslatorTest.java   |    6 +-
 .../translation/ReadUnboundTranslatorTest.java  |    8 +-
 .../utils/ApexStateInternalsTest.java           |    2 +-
 .../test/resources/beam-runners-apex.properties |   20 +
 runners/core-java/pom.xml                       |    2 +-
 .../beam/runners/core/AssignWindowsDoFn.java    |    3 +-
 .../apache/beam/runners/core/DoFnAdapters.java  |  343 ++++++
 .../apache/beam/runners/core/DoFnRunner.java    |   21 -
 .../apache/beam/runners/core/DoFnRunners.java   |  138 +--
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |   10 +-
 .../runners/core/GroupAlsoByWindowsDoFn.java    |    5 +-
 .../beam/runners/core/KeyedWorkItemCoder.java   |    4 +-
 .../core/LateDataDroppingDoFnRunner.java        |    1 -
 .../apache/beam/runners/core/NonEmptyPanes.java |    2 +-
 .../org/apache/beam/runners/core/OldDoFn.java   |  472 ++++++++
 .../runners/core/PerKeyCombineFnRunner.java     |   70 --
 .../runners/core/PerKeyCombineFnRunners.java    |  101 --
 .../beam/runners/core/SimpleDoFnRunner.java     |   63 -
 .../beam/runners/core/SimpleOldDoFnRunner.java  |    7 +-
 .../beam/runners/core/SplittableParDo.java      |    7 -
 .../core/UnboundedReadFromBoundedSource.java    |   14 +-
 .../AfterDelayFromFirstElementStateMachine.java |    2 +-
 .../core/triggers/AfterPaneStateMachine.java    |    2 +-
 .../core/DoFnDelegatingAggregatorTest.java      |  144 +++
 .../core/GroupAlsoByWindowsProperties.java      |    2 +-
 .../runners/core/KeyedWorkItemCoderTest.java    |    6 +
 .../core/LateDataDroppingDoFnRunnerTest.java    |    2 +-
 .../apache/beam/runners/core/NoOpOldDoFn.java   |   72 ++
 .../beam/runners/core/OldDoFnContextTest.java   |   72 ++
 .../apache/beam/runners/core/OldDoFnTest.java   |  192 +++
 .../beam/runners/core/ReduceFnRunnerTest.java   |   12 +-
 .../beam/runners/core/ReduceFnTester.java       |    2 +-
 .../runners/core/SimpleOldDoFnRunnerTest.java   |    2 +-
 .../UnboundedReadFromBoundedSourceTest.java     |   12 +-
 runners/direct-java/pom.xml                     |    3 +-
 .../direct/BoundedReadEvaluatorFactory.java     |   10 +-
 ...ecycleManagerRemovingTransformEvaluator.java |   19 +-
 .../beam/runners/direct/EvaluationContext.java  |    2 +-
 .../direct/ExecutorServiceParallelExecutor.java |    4 +-
 .../runners/direct/FlattenEvaluatorFactory.java |    4 +-
 .../GroupAlsoByWindowEvaluatorFactory.java      |    9 +-
 .../direct/GroupByKeyOnlyEvaluatorFactory.java  |   10 +-
 .../beam/runners/direct/ParDoEvaluator.java     |   16 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   19 +-
 .../direct/ParDoMultiOverrideFactory.java       |   13 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |   27 +-
 .../direct/TestStreamEvaluatorFactory.java      |    5 +-
 .../direct/UnboundedReadEvaluatorFactory.java   |   22 +-
 .../runners/direct/ViewEvaluatorFactory.java    |    8 +-
 .../beam/runners/direct/WatermarkManager.java   |    6 +-
 .../runners/direct/WindowEvaluatorFactory.java  |    3 +-
 .../runners/direct/AggregatorContainerTest.java |   16 +-
 .../direct/BoundedReadEvaluatorFactoryTest.java |    5 -
 .../CopyOnAccessInMemoryStateInternalsTest.java |    4 +-
 .../runners/direct/DirectGraphVisitorTest.java  |   16 +-
 .../beam/runners/direct/DirectRunnerTest.java   |    5 -
 ...leManagerRemovingTransformEvaluatorTest.java |  103 +-
 .../runners/direct/EvaluationContextTest.java   |    6 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |    3 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |    4 +-
 .../UnboundedReadEvaluatorFactoryTest.java      |   13 +-
 runners/flink/README.md                         |    6 +-
 runners/flink/examples/pom.xml                  |    2 +-
 .../beam/runners/flink/examples/WordCount.java  |    2 +-
 .../flink/examples/streaming/AutoComplete.java  |    2 +-
 .../examples/streaming/KafkaIOExamples.java     |    4 +-
 .../KafkaWindowedWordCountExample.java          |    2 +-
 .../examples/streaming/WindowedWordCount.java   |    2 +-
 runners/flink/pom.xml                           |    2 +-
 runners/flink/runner/pom.xml                    |    4 +-
 .../runners/flink/FlinkPipelineOptions.java     |    6 +-
 .../runners/flink/OldPerKeyCombineFnRunner.java |   62 +
 .../flink/OldPerKeyCombineFnRunners.java        |  155 +++
 .../FlinkBatchTransformTranslators.java         |   40 +-
 .../FlinkBatchTranslationContext.java           |   21 +-
 .../FlinkStreamingTransformTranslators.java     |   46 +-
 .../FlinkStreamingTranslationContext.java       |   20 +-
 .../functions/FlinkDoFnFunction.java            |    4 +-
 .../FlinkMergingNonShuffleReduceFunction.java   |   10 +-
 .../FlinkMergingPartialReduceFunction.java      |    8 +-
 .../functions/FlinkMergingReduceFunction.java   |    8 +-
 .../functions/FlinkMultiOutputDoFnFunction.java |    4 +-
 .../FlinkMultiOutputProcessContext.java         |    2 +-
 .../functions/FlinkNoElementAssignContext.java  |    2 +-
 .../functions/FlinkPartialReduceFunction.java   |   10 +-
 .../functions/FlinkProcessContextBase.java      |    4 +-
 .../functions/FlinkReduceFunction.java          |   10 +-
 .../FlinkSingleOutputProcessContext.java        |    2 +-
 .../wrappers/streaming/DoFnOperator.java        |    6 +-
 .../streaming/SingletonKeyedWorkItemCoder.java  |   10 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |    2 +-
 .../streaming/io/BoundedSourceWrapper.java      |    2 +-
 .../streaming/io/UnboundedFlinkSink.java        |    6 +
 .../streaming/io/UnboundedSourceWrapper.java    |    2 +-
 .../beam/runners/flink/PipelineOptionsTest.java |   13 +
 .../streaming/FlinkStateInternalsTest.java      |    2 +-
 .../streaming/UnboundedSourceWrapperTest.java   |  464 +++----
 runners/google-cloud-dataflow-java/pom.xml      |   15 +-
 .../beam/runners/dataflow/AssignWindows.java    |   89 ++
 .../dataflow/DataflowAggregatorTransforms.java  |   79 ++
 .../dataflow/DataflowMetricUpdateExtractor.java |  109 ++
 .../runners/dataflow/DataflowPipelineJob.java   |    2 -
 .../dataflow/DataflowPipelineTranslator.java    |  510 +++-----
 .../beam/runners/dataflow/DataflowRunner.java   |  109 +-
 .../DataflowUnboundedReadFromBoundedSource.java |  547 +++++++++
 .../beam/runners/dataflow/ReadTranslator.java   |  102 ++
 .../runners/dataflow/TransformTranslator.java   |  120 ++
 .../dataflow/internal/AssignWindows.java        |   89 --
 .../dataflow/internal/CustomSources.java        |    5 -
 .../internal/DataflowAggregatorTransforms.java  |   79 --
 .../internal/DataflowMetricUpdateExtractor.java |  109 --
 .../DataflowUnboundedReadFromBoundedSource.java |  556 ---------
 .../runners/dataflow/internal/IsmFormat.java    |   20 +-
 .../dataflow/internal/ReadTranslator.java       |  107 --
 .../DataflowPipelineWorkerPoolOptions.java      |   16 +-
 .../beam/runners/dataflow/util/DoFnInfo.java    |   66 +-
 .../beam/runners/dataflow/util/GcsStager.java   |   18 +-
 .../beam/runners/dataflow/util/PackageUtil.java |  352 ++++--
 .../beam/runners/dataflow/dataflow.properties   |    6 +-
 .../dataflow/DataflowPipelineJobTest.java       |   38 +-
 .../DataflowPipelineTranslatorTest.java         |    3 +-
 .../runners/dataflow/DataflowRunnerTest.java    |    8 +-
 ...aflowUnboundedReadFromBoundedSourceTest.java |   79 ++
 ...aflowUnboundedReadFromBoundedSourceTest.java |   83 --
 .../DataflowPipelineDebugOptionsTest.java       |    2 +-
 .../options/DataflowPipelineOptionsTest.java    |    4 +-
 .../options/DataflowProfilingOptionsTest.java   |    4 +-
 .../runners/dataflow/util/PackageUtilTest.java  |   69 +-
 runners/pom.xml                                 |   16 +-
 runners/spark/README.md                         |    8 +-
 runners/spark/pom.xml                           |   29 +-
 .../spark/aggregators/NamedAggregators.java     |    4 +-
 .../coders/BeamSparkRunnerRegistrator.java      |   48 +-
 .../spark/coders/StatelessJavaSerializer.java   |   97 ++
 .../runners/spark/coders/WritableCoder.java     |    4 +-
 .../beam/runners/spark/examples/WordCount.java  |    2 +-
 .../beam/runners/spark/io/MicrobatchSource.java |    9 +-
 .../runners/spark/io/SparkUnboundedSource.java  |  127 +-
 .../spark/stateful/StateSpecFunctions.java      |   37 +-
 .../runners/spark/translation/DoFnFunction.java |   15 +-
 .../spark/translation/EvaluationContext.java    |   83 +-
 .../translation/GroupCombineFunctions.java      |    8 +-
 .../spark/translation/MultiDoFnFunction.java    |   14 +-
 .../translation/SparkAbstractCombineFn.java     |   12 +-
 .../spark/translation/SparkGlobalCombineFn.java |   13 +-
 .../translation/SparkGroupAlsoByWindowFn.java   |    2 +-
 .../spark/translation/SparkKeyedCombineFn.java  |   13 +-
 .../spark/translation/SparkPCollectionView.java |   99 ++
 .../spark/translation/SparkRuntimeContext.java  |   63 +-
 .../spark/translation/TransformTranslator.java  |   67 +-
 .../spark/translation/TranslationUtils.java     |   37 +-
 .../streaming/StreamingTransformTranslator.java |  114 +-
 .../runners/spark/util/BroadcastHelper.java     |  127 --
 .../runners/spark/util/SideInputBroadcast.java  |   77 ++
 .../spark/util/SparkSideInputReader.java        |    8 +-
 .../coders/BeamSparkRunnerRegistratorTest.java  |   57 -
 .../streaming/KafkaStreamingTest.java           |   57 +-
 .../ResumeFromCheckpointStreamingTest.java      |   20 +-
 .../streaming/utils/PAssertStreaming.java       |    4 +-
 sdks/java/build-tools/pom.xml                   |    2 +-
 .../src/main/resources/beam/findbugs-filter.xml |   26 -
 sdks/java/core/pom.xml                          |    2 +-
 .../beam/sdk/annotations/Experimental.java      |    5 +-
 .../org/apache/beam/sdk/coders/AtomicCoder.java |    2 +-
 .../org/apache/beam/sdk/coders/AvroCoder.java   |   30 +-
 .../apache/beam/sdk/coders/BigDecimalCoder.java |    6 +-
 .../beam/sdk/coders/BigEndianIntegerCoder.java  |    7 +
 .../beam/sdk/coders/BigEndianLongCoder.java     |    7 +
 .../apache/beam/sdk/coders/ByteArrayCoder.java  |    7 +
 .../org/apache/beam/sdk/coders/ByteCoder.java   |    7 +
 .../apache/beam/sdk/coders/ByteStringCoder.java |    8 +
 .../java/org/apache/beam/sdk/coders/Coder.java  |    7 +
 .../apache/beam/sdk/coders/CollectionCoder.java |   12 +-
 .../org/apache/beam/sdk/coders/CustomCoder.java |   18 +-
 .../apache/beam/sdk/coders/DelegateCoder.java   |   29 +-
 .../org/apache/beam/sdk/coders/DoubleCoder.java |    7 +
 .../apache/beam/sdk/coders/DurationCoder.java   |    8 +
 .../apache/beam/sdk/coders/InstantCoder.java    |    7 +
 .../apache/beam/sdk/coders/IterableCoder.java   |   12 +-
 .../org/apache/beam/sdk/coders/JAXBCoder.java   |   48 +-
 .../org/apache/beam/sdk/coders/KvCoder.java     |   35 +-
 .../beam/sdk/coders/LengthPrefixCoder.java      |  145 +++
 .../org/apache/beam/sdk/coders/ListCoder.java   |    7 +
 .../org/apache/beam/sdk/coders/MapCoder.java    |   62 +-
 .../apache/beam/sdk/coders/NullableCoder.java   |    6 +
 .../beam/sdk/coders/SerializableCoder.java      |   17 +-
 .../org/apache/beam/sdk/coders/SetCoder.java    |   12 +-
 .../apache/beam/sdk/coders/StandardCoder.java   |   39 +-
 .../beam/sdk/coders/StringDelegateCoder.java    |   16 +-
 .../apache/beam/sdk/coders/StringUtf8Coder.java |    7 +
 .../beam/sdk/coders/TableRowJsonCoder.java      |    7 +
 .../beam/sdk/coders/TextualIntegerCoder.java    |    8 +
 .../org/apache/beam/sdk/coders/VarIntCoder.java |   10 +-
 .../apache/beam/sdk/coders/VarLongCoder.java    |    7 +
 .../org/apache/beam/sdk/coders/VoidCoder.java   |    7 +
 .../beam/sdk/coders/protobuf/ProtoCoder.java    |    8 +-
 .../java/org/apache/beam/sdk/io/AvroSource.java |    5 -
 .../sdk/io/BoundedReadFromUnboundedSource.java  |   79 +-
 .../org/apache/beam/sdk/io/BoundedSource.java   |    8 -
 .../apache/beam/sdk/io/CompressedSource.java    |    8 -
 .../org/apache/beam/sdk/io/CountingSource.java  |    5 -
 .../org/apache/beam/sdk/io/FileSystems.java     |   32 +-
 .../java/org/apache/beam/sdk/io/PubsubIO.java   | 1142 +++++++++---------
 .../apache/beam/sdk/io/PubsubUnboundedSink.java |   88 +-
 .../beam/sdk/io/PubsubUnboundedSource.java      |  104 +-
 .../main/java/org/apache/beam/sdk/io/Read.java  |    7 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     |    5 -
 .../java/org/apache/beam/sdk/io/XmlSource.java  |    5 -
 .../org/apache/beam/sdk/options/GcpOptions.java |   36 +-
 .../org/apache/beam/sdk/options/GcsOptions.java |    4 +-
 .../beam/sdk/options/PipelineOptions.java       |    2 +-
 .../sdk/options/PipelineOptionsFactory.java     |   10 +-
 .../apache/beam/sdk/options/ValueProvider.java  |    6 +-
 .../beam/sdk/runners/TransformHierarchy.java    |   33 +-
 .../testing/FlattenWithHeterogeneousCoders.java |   29 +
 .../org/apache/beam/sdk/testing/PAssert.java    |   12 +-
 .../beam/sdk/testing/RunnableOnService.java     |   14 +-
 .../beam/sdk/testing/SourceTestUtils.java       |    5 -
 .../org/apache/beam/sdk/testing/TestStream.java |    8 +
 .../sdk/testing/UsesUnboundedPCollections.java  |   23 +
 .../beam/sdk/testing/ValueInSingleWindow.java   |    6 +-
 .../sdk/transforms/AggregatorRetriever.java     |   13 +-
 .../beam/sdk/transforms/AppliedPTransform.java  |   11 +-
 .../org/apache/beam/sdk/transforms/Combine.java |  197 +--
 .../apache/beam/sdk/transforms/CombineFns.java  |   14 +-
 .../org/apache/beam/sdk/transforms/Count.java   |    4 +-
 .../org/apache/beam/sdk/transforms/Create.java  |    5 -
 .../sdk/transforms/DelegatingAggregator.java    |    2 +-
 .../beam/sdk/transforms/DoFnAdapters.java       |  504 --------
 .../apache/beam/sdk/transforms/DoFnTester.java  |    7 -
 .../apache/beam/sdk/transforms/GroupByKey.java  |    2 +-
 .../org/apache/beam/sdk/transforms/Max.java     |  124 +-
 .../org/apache/beam/sdk/transforms/Mean.java    |   27 +-
 .../org/apache/beam/sdk/transforms/Min.java     |  122 +-
 .../org/apache/beam/sdk/transforms/OldDoFn.java |  758 ------------
 .../apache/beam/sdk/transforms/PTransform.java  |    9 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   46 +-
 .../org/apache/beam/sdk/transforms/Regex.java   |  589 ++++++++-
 .../org/apache/beam/sdk/transforms/Sum.java     |   57 +-
 .../apache/beam/sdk/transforms/ToString.java    |  198 +++
 .../org/apache/beam/sdk/transforms/Top.java     |   27 +-
 .../beam/sdk/transforms/join/CoGbkResult.java   |   35 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   20 -
 .../sdk/transforms/reflect/DoFnInvokers.java    |  142 +--
 .../sdk/transforms/reflect/DoFnSignature.java   |   15 +-
 .../windowing/AfterDelayFromFirstElement.java   |    2 +-
 .../sdk/transforms/windowing/AfterPane.java     |    2 +-
 .../sdk/transforms/windowing/GlobalWindow.java  |    6 +
 .../transforms/windowing/IntervalWindow.java    |    4 +-
 .../beam/sdk/transforms/windowing/Window.java   |    3 +-
 .../org/apache/beam/sdk/util/CoderUtils.java    |   28 +-
 .../beam/sdk/util/CombineContextFactory.java    |   18 -
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 ++
 .../util/EmptyOnDeserializationThreadLocal.java |   39 +
 .../apache/beam/sdk/util/GcpProjectUtil.java    |    2 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |   36 +-
 .../org/apache/beam/sdk/util/NameUtils.java     |  162 +++
 .../org/apache/beam/sdk/util/PropertyNames.java |    1 +
 .../org/apache/beam/sdk/util/PubsubClient.java  |   28 +-
 .../apache/beam/sdk/util/PubsubGrpcClient.java  |    6 +-
 .../apache/beam/sdk/util/PubsubJsonClient.java  |    4 +-
 .../apache/beam/sdk/util/PubsubTestClient.java  |    6 +-
 .../org/apache/beam/sdk/util/StringUtils.java   |  100 --
 .../apache/beam/sdk/util/TimerInternals.java    |    4 +-
 .../org/apache/beam/sdk/util/WindowedValue.java |   23 +-
 .../beam/sdk/util/state/StateContexts.java      |    4 +-
 .../org/apache/beam/sdk/values/PValueBase.java  |    4 +-
 .../beam/sdk/values/TimestampedValue.java       |   10 +-
 .../sdk/AggregatorPipelineExtractorTest.java    |   16 +-
 .../apache/beam/sdk/coders/AvroCoderTest.java   |    7 +
 .../beam/sdk/coders/BigDecimalCoderTest.java    |   46 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java   |    9 +
 .../beam/sdk/coders/BigEndianLongCoderTest.java |    9 +
 .../beam/sdk/coders/ByteArrayCoderTest.java     |    6 +
 .../apache/beam/sdk/coders/ByteCoderTest.java   |    9 +
 .../beam/sdk/coders/ByteStringCoderTest.java    |    8 +
 .../beam/sdk/coders/CoderRegistryTest.java      |    6 +
 .../org/apache/beam/sdk/coders/CoderTest.java   |    8 +
 .../beam/sdk/coders/CollectionCoderTest.java    |   16 +
 .../beam/sdk/coders/DefaultCoderTest.java       |    4 +-
 .../beam/sdk/coders/DelegateCoderTest.java      |   35 +-
 .../apache/beam/sdk/coders/DoubleCoderTest.java |    9 +
 .../beam/sdk/coders/DurationCoderTest.java      |   10 +
 .../beam/sdk/coders/InstantCoderTest.java       |    9 +
 .../beam/sdk/coders/IterableCoderTest.java      |   27 +-
 .../apache/beam/sdk/coders/JAXBCoderTest.java   |   26 +-
 .../org/apache/beam/sdk/coders/KvCoderTest.java |   29 +
 .../beam/sdk/coders/LengthPrefixCoderTest.java  |  129 ++
 .../apache/beam/sdk/coders/ListCoderTest.java   |   16 +-
 .../apache/beam/sdk/coders/MapCoderTest.java    |   21 +-
 .../beam/sdk/coders/NullableCoderTest.java      |   12 +
 .../beam/sdk/coders/SerializableCoderTest.java  |    9 +
 .../apache/beam/sdk/coders/SetCoderTest.java    |   16 +
 .../beam/sdk/coders/StandardCoderTest.java      |   40 +
 .../sdk/coders/StringDelegateCoderTest.java     |   11 +
 .../beam/sdk/coders/StringUtf8CoderTest.java    |    9 +
 .../beam/sdk/coders/TableRowJsonCoderTest.java  |    9 +
 .../sdk/coders/TextualIntegerCoderTest.java     |    9 +
 .../apache/beam/sdk/coders/VarIntCoderTest.java |    9 +
 .../beam/sdk/coders/VarLongCoderTest.java       |    9 +
 .../apache/beam/sdk/coders/VoidCoderTest.java   |   40 +
 .../beam/sdk/io/AvroIOGeneratedClassTest.java   |  285 -----
 .../apache/beam/sdk/io/AvroIOTransformTest.java |  324 +++++
 .../beam/sdk/io/CompressedSourceTest.java       |    5 -
 .../apache/beam/sdk/io/FileBasedSourceTest.java |    5 -
 .../org/apache/beam/sdk/io/FileSystemsTest.java |   33 +-
 .../beam/sdk/io/OffsetBasedSourceTest.java      |    5 -
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |   86 +-
 .../beam/sdk/io/PubsubUnboundedSinkTest.java    |   41 +-
 .../beam/sdk/io/PubsubUnboundedSourceTest.java  |   10 +-
 .../java/org/apache/beam/sdk/io/ReadTest.java   |    5 -
 .../java/org/apache/beam/sdk/io/WriteTest.java  |   10 +-
 .../apache/beam/sdk/options/GcpOptionsTest.java |    4 +-
 .../sdk/options/PipelineOptionsFactoryTest.java |    6 +-
 .../beam/sdk/options/PipelineOptionsTest.java   |    3 +-
 .../beam/sdk/options/ValueProviderTest.java     |   36 +-
 .../sdk/options/ValueProviderUtilsTest.java     |    2 +-
 .../sdk/runners/TransformHierarchyTest.java     |   30 +-
 .../apache/beam/sdk/testing/TestStreamTest.java |    5 +
 .../testing/ValueInSingleWindowCoderTest.java   |    7 +
 .../sdk/transforms/ApproximateUniqueTest.java   |  483 ++++----
 .../beam/sdk/transforms/CombineFnsTest.java     |   20 +-
 .../apache/beam/sdk/transforms/CombineTest.java |   99 +-
 .../apache/beam/sdk/transforms/CountTest.java   |    2 +-
 .../apache/beam/sdk/transforms/CreateTest.java  |    8 -
 .../DoFnDelegatingAggregatorTest.java           |  142 ---
 .../apache/beam/sdk/transforms/DoFnTest.java    |   15 +-
 .../beam/sdk/transforms/DoFnTesterTest.java     |    6 +-
 .../apache/beam/sdk/transforms/FlattenTest.java |   27 +
 .../apache/beam/sdk/transforms/KvSwapTest.java  |   13 +-
 .../org/apache/beam/sdk/transforms/MaxTest.java |   20 +-
 .../apache/beam/sdk/transforms/MeanTest.java    |    7 +-
 .../org/apache/beam/sdk/transforms/MinTest.java |   21 +-
 .../apache/beam/sdk/transforms/NoOpOldDoFn.java |   71 --
 .../beam/sdk/transforms/OldDoFnContextTest.java |   69 --
 .../apache/beam/sdk/transforms/OldDoFnTest.java |  188 ---
 .../apache/beam/sdk/transforms/ParDoTest.java   |   74 +-
 .../apache/beam/sdk/transforms/RegexTest.java   |  127 +-
 .../apache/beam/sdk/transforms/SampleTest.java  |  405 ++++---
 .../beam/sdk/transforms/SimpleStatsFnsTest.java |   36 +-
 .../org/apache/beam/sdk/transforms/SumTest.java |   24 +-
 .../beam/sdk/transforms/ToStringTest.java       |  125 ++
 .../org/apache/beam/sdk/transforms/TopTest.java |   13 +-
 .../apache/beam/sdk/transforms/ViewTest.java    |    2 +-
 .../transforms/join/CoGbkResultCoderTest.java   |   10 +-
 .../sdk/transforms/join/UnionCoderTest.java     |   24 +-
 .../transforms/reflect/DoFnInvokersTest.java    |   42 -
 .../transforms/windowing/GlobalWindowTest.java  |   64 +
 .../apache/beam/sdk/util/CombineFnUtilTest.java |    8 +-
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 ++
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |   56 +
 .../org/apache/beam/sdk/util/NameUtilsTest.java |  177 +++
 .../beam/sdk/util/PubsubGrpcClientTest.java     |    8 +-
 .../beam/sdk/util/PubsubJsonClientTest.java     |    3 +-
 .../beam/sdk/util/PubsubTestClientTest.java     |    4 +-
 .../beam/sdk/util/SerializableUtilsTest.java    |    4 +-
 .../apache/beam/sdk/util/StringUtilsTest.java   |  100 --
 .../beam/sdk/util/TimerInternalsTest.java       |    5 +
 .../beam/sdk/util/ValueWithRecordIdTest.java    |   34 +
 .../apache/beam/sdk/util/WindowedValueTest.java |   23 +
 .../util/state/InMemoryStateInternalsTest.java  |    2 +-
 .../beam/sdk/util/state/StateTagTest.java       |   11 +-
 .../beam/sdk/values/TimestampedValueTest.java   |   19 +-
 sdks/java/extensions/join-library/README.md     |   10 -
 sdks/java/extensions/join-library/pom.xml       |    2 +-
 sdks/java/extensions/pom.xml                    |    2 +-
 sdks/java/extensions/sorter/pom.xml             |    2 +-
 sdks/java/io/elasticsearch/pom.xml              |  175 +++
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |  819 +++++++++++++
 .../beam/sdk/io/elasticsearch/package-info.java |   20 +
 .../elasticsearch/ElasticSearchIOTestUtils.java |  129 ++
 .../io/elasticsearch/ElasticsearchIOTest.java   |  358 ++++++
 sdks/java/io/google-cloud-platform/pom.xml      |    2 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  308 +++--
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |   16 +-
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |   76 +-
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    8 -
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 -
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  205 +++-
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  141 +++
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |    3 +-
 .../sdk/io/gcp/bigtable/BigtableIOTest.java     |    5 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 +
 sdks/java/io/hdfs/pom.xml                       |    2 +-
 .../beam/sdk/io/hdfs/AvroWrapperCoder.java      |    4 +-
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java |    5 -
 .../apache/beam/sdk/io/hdfs/WritableCoder.java  |    4 +-
 .../beam/sdk/io/hdfs/AvroWrapperCoderTest.java  |    1 -
 sdks/java/io/jdbc/pom.xml                       |    2 +-
 sdks/java/io/jms/pom.xml                        |    2 +-
 sdks/java/io/kafka/pom.xml                      |    2 +-
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |    7 +-
 .../beam/sdk/io/kafka/KafkaRecordCoder.java     |    4 +-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |    9 +-
 .../beam/sdk/io/kafka/KafkaRecordCoderTest.java |   34 +
 sdks/java/io/kinesis/pom.xml                    |    2 +-
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |    4 +-
 .../beam/sdk/io/kinesis/package-info.java       |    2 +-
 sdks/java/io/mongodb/pom.xml                    |    2 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |    5 -
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |    5 -
 sdks/java/io/mqtt/pom.xml                       |  152 +++
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  588 +++++++++
 .../apache/beam/sdk/io/mqtt/package-info.java   |   22 +
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java |  197 +++
 sdks/java/io/pom.xml                            |    4 +-
 sdks/java/java8tests/pom.xml                    |    2 +-
 .../maven-archetypes/examples-java8/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   19 +-
 sdks/java/maven-archetypes/examples/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   19 +-
 sdks/java/maven-archetypes/pom.xml              |    2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    4 +-
 .../resources/projects/basic/reference/pom.xml  |    4 +-
 sdks/java/pom.xml                               |    2 +-
 sdks/pom.xml                                    |   15 +-
 sdks/python/pom.xml                             |    2 +-
 466 files changed, 13883 insertions(+), 8047 deletions(-)
----------------------------------------------------------------------



[03/50] beam git commit: [BEAM-1302] BigQueryServicesImpl: skip logging warning if exceptions do not need retry.

Posted by dh...@apache.org.
[BEAM-1302] BigQueryServicesImpl: skip logging warning if exceptions do not need retry.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2a23e8b5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2a23e8b5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2a23e8b5

Branch: refs/heads/python-sdk
Commit: 2a23e8b5dd11c825dfe13f79d69c2099069be724
Parents: 5b6dd91
Author: Pei He <pe...@google.com>
Authored: Mon Jan 23 17:39:20 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Jan 24 12:25:22 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/2a23e8b5/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 2098148..c9edf7c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -887,11 +887,11 @@ class BigQueryServicesImpl implements BigQueryServices {
       try {
         return request.execute();
       } catch (IOException e) {
-        LOG.warn("Ignore the error and retry the request.", e);
         lastException = e;
         if (!shouldRetry.apply(e)) {
           break;
         }
+        LOG.warn("Ignore the error and retry the request.", e);
       }
     } while (nextBackOff(sleeper, backoff));
     throw new IOException(


[16/50] beam git commit: address comments

Posted by dh...@apache.org.
address comments


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/968c3112
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/968c3112
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/968c3112

Branch: refs/heads/python-sdk
Commit: 968c31122395d120117ed725aad83d5e3a47e3b1
Parents: eeec9f1
Author: Kai Jiang <ji...@gmail.com>
Authored: Wed Jan 25 04:49:35 2017 -0800
Committer: Kai Jiang <ji...@gmail.com>
Committed: Wed Jan 25 05:09:33 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/968c3112/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
index 7e25a01..f52b822 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedReadFromUnboundedSource.java
@@ -141,8 +141,8 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
   }
 
   /**
-   * An Adapter wraps the underlying {@link UnboundedSource} with the specified bounds on
-   * number of records and read time into {@link BoundedSource}.
+   * Adapter that wraps the underlying {@link UnboundedSource} with the specified bounds on
+   * number of records and read time into a {@link BoundedSource}.
    */
   @AutoValue
   public abstract static class UnboundedToBoundedSourceAdapter<T>
@@ -151,8 +151,6 @@ public class BoundedReadFromUnboundedSource<T> extends PTransform<PBegin, PColle
     @Nullable abstract long getMaxNumRecords();
     @Nullable abstract Duration getMaxReadTime();
 
-    public abstract String toString();
-
     abstract Builder<T> toBuilder();
 
     @AutoValue.Builder


[20/50] beam git commit: This closes #1843

Posted by dh...@apache.org.
This closes #1843


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/979c9376
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/979c9376
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/979c9376

Branch: refs/heads/python-sdk
Commit: 979c9376f820577bad43c18cc1a7ee86fab9d942
Parents: bf9d454 e95335f
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 10:40:16 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 10:40:16 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml                     | 4 ++--
 .../org/apache/beam/runners/dataflow/dataflow.properties       | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------



[41/50] beam git commit: This closes #1830

Posted by dh...@apache.org.
This closes #1830


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/47304d1f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/47304d1f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/47304d1f

Branch: refs/heads/python-sdk
Commit: 47304d1fc75d3a7751883638efdaf9f9d8b40a25
Parents: 83f8c46 e01ce86
Author: Dan Halperin <dh...@google.com>
Authored: Thu Jan 26 22:52:12 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Thu Jan 26 22:52:12 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/sdk/transforms/ToString.java    | 168 ++++++++++++++++---
 .../java/org/apache/beam/sdk/io/WriteTest.java  |   2 +-
 .../beam/sdk/transforms/ToStringTest.java       |  86 ++++++++--
 3 files changed, 226 insertions(+), 30 deletions(-)
----------------------------------------------------------------------



[28/50] beam git commit: This closes #1847

Posted by dh...@apache.org.
This closes #1847


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1c6e6674
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1c6e6674
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1c6e6674

Branch: refs/heads/python-sdk
Commit: 1c6e667414788fe99f583fac39d458a4984ae162
Parents: 6413299 fee029f
Author: Dan Halperin <dh...@google.com>
Authored: Wed Jan 25 17:47:08 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 17:47:08 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml      |   5 -
 .../beam/runners/dataflow/util/GcsStager.java   |  18 +-
 .../beam/runners/dataflow/util/PackageUtil.java | 349 +++++++------------
 .../runners/dataflow/util/PackageUtilTest.java  |  42 +--
 .../org/apache/beam/sdk/options/GcsOptions.java |   4 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  12 -
 6 files changed, 149 insertions(+), 281 deletions(-)
----------------------------------------------------------------------



[17/50] beam git commit: Fix Flink RunnableOnService tests

Posted by dh...@apache.org.
Fix Flink RunnableOnService tests

* Check that a Multi-Output map contains the Tag, not the TaggedValue

* Return Inputs from getInputs

  Don't return outputs.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a361b65d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a361b65d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a361b65d

Branch: refs/heads/python-sdk
Commit: a361b65d6aa56d70769403d884abf48d1e1141a4
Parents: 7402d76
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jan 24 17:41:07 2017 -0800
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Jan 25 09:03:23 2017 -0800

----------------------------------------------------------------------
 .../runners/flink/translation/FlinkBatchTransformTranslators.java  | 2 +-
 .../flink/translation/FlinkStreamingTranslationContext.java        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a361b65d/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
index 654b464..f7f1878 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
@@ -580,7 +580,7 @@ class FlinkBatchTransformTranslators {
       outputMap.put(transform.getMainOutputTag(), 0);
       int count = 1;
       for (TaggedPValue taggedValue : outputs) {
-        if (!outputMap.containsKey(taggedValue)) {
+        if (!outputMap.containsKey(taggedValue.getTag())) {
           outputMap.put(taggedValue.getTag(), count++);
         }
       }

http://git-wip-us.apache.org/repos/asf/beam/blob/a361b65d/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
index 6db252e..7932f68 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
@@ -107,7 +107,7 @@ public class FlinkStreamingTranslationContext {
   }
 
   public <T extends PInput> List<TaggedPValue> getInputs(PTransform<T, ?> transform) {
-    return currentTransform.getOutputs();
+    return currentTransform.getInputs();
   }
 
   @SuppressWarnings("unchecked")